diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/AbstractValidator.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/AbstractValidator.java index 35b0d85f594480a20dd3d6010e5c9088b64ddfa2..867417a5563b35773cc9a3395a3d34b2cf95eec9 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/AbstractValidator.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/AbstractValidator.java @@ -21,7 +21,7 @@ import org.apache.commons.lang.StringUtils; import ch.systemsx.cisd.common.exceptions.UserFailureException; /** - * + * Super class for validators which handles empty cells in the same way. * * @author Franz-Josef Elmer */ diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/ColumnDefinition.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/ColumnDefinition.java index 84c298db50b312e8d6d879c8a10905df517bd8c9..fa6a47bcbf42a63de5f91e69ec07bd5a62c73c3f 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/ColumnDefinition.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/ColumnDefinition.java @@ -24,7 +24,7 @@ import ch.systemsx.cisd.common.utilities.ClassUtils; import ch.systemsx.cisd.common.utilities.PropertyUtils; /** - * + * Definition of a column used by {@link DataSetValidatorForTSV}. * * @author Franz-Josef Elmer */ @@ -35,17 +35,20 @@ class ColumnDefinition static final String HEADER_VALIDATOR_KEY = "header-validator"; static final String HEADER_PATTERN_KEY = "header-pattern"; static final String VALUE_VALIDATOR_KEY = "value-validator"; + static final String CAN_DEFINE_MULTIPLE_COLUMNS_KEY = "can-define-multiple-columns"; private final String name; private final IColumnHeaderValidator headerValidator; private final IValidatorFactory valueValidatorFactory; private final boolean mandatory; private final Integer orderOrNull; + private final boolean canDefineMultipleColumns; static ColumnDefinition create(String name, Properties properties) { boolean mandatory = PropertyUtils.getBoolean(properties, MANDATORY_KEY, false); Integer order = null; + boolean canDefineMultipleColumns; if (properties.getProperty(ORDER_KEY) != null) { order = PropertyUtils.getInt(properties, ORDER_KEY, 0); @@ -53,6 +56,11 @@ class ColumnDefinition { throw new ConfigurationFailureException("Order value has to be positive: " + order); } + canDefineMultipleColumns = false; + } else + { + canDefineMultipleColumns = + PropertyUtils.getBoolean(properties, CAN_DEFINE_MULTIPLE_COLUMNS_KEY, false); } String headerValidatorName = properties.getProperty(HEADER_VALIDATOR_KEY); IColumnHeaderValidator headerValidator; @@ -71,17 +79,25 @@ class ColumnDefinition .getName()); IValidatorFactory factory = ClassUtils.create(IValidatorFactory.class, validatorFactoryName, properties); - return new ColumnDefinition(name, headerValidator, factory, mandatory, order); + return new ColumnDefinition(name, headerValidator, factory, mandatory, order, + canDefineMultipleColumns); } private ColumnDefinition(String name, IColumnHeaderValidator headerValidator, - IValidatorFactory valueValidatorFactory, boolean mandatory, Integer orderOrNull) + IValidatorFactory valueValidatorFactory, boolean mandatory, Integer orderOrNull, + boolean canDefineMultipleColumns) { this.name = name; this.headerValidator = headerValidator; this.valueValidatorFactory = valueValidatorFactory; this.mandatory = mandatory; this.orderOrNull = orderOrNull; + this.canDefineMultipleColumns = canDefineMultipleColumns; + } + + boolean canDefineMultipleColumns() + { + return canDefineMultipleColumns; } boolean isMandatory() diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSV.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSV.java index 01f80ac901132f8d6c268979cdb181d9c44b959b..b6927b51128744e19fcedc0ae9b57452a7ece2c6 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSV.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSV.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; @@ -41,8 +42,14 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.utils.PropertyParametersUtil. import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; /** - * Validator for data sets containing TAB-separated value (TSV) files. - * + * Validator for data sets containing TAB-separated value (TSV) files. Such a file is valid if + * <ul> + * <li>All column headers are unique. + * <li>For each column a {@link ColumnDefinition} as specified by the properties are found. + * <li>All values of a column are valid in accordance to the {@link IValidator} created by the + * {@link ColumnDefinition} instance of the column. + * </ul> + * * @author Franz-Josef Elmer */ class DataSetValidatorForTSV implements IDataSetValidator @@ -128,6 +135,7 @@ class DataSetValidatorForTSV implements IDataSetValidator throw new IOException("Empty file '" + file); } String[] headers = getRowCells(lineIterator.nextLine()); + assertUniqueHeaders(headers); ColumnDefinition[] definitions = findColumnDefinitions(headers); IValidator[] validators = new IValidator[definitions.length]; for (int i = 0; i < validators.length; i++) @@ -166,7 +174,19 @@ class DataSetValidatorForTSV implements IDataSetValidator { IOUtils.closeQuietly(reader); } - + } + + private void assertUniqueHeaders(String[] headers) + { + HashSet<String> headerSet = new HashSet<String>(); + for (String header : headers) + { + if (headerSet.contains(header)) + { + throw new UserFailureException("Column header '" + header + "' appeared twice."); + } + headerSet.add(header); + } } private ColumnDefinition[] findColumnDefinitions(String[] columnHeaders) @@ -239,7 +259,10 @@ class DataSetValidatorForTSV implements IDataSetValidator ColumnDefinition columnDefinition = iterator.next(); if (columnDefinition.isValidHeader(columnHeader)) { - iterator.remove(); + if (columnDefinition.canDefineMultipleColumns() == false) + { + iterator.remove(); + } return columnDefinition; } } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DefaultValueValidatorFactory.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DefaultValueValidatorFactory.java index fa127b029b01e176829dd45a985fa5c4986a65e3..c6241da68ec8ad3d528145584d46c6acb6c347dd 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DefaultValueValidatorFactory.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/DefaultValueValidatorFactory.java @@ -21,7 +21,7 @@ import java.util.Properties; import ch.systemsx.cisd.common.exceptions.ConfigurationFailureException; /** - * + * Default factory for {@link IValidator} intances. * * @author Franz-Josef Elmer */ diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IColumnHeaderValidator.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IColumnHeaderValidator.java index 590915b4e7ea841744df0280f09bbb354da647a1..3c96effebac8722b7e137ea1002bde55777579fe 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IColumnHeaderValidator.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IColumnHeaderValidator.java @@ -17,11 +17,14 @@ package ch.systemsx.cisd.etlserver.validation; /** - * + * Interface of a validator for the header of a column. * * @author Franz-Josef Elmer */ public interface IColumnHeaderValidator { + /** + * Returns <code>true</code> if the specified header is valid. + */ public boolean isValidHeader(String header); } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidator.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidator.java index 644a7c87dbc2f8a8e0db10e42f13cac4143450de..4e2b3cf188dd04317631b54cadbdf9156c7fe08e 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidator.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidator.java @@ -16,12 +16,19 @@ package ch.systemsx.cisd.etlserver.validation; +import ch.systemsx.cisd.common.exceptions.UserFailureException; + /** - * + * Interface for a validator of a table cell value. * * @author Franz-Josef Elmer */ public interface IValidator { + /** + * Asserts valid table cell value. + * + * @throws UserFailureException if the specified value is not valid. + */ public void assertValid(String value); } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidatorFactory.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidatorFactory.java index d770118b1de305b37d98f949870da260ad1c9580..4e42035830d4badc2e0472032cf3052c4b1edff2 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidatorFactory.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/validation/IValidatorFactory.java @@ -17,11 +17,15 @@ package ch.systemsx.cisd.etlserver.validation; /** - * + * Factory for table cell validators. * * @author Franz-Josef Elmer */ public interface IValidatorFactory { + /** + * Creates a validator. The same instance can be returned as in a previous call if the + * validator object is immutable. + */ public IValidator createValidator(); } diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSVTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSVTest.java index 0807d3f6f1e7a62b2b2a9a39c946908e8a9e5e8a..5bdc717b6f1006b2c58e7ab3e1237bd73ff9f66b 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSVTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/validation/DataSetValidatorForTSVTest.java @@ -307,6 +307,53 @@ public class DataSetValidatorForTSVTest extends AbstractFileSystemTestCase MockValidatorFactory.assertSatisfied(); } + @Test + public void testUniqueColumnHeaders() + { + Properties properties = new Properties(); + properties.setProperty(DataSetValidatorForTSV.PATH_PATTERNS_KEY, "*"); + DataSetValidatorForTSV validator = new DataSetValidatorForTSV(properties); + FileUtilities.writeToFile(new File(workingDirectory, "a.txt"), "A\tA\n"); + + try + { + validator.assertValidDataSet(null, workingDirectory); + fail("UserFailureException expected"); + } catch (UserFailureException ex) + { + assertEquals("Column header 'A' appeared twice.", ex.getMessage()); + } + + MockValidatorFactory.assertSatisfied(); + } + + @Test + public void testColumnDefinitionWhichCanBeUsedToDefineMultipleColumnsOrder() + { + Properties properties = new Properties(); + properties.setProperty(DataSetValidatorForTSV.PATH_PATTERNS_KEY, "a.txt"); + properties.setProperty(DataSetValidatorForTSV.COLUMNS_KEY, "c1, c2, c3, c4"); + properties.setProperty("c1." + ColumnDefinition.HEADER_PATTERN_KEY, "ID"); + properties.setProperty("c1." + ColumnDefinition.MANDATORY_KEY, "yes"); + properties.setProperty("c1." + ColumnDefinition.ORDER_KEY, "1"); + properties.setProperty("c1." + ColumnDefinition.VALUE_VALIDATOR_KEY, MOCK_FACTORY); + properties.setProperty("c1." + NAME_KEY, "c1"); + properties.setProperty("c1." + EXPECTED_VALUES_KEY, "1,2"); + properties.setProperty("c2." + ColumnDefinition.HEADER_PATTERN_KEY, "A[0-9]*"); + properties.setProperty("c2." + ColumnDefinition.CAN_DEFINE_MULTIPLE_COLUMNS_KEY, "true"); + properties.setProperty("c2." + ColumnDefinition.VALUE_VALIDATOR_KEY, MOCK_FACTORY); + properties.setProperty("c2." + NAME_KEY, "c2"); + properties.setProperty("c2." + EXPECTED_VALUES_KEY, "a,b,c,d"); + DataSetValidatorForTSV validator = new DataSetValidatorForTSV(properties); + + FileUtilities.writeToFile(new File(workingDirectory, "a.txt"), "ID\tA6\tA42\n" + + "1\ta\tb\n" + "2\tc\td\n"); + + validator.assertValidDataSet(null, workingDirectory); + + MockValidatorFactory.assertSatisfied(); + } + @Test public void testMissingColumnWithOrder() {