From ba2fd2ab990a4c1fad63ce1d0b6c31414b8c2a67 Mon Sep 17 00:00:00 2001 From: tpylak <tpylak> Date: Tue, 10 Nov 2009 23:16:39 +0000 Subject: [PATCH] LMS-1192 BatchDataSetHandler needs to support data sets without sample SVN: 13298 --- .../yeastx/etl/BatchDataSetInfoExtractor.java | 52 ++++++++----- .../yeastx/etl/DataSetMappingInformation.java | 20 ++--- .../yeastx/etl/DatasetMappingResolver.java | 73 +++++++++++++++++-- .../etl/DataSetInformationParserTest.java | 12 ++- 4 files changed, 116 insertions(+), 41 deletions(-) diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java index f24c1f334fe..2e43acfc430 100644 --- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java @@ -17,7 +17,7 @@ package ch.systemsx.cisd.yeastx.etl; import java.io.File; -import java.util.Collections; +import java.util.Arrays; import java.util.Properties; import org.apache.commons.lang.StringUtils; @@ -28,6 +28,7 @@ import ch.systemsx.cisd.common.utilities.ExtendedProperties; import ch.systemsx.cisd.etlserver.IDataSetInfoExtractor; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation; +import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier; /** * @author Tomasz Pylak @@ -56,14 +57,17 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor DataSetInformationYeastX info = new DataSetInformationYeastX(); info.setComplete(true); info.setDataSetProperties(plainInfo.getProperties()); - String sampleCode = getSampleCode(plainInfo, openbisService, log); - info.setSampleCode(sampleCode); + setSampleOrExperiment(openbisService, log, plainInfo, info); info.setGroupCode(plainInfo.getGroupCode()); MLConversionType conversion = getConversion(plainInfo.getConversion()); info.setConversion(conversion); - if (StringUtils.isNotBlank(plainInfo.getParentDataSetCode())) + String parentDataSetCodes = plainInfo.getParentDataSetCodes(); + if (StringUtils.isNotBlank(parentDataSetCodes)) { - info.setParentDataSetCodes(Collections.singletonList(plainInfo.getParentDataSetCode())); + String[] parentCodes = + parentDataSetCodes + .split(DataSetMappingInformation.PARENT_DATASETS_SEPARATOR); + info.setParentDataSetCodes(Arrays.asList(parentCodes)); } fileNameDecorator.enrich(info, incomingDataSetPath); return info; @@ -75,6 +79,29 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor } } + private void setSampleOrExperiment(IEncapsulatedOpenBISService openbisService, LogUtils log, + DataSetMappingInformation mapping, DataSetInformationYeastX info) + { + DatasetMappingResolver mappingResolver = + new DatasetMappingResolver(properties, openbisService); + String sampleCode = mappingResolver.tryFigureSampleCode(mapping, log); + if (sampleCode != null) + { + info.setSampleCode(sampleCode); + } else + { + ExperimentIdentifier experimentIdentifier = + DatasetMappingResolver.tryFigureExperimentIdentifier(mapping); + if (experimentIdentifier == null) + { + throw new UserFailureException( + "Both sample and experiment are not provided for the file " + + mapping.getFileName()); + } + info.setExperimentIdentifier(experimentIdentifier); + } + } + private static MLConversionType getConversion(String conversion) { MLConversionType conversionType = MLConversionType.tryCreate(conversion); @@ -85,19 +112,4 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor } return conversionType; } - - private String getSampleCode(DataSetMappingInformation mapping, - IEncapsulatedOpenBISService openbisService, LogUtils log) - { - String sampleCode = - new DatasetMappingResolver(properties, openbisService).tryFigureSampleCode(mapping, - log); - if (sampleCode == null) - { - // should not happen, the dataset handler should skip datasets with incorrect mapping - throw UserFailureException.fromTemplate("Cannot find a sample for the file '%s'.", - mapping.getFileName()); - } - return sampleCode; - } } diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java index 675949f5652..126e2238e22 100644 --- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java @@ -31,6 +31,8 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty; */ public class DataSetMappingInformation { + public static final String PARENT_DATASETS_SEPARATOR = ","; + private String fileName; private String sampleCodeOrLabel; @@ -42,8 +44,8 @@ public class DataSetMappingInformation private String groupCode; private String conversion; - - private String parentDataSetCode; + + private String parentDataSetCodes; private List<NewProperty> properties; @@ -63,24 +65,24 @@ public class DataSetMappingInformation return sampleCodeOrLabel; } - @BeanProperty(label = "sample") + @BeanProperty(label = "sample", optional = true) public void setSampleCodeOrLabel(String sampleCodeOrLabel) { this.sampleCodeOrLabel = StringUtils.trimToNull(sampleCodeOrLabel); } /** - * Returns the code of the parent data set, if any. + * Returns the codes of the parent data sets, separated by {@link #PARENT_DATASETS_SEPARATOR}. */ - public final String getParentDataSetCode() + public final String getParentDataSetCodes() { - return parentDataSetCode; + return parentDataSetCodes; } - @BeanProperty(label="parent", optional = true) - public final void setParentDataSetCode(String parentCode) + @BeanProperty(label = "parent", optional = true) + public final void setParentDataSetCode(String parentCodes) { - this.parentDataSetCode = parentCode; + this.parentDataSetCodes = parentCodes; } public String getExperimentName() diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java index 6f4d799b83e..0caea447868 100644 --- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java @@ -25,8 +25,10 @@ import ch.systemsx.cisd.common.collections.CollectionUtils; import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException; import ch.systemsx.cisd.common.exceptions.UserFailureException; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria; +import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier; import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.GroupIdentifier; import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.LocalExperimentIdentifier; import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier; @@ -106,6 +108,10 @@ class DatasetMappingResolver public String tryFigureSampleCode(DataSetMappingInformation mapping, LogUtils log) { String sampleCodeOrLabel = mapping.getSampleCodeOrLabel(); + if (sampleCodeOrLabel == null) + { + return null; + } if (samplePropertyCodeOrNull == null) { return sampleCodeOrLabel; @@ -204,13 +210,70 @@ class DatasetMappingResolver { return false; } + if (isConversionColumnValid(mapping, log) == false) + { + return false; + } String sampleCode = tryFigureSampleCode(mapping, log); + ExperimentIdentifier experimentIdentifier = tryFigureExperimentIdentifier(mapping); if (sampleCode == null) { + // sample can be skipped only if experiment identifier is supplied + if (experimentIdentifier == null) + { + log.datasetMappingError(mapping, + "neither sample nor experiment has been specified."); + return false; + } else + { + return experimentExists(mapping, log, experimentIdentifier); + } + } else + { + return sampleExistsAndBelongsToExperiment(mapping, log, sampleCode); + } + } + + private boolean experimentExists(DataSetMappingInformation mapping, LogUtils log, + ExperimentIdentifier experimentIdentifier) + { + try + { + Experiment experiment = openbisService.tryToGetExperiment(experimentIdentifier); + if (experiment == null) + { + log.datasetMappingError(mapping, "experiment '%s' does not exist", + experimentIdentifier); + return false; + } else + { + return true; + } + } catch (UserFailureException ex) + { + // if project or group is unknown then an exception is thrown + log.datasetMappingError(mapping, "experiment '%s' does not exist: %s", + experimentIdentifier, ex.getMessage()); return false; } - return isConversionColumnValid(mapping, log) - && existsAndBelongsToExperiment(mapping, log, sampleCode); + } + + /** + * NOTE: we do not support experiment names if the dataset has to be connected to the experiment + * directly. + */ + public static ExperimentIdentifier tryFigureExperimentIdentifier( + DataSetMappingInformation mapping) + { + String project = mapping.getProjectCode(); + String experimentCode = mapping.getExperimentName(); + if (project != null && experimentCode != null) + { + return new ExperimentIdentifier(null, mapping.getGroupCode(), project, experimentCode); + } else + { + return null; + } } private static boolean isConversionColumnValid(final DataSetMappingInformation mapping, @@ -235,7 +298,7 @@ class DatasetMappingResolver log.datasetMappingError(mapping, "conversion column must be empty " + "for this type of file."); return false; - + } return true; } @@ -247,8 +310,8 @@ class DatasetMappingResolver return conversionRequired; } - private boolean existsAndBelongsToExperiment(DataSetMappingInformation mapping, LogUtils log, - String sampleCode) + private boolean sampleExistsAndBelongsToExperiment(DataSetMappingInformation mapping, + LogUtils log, String sampleCode) { if (isConnectedToExperiment(sampleCode, mapping, log) == false) { diff --git a/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java b/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java index 97275e3a8b7..54be1114e87 100644 --- a/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java +++ b/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java @@ -36,7 +36,7 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty; */ public class DataSetInformationParserTest extends AbstractFileSystemTestCase { - private static final String MANDATORY_HEADER = "file_name sample group\n"; + private static final String MANDATORY_HEADER_SAMPLE = "file_name sample group\n"; private static final String HEADER = // "# user@gmail.com\n+"+ @@ -54,7 +54,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase AssertJUnit.assertEquals(1, list.size()); DataSetMappingInformation elem = list.get(0); AssertJUnit.assertEquals("group1", elem.getGroupCode()); - AssertJUnit.assertEquals("parentCode", elem.getParentDataSetCode()); + AssertJUnit.assertEquals("parentCode", elem.getParentDataSetCodes()); AssertJUnit.assertEquals("sample1", elem.getSampleCodeOrLabel()); AssertJUnit.assertEquals("data.txt", elem.getFileName()); AssertJUnit.assertEquals("experiment1", elem.getExperimentName()); @@ -68,9 +68,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase @Test public void testLoadIndexFileMandatoryColumnsOnly() { - File indexFile = - writeMappingFile(MANDATORY_HEADER - + "data2.txt sample2 group2"); + File indexFile = writeMappingFile(MANDATORY_HEADER_SAMPLE + "data2.txt sample2 group2"); List<DataSetMappingInformation> list = tryParse(indexFile); AssertJUnit.assertEquals(1, list.size()); DataSetMappingInformation elem = list.get(0); @@ -101,8 +99,8 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase AssertJUnit.assertNull("error during parsing expected", result); List<String> logLines = readLogFile(); AssertJUnit.assertEquals(2, logLines.size()); - AssertionUtil.assertContains( - "Mandatory column(s) 'group', 'sample', 'file_name' are missing", logLines.get(1)); + AssertionUtil.assertContains("Mandatory column(s) 'group', 'file_name' are missing", + logLines.get(1)); } private List<DataSetMappingInformation> tryParse(File indexFile) -- GitLab