From ba2fd2ab990a4c1fad63ce1d0b6c31414b8c2a67 Mon Sep 17 00:00:00 2001
From: tpylak <tpylak>
Date: Tue, 10 Nov 2009 23:16:39 +0000
Subject: [PATCH] LMS-1192 	 BatchDataSetHandler needs to support data
 sets without sample

SVN: 13298
---
 .../yeastx/etl/BatchDataSetInfoExtractor.java | 52 ++++++++-----
 .../yeastx/etl/DataSetMappingInformation.java | 20 ++---
 .../yeastx/etl/DatasetMappingResolver.java    | 73 +++++++++++++++++--
 .../etl/DataSetInformationParserTest.java     | 12 ++-
 4 files changed, 116 insertions(+), 41 deletions(-)

diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java
index f24c1f334fe..2e43acfc430 100644
--- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetInfoExtractor.java
@@ -17,7 +17,7 @@
 package ch.systemsx.cisd.yeastx.etl;
 
 import java.io.File;
-import java.util.Collections;
+import java.util.Arrays;
 import java.util.Properties;
 
 import org.apache.commons.lang.StringUtils;
@@ -28,6 +28,7 @@ import ch.systemsx.cisd.common.utilities.ExtendedProperties;
 import ch.systemsx.cisd.etlserver.IDataSetInfoExtractor;
 import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
 import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier;
 
 /**
  * @author Tomasz Pylak
@@ -56,14 +57,17 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor
             DataSetInformationYeastX info = new DataSetInformationYeastX();
             info.setComplete(true);
             info.setDataSetProperties(plainInfo.getProperties());
-            String sampleCode = getSampleCode(plainInfo, openbisService, log);
-            info.setSampleCode(sampleCode);
+            setSampleOrExperiment(openbisService, log, plainInfo, info);
             info.setGroupCode(plainInfo.getGroupCode());
             MLConversionType conversion = getConversion(plainInfo.getConversion());
             info.setConversion(conversion);
-            if (StringUtils.isNotBlank(plainInfo.getParentDataSetCode()))
+            String parentDataSetCodes = plainInfo.getParentDataSetCodes();
+            if (StringUtils.isNotBlank(parentDataSetCodes))
             {
-                info.setParentDataSetCodes(Collections.singletonList(plainInfo.getParentDataSetCode()));
+                String[] parentCodes =
+                        parentDataSetCodes
+                                .split(DataSetMappingInformation.PARENT_DATASETS_SEPARATOR);
+                info.setParentDataSetCodes(Arrays.asList(parentCodes));
             }
             fileNameDecorator.enrich(info, incomingDataSetPath);
             return info;
@@ -75,6 +79,29 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor
         }
     }
 
+    private void setSampleOrExperiment(IEncapsulatedOpenBISService openbisService, LogUtils log,
+            DataSetMappingInformation mapping, DataSetInformationYeastX info)
+    {
+        DatasetMappingResolver mappingResolver =
+                new DatasetMappingResolver(properties, openbisService);
+        String sampleCode = mappingResolver.tryFigureSampleCode(mapping, log);
+        if (sampleCode != null)
+        {
+            info.setSampleCode(sampleCode);
+        } else
+        {
+            ExperimentIdentifier experimentIdentifier =
+                    DatasetMappingResolver.tryFigureExperimentIdentifier(mapping);
+            if (experimentIdentifier == null)
+            {
+                throw new UserFailureException(
+                        "Both sample and experiment are not provided for the file "
+                                + mapping.getFileName());
+            }
+            info.setExperimentIdentifier(experimentIdentifier);
+        }
+    }
+
     private static MLConversionType getConversion(String conversion)
     {
         MLConversionType conversionType = MLConversionType.tryCreate(conversion);
@@ -85,19 +112,4 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor
         }
         return conversionType;
     }
-
-    private String getSampleCode(DataSetMappingInformation mapping,
-            IEncapsulatedOpenBISService openbisService, LogUtils log)
-    {
-        String sampleCode =
-                new DatasetMappingResolver(properties, openbisService).tryFigureSampleCode(mapping,
-                        log);
-        if (sampleCode == null)
-        {
-            // should not happen, the dataset handler should skip datasets with incorrect mapping
-            throw UserFailureException.fromTemplate("Cannot find a sample for the file '%s'.",
-                    mapping.getFileName());
-        }
-        return sampleCode;
-    }
 }
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java
index 675949f5652..126e2238e22 100644
--- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DataSetMappingInformation.java
@@ -31,6 +31,8 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
  */
 public class DataSetMappingInformation
 {
+    public static final String PARENT_DATASETS_SEPARATOR = ",";
+
     private String fileName;
 
     private String sampleCodeOrLabel;
@@ -42,8 +44,8 @@ public class DataSetMappingInformation
     private String groupCode;
 
     private String conversion;
-    
-    private String parentDataSetCode;
+
+    private String parentDataSetCodes;
 
     private List<NewProperty> properties;
 
@@ -63,24 +65,24 @@ public class DataSetMappingInformation
         return sampleCodeOrLabel;
     }
 
-    @BeanProperty(label = "sample")
+    @BeanProperty(label = "sample", optional = true)
     public void setSampleCodeOrLabel(String sampleCodeOrLabel)
     {
         this.sampleCodeOrLabel = StringUtils.trimToNull(sampleCodeOrLabel);
     }
 
     /**
-     * Returns the code of the parent data set, if any.
+     * Returns the codes of the parent data sets, separated by {@link #PARENT_DATASETS_SEPARATOR}.
      */
-    public final String getParentDataSetCode()
+    public final String getParentDataSetCodes()
     {
-        return parentDataSetCode;
+        return parentDataSetCodes;
     }
 
-    @BeanProperty(label="parent", optional = true)
-    public final void setParentDataSetCode(String parentCode)
+    @BeanProperty(label = "parent", optional = true)
+    public final void setParentDataSetCode(String parentCodes)
     {
-        this.parentDataSetCode = parentCode;
+        this.parentDataSetCodes = parentCodes;
     }
 
     public String getExperimentName()
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java
index 6f4d799b83e..0caea447868 100644
--- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingResolver.java
@@ -25,8 +25,10 @@ import ch.systemsx.cisd.common.collections.CollectionUtils;
 import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException;
 import ch.systemsx.cisd.common.exceptions.UserFailureException;
 import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample;
 import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier;
 import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.GroupIdentifier;
 import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.LocalExperimentIdentifier;
 import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier;
@@ -106,6 +108,10 @@ class DatasetMappingResolver
     public String tryFigureSampleCode(DataSetMappingInformation mapping, LogUtils log)
     {
         String sampleCodeOrLabel = mapping.getSampleCodeOrLabel();
+        if (sampleCodeOrLabel == null)
+        {
+            return null;
+        }
         if (samplePropertyCodeOrNull == null)
         {
             return sampleCodeOrLabel;
@@ -204,13 +210,70 @@ class DatasetMappingResolver
         {
             return false;
         }
+        if (isConversionColumnValid(mapping, log) == false)
+        {
+            return false;
+        }
         String sampleCode = tryFigureSampleCode(mapping, log);
+        ExperimentIdentifier experimentIdentifier = tryFigureExperimentIdentifier(mapping);
         if (sampleCode == null)
         {
+            // sample can be skipped only if experiment identifier is supplied
+            if (experimentIdentifier == null)
+            {
+                log.datasetMappingError(mapping,
+                        "neither sample nor experiment has been specified.");
+                return false;
+            } else
+            {
+                return experimentExists(mapping, log, experimentIdentifier);
+            }
+        } else
+        {
+            return sampleExistsAndBelongsToExperiment(mapping, log, sampleCode);
+        }
+    }
+
+    private boolean experimentExists(DataSetMappingInformation mapping, LogUtils log,
+            ExperimentIdentifier experimentIdentifier)
+    {
+        try
+        {
+            Experiment experiment = openbisService.tryToGetExperiment(experimentIdentifier);
+            if (experiment == null)
+            {
+                log.datasetMappingError(mapping, "experiment '%s' does not exist",
+                        experimentIdentifier);
+                return false;
+            } else
+            {
+                return true;
+            }
+        } catch (UserFailureException ex)
+        {
+            // if project or group is unknown then an exception is thrown
+            log.datasetMappingError(mapping, "experiment '%s' does not exist: %s",
+                    experimentIdentifier, ex.getMessage());
             return false;
         }
-        return isConversionColumnValid(mapping, log)
-                && existsAndBelongsToExperiment(mapping, log, sampleCode);
+    }
+
+    /**
+     * NOTE: we do not support experiment names if the dataset has to be connected to the experiment
+     * directly.
+     */
+    public static ExperimentIdentifier tryFigureExperimentIdentifier(
+            DataSetMappingInformation mapping)
+    {
+        String project = mapping.getProjectCode();
+        String experimentCode = mapping.getExperimentName();
+        if (project != null && experimentCode != null)
+        {
+            return new ExperimentIdentifier(null, mapping.getGroupCode(), project, experimentCode);
+        } else
+        {
+            return null;
+        }
     }
 
     private static boolean isConversionColumnValid(final DataSetMappingInformation mapping,
@@ -235,7 +298,7 @@ class DatasetMappingResolver
             log.datasetMappingError(mapping, "conversion column must be empty "
                     + "for this type of file.");
             return false;
-            
+
         }
         return true;
     }
@@ -247,8 +310,8 @@ class DatasetMappingResolver
         return conversionRequired;
     }
 
-    private boolean existsAndBelongsToExperiment(DataSetMappingInformation mapping, LogUtils log,
-            String sampleCode)
+    private boolean sampleExistsAndBelongsToExperiment(DataSetMappingInformation mapping,
+            LogUtils log, String sampleCode)
     {
         if (isConnectedToExperiment(sampleCode, mapping, log) == false)
         {
diff --git a/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java b/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java
index 97275e3a8b7..54be1114e87 100644
--- a/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java
+++ b/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/etl/DataSetInformationParserTest.java
@@ -36,7 +36,7 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
  */
 public class DataSetInformationParserTest extends AbstractFileSystemTestCase
 {
-    private static final String MANDATORY_HEADER = "file_name sample group\n";
+    private static final String MANDATORY_HEADER_SAMPLE = "file_name sample group\n";
 
     private static final String HEADER =
     // "# user@gmail.com\n+"+
@@ -54,7 +54,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase
         AssertJUnit.assertEquals(1, list.size());
         DataSetMappingInformation elem = list.get(0);
         AssertJUnit.assertEquals("group1", elem.getGroupCode());
-        AssertJUnit.assertEquals("parentCode", elem.getParentDataSetCode());
+        AssertJUnit.assertEquals("parentCode", elem.getParentDataSetCodes());
         AssertJUnit.assertEquals("sample1", elem.getSampleCodeOrLabel());
         AssertJUnit.assertEquals("data.txt", elem.getFileName());
         AssertJUnit.assertEquals("experiment1", elem.getExperimentName());
@@ -68,9 +68,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase
     @Test
     public void testLoadIndexFileMandatoryColumnsOnly()
     {
-        File indexFile =
-                writeMappingFile(MANDATORY_HEADER
-                        + "data2.txt sample2 group2");
+        File indexFile = writeMappingFile(MANDATORY_HEADER_SAMPLE + "data2.txt sample2 group2");
         List<DataSetMappingInformation> list = tryParse(indexFile);
         AssertJUnit.assertEquals(1, list.size());
         DataSetMappingInformation elem = list.get(0);
@@ -101,8 +99,8 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase
         AssertJUnit.assertNull("error during parsing expected", result);
         List<String> logLines = readLogFile();
         AssertJUnit.assertEquals(2, logLines.size());
-        AssertionUtil.assertContains(
-                "Mandatory column(s) 'group', 'sample', 'file_name' are missing", logLines.get(1));
+        AssertionUtil.assertContains("Mandatory column(s) 'group', 'file_name' are missing",
+                logLines.get(1));
     }
 
     private List<DataSetMappingInformation> tryParse(File indexFile)
-- 
GitLab