From e05c823ea2f6ef4249bb82102bf02a1de3cdd6a3 Mon Sep 17 00:00:00 2001
From: tpylak <tpylak>
Date: Mon, 20 Dec 2010 12:18:01 +0000
Subject: [PATCH] LMS-1939 improve specific DSS uploading plugin

SVN: 19194
---
 .../etl/AbstractImageStorageProcessor.java    |  9 ++-
 .../openbis/dss/etl/HCSImageCheckList.java    | 69 ++--------------
 .../dss/etl/UnparsedImageFileInfoLexer.java   |  2 +-
 .../biozentrum/BZDataSetInfoExtractor.java    | 29 ++++---
 .../BZDatasetDirectoryNameTokenizer.java      | 32 +++++---
 .../openbis/dss/etl/dto/ImageSeriesPoint.java | 81 +++++++++++++++++++
 .../BZDatasetDirectoryNameTokenizerTest.java  | 38 +++++++++
 7 files changed, 171 insertions(+), 89 deletions(-)
 create mode 100644 screening/source/java/ch/systemsx/cisd/openbis/dss/etl/dto/ImageSeriesPoint.java
 create mode 100644 screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizerTest.java

diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/AbstractImageStorageProcessor.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/AbstractImageStorageProcessor.java
index 9498a1d8914..bb5ae6b1252 100644
--- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/AbstractImageStorageProcessor.java
+++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/AbstractImageStorageProcessor.java
@@ -20,8 +20,10 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Properties;
+import java.util.Set;
 
 import javax.sql.DataSource;
 
@@ -50,6 +52,7 @@ import ch.systemsx.cisd.etlserver.hdf5.Hdf5Container;
 import ch.systemsx.cisd.etlserver.hdf5.HierarchicalStructureDuplicatorFileToHdf5;
 import ch.systemsx.cisd.openbis.dss.Constants;
 import ch.systemsx.cisd.openbis.dss.etl.dataaccess.IImagingQueryDAO;
+import ch.systemsx.cisd.openbis.dss.etl.dto.ImageSeriesPoint;
 import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider;
 import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation;
 import ch.systemsx.cisd.openbis.generic.shared.dto.StorageFormat;
@@ -711,15 +714,17 @@ abstract class AbstractImageStorageProcessor extends AbstractStorageProcessor
 
     protected static boolean hasImageSeries(List<AcquiredSingleImage> images)
     {
+        Set<ImageSeriesPoint> points = new HashSet<ImageSeriesPoint>();
         for (AcquiredSingleImage image : images)
         {
             if (image.tryGetTimePoint() != null || image.tryGetDepth() != null
                     || image.tryGetSeriesNumber() != null)
             {
-                return true;
+                points.add(new ImageSeriesPoint(image.tryGetTimePoint(), image.tryGetDepth(), image
+                        .tryGetSeriesNumber()));
             }
         }
-        return false;
+        return points.size() > 1;
     }
 
 }
diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSImageCheckList.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSImageCheckList.java
index 05ebadacd74..564716c4b8b 100644
--- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSImageCheckList.java
+++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSImageCheckList.java
@@ -25,6 +25,7 @@ import java.util.Set;
 
 import ch.systemsx.cisd.bds.hcs.Geometry;
 import ch.systemsx.cisd.common.utilities.AbstractHashable;
+import ch.systemsx.cisd.openbis.dss.etl.dto.ImageSeriesPoint;
 import ch.systemsx.cisd.openbis.plugin.screening.shared.dto.PlateDimension;
 
 /**
@@ -119,85 +120,25 @@ public final class HCSImageCheckList
     // Helper classes
     //
 
-    private static final class CheckDimension
-    {
-        private final Float timeOrNull;
-
-        private final Float depthOrNull;
-
-        private final Integer seriesNumberOrNull;
-
-        public CheckDimension(Float timeOrNull, Float depthOrNull, Integer seriesNumberOrNull)
-        {
-            this.timeOrNull = timeOrNull;
-            this.depthOrNull = depthOrNull;
-            this.seriesNumberOrNull = seriesNumberOrNull;
-        }
-
-        @Override
-        public int hashCode()
-        {
-            final int prime = 31;
-            int result = 1;
-            result = prime * result + ((depthOrNull == null) ? 0 : depthOrNull.hashCode());
-            result = prime * result + ((timeOrNull == null) ? 0 : timeOrNull.hashCode());
-            result =
-                    prime * result
-                            + ((seriesNumberOrNull == null) ? 0 : seriesNumberOrNull.hashCode());
-            return result;
-        }
-
-        @Override
-        public boolean equals(Object obj)
-        {
-            if (this == obj)
-                return true;
-            if (obj == null)
-                return false;
-            if (getClass() != obj.getClass())
-                return false;
-            CheckDimension other = (CheckDimension) obj;
-            if (depthOrNull == null)
-            {
-                if (other.depthOrNull != null)
-                    return false;
-            } else if (!depthOrNull.equals(other.depthOrNull))
-                return false;
-            if (timeOrNull == null)
-            {
-                if (other.timeOrNull != null)
-                    return false;
-            } else if (!timeOrNull.equals(other.timeOrNull))
-                return false;
-            if (seriesNumberOrNull == null)
-            {
-                if (other.seriesNumberOrNull != null)
-                    return false;
-            } else if (!seriesNumberOrNull.equals(other.seriesNumberOrNull))
-                return false;
-            return true;
-        }
-    }
-
     private static final class Check
     {
         private boolean checkedOff;
 
-        private final Set<CheckDimension> dimensions = new HashSet<CheckDimension>();
+        private final Set<ImageSeriesPoint> dimensions = new HashSet<ImageSeriesPoint>();
 
         final void checkOff(Float timepointOrNull, Float depthOrNull, Integer seriesNumberOrNull)
         {
-            dimensions.add(new CheckDimension(timepointOrNull, depthOrNull, seriesNumberOrNull));
+            dimensions.add(new ImageSeriesPoint(timepointOrNull, depthOrNull, seriesNumberOrNull));
             checkedOff = true;
         }
 
         final boolean isCheckedOff(Float timepointOrNull, Float depthOrNull,
                 Integer seriesNumberOrNull)
         {
-            CheckDimension dim = null;
+            ImageSeriesPoint dim = null;
             if (timepointOrNull != null || depthOrNull != null || seriesNumberOrNull != null)
             {
-                dim = new CheckDimension(timepointOrNull, depthOrNull, seriesNumberOrNull);
+                dim = new ImageSeriesPoint(timepointOrNull, depthOrNull, seriesNumberOrNull);
             }
             return checkedOff && (dim == null || dimensions.contains(dim));
         }
diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/UnparsedImageFileInfoLexer.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/UnparsedImageFileInfoLexer.java
index f7b3521f22c..ef708fb3c93 100644
--- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/UnparsedImageFileInfoLexer.java
+++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/UnparsedImageFileInfoLexer.java
@@ -121,7 +121,7 @@ public class UnparsedImageFileInfoLexer
         return info;
     }
 
-    private static Map<Character, String> extractTokensMap(String text)
+    public static Map<Character, String> extractTokensMap(String text)
     {
         Map<Character, String> tokensMap = new HashMap<Character, String>();
         String[] tokens = StringUtils.split(text, TOKENS_SEPARATOR);
diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDataSetInfoExtractor.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDataSetInfoExtractor.java
index bab4a42c23e..b45fe0113f0 100644
--- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDataSetInfoExtractor.java
+++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDataSetInfoExtractor.java
@@ -66,17 +66,22 @@ public class BZDataSetInfoExtractor implements IDataSetInfoExtractor
 
     static final String PLATE_GEOMETRY = "plate-geometry";
 
+    static final String SEPARATOR = "separator";
+
     private final String spaceCode;
 
     private final String projectCode;
 
     private final String defaultPlateGeometryOrNull;
 
+    private final String separatorOrNull;
+
     public BZDataSetInfoExtractor(final Properties properties)
     {
         spaceCode = PropertyUtils.getMandatoryProperty(properties, SPACE_CODE);
         projectCode = PropertyUtils.getMandatoryProperty(properties, PROJECT_CODE);
         defaultPlateGeometryOrNull = properties.getProperty(PLATE_GEOMETRY);
+        separatorOrNull = properties.getProperty(SEPARATOR);
     }
 
     public DataSetInformation getDataSetInformation(File incomingDataSetPath,
@@ -84,9 +89,16 @@ public class BZDataSetInfoExtractor implements IDataSetInfoExtractor
             EnvironmentFailureException
     {
 
-        BZDatasetDirectoryNameTokenizer tokens =
-                new BZDatasetDirectoryNameTokenizer(FilenameUtils.getBaseName(incomingDataSetPath
-                        .getPath()));
+        String fileBaseName = FilenameUtils.getBaseName(incomingDataSetPath.getPath());
+        if (separatorOrNull != null)
+        {
+            int separatorIndex = fileBaseName.indexOf(separatorOrNull);
+            if (separatorIndex != -1)
+            {
+                fileBaseName = fileBaseName.substring(0, separatorIndex);
+            }
+        }
+        BZDatasetDirectoryNameTokenizer tokens = new BZDatasetDirectoryNameTokenizer(fileBaseName);
         String sampleCode = getSampleCode(tokens);
         String experimentCode = getExperiment(tokens);
         ExperimentIdentifier experimentIdentifier =
@@ -155,12 +167,6 @@ public class BZDataSetInfoExtractor implements IDataSetInfoExtractor
         if (experimentOrNull == null)
         {
             openbisService.registerExperiment(createExperimentSIRNAHCS(experimentIdentifier));
-            experimentOrNull = openbisService.tryToGetExperiment(experimentIdentifier);
-            if (experimentOrNull == null)
-            {
-                throw new UserFailureException(String.format("Experiment '%s' could not be found",
-                        experimentIdentifier));
-            }
         }
         openbisService.registerSample(
                 createPlate(sampleIdentifier, experimentIdentifier, plateGeometry), null);
@@ -173,7 +179,7 @@ public class BZDataSetInfoExtractor implements IDataSetInfoExtractor
 
     private static String getSampleCode(BZDatasetDirectoryNameTokenizer tokens)
     {
-        return "P_" + tokens.getExperimentToken() + "_" + tokens.getTimestampToken();
+        return "PLATE_" + tokens.getPlateBarcodeToken();
     }
 
     private static IEntityProperty[] createVocabularyProperty(String propertyTypeCode,
@@ -231,7 +237,8 @@ public class BZDataSetInfoExtractor implements IDataSetInfoExtractor
         for (File imageFile : imageFiles)
         {
             UnparsedImageFileInfo imageInfo =
-                    UnparsedImageFileInfoLexer.tryExtractHCSImageFileInfo(imageFile, incomingDataSetPath);
+                    UnparsedImageFileInfoLexer.tryExtractHCSImageFileInfo(imageFile,
+                            incomingDataSetPath);
             if (imageInfo != null)
             {
                 String wellLocationToken = imageInfo.getWellLocationToken();
diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizer.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizer.java
index 30e90f38f78..ca2763be302 100644
--- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizer.java
+++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizer.java
@@ -16,7 +16,9 @@
 
 package ch.systemsx.cisd.openbis.dss.etl.biozentrum;
 
-import org.apache.commons.lang.StringUtils;
+import java.util.Map;
+
+import ch.systemsx.cisd.openbis.dss.etl.UnparsedImageFileInfoLexer;
 
 /**
  * Extracts useful information from dataset directory name specific to iBrain2.
@@ -25,9 +27,17 @@ import org.apache.commons.lang.StringUtils;
  */
 public class BZDatasetDirectoryNameTokenizer
 {
+    private static final char EXPERIMENT_MARKER = 'i';
+
+    private static final char MICROSCOPE_MARKER = 'm';
+
+    private static final char PLATE_BARCODE_MARKER = 'b';
+
+    private static final char UNIQUE_ID_MARKER = 'u';
+
     private final String experimentToken;
 
-    private final String plateToken;
+    private final String microscopeToken;
 
     private final String barcodeToken;
 
@@ -35,11 +45,11 @@ public class BZDatasetDirectoryNameTokenizer
 
     BZDatasetDirectoryNameTokenizer(String identifier)
     {
-        String[] namedParts = StringUtils.split(identifier, "_");
-        experimentToken = StringUtils.split(namedParts[0], "-")[1];
-        plateToken = StringUtils.split(namedParts[1], "-")[1];
-        barcodeToken = StringUtils.split(namedParts[2], "-")[1];
-        timestampToken = StringUtils.split(namedParts[3], "-")[1];
+        Map<Character, String> tokensMap = UnparsedImageFileInfoLexer.extractTokensMap(identifier);
+        experimentToken = tokensMap.get(EXPERIMENT_MARKER);
+        microscopeToken = tokensMap.get(MICROSCOPE_MARKER);
+        barcodeToken = tokensMap.get(PLATE_BARCODE_MARKER);
+        timestampToken = tokensMap.get(UNIQUE_ID_MARKER);
     }
 
     public String getExperimentToken()
@@ -47,17 +57,17 @@ public class BZDatasetDirectoryNameTokenizer
         return experimentToken;
     }
 
-    public String getPlateToken()
+    public String getMicroscopeToken()
     {
-        return plateToken;
+        return microscopeToken;
     }
 
-    public String getBarcodeToken()
+    public String getPlateBarcodeToken()
     {
         return barcodeToken;
     }
 
-    public String getTimestampToken()
+    public String getUniqueIdToken()
     {
         return timestampToken;
     }
diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/dto/ImageSeriesPoint.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/dto/ImageSeriesPoint.java
new file mode 100644
index 00000000000..813d74b6df0
--- /dev/null
+++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/dto/ImageSeriesPoint.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2010 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.dss.etl.dto;
+
+/**
+ * DTO which stores time, depth and series number (all optional).
+ * 
+ * @author Tomasz Pylak
+ */
+public final class ImageSeriesPoint
+{
+    private final Float timeOrNull;
+
+    private final Float depthOrNull;
+
+    private final Integer seriesNumberOrNull;
+
+    public ImageSeriesPoint(Float timeOrNull, Float depthOrNull, Integer seriesNumberOrNull)
+    {
+        this.timeOrNull = timeOrNull;
+        this.depthOrNull = depthOrNull;
+        this.seriesNumberOrNull = seriesNumberOrNull;
+    }
+
+    @Override
+    public int hashCode()
+    {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((depthOrNull == null) ? 0 : depthOrNull.hashCode());
+        result = prime * result + ((timeOrNull == null) ? 0 : timeOrNull.hashCode());
+        result =
+                prime * result + ((seriesNumberOrNull == null) ? 0 : seriesNumberOrNull.hashCode());
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj)
+    {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        ImageSeriesPoint other = (ImageSeriesPoint) obj;
+        if (depthOrNull == null)
+        {
+            if (other.depthOrNull != null)
+                return false;
+        } else if (!depthOrNull.equals(other.depthOrNull))
+            return false;
+        if (timeOrNull == null)
+        {
+            if (other.timeOrNull != null)
+                return false;
+        } else if (!timeOrNull.equals(other.timeOrNull))
+            return false;
+        if (seriesNumberOrNull == null)
+        {
+            if (other.seriesNumberOrNull != null)
+                return false;
+        } else if (!seriesNumberOrNull.equals(other.seriesNumberOrNull))
+            return false;
+        return true;
+    }
+}
\ No newline at end of file
diff --git a/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizerTest.java b/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizerTest.java
new file mode 100644
index 00000000000..e03b91f3f2d
--- /dev/null
+++ b/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/biozentrum/BZDatasetDirectoryNameTokenizerTest.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.dss.etl.biozentrum;
+
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+/**
+ * @author Tomasz Pylak
+ */
+public class BZDatasetDirectoryNameTokenizerTest extends AssertJUnit
+{
+    @Test
+    public void test()
+    {
+        BZDatasetDirectoryNameTokenizer tokenizer =
+                new BZDatasetDirectoryNameTokenizer(
+                        "iBrucellaInfectionWF10_mBS-IX2_bRCB024_u0265626F");
+        assertEquals("BrucellaInfectionWF10", tokenizer.getExperimentToken());
+        assertEquals("BS-IX2", tokenizer.getMicroscopeToken());
+        assertEquals("RCB024", tokenizer.getPlateBarcodeToken());
+        assertEquals("0265626F", tokenizer.getUniqueIdToken());
+    }
+}
-- 
GitLab