From 4f2ceeaef60b83f900452cf67709e7dc1afebca8 Mon Sep 17 00:00:00 2001
From: tpylak <tpylak>
Date: Wed, 2 Mar 2011 10:30:13 +0000
Subject: [PATCH] LMS-2081 example dropbox for feature vectors

SVN: 20193
---
 .../data-set-handler.py                       | 85 +++++++++++++++----
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py b/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py
index 60efa81fb81..76b9575cba4 100755
--- a/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py
+++ b/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py
@@ -4,18 +4,16 @@
 import os
 from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import *
 
-DEFAULT_SPACE = "TEST"
-SEP = ","
+SEPARATOR = ","
 
-def parseFeaturesFile(featuresBuilder, incoming):
+def defineFeaturesBiozentrum(featuresBuilder, incoming):
     file = open(incoming.getPath())
     for header in file:
-        headerTokens = header.split(SEP)
+        headerTokens = header.split(SEPARATOR)
         featureCode = headerTokens[0]
-        featureDef = FeatureDefinition(featureCode)
-        featureValues = featuresBuilder.defineFeature(featureDef)
+        featureValues = featuresBuilder.defineFeature(featureCode)
         for rowValues in file:
-            rowTokens = rowValues.split(SEP)
+            rowTokens = rowValues.split(SEPARATOR)
             rowLabel = rowTokens[0].strip()
             if len(rowLabel) == 0:
                 break
@@ -25,18 +23,75 @@ def parseFeaturesFile(featuresBuilder, incoming):
                 #print featureCode, well, value
                 featureValues.addValue(well, value)
 
+
+# Specific code which defines the feature vector values for the dataset..
+# Usually you will parse the content of the incoming file or directory to get the values.
+# Here all the values are hard-coded for simplicity, 
+# but the example shows which calls you need to perform in your parser.
+# Parameters 
+#     incoming: java.io.File which points to the incoming dataset
+def defineFeatures(featuresBuilder, incoming):
+        # define INFECTION_INDEX feature
+        infectionFeature = featuresBuilder.defineFeature("INFECTION_INDEX")
+        # optionally you can set the label and description of the feature
+        infectionFeature.setFeatureLabel("Infection Index")
+        infectionFeature.setFeatureDescription("What percentage of the cells in the well has been infected?")
+        # set values for each well
+        infectionFeature.addValue("A1", "3.432")
+        # Instead of the well code you can use row and column numbers. For B1 it would be (2,1)
+        infectionFeature.addValue(2, 1, "5.343")
+        infectionFeature.addValue("C1", "0.987")
+
+        # define QUALITY feature
+        qualityFeature = featuresBuilder.defineFeature("QUALITY")
+        qualityFeature.addValue("A1", "GOOD")
+        qualityFeature.addValue("B1", "BAD")
+        qualityFeature.addValue("C1", "GOOD")
+
+def defineFeaturesForTimepoints(featuresBuilder, incoming):
+        # define INFECTION_INDEX feature
+        infectionFeature = featuresBuilder.defineFeature("INFECTION_INDEX")
+        # Define the feature values for the timepoint 100. 
+        # The second argument is the depth and can be used if depth-scans are performed.
+        infectionFeature.changeSeries(100, None)
+        infectionFeature.addValue("A1", "3.432")
+        infectionFeature.addValue("B1", "5.343")
+        infectionFeature.addValue("C1", "0.987")
+        # Define the feature values for the timepoint 200. 
+        infectionFeature.changeSeries(200, None)
+        infectionFeature.addValue("A1", "1.652")
+        infectionFeature.addValue("B1", "2.321")
+        infectionFeature.addValue("C1", "0.121")
+
+# Returns the code of the plate to which the dataset should be connected.
+# Parameters 
+#     incoming: java.io.File which points to the incoming dataset
+def extractPlateCode(incoming):
+    return os.path.splitext(incoming.getName())[0]
+
+def extractSpaceCode(incoming):
+    return "TEST"
+                
+# --- boilerplate code which register one dataset with image analysis results on the well level
 featuresBuilder = factory.createFeaturesBuilder()
-parseFeaturesFile(featuresBuilder, incoming) 
-analysisRegistrationDetails = factory.createFeatureVectorRegistrationDetails(featuresBuilder, incoming)
+defineFeaturesBiozentrum(featuresBuilder, incoming) 
+#defineFeatures(featuresBuilder, incoming) 
+#defineFeaturesForTimepoints(featuresBuilder, incoming) 
 
+analysisRegistrationDetails = factory.createFeatureVectorRegistrationDetails(featuresBuilder, incoming)
 tr = service.transaction(incoming, factory)
+analysisDataset = tr.createNewDataSet(analysisRegistrationDetails)
 
-plateCode = os.path.splitext(incoming.getName())[0]
-sampleIdentifier = "/"+DEFAULT_SPACE+"/"+plateCode
+# set plate to which the dataset should be connected
+sampleIdentifier = "/"+extractSpaceCode(incoming)+"/"+extractPlateCode(incoming)
 plate = tr.getSample(sampleIdentifier)
-
-analysisDataset = tr.createNewDataSet(analysisRegistrationDetails)
-analysisDataset.setPropertyValue("DESCRIPTION", "my dataset")
 analysisDataset.setSample(plate)
+
+# store the original file in the dataset.
 tr.moveFile(incoming.getPath(), analysisDataset)
-print "Registered dataset:", analysisDataset.getDataSetCode()
+
+# --- optional: other standard operations on analysisDataset can be performed (see IDataSet interface)
+#analysisDataset.setFileFormatType("CSV")
+#analysisDataset.setDataSetType("HCS_ANALYSIS_WELL_FEATURES")
+#analysisDataset.setPropertyValue("DESCRIPTION", incoming.getName())
+#analysisDataset.setParentDatasets(["20110302085840150-90"])
-- 
GitLab