From 4f2ceeaef60b83f900452cf67709e7dc1afebca8 Mon Sep 17 00:00:00 2001 From: tpylak <tpylak> Date: Wed, 2 Mar 2011 10:30:13 +0000 Subject: [PATCH] LMS-2081 example dropbox for feature vectors SVN: 20193 --- .../data-set-handler.py | 85 +++++++++++++++---- 1 file changed, 70 insertions(+), 15 deletions(-) diff --git a/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py b/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py index 60efa81fb81..76b9575cba4 100755 --- a/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py +++ b/screening/etc/example-dropbox-configuration/jython-feature-vectors-biozentrum/data-set-handler.py @@ -4,18 +4,16 @@ import os from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import * -DEFAULT_SPACE = "TEST" -SEP = "," +SEPARATOR = "," -def parseFeaturesFile(featuresBuilder, incoming): +def defineFeaturesBiozentrum(featuresBuilder, incoming): file = open(incoming.getPath()) for header in file: - headerTokens = header.split(SEP) + headerTokens = header.split(SEPARATOR) featureCode = headerTokens[0] - featureDef = FeatureDefinition(featureCode) - featureValues = featuresBuilder.defineFeature(featureDef) + featureValues = featuresBuilder.defineFeature(featureCode) for rowValues in file: - rowTokens = rowValues.split(SEP) + rowTokens = rowValues.split(SEPARATOR) rowLabel = rowTokens[0].strip() if len(rowLabel) == 0: break @@ -25,18 +23,75 @@ def parseFeaturesFile(featuresBuilder, incoming): #print featureCode, well, value featureValues.addValue(well, value) + +# Specific code which defines the feature vector values for the dataset.. +# Usually you will parse the content of the incoming file or directory to get the values. +# Here all the values are hard-coded for simplicity, +# but the example shows which calls you need to perform in your parser. +# Parameters +# incoming: java.io.File which points to the incoming dataset +def defineFeatures(featuresBuilder, incoming): + # define INFECTION_INDEX feature + infectionFeature = featuresBuilder.defineFeature("INFECTION_INDEX") + # optionally you can set the label and description of the feature + infectionFeature.setFeatureLabel("Infection Index") + infectionFeature.setFeatureDescription("What percentage of the cells in the well has been infected?") + # set values for each well + infectionFeature.addValue("A1", "3.432") + # Instead of the well code you can use row and column numbers. For B1 it would be (2,1) + infectionFeature.addValue(2, 1, "5.343") + infectionFeature.addValue("C1", "0.987") + + # define QUALITY feature + qualityFeature = featuresBuilder.defineFeature("QUALITY") + qualityFeature.addValue("A1", "GOOD") + qualityFeature.addValue("B1", "BAD") + qualityFeature.addValue("C1", "GOOD") + +def defineFeaturesForTimepoints(featuresBuilder, incoming): + # define INFECTION_INDEX feature + infectionFeature = featuresBuilder.defineFeature("INFECTION_INDEX") + # Define the feature values for the timepoint 100. + # The second argument is the depth and can be used if depth-scans are performed. + infectionFeature.changeSeries(100, None) + infectionFeature.addValue("A1", "3.432") + infectionFeature.addValue("B1", "5.343") + infectionFeature.addValue("C1", "0.987") + # Define the feature values for the timepoint 200. + infectionFeature.changeSeries(200, None) + infectionFeature.addValue("A1", "1.652") + infectionFeature.addValue("B1", "2.321") + infectionFeature.addValue("C1", "0.121") + +# Returns the code of the plate to which the dataset should be connected. +# Parameters +# incoming: java.io.File which points to the incoming dataset +def extractPlateCode(incoming): + return os.path.splitext(incoming.getName())[0] + +def extractSpaceCode(incoming): + return "TEST" + +# --- boilerplate code which register one dataset with image analysis results on the well level featuresBuilder = factory.createFeaturesBuilder() -parseFeaturesFile(featuresBuilder, incoming) -analysisRegistrationDetails = factory.createFeatureVectorRegistrationDetails(featuresBuilder, incoming) +defineFeaturesBiozentrum(featuresBuilder, incoming) +#defineFeatures(featuresBuilder, incoming) +#defineFeaturesForTimepoints(featuresBuilder, incoming) +analysisRegistrationDetails = factory.createFeatureVectorRegistrationDetails(featuresBuilder, incoming) tr = service.transaction(incoming, factory) +analysisDataset = tr.createNewDataSet(analysisRegistrationDetails) -plateCode = os.path.splitext(incoming.getName())[0] -sampleIdentifier = "/"+DEFAULT_SPACE+"/"+plateCode +# set plate to which the dataset should be connected +sampleIdentifier = "/"+extractSpaceCode(incoming)+"/"+extractPlateCode(incoming) plate = tr.getSample(sampleIdentifier) - -analysisDataset = tr.createNewDataSet(analysisRegistrationDetails) -analysisDataset.setPropertyValue("DESCRIPTION", "my dataset") analysisDataset.setSample(plate) + +# store the original file in the dataset. tr.moveFile(incoming.getPath(), analysisDataset) -print "Registered dataset:", analysisDataset.getDataSetCode() + +# --- optional: other standard operations on analysisDataset can be performed (see IDataSet interface) +#analysisDataset.setFileFormatType("CSV") +#analysisDataset.setDataSetType("HCS_ANALYSIS_WELL_FEATURES") +#analysisDataset.setPropertyValue("DESCRIPTION", incoming.getName()) +#analysisDataset.setParentDatasets(["20110302085840150-90"]) -- GitLab