Skip to content
Snippets Groups Projects
Commit d92d870f authored by tpylak's avatar tpylak
Browse files

LMS-2069, LMS-2085 ibrain dropboxes (draft)

SVN: 20302
parent 9bf1d525
No related branches found
No related tags found
No related merge requests found
Showing
with 490 additions and 0 deletions
inputs = hcs_image_raw, hcs_image_overview, hcs_image_segmentation, hcs_analysis_well_features, hcs_analysis_well_quality, hcs_analysis_well_features, hcs_analysis_cell_features_csv, hcs_analysis_cell_features_mat
root = ~/data/dropboxes
ibrain2-dropboxes-dir = ${root}/ibrain2-dropboxes-test
staging-dir = ${root}/tmp
scripts-dir = ${root}/scripts
# --- hcs_image_raw ------------
hcs_image_raw.incoming-dir = ${ibrain2-dropboxes-dir}/hcs_image_raw
hcs_image_raw.script-path = ${scripts-dir}/hcs_image_raw.py
hcs_image_raw.incoming-data-completeness-condition = auto-detection
hcs_image_raw.top-level-data-set-handler = ch.systemsx.cisd.openbis.dss.etl.jython.JythonPlateDataSetHandler
hcs_image_raw.staging-dir = ${staging-dir}
hcs_image_raw.storage-processor = ch.systemsx.cisd.openbis.dss.etl.PlateStorageProcessor
hcs_image_raw.storage-processor.data-source = imaging-db
# --- hcs_image_overview
# --- hcs_image_segmentation
# --- hcs_analysis_cell_features_csv
# --- hcs_analysis_cell_features_mat
# --- hcs_analysis_well_quality -----------
hcs_analysis_well_quality.incoming-dir = ${ibrain2-dropboxes-dir}/hcs_analysis_well_quality
hcs_analysis_well_quality.script-path = ${scripts-dir}/hcs_analysis_well_quality.py
hcs_analysis_well_quality.incoming-data-completeness-condition = auto-detection
hcs_analysis_well_quality.top-level-data-set-handler = ch.systemsx.cisd.openbis.dss.etl.jython.JythonPlateDataSetHandler
hcs_analysis_well_quality.staging-dir = ${staging-dir}
hcs_analysis_well_quality.storage-processor = ch.systemsx.cisd.openbis.dss.etl.featurevector.FeatureVectorStorageProcessor
hcs_analysis_well_quality.storage-processor.processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor
hcs_analysis_well_quality.storage-processor.data-source = imaging-db
hcs_analysis_well_quality.storage-processor.separator = ,
hcs_analysis_well_quality.storage-processor.ignore-comments = true
hcs_analysis_well_quality.storage-processor.well-name-col-is-alphanum = true
hcs_analysis_well_quality.storage-processor.well-name-row = File_Name
hcs_analysis_well_quality.storage-processor.well-name-col = File_Name
# --- hcs_analysis_well_features -----------
hcs_analysis_well_features.incoming-dir = ${ibrain2-dropboxes-dir}/hcs_analysis_well_features
hcs_analysis_well_features.script-path = ${scripts-dir}/hcs_analysis_well_features.py
hcs_analysis_well_features.incoming-data-completeness-condition = auto-detection
hcs_analysis_well_features.top-level-data-set-handler = ch.systemsx.cisd.openbis.dss.etl.jython.JythonPlateDataSetHandler
hcs_analysis_well_features.staging-dir = ${staging-dir}
hcs_analysis_well_features.storage-processor = ch.systemsx.cisd.openbis.dss.etl.featurevector.FeatureVectorStorageProcessor
hcs_analysis_well_features.storage-processor.processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor
hcs_analysis_well_features.storage-processor.data-source = imaging-db
# --- hcs_analysis_well_metadata -----------
hcs_analysis_well_metadata.incoming-dir = ${ibrain2-dropboxes-dir}/hcs_analysis_well_metadata
hcs_analysis_well_metadata.script-path = ${scripts-dir}/hcs_analysis_well_metadata.py
hcs_analysis_well_metadata.incoming-data-completeness-condition = auto-detection
hcs_analysis_well_metadata.top-level-data-set-handler = ch.systemsx.cisd.openbis.dss.etl.jython.JythonPlateDataSetHandler
hcs_analysis_well_metadata.staging-dir = ${staging-dir}
hcs_analysis_well_metadata.storage-processor = ch.systemsx.cisd.openbis.dss.etl.featurevector.FeatureVectorStorageProcessor
hcs_analysis_well_metadata.storage-processor.processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor
hcs_analysis_well_metadata.storage-processor.data-source = imaging-db
File added
#! /usr/bin/env python
import os
class AbstractPropertiesParser:
_propertiesDict = None
def __init__(self, incoming, fileName):
path = os.path.join(incoming, fileName)
self._propertiesDict = self._parseMetadata(path)
# Parses the metadata file from the given incoming directory.
# Each line should have a form:
# key = value
# Keys should be unique in the file.
# Returns:
# a dictionary with keys and values from the file.
def _parseMetadata(self, path):
f = open(path)
myDict = {}
for line in f:
line = line.strip()
if len(line) == 0 or line.startswith("#"):
continue
ix = line.find("=")
if ix == -1:
raise Exception("Cannot find '=' in line '"+line+"' in file: "+path)
key = line[:ix].strip()
value = line[ix+1:].strip()
if key in myDict:
raise Exception("Duplicated key '"+key+"' in file: "+path)
myDict[key] = value
return myDict
def get(self, propertyName):
return self._propertiesDict[propertyName]
# All properties in the file.
# Returns:
# an iterator which yields (propertyName, propertyValue) pairs
def getPropertiesIter(self):
for key, value in self._propertiesDict.iteritems():
yield (key, value)
# All dataset properties.
# Returns:
# an iterator which yields (propertyCode, propertyValue) pairs
def getDatasetPropertiesIter(self):
for key, value in self._propertiesDict.iteritems():
if key.startswith(self.DATASET_PROPERTY_PREFIX):
yield (key, value)
class AbstractMetadataParser(AbstractPropertiesParser):
METADATA_FILE="metadata.properties"
IBRAIN2_DATASET_ID_PROPERTY = "brain2.dataset.id"
DATASET_PROPERTY_PREFIX = "ibrain2."
DATASET_TYPE_PROPERTY = "dataset.type"
def __init__(self, incoming):
AbstractPropertiesParser.__init__(self, incoming, self.METADATA_FILE)
def getDatasetType(self):
return self.get(self.DATASET_TYPE_PROPERTY)
def getIBrain2DatasetId(self):
return self.get(self.IBRAIN2_DATASET_ID_PROPERTY)
# --- concrete parser classes ----------------------
class AcquiredDatasetMetadataParser(AbstractMetadataParser):
PLATE_CODE_PRPOPERTY = "barcode"
INSTRUMENT_PROPERTY = "instrument.id"
TIMESTAMP_PROPERTY = "timestamp" # not used
# All dataset properties.
# Returns:
# an iterator which yields (propertyCode, propertyValue) pairs
def getDatasetPropertiesIter(self):
for propertyCode, value in AbstractPropertiesParser.getDatasetPropertiesIter(self):
yield (propertyCode, value)
yield (self.INSTRUMENT_PROPERTY, self.get(self.INSTRUMENT_PROPERTY))
def getPlateCode(self):
return self.get(self.PLATE_CODE_PRPOPERTY)
class DerivedDatasetMetadataParser(AbstractMetadataParser):
PARENT_DATSASET_PERMID_PRPOPERTY = "storage_provider.parent.dataset.id"
DATASET_TYPE_PROPERTY = "dataset.type"
def getDatasetPropertiesIter(self):
return AbstractMetadataParser.getDatasetPropertiesIter(self)
def getParentDatasetPermId(self):
return self.get(self.PARENT_DATSASET_PERMID_PRPOPERTY)
def getDatasetType(self):
return self.get(self.DATASET_TYPE_PROPERTY)
class AssayParser(AbstractPropertiesParser):
ASSAY_FILE_PREFIX="assay_"
ASSAY_ID_PROPERTY = "assay.id"
ASSAY_TYPE_PROPERTY = "assay.type"
ASSAY_DESC_PROPERTY = "assay.description"
LAB_LEADER_PROPERTY = "labinfo.pi"
EXPERIMENTER_PROPERTY = "experimenter.login"
WORKFLOW_NAME_PROPERTY = "workflow.name"
WORKFLOW_AUTHOR_PROPERTY = "workflow.author"
def _findAssayFile(self, incoming):
for file in os.listdir(incoming):
print file
if file.startswith(self.ASSAY_FILE_PREFIX):
return file
raise Exception("Assay file not found!")
def __init__(self, incoming):
AbstractPropertiesParser.__init__(self, incoming, self._findAssayFile(incoming))
class IBRAIN2Utils:
""" path to the registration confirmation directory relative to the incoming dataset """
CONFIRMATION_DIRECTORY = "../../registration-status"
STATUS_PROPERTY = "storage_provider.storage.status"
OK = "STORAGE_SUCCESS"
ERROR = "STORAGE_FAILED"
ERROR_MSG_PROPERTY = "storage_provider.message"
OPENBIS_DATASET_ID_PROPERTY = "storage_provider.dataset.id"
IBRAIN2_STATUS_FILE_PREFIX = "ibrain2_dataset_id_"
IBRAIN2_STATUS_FILE_SUFFIX = ".properties"
def _getStatusFileName(self, ibrain2DatasetId, incoming):
return incoming + "/" + self.CONFIRMATION_DIRECTORY + "/" + self.IBRAIN2_STATUS_FILE_PREFIX + ibrain2DatasetId + self.IBRAIN2_STATUS_FILE_SUFFIX
def _prop(self, name, value):
return name + " = " + value + "\n"
def _writeConfirmationFile(self, ibrain2DatasetId, fileContent, incoming):
confirmationFile = self._getStatusFileName(ibrain2DatasetId, incoming)
file = open(confirmationFile, "w")
file.write(fileContent)
file.close()
def createSuccessStatus(self, ibrain2DatasetId, openbisDatasetId, incoming):
fileContent = self._prop(self.STATUS_PROPERTY, self.OK)
fileContent += self._prop(AbstractMetadataParser.IBRAIN2_DATASET_ID_PROPERTY, ibrain2DatasetId)
fileContent += self._prop(self.OPENBIS_DATASET_ID_PROPERTY, openbisDatasetId)
self._writeConfirmationFile(ibrain2DatasetId, fileContent, incoming)
def createFailureStatus(self, ibrain2DatasetId, errorMessage, incoming):
fileContent = self._prop(self.STATUS_PROPERTY, self.ERROR)
fileContent += self._prop(AbstractMetadataParser.IBRAIN2_DATASET_ID_PROPERTY, ibrain2DatasetId)
fileContent += self._prop(self.ERROR_MSG_PROPERTY, errorMessage)
self._writeConfirmationFile(ibrain2DatasetId, fileContent, incoming)
# -------------- TODO: remove tests
TEST_DIR = "/Users/tpylak/main/src/screening-demo/biozentrum/dropboxes/ibrain2-dropboxes-test"
def testMetadataParsers():
print "-- acquired ---------------------------------"
parser = AcquiredDatasetMetadataParser(TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
print "dataset type:", parser.getDatasetType()
print "plate:", parser.getPlateCode()
print "properties"
for key, value in parser.getDatasetPropertiesIter():
print key, value
print "\n-- derived ---------------------------------"
parser = DerivedDatasetMetadataParser(TEST_DIR+"/HCS_IMAGE_OVERVIEW/ibrain2_dataset_id_48")
print "dataset type:", parser.getDatasetType()
print "parent perm id:", parser.getParentDatasetPermId()
print "properties"
for key, value in parser.getDatasetPropertiesIter():
print key, value
def testAssayParsers():
print "-- assay ---------------------------------"
parser = AssayParser(TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
print "properties"
for key, value in parser.getPropertiesIter():
print key, value
def testConfirmationFiles():
IBRAIN2Utils().createSuccessStatus("123", "123123123123-12312", TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
IBRAIN2Utils().createFailureStatus("321", "Global catastrophy!", TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
#testAssayParsers()
#testMetadataParsers()
#testConfirmationFiles()
#! /usr/bin/env python
import os
from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import *
from ch.systemsx.cisd.openbis.plugin.screening.shared.api.v1.dto import Geometry
class IBrain2ImageDataSetConfig(SimpleImageDataConfig):
THUMBANAIL_SIZE = 200
def extractImageMetadata(self, imagePath):
image_tokens = ImageMetadata()
basename = os.path.splitext(imagePath)[0]
#
token_dict = {}
for token in basename.split("_"):
token_dict[token[:1]] = token[1:]
image_tokens.well = token_dict["w"]
fieldText = token_dict["s"]
try:
image_tokens.tileNumber = int(fieldText)
except ValueError:
raise Exception("Cannot parse field number from '" + fieldText + "' in '" + basename + "' file name.")
image_tokens.channelCode = basename.split("_")[-1] + " ("+ token_dict["c"] + ")"
return image_tokens
def geom(self, row, col):
return Geometry.createFromRowColDimensions(row, col)
"""
Parameters:
image_tokens_list - list of ImageTokens
Returns: (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well
"""
def getTileGeometry(self, imageTokens, maxTileNumber):
# if a number of tiles is strange, assume that one tile is missing
if maxTileNumber == 5 or maxTileNumber == 7 or maxTileNumber == 11 or maxTileNumber == 13:
maxTileNumber = maxTileNumber + 1
if maxTileNumber % 4 == 0 and maxTileNumber != 4:
return self.geom(4, maxTileNumber / 4) # (4,2), (4,4)
elif maxTileNumber % 3 == 0:
return self.geom(maxTileNumber / 3, 3) # (3,3), (4,3), (5,3)
elif maxTileNumber % 2 == 0:
return self.geom(maxTileNumber / 2, 2) # (2,2), (3,2), (5,2), (7,2)
else:
return self.geom(maxTileNumber, 1)
\ No newline at end of file
#! /usr/bin/env python
# This is an example Jython dropbox for importing feature vectors coming from analysis of image datasets
import os
from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import *
SEPARATOR = ","
# Specific code which defines the feature vector values for the dataset..
# Usually you will parse the content of the incoming file or directory to get the values.
# Here all the values are hard-coded for simplicity,
# but the example shows which calls you need to perform in your parser.
# Parameters
# incoming: java.io.File which points to the incoming dataset
def defineFeatures(featuresBuilder, incoming):
file = open(incoming.getPath())
for header in file:
headerTokens = header.split(SEPARATOR)
featureCode = headerTokens[0]
featureValues = featuresBuilder.defineFeature(featureCode)
for rowValues in file:
rowTokens = rowValues.split(SEPARATOR)
rowLabel = rowTokens[0].strip()
if len(rowLabel) == 0:
break
for column in range(1,len(headerTokens)):
value = rowTokens[column].strip()
well = rowLabel + str(column)
featureValues.addValue(well, value)
# Returns the code of the plate to which the dataset should be connected.
# Parameters
# incoming: java.io.File which points to the incoming dataset
def extractPlateCode(incoming):
return os.path.splitext(incoming.getName())[0]
def extractSpaceCode(incoming):
return "TEST"
# ----------------------------
# --- boilerplate code which register one dataset with image analysis results on the well level
# --- Nothing has to be modified if your case is not complicated.
# ----------------------------
featuresBuilder = factory.createFeaturesBuilder()
defineFeatures(featuresBuilder, incoming)
analysisRegistrationDetails = factory.createFeatureVectorRegistrationDetails(featuresBuilder, incoming)
tr = service.transaction(incoming, factory)
analysisDataset = tr.createNewDataSet(analysisRegistrationDetails)
# set plate to which the dataset should be connected
sampleIdentifier = "/"+extractSpaceCode(incoming)+"/"+extractPlateCode(incoming)
plate = tr.getSample(sampleIdentifier)
analysisDataset.setSample(plate)
# store the original file in the dataset.
tr.moveFile(incoming.getPath(), analysisDataset)
# ----------------------------
# --- optional: other standard operations on analysisDataset can be performed (see IDataSet interface)
# ----------------------------
analysisDataset.setFileFormatType("CSV")
analysisDataset.setDataSetType("HCS_ANALYSIS_WELL_FEATURES")
#analysisDataset.setParentDatasets(["20110302085840150-90"])
\ No newline at end of file
#! /usr/bin/env python
from commonImageDropbox import IBrain2ImageDataSetConfig
from commonDropbox import AcquiredDatasetMetadataParser
""" Plate geometry which will be used. Other possible value: 96_WELLS_8X12 """
PLATE_GEOMETRY = "384_WELLS_16X24"
""" sample type code of the plate, needed if a new sample is registered automatically """
PLATE_TYPE_CODE = "PLATE"
SIRNA_EXP_TYPE = "SIRNA_HCS"
PLATE_GEOMETRY_PROPERTY_CODE = "$PLATE_GEOMETRY"
def createPlateWithExperimentIfNeeded(transaction, assayParser, plate, space):
project = assayParser.get(assayParser.EXPERIMENTER_PROPERTY)
experiment = assayParser.get(assayParser.ASSAY_ID_PROPERTY)
experimentDesc = assayParser.get(assayParser.ASSAY_DESC_PROPERTY)
experimentType = assayParser.get(assayParser.ASSAY_TYPE_PROPERTY)
sampleIdentifier = "/"+space+"/"+plate
plate = transaction.getSample(sampleIdentifier)
if plate == None:
expIdentifier = "/"+space+"/"+project+"/"+experiment
experiment = transaction.getExperiment(expIdentifier)
if experiment == None:
experiment = transaction.createNewExperiment(expIdentifier, SIRNA_EXP_TYPE)
openbisExpDesc = experimentDesc + "\ntype: "+experimentType
experiment.setPropertyValue("DESCRIPTION", openbisExpDesc)
plate = transaction.createNewSample(sampleIdentifier, PLATE_TYPE_CODE)
plate.setPropertyValue(PLATE_GEOMETRY_PROPERTY_CODE, PLATE_GEOMETRY)
plate.setExperiment(experiment)
"""
TODO:
-
"""
if incoming.isDirectory():
imageDataset = IBrain2ImageDataSetConfig()
imageDataset.setRawImageDatasetType()
metadataParser = AcquiredDatasetMetadataParser(incoming)
assayParser = AssayParser(incoming)
plate = metadataParser.getPlateCode()
space = assayParser.get(assayParser.LAB_LEADER_PROPERTY)
imageDataset.setPlate(space, plate)
imageDataset.setFileFormatType("TIFF")
imageDataset.setGenerateThumbnails(True)
imageDataset.setMaxThumbnailWidthAndHeight(imageDataset.THUMBANAIL_SIZE)
imageDataset.setRecognizedImageExtensions(["tif, tiff"])
imageDataset.setStoreChannelsOnExperimentLevel(False)
imageRegistrationDetails = factory.createImageRegistrationDetails(imageDataset, incoming)
for propertyCode, value in metadataParser.getPropertiesIter():
imageRegistrationDetails.setPropertyValue(propertyCode, value)
tr = service.transaction(incoming, factory)
createPlateWithExperimentIfNeeded(tr, assayParser, plate, space)
dataset = tr.createNewDataSet(imageRegistrationDetails)
imageDataSetFolder = tr.moveFile(incoming.getPath(), dataset)
imageDatasetCode = dataset.getDataSetCode()
IBRAIN2Utils().createSuccessStatus(metadataParser.getIBrain2DatasetId(), imageDatasetCode, incoming)
print "Registered dataset:", imageDatasetCode
# TODO: test this !!!
def rollback_transaction(service, transaction, algorithmRunner, throwable):
incoming = service.incomingDataSetFile
iBrain2DatasetId = AcquiredDatasetMetadataParser(incoming).getIBrain2DatasetId()
IBRAIN2Utils().createFailureStatus(iBrain2DatasetId, throwable.getMessage(), incoming)
#! /usr/bin/env python
# This is a dropbox for importing HCS segmentation image datasets
import IBrain2ImageDataSetConfig from common-image-dropbox
"""
TODO:
- check if parent exists and exit otherwise (ask Eva)
-
"""
if incoming.isDirectory():
imageDataset = IBrain2ImageDataSetConfig()
imageDataset.setSegmentationImageDatasetType()
plate = incoming.getName().split("_")[2][1:]
space = "IBRAIN2"
#space = "TEST"
imageDataset.setPlate(space, plate)
imageDataset.setFileFormatType("PNG")
imageDataset.setGenerateThumbnails(True)
imageDataset.setMaxThumbnailWidthAndHeight(imageDataset.THUMBANAIL_SIZE)
imageDataset.setRecognizedImageExtensions(["png"])
imageDataset.setStoreChannelsOnExperimentLevel(False)
imageDataset.setOriginalDataStorageFormat(OriginalDataStorageFormat.HDF5)
imageRegistrationDetails = factory.createImageRegistrationDetails(imageDataset, incoming)
info = imageRegistrationDetails.getDataSetInformation()
info.getImageStorageConfiguraton().getThumbnailsStorageFormat().setHighQuality(True)
tr = service.transaction(incoming, factory)
dataset = tr.createNewDataSet(imageRegistrationDetails)
imageDataSetFolder = tr.moveFile(incoming.getPath(), dataset)
imageDatasetCode = dataset.getDataSetCode()
print "Registered dataset:", imageDatasetCode
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment