diff --git a/sanofi/dist/etc/sanofi-dropbox/dropbox-all-in-one-with-library.py b/sanofi/dist/etc/sanofi-dropbox/dropbox-all-in-one-with-library.py index efbb42045ff8afc963b66ad8d4b8f4a95932d78c..b9bfa97dba855487338cefed497e33c550e9948d 100644 --- a/sanofi/dist/etc/sanofi-dropbox/dropbox-all-in-one-with-library.py +++ b/sanofi/dist/etc/sanofi-dropbox/dropbox-all-in-one-with-library.py @@ -1,19 +1,22 @@ import re import os +import utilfunctions as util +import plateinitializer as plateinit + +from java.lang import RuntimeException from java.io import File from java.util import Properties -from ch.systemsx.cisd.common.geometry import Point, ConversionUtils from ch.systemsx.cisd.common.mail import From from ch.systemsx.cisd.common.fileconverter import FileConverter, Tiff2PngConversionStrategy +# TODO KE: this is somewhat ugly, maybe we need an exception class in the etlserver package ?! +from ch.systemsx.cisd.openbis.generic.shared.basic.dto.api import ValidationException from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria import MatchClause, MatchClauseAttribute -from ch.systemsx.cisd.openbis.dss.generic.shared.api.internal.v1 import MaterialIdentifierCollection from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import SimpleImageDataConfig, ImageMetadata, OriginalDataStorageFormat, Location from ch.systemsx.cisd.openbis.dss.etl.custom.geexplorer import GEExplorerImageAnalysisResultParser -from ch.systemsx.cisd.openbis.plugin.screening.shared.basic.dto import ScreeningConstants from ch.systemsx.cisd.openbis.plugin.screening.shared.api.v1.dto import Geometry """ Switch to False for the Sanofi production environment """ @@ -62,80 +65,6 @@ STORE_CHANNELS_ON_EXPERIMENT_LEVEL = False """ should the original data be stored in the original form or should we pack them into one container? """ ORIGINAL_DATA_STORAGE_FORMAT = OriginalDataStorageFormat.UNCHANGED -# ================================= -# Generic utility functions -# ================================= - -""" -Finds first occurence of the patter from the right. -Throws exception if the pattern cannot be found. -""" -def rfind(text, pattern): - ix = text.rfind(pattern) - ensurePatternFound(ix, text, pattern) - return ix - -""" -Finds first occurence of the patter from the left. -Throws exception if the pattern cannot be found. -""" -def find(text, pattern): - ix = text.find(pattern) - ensurePatternFound(ix, text, pattern) - return ix - -def ensurePatternFound(ix, file, pattern): - if ix == -1: - raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'") - -""" Returns: name of the file without the extension """ -def extractFileBasename(filename): - lastDot = filename.rfind(".") - if lastDot != -1: - return filename[0:lastDot] - else: - return filename - -""" Returns: extension of the file """ -def getFileExt(file): - return os.path.splitext(file)[1][1:].lower() - -""" Returns: java.io.File - first file with the specified extension or None if no file matches """ -def findFileByExt(incomingFile, expectedExt): - if not incomingFile.isDirectory(): - return None - incomingPath = incomingFile.getPath() - for file in os.listdir(incomingPath): - ext = getFileExt(file) - if ext.upper() == expectedExt.upper(): - return File(incomingPath, file) - return None - -""" Returns: java.io.File - subdirectory which contains the specified marker in the name """ -def findDir(incomingFile, dirNameMarker): - if not incomingFile.isDirectory(): - return None - incomingPath = incomingFile.getPath() - for file in os.listdir(incomingPath): - if dirNameMarker.upper() in file.upper(): - return File(incomingPath, file) - return None - -""" Removes trailing empty strings from a list """ -def removeTrailingEmptyElements(list): - pos = len(list) - while (pos > 0): - pos = pos - 1 - if not list[pos].strip(): - del list[pos] - else: - break - return list - - -# ====================================== -# end generic utility functions -# ====================================== def rollback_service(service, ex): global plateCode @@ -233,7 +162,7 @@ def findPlateByCode(code): platesFound = list(searchService.searchForSamples(criteria)) if not platesFound: - raise RuntimeError("No plate with code '%(code)s' found in the openBIS database" % vars()) + raise ValidationException("No plate with code '%(code)s' found in the openBIS database" % vars()) return platesFound[0] @@ -244,262 +173,12 @@ def parseIncomingDirname(dirName): """ tokens = dirName.split("_") if len(tokens) < 2: - raise RuntimeError("Data set directory name does not match the pattern '<ACQUISITION_BATCH_NAME>_<BAR_CODE>_<TIMESTAMP>': " + dirName) + raise ValidationException("Data set directory name does not match the pattern '<ACQUISITION_BATCH_NAME>_<BAR_CODE>_<TIMESTAMP>': " + dirName) acquisitionBatch = tokens[0] plateCode = tokens[1].split('.')[0] return (acquisitionBatch, plateCode) -class SanofiMaterial: - """ - A data structure class holding compound materials as they exist in the Abase (Sanofi) database. - """ - def __init__(self, wellCode, materialCode, sanofiId, sanofiBatchId): - self.wellCode = self.normalizeWellCode(wellCode) - self.materialCode = materialCode - self.sanofiId = sanofiId - self.sanofiBatchId = sanofiBatchId - - def normalizeWellCode(self, wellCode): - """ normalizes Sanofi wellCodes openBIS wellCodes e.g. AB007 to AB7 """ - return re.sub("(?<=\w)(0+)(?=\d)", "", wellCode) - -class PlateInitializer: - ABASE_DATA_SOURCE = "abase-datasource" - ABASE_PRODUCTION_QUERY = """select - ptodwellreference WELL_CODE, - translate(objdbatchref,'{/:()+','{_____') MATERIAL_CODE, - objdbatchref ABASE_COMPOUND_BATCH_ID, - objdid ABASE_COMPOUND_ID, - olptid ABASE_PLATE_CODE - from sysadmin.plteobjd - where olptid = ?{1}""" - - # used for integration testing from openBIS team members - ABASE_TEST_MODE_QUERY = """select - WELL_CODE, MATERIAL_CODE, ABASE_COMPOUND_ID, - ABASE_COMPOUND_BATCH_ID, ABASE_PLATE_CODE - from plates - where ABASE_PLATE_CODE = ?{1}""" - - LIBRARY_TEMPLATE_PROPNAME = "LIBRARY_TEMPLATE" - - POSITIVE_CONTROL_TYPE = "POSITIVE_CONTROL" - NEGATIVE_CONTROL_TYPE = "NEGATIVE_CONTROL" - - COMPOUND_WELL_TYPE = "COMPOUND_WELL" - COMPOUND_WELL_CONCENTRATION_PROPNAME = "CONCENTRATION_M" - COMPOUND_WELL_MATERIAL_PROPNAME = "COMPOUND" - - MATERIAL_TYPE = "COMPOUND" - MATERIAL_ID_PROPNAME = "COMPOUND_ID" - MATERIAL_BATCH_ID_PROPNAME = "COMPOUND_BATCH_ID" - - def __init__(self, transaction, plate): - self.transaction = transaction - self.plate = plate - self.plateCode = plate.getCode() - self.experimentId = plate.getExperiment().getExperimentIdentifier() - - def getWellCode(self, x, y): - return ConversionUtils.convertToSpreadsheetLocation(Point(x,y)) - - def getPlateGeometryDimensions(self): - plateGeometryString = self.plate.getPropertyValue(ScreeningConstants.PLATE_GEOMETRY) - geometry = Geometry.createFromPlateGeometryString(plateGeometryString) - return (geometry.height, geometry.width) - - def validateLibraryDimensions(self, tsvLines): - (plateHeight, plateWidth) = self.getPlateGeometryDimensions() - - numLines = len(tsvLines) - if plateHeight < len(tsvLines) : - raise RuntimeError("The property %s of experiment '%s' contains %s rows, but the" - " geometry of plate '%s' allows a maximum of %s rows. You should either reduce" - " the number of rows in the library template or change the plate geometry." % - (self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, numLines, self.plateCode, plateHeight)) - - for i in range(0, len(tsvLines)): - lineWidth = len(tsvLines[i]) - if plateWidth < lineWidth: - raise RuntimeError("The property %s of experiment '%s' contains %s columns in row %s, but the" - " geometry of plate '%s' allows a maximum of %s columns. You should either reduce" - " the number of columns in the library template or change the plate geometry." % - (self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, lineWidth, (i + 1), self.plateCode, plateHeight)) - - def parseLibraryTemplate(self): - template = experiment.getPropertyValue(self.LIBRARY_TEMPLATE_PROPNAME) - if not template: - raise RuntimeError("Experiment %s has no library template value in property %s" \ - % (self.experimentId, self.LIBRARY_TEMPLATE_PROPNAME)) - - lines = template.splitlines() - lines = removeTrailingEmptyElements(lines) - tsvLists = [ removeTrailingEmptyElements(line.split("\t")) for line in lines ] - - self.validateLibraryDimensions(tsvLists) - - library = {} - for x in range(0, len(tsvLists)): - for y in range(0, len(tsvLists[0])): - wellCode = self.getWellCode(x,y) - library[wellCode] = tsvLists[x][y].strip() - - return library - - def upperCaseKeys(self, map): - result = {} - for entry in map.entrySet(): - result[entry.key.upper()] = entry.value - return result - - def fetchPlateCompounds(self): - """ - Fetch well metadata from the Abase database. - - @return: a list of tuples (one per well) in the form - (wellCode, openBisCompoundCode, abaseCompoundBatchId, abaseCompoundId). - In case the plate is not found in Abase return None. - """ - if TEST_MODE: - query = self.ABASE_TEST_MODE_QUERY - else: - query = self.ABASE_PRODUCTION_QUERY - - queryService = state.getDataSourceQueryService() - queryResult = queryService.select(self.ABASE_DATA_SOURCE, query, [self.plateCode]) - - sanofiMaterials = [] - for resultMap in list(queryResult): - materialMap = self.upperCaseKeys(resultMap) - def val(code): - if code in materialMap: - return str(materialMap[code]) - else: - raise RuntimeError("No column '%s' in the query results from the ABASE Database" % (code)) - - material = SanofiMaterial(val('WELL_CODE'), val('MATERIAL_CODE'), \ - val('ABASE_COMPOUND_ID'), val('ABASE_COMPOUND_BATCH_ID')) - - sanofiMaterials.append(material) - - queryResult.close() - - return sanofiMaterials - - def createMaterial(self, sanofiMaterial): - material = self.transaction.createNewMaterial(sanofiMaterial.materialCode, self.MATERIAL_TYPE) - material.setPropertyValue(self.MATERIAL_ID_PROPNAME, sanofiMaterial.sanofiId) - material.setPropertyValue(self.MATERIAL_BATCH_ID_PROPNAME, sanofiMaterial.sanofiBatchId) - return material - - def getOrCreateMaterials(self, template, materialsByCode): - materialIdentifiers = MaterialIdentifierCollection() - for materialCode in materialsByCode: - materialIdentifiers.addIdentifier(self.MATERIAL_TYPE, materialCode) - searchService = self.transaction.getSearchService() - existingMaterials = list(searchService.listMaterials(materialIdentifiers)) - - existingMaterialsByCode = {} - for material in existingMaterials: - existingMaterialsByCode[ material.getCode() ] = material - - for materialCode in materialsByCode: - if not materialCode in existingMaterialsByCode: - sanofiMaterial = materialsByCode[materialCode] - openbisMaterial = self.createMaterial(sanofiMaterial) - existingMaterialsByCode[materialCode] = openbisMaterial - - return existingMaterialsByCode - - - def getByWellCode(self, wellCode, materialsByCode): - for sanofiMaterial in materialsByCode.values(): - if wellCode == sanofiMaterial.wellCode: - return sanofiMaterial - - return None - - def isCompoundWell(self, libraryValue): - try: - float(libraryValue) - return True - except ValueError: - return False - - def createWells(self, template, sanofiMaterials, openbisMaterials): - controlWellTypes = { "H" : self.POSITIVE_CONTROL_TYPE, - "L" : self.NEGATIVE_CONTROL_TYPE}; - - for wellCode in template: - if template[wellCode] in ["", "-"]: - continue - - templateValue = template[wellCode].upper() - wellIdentifier = self.plate.getSampleIdentifier() + ":" + wellCode - - if templateValue in controlWellTypes: - # CONTROL_WELL - wellType = controlWellTypes[templateValue] - well = self.transaction.createNewSample(wellIdentifier, wellType) - well.setContainer(self.plate) - - elif self.isCompoundWell(templateValue): - # COMPOUND_WELL - well = self.transaction.createNewSample(wellIdentifier, self.COMPOUND_WELL_TYPE) - well.setContainer(self.plate) - well.setPropertyValue(self.COMPOUND_WELL_CONCENTRATION_PROPNAME, templateValue) - sanofiMaterial = self.getByWellCode(wellCode, sanofiMaterials) - materialCode = sanofiMaterial.materialCode - material = openbisMaterials[materialCode] - well.setPropertyValue(self.COMPOUND_WELL_MATERIAL_PROPNAME, material.getMaterialIdentifier()) - - else: - raise RuntimeError("The specified value for well '%s' in the property " - " '%s' of experiment '%s' is invalid. Allowed values are 'H', 'L'" - " or a number, but '%s' was found." % - (wellCode, self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, templateValue)) - - def validate(self, template, sanofiMaterialsByCode): - for wellCode in template: - if self.isCompoundWell(template[wellCode]): - sanofiMaterial = self.getByWellCode(wellCode, sanofiMaterialsByCode) - if not sanofiMaterial: - raise RuntimeError("Error registering library for plate '%s'. The library template" - " specified in property '%s' of experiment '%s' contains" - " concentration value for well '%s', but no" - " mapping to a material was found in the ABASE DB." % - (self.plateCode, self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, wellCode)) - - for sanofiMaterial in sanofiMaterialsByCode.values(): - wellCode = sanofiMaterial.wellCode - templateValue = template.get(wellCode, None) - - if not templateValue or not self.isCompoundWell(templateValue): - val = templateValue and ("'%s'" % templateValue) or "no value" - raise RuntimeError("Error registering library for plate '%s'. The ABASE DB contains" - " a material definition for well '%s', but no valid concentration" - " was found in the library template of experiment '%s'. The library" - " template should contain a number for '%s' but %s was found" % - (self.plateCode, wellCode, self.experimentId, wellCode, val)) - - - def createWellsAndMaterials(self): - template = self.parseLibraryTemplate() - sanofiMaterials = self.fetchPlateCompounds() - - materialsByCode = {} - for sanofiMaterial in sanofiMaterials: - materialsByCode[ sanofiMaterial.materialCode ] = sanofiMaterial - - self.validate(template, materialsByCode) - - openbisMaterials = self.getOrCreateMaterials(template, materialsByCode) - self.createWells(template, materialsByCode, openbisMaterials) - -# ------------ -# Image dataset registration -# ------------ def convertToPng(dir, transparentColor): delete_original_files = True @@ -509,7 +188,7 @@ def convertToPng(dir, transparentColor): maxThreads = 100 errorMsg = FileConverter.performConversion(File(dir), strategy, machineLoad, maxThreads) if errorMsg != None: - raise Exception("Error", errorMsg) + raise RuntimeException("Error converting overlays:", errorMsg) # --------------------- @@ -544,20 +223,20 @@ class MyImageDataSetConfig(SimpleImageDataConfig): return None basename = os.path.splitext(imageFile.name)[0] - wellText = basename[0:find(basename, "(")] # A - 1 + wellText = basename[0:util.find(basename, "(")] # A - 1 imageTokens.well = wellText.replace(" - ", "") if " wv " in basename: - fieldText = basename[find(basename, "fld ") + 4 : find(basename, " wv")] - imageTokens.channelCode = basename[rfind(basename, " - ") + 3 :-1] + fieldText = basename[util.find(basename, "fld ") + 4 : util.find(basename, " wv")] + imageTokens.channelCode = basename[util.rfind(basename, " - ") + 3 :-1] else: - fieldText = basename[find(basename, "fld ") + 4 : find(basename, ")")] + fieldText = basename[util.find(basename, "fld ") + 4 : util.find(basename, ")")] imageTokens.channelCode = "DEFAULT" try: imageTokens.tileNumber = int(fieldText) except ValueError: - raise Exception("Cannot parse field number from '" + fieldText + "' in '" + basename + "' file name.") + raise ValidationException("Cannot parse field number from '" + fieldText + "' in '" + basename + "' file name.") return imageTokens @@ -590,7 +269,7 @@ if incoming.isDirectory(): (batchName, plateCode) = parseIncomingDirname(incoming.getName()) plate = findPlateByCode(plateCode) if not plate.getExperiment(): - raise RuntimeError("Plate with code '%(plateCode)s' is not associated with experiment" % vars()) + raise ValidationException("Plate with code '%(plateCode)s' is not associated with experiment" % vars()) experimentId = plate.getExperiment().getExperimentIdentifier() experiment = transaction.getExperiment(experimentId) @@ -598,7 +277,7 @@ if incoming.isDirectory(): # reload the sample with all contained samples plate = transaction.getSample(plate.getSampleIdentifier()) if len(plate.getContainedSamples()) == 0: - plateInitializer = PlateInitializer(transaction, plate) + plateInitializer = plateinit.PlateInitializer(transaction, state, plate, experiment, TEST_MODE) plateInitializer.createWellsAndMaterials() imageDatasetConfig = MyImageDataSetConfig(incoming, incoming) @@ -610,7 +289,7 @@ if incoming.isDirectory(): imageDataSet.setSample(plate) # check for overlays folder - overlaysDir = findDir(incoming, OVERLAYS_DIR_PATTERN) + overlaysDir = util.findDir(incoming, OVERLAYS_DIR_PATTERN) if overlaysDir is not None: convertToPng(overlaysDir.getPath(), OVERLAYS_TRANSPARENT_COLOR) overlayDatasetConfig = MyImageDataSetConfig(overlaysDir, overlaysDir) @@ -623,7 +302,7 @@ if incoming.isDirectory(): transaction.moveFile(overlaysDir.getPath(), overlayDataset, "overlays") # transform and move analysis file - analysisFile = findFileByExt(incoming, "xml") + analysisFile = util.findFileByExt(incoming, "xml") if analysisFile is not None: analysisCSVFile = File(analysisFile.getPath() + ".csv") GEExplorerImageAnalysisResultParser(analysisFile.getPath()).writeCSV(analysisCSVFile) @@ -632,11 +311,9 @@ if incoming.isDirectory(): featureProps.setProperty("separator", ",") featureProps.setProperty("well-name-row", "Well") featureProps.setProperty("well-name-col", "Well") - # TODO KE: Tomek, this string is not used anywhere in the Java code. Are you sure we need it ? - featureProps.setProperty("well-name-col-is-alphanum", "true") analysisDataSetDetails = factory.createFeatureVectorRegistrationDetails(analysisCSVFile.getPath(), featureProps) - analysisProcedureCode = extractFileBasename(analysisFile.getName()) + analysisProcedureCode = util.extractFileBasename(analysisFile.getName()) analysisDataSetDetails.getDataSetInformation().setAnalysisProcedure(analysisProcedureCode) analysisDataSet = transaction.createNewDataSet(analysisDataSetDetails) analysisDataSet.setSample(imageDataSet.getSample()) diff --git a/sanofi/dist/etc/sanofi-dropbox/plateinitializer.py b/sanofi/dist/etc/sanofi-dropbox/plateinitializer.py new file mode 100644 index 0000000000000000000000000000000000000000..bb7a7bfb4a7fb00b350987cfa8c9462adbf044a8 --- /dev/null +++ b/sanofi/dist/etc/sanofi-dropbox/plateinitializer.py @@ -0,0 +1,265 @@ +import re +import os + +import utilfunctions as util + +from java.lang import RuntimeException + +from ch.systemsx.cisd.common.geometry import Point, ConversionUtils +# TODO KE: this is somewhat ugly, maybe we need an exception class in the etlserver package ?! +from ch.systemsx.cisd.openbis.generic.shared.basic.dto.api import ValidationException + +from ch.systemsx.cisd.openbis.dss.generic.shared.api.internal.v1 import MaterialIdentifierCollection +from ch.systemsx.cisd.openbis.plugin.screening.shared.basic.dto import ScreeningConstants +from ch.systemsx.cisd.openbis.plugin.screening.shared.api.v1.dto import Geometry + + +class SanofiMaterial: + """ + A data structure class holding compound materials as they exist in the Abase (Sanofi) database. + """ + def __init__(self, wellCode, materialCode, sanofiId, sanofiBatchId): + self.wellCode = self.normalizeWellCode(wellCode) + self.materialCode = materialCode + self.sanofiId = sanofiId + self.sanofiBatchId = sanofiBatchId + + def normalizeWellCode(self, wellCode): + """ normalizes Sanofi wellCodes openBIS wellCodes e.g. AB007 to AB7 """ + return re.sub("(?<=\w)(0+)(?=\d)", "", wellCode) + +class PlateInitializer: + ABASE_DATA_SOURCE = "abase-datasource" + ABASE_PRODUCTION_QUERY = """select + ptodwellreference WELL_CODE, + translate(objdbatchref,'{/:()+','{_____') MATERIAL_CODE, + objdbatchref ABASE_COMPOUND_BATCH_ID, + objdid ABASE_COMPOUND_ID, + olptid ABASE_PLATE_CODE + from sysadmin.plteobjd + where olptid = ?{1}""" + + # used for integration testing from openBIS team members + ABASE_TEST_MODE_QUERY = """select + WELL_CODE, MATERIAL_CODE, ABASE_COMPOUND_ID, + ABASE_COMPOUND_BATCH_ID, ABASE_PLATE_CODE + from plates + where ABASE_PLATE_CODE = ?{1}""" + + LIBRARY_TEMPLATE_PROPNAME = "LIBRARY_TEMPLATE" + + POSITIVE_CONTROL_TYPE = "POSITIVE_CONTROL" + NEGATIVE_CONTROL_TYPE = "NEGATIVE_CONTROL" + + COMPOUND_WELL_TYPE = "COMPOUND_WELL" + COMPOUND_WELL_CONCENTRATION_PROPNAME = "CONCENTRATION_M" + COMPOUND_WELL_MATERIAL_PROPNAME = "COMPOUND" + + MATERIAL_TYPE = "COMPOUND" + MATERIAL_ID_PROPNAME = "COMPOUND_ID" + MATERIAL_BATCH_ID_PROPNAME = "COMPOUND_BATCH_ID" + + def __init__(self, transaction, state, plate, experiment, testMode): + self.transaction = transaction + self.state = state + self.plate = plate + self.plateCode = plate.getCode() + self.experiment = experiment + self.experimentId = experiment.getExperimentIdentifier() + self.testMode = testMode + + def getWellCode(self, x, y): + return ConversionUtils.convertToSpreadsheetLocation(Point(x,y)) + + def getPlateGeometryDimensions(self): + plateGeometryString = self.plate.getPropertyValue(ScreeningConstants.PLATE_GEOMETRY) + geometry = Geometry.createFromPlateGeometryString(plateGeometryString) + return (geometry.height, geometry.width) + + def validateLibraryDimensions(self, tsvLines): + (plateHeight, plateWidth) = self.getPlateGeometryDimensions() + + numLines = len(tsvLines) + if plateHeight < len(tsvLines) : + raise ValidationException("The property %s of experiment '%s' contains %s rows, but the" + " geometry of plate '%s' allows a maximum of %s rows. You should either reduce" + " the number of rows in the library template or change the plate geometry." % + (self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, numLines, self.plateCode, plateHeight)) + + for i in range(0, len(tsvLines)): + lineWidth = len(tsvLines[i]) + if plateWidth < lineWidth: + raise ValidationException("The property %s of experiment '%s' contains %s columns in row %s, but the" + " geometry of plate '%s' allows a maximum of %s columns. You should either reduce" + " the number of columns in the library template or change the plate geometry." % + (self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, lineWidth, (i + 1), self.plateCode, plateHeight)) + + def parseLibraryTemplate(self): + template = self.experiment.getPropertyValue(self.LIBRARY_TEMPLATE_PROPNAME) + if not template: + raise ValidationException("Experiment %s has no library template value in property %s" \ + % (self.experimentId, self.LIBRARY_TEMPLATE_PROPNAME)) + + lines = template.splitlines() + lines = util.removeTrailingEmptyElements(lines) + tsvLists = [ util.removeTrailingEmptyElements(line.split("\t")) for line in lines ] + + self.validateLibraryDimensions(tsvLists) + + library = {} + for x in range(0, len(tsvLists)): + for y in range(0, len(tsvLists[0])): + wellCode = self.getWellCode(x,y) + library[wellCode] = tsvLists[x][y].strip() + + return library + + def upperCaseKeys(self, map): + result = {} + for entry in map.entrySet(): + result[entry.key.upper()] = entry.value + return result + + def fetchPlateCompounds(self): + """ + Fetch well metadata from the Abase database. + + @return: a list of tuples (one per well) in the form + (wellCode, openBisCompoundCode, abaseCompoundBatchId, abaseCompoundId). + In case the plate is not found in Abase return None. + """ + if self.testMode: + query = self.ABASE_TEST_MODE_QUERY + else: + query = self.ABASE_PRODUCTION_QUERY + + queryService = self.state.getDataSourceQueryService() + queryResult = queryService.select(self.ABASE_DATA_SOURCE, query, [self.plateCode]) + + sanofiMaterials = [] + for resultMap in list(queryResult): + materialMap = self.upperCaseKeys(resultMap) + def val(code): + if code in materialMap: + return str(materialMap[code]) + else: + raise RuntimeException("No column '%s' in the query results from the ABASE Database" % (code)) + + material = SanofiMaterial(val('WELL_CODE'), val('MATERIAL_CODE'), \ + val('ABASE_COMPOUND_ID'), val('ABASE_COMPOUND_BATCH_ID')) + + sanofiMaterials.append(material) + + queryResult.close() + + return sanofiMaterials + + def createMaterial(self, sanofiMaterial): + material = self.transaction.createNewMaterial(sanofiMaterial.materialCode, self.MATERIAL_TYPE) + material.setPropertyValue(self.MATERIAL_ID_PROPNAME, sanofiMaterial.sanofiId) + material.setPropertyValue(self.MATERIAL_BATCH_ID_PROPNAME, sanofiMaterial.sanofiBatchId) + return material + + def getOrCreateMaterials(self, template, materialsByCode): + materialIdentifiers = MaterialIdentifierCollection() + for materialCode in materialsByCode: + materialIdentifiers.addIdentifier(self.MATERIAL_TYPE, materialCode) + searchService = self.transaction.getSearchService() + existingMaterials = list(searchService.listMaterials(materialIdentifiers)) + + existingMaterialsByCode = {} + for material in existingMaterials: + existingMaterialsByCode[ material.getCode() ] = material + + for materialCode in materialsByCode: + if not materialCode in existingMaterialsByCode: + sanofiMaterial = materialsByCode[materialCode] + openbisMaterial = self.createMaterial(sanofiMaterial) + existingMaterialsByCode[materialCode] = openbisMaterial + + return existingMaterialsByCode + + + def getByWellCode(self, wellCode, materialsByCode): + for sanofiMaterial in materialsByCode.values(): + if wellCode == sanofiMaterial.wellCode: + return sanofiMaterial + + return None + + def isCompoundWell(self, libraryValue): + try: + float(libraryValue) + return True + except ValueError: + return False + + def createWells(self, template, sanofiMaterials, openbisMaterials): + controlWellTypes = { "H" : self.POSITIVE_CONTROL_TYPE, + "L" : self.NEGATIVE_CONTROL_TYPE}; + + for wellCode in template: + if template[wellCode] in ["", "-"]: + continue + + templateValue = template[wellCode].upper() + wellIdentifier = self.plate.getSampleIdentifier() + ":" + wellCode + + if templateValue in controlWellTypes: + # CONTROL_WELL + wellType = controlWellTypes[templateValue] + well = self.transaction.createNewSample(wellIdentifier, wellType) + well.setContainer(self.plate) + + elif self.isCompoundWell(templateValue): + # COMPOUND_WELL + well = self.transaction.createNewSample(wellIdentifier, self.COMPOUND_WELL_TYPE) + well.setContainer(self.plate) + well.setPropertyValue(self.COMPOUND_WELL_CONCENTRATION_PROPNAME, templateValue) + sanofiMaterial = self.getByWellCode(wellCode, sanofiMaterials) + materialCode = sanofiMaterial.materialCode + material = openbisMaterials[materialCode] + well.setPropertyValue(self.COMPOUND_WELL_MATERIAL_PROPNAME, material.getMaterialIdentifier()) + + else: + raise ValidationException("The specified value for well '%s' in the property " + " '%s' of experiment '%s' is invalid. Allowed values are 'H', 'L'" + " or a number, but '%s' was found." % + (wellCode, self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, templateValue)) + + def validate(self, template, sanofiMaterialsByCode): + for wellCode in template: + if self.isCompoundWell(template[wellCode]): + sanofiMaterial = self.getByWellCode(wellCode, sanofiMaterialsByCode) + if not sanofiMaterial: + raise ValidationException("Error registering library for plate '%s'. The library template" + " specified in property '%s' of experiment '%s' contains" + " concentration value for well '%s', but no" + " mapping to a material was found in the ABASE DB." % + (self.plateCode, self.LIBRARY_TEMPLATE_PROPNAME, self.experimentId, wellCode)) + + for sanofiMaterial in sanofiMaterialsByCode.values(): + wellCode = sanofiMaterial.wellCode + templateValue = template.get(wellCode, None) + + if not templateValue or not self.isCompoundWell(templateValue): + val = templateValue and ("'%s'" % templateValue) or "no value" + raise ValidationException("Error registering library for plate '%s'. The ABASE DB contains" + " a material definition for well '%s', but no valid concentration" + " was found in the library template of experiment '%s'. The library" + " template should contain a number for '%s' but %s was found" % + (self.plateCode, wellCode, self.experimentId, wellCode, val)) + + + def createWellsAndMaterials(self): + template = self.parseLibraryTemplate() + sanofiMaterials = self.fetchPlateCompounds() + + materialsByCode = {} + for sanofiMaterial in sanofiMaterials: + materialsByCode[ sanofiMaterial.materialCode ] = sanofiMaterial + + self.validate(template, materialsByCode) + + openbisMaterials = self.getOrCreateMaterials(template, materialsByCode) + self.createWells(template, materialsByCode, openbisMaterials) diff --git a/sanofi/dist/etc/sanofi-dropbox/utilfunctions.py b/sanofi/dist/etc/sanofi-dropbox/utilfunctions.py new file mode 100644 index 0000000000000000000000000000000000000000..06b41f084e48010b6b7c9ab5ae83de2be03b5336 --- /dev/null +++ b/sanofi/dist/etc/sanofi-dropbox/utilfunctions.py @@ -0,0 +1,76 @@ +import re +import os + +from java.lang import Exception +from java.io import File + +# ================================= +# Generic utility functions +# ================================= + +""" +Finds first occurence of the patter from the right. +Throws exception if the pattern cannot be found. +""" +def rfind(text, pattern): + ix = text.rfind(pattern) + ensurePatternFound(ix, text, pattern) + return ix + +""" +Finds first occurence of the patter from the left. +Throws exception if the pattern cannot be found. +""" +def find(text, pattern): + ix = text.find(pattern) + ensurePatternFound(ix, text, pattern) + return ix + +def ensurePatternFound(ix, file, pattern): + if ix == -1: + raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'") + +""" Returns: name of the file without the extension """ +def extractFileBasename(filename): + lastDot = filename.rfind(".") + if lastDot != -1: + return filename[0:lastDot] + else: + return filename + +""" Returns: extension of the file """ +def getFileExt(file): + return os.path.splitext(file)[1][1:].lower() + +""" Returns: java.io.File - first file with the specified extension or None if no file matches """ +def findFileByExt(incomingFile, expectedExt): + if not incomingFile.isDirectory(): + return None + incomingPath = incomingFile.getPath() + for file in os.listdir(incomingPath): + ext = getFileExt(file) + if ext.upper() == expectedExt.upper(): + return File(incomingPath, file) + return None + +""" Returns: java.io.File - subdirectory which contains the specified marker in the name """ +def findDir(incomingFile, dirNameMarker): + if not incomingFile.isDirectory(): + return None + incomingPath = incomingFile.getPath() + for file in os.listdir(incomingPath): + if dirNameMarker.upper() in file.upper(): + return File(incomingPath, file) + return None + +""" Removes trailing empty strings from a list """ +def removeTrailingEmptyElements(list): + pos = len(list) + while (pos > 0): + pos = pos - 1 + if not list[pos].strip(): + del list[pos] + else: + break + return list + diff --git a/sanofi/sourceTest/java/ch/systemsx/cisd/sanofi/dss/test/SanofiDropboxJythonTest.java b/sanofi/sourceTest/java/ch/systemsx/cisd/sanofi/dss/test/SanofiDropboxJythonTest.java index ccc38f77da780b2eb97c4051fed8077b910d97ef..90990f83ae07cdd572cc15543c624f2bf19f22a4 100644 --- a/sanofi/sourceTest/java/ch/systemsx/cisd/sanofi/dss/test/SanofiDropboxJythonTest.java +++ b/sanofi/sourceTest/java/ch/systemsx/cisd/sanofi/dss/test/SanofiDropboxJythonTest.java @@ -38,8 +38,10 @@ import ch.systemsx.cisd.base.exceptions.CheckedExceptionTunnel; import ch.systemsx.cisd.common.eodsql.MockDataSet; import ch.systemsx.cisd.common.exceptions.UserFailureException; import ch.systemsx.cisd.common.filesystem.FileUtilities; +import ch.systemsx.cisd.common.logging.LogCategory; import ch.systemsx.cisd.common.mail.From; import ch.systemsx.cisd.common.test.AssertionUtil; +import ch.systemsx.cisd.common.test.LogMonitoringAppender; import ch.systemsx.cisd.common.test.RecordingMatcher; import ch.systemsx.cisd.etlserver.TopLevelDataSetRegistratorGlobalState; import ch.systemsx.cisd.etlserver.registrator.AbstractJythonDataSetHandlerTest; @@ -74,7 +76,6 @@ import ch.systemsx.cisd.openbis.plugin.screening.shared.basic.dto.ScreeningConst * Things not tested * - skip well creation when plate library already exists * - skip material creation for preexisting materials - * - error cases * </pre> * * @author Kaloyan Enimanev @@ -106,7 +107,7 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest private static final String OVERLAYS_DATA_SET_DIR_NAME = "overlays"; - private static final String ANALYSIS_DATA_SET_FILE_NAME = "LC80463-RS101117.xml"; + private static final String ANALYSIS_DATA_SET_FILE_NAME = "LC80463-RS101117.xml.csv"; private static final String IMAGE_DATA_SET_CODE = "data-set-code"; @@ -138,10 +139,14 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest public void setUp() throws IOException { super.setUp(); + + extendJythonLibPath(getRegistrationScriptsFolderPath()); + atomicatOperationDetails = new RecordingMatcher<ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails>(); materialCriteria = new RecordingMatcher<ListMaterialCriteria>(); email = new RecordingMatcher<String>(); + } @Test @@ -165,21 +170,19 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest } }); - try - { - handler.handle(markerFile); - fail("Registration should fail with library validation error"); - } catch (RuntimeException rex) - { - final String error = - "The property LIBRARY_TEMPLATE of experiment '/SANOFI/PROJECT/EXP' contains 2 rows, " - + "but the geometry of plate 'TEST-PLATE' allows a maximum of 1 rows. You should either reduce the " - + "number of rows in the library template or change the plate geometry."; - assertContains(error, rex.getMessage()); - assertContains(error, email.recordedObject()); - assertContains(IMAGE_DATA_SET_DIR_NAME, email.recordedObject()); - } + final String error = + "The property LIBRARY_TEMPLATE of experiment '/SANOFI/PROJECT/EXP' contains 2 rows, " + + "but the geometry of plate 'TEST-PLATE' allows a maximum of 1 rows. You should either reduce the " + + "number of rows in the library template or change the plate geometry."; + LogMonitoringAppender appender = + LogMonitoringAppender.addAppender(LogCategory.OPERATION, error); + + handler.handle(markerFile); + + assertContains(error, email.recordedObject()); + assertContains(IMAGE_DATA_SET_DIR_NAME, email.recordedObject()); + appender.verifyLogHasHappened(); context.assertIsSatisfied(); } @@ -204,22 +207,20 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest } }); - try - { - handler.handle(markerFile); - fail("Registration should fail with library validation error"); - } catch (RuntimeException rex) - { - final String error = - "The property LIBRARY_TEMPLATE of experiment '/SANOFI/PROJECT/EXP' contains 3 " - + "columns in row 1, but the geometry of plate 'TEST-PLATE' allows a maximum of " - + "5 columns. You should either reduce the number of columns in the library " - + "template or change the plate geometry."; - assertContains(error, rex.getMessage()); - assertContains(error, email.recordedObject()); - assertContains(IMAGE_DATA_SET_DIR_NAME, email.recordedObject()); - } + final String error = + "The property LIBRARY_TEMPLATE of experiment '/SANOFI/PROJECT/EXP' contains 3 " + + "columns in row 1, but the geometry of plate 'TEST-PLATE' allows a maximum of " + + "5 columns. You should either reduce the number of columns in the library " + + "template or change the plate geometry."; + LogMonitoringAppender appender = + LogMonitoringAppender.addAppender(LogCategory.OPERATION, error); + + handler.handle(markerFile); + assertContains(error, email.recordedObject()); + assertContains(IMAGE_DATA_SET_DIR_NAME, email.recordedObject()); + + appender.verifyLogHasHappened(); context.assertIsSatisfied(); } @@ -250,19 +251,16 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest } }); - try - { - handler.handle(markerFile); - fail("Registration should fail validation error"); - } catch (RuntimeException rex) - { - assertContains( - "Error registering library for plate 'TEST-PLATE'. The library template specified in " - + "property 'LIBRARY_TEMPLATE' of experiment '/SANOFI/PROJECT/EXP' contains concentration value " - + "for well 'B1', but no mapping to a material was found in the ABASE DB.", - rex.getMessage()); - } + final String error = + "Error registering library for plate 'TEST-PLATE'. The library template specified in " + + "property 'LIBRARY_TEMPLATE' of experiment '/SANOFI/PROJECT/EXP' contains concentration value " + + "for well 'B1', but no mapping to a material was found in the ABASE DB."; + LogMonitoringAppender appender = + LogMonitoringAppender.addAppender(LogCategory.OPERATION, error); + handler.handle(markerFile); + + appender.verifyLogHasHappened(); context.assertIsSatisfied(); } @@ -294,19 +292,16 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest } }); - try - { - handler.handle(markerFile); - fail("Registration should fail validation error"); - } catch (RuntimeException rex) - { - assertContains( - " Error registering library for plate 'TEST-PLATE'. The ABASE DB contains a material definition " - + "for well 'A3', but no valid concentration was found in the library template of experiment " - + "'/SANOFI/PROJECT/EXP'. The library template should contain a number for 'A3' but no value was found", - rex.getMessage()); - } + final String error = + " Error registering library for plate 'TEST-PLATE'. The ABASE DB contains a material definition " + + "for well 'A3', but no valid concentration was found in the library template of experiment " + + "'/SANOFI/PROJECT/EXP'. The library template should contain a number for 'A3' but no value was found"; + LogMonitoringAppender appender = + LogMonitoringAppender.addAppender(LogCategory.OPERATION, error); + + handler.handle(markerFile); + appender.verifyLogHasHappened(); context.assertIsSatisfied(); } @@ -612,8 +607,8 @@ public class SanofiDropboxJythonTest extends AbstractJythonDataSetHandlerTest handler = new TestingPlateDataSetHandler(globalState, registrationShouldFail, shouldReThrowException); + } - private class TestingPlateDataSetHandler extends JythonPlateDataSetHandler {