Newer
Older
class AbstractPropertiesParser:
_propertiesDict = None
def __init__(self, incoming, fileName):
path = os.path.join(incoming, fileName)
self._propertiesDict = self._parseMetadata(path)
# Returns: name of the file
def _findFile(self, incoming, prefix):
for file in os.listdir(incoming):
if file.startswith(prefix):
return file
raise Exception("No file with prefix '"+prefix+"' has been found!")
# Parses the metadata file from the given incoming directory.
# Each line should have a form:
# key = value
# Keys should be unique in the file.
# Returns:
# a dictionary with keys and values from the file.
def _parseMetadata(self, path):
f = open(path)
myDict = {}
for line in f:
line = line.strip()
if len(line) == 0 or line.startswith("#"):
continue
ix = line.find("=")
if ix == -1:
raise Exception("Cannot find '=' in line '"+line+"' in file: "+path)
key = line[:ix].strip()
value = line[ix+1:].strip()
if key in myDict:
raise Exception("Duplicated key '"+key+"' in file: "+path)
myDict[key] = value
return myDict
def get(self, propertyName):
return self._propertiesDict[propertyName]
# All properties in the file.
# Returns:
# an iterator which yields (propertyName, propertyValue) pairs
def getPropertiesIter(self):
return [ (key, value) for key, value in self._propertiesDict.iteritems() ]
# All dataset properties.
# Returns:
# an iterator which yields (propertyCode, propertyValue) pairs
def getDatasetPropertiesIter(self):
return [ (key, value) for key, value in self._propertiesDict.iteritems() if key.startswith(self.DATASET_PROPERTY_PREFIX) ]
class AbstractMetadataParser(AbstractPropertiesParser):
METADATA_FILE="metadata.properties"
IBRAIN2_DATASET_ID_PROPERTY = "ibrain2.dataset.id"
DATASET_PROPERTY_PREFIX = "ibrain2."
DATASET_TYPE_PROPERTY = "dataset.type"
def __init__(self, incoming):
AbstractPropertiesParser.__init__(self, incoming, self.METADATA_FILE)
def getDatasetType(self):
return self.get(self.DATASET_TYPE_PROPERTY)
def getIBrain2DatasetId(self):
return self.get(self.IBRAIN2_DATASET_ID_PROPERTY)
# --- concrete parser classes ----------------------
class AcquiredDatasetMetadataParser(AbstractMetadataParser):
PLATE_CODE_PRPOPERTY = "barcode"
INSTRUMENT_PROPERTY = "instrument.id"
TIMESTAMP_PROPERTY = "timestamp" # not used
# All dataset properties.
# Returns:
# an iterator which yields (propertyCode, propertyValue) pairs
def getDatasetPropertiesIter(self):
properties = AbstractPropertiesParser.getDatasetPropertiesIter(self)
properties = [ (key, value) for (key, value) in properties if key != "ibrain2.assay.id" ]
properties.append((self.INSTRUMENT_PROPERTY, self.get(self.INSTRUMENT_PROPERTY)))
return properties
def getPlateCode(self):
return self.get(self.PLATE_CODE_PRPOPERTY)
class DerivedDatasetMetadataParser(AbstractMetadataParser):
WORKFLOW_FILE_PREFIX = "workflow_"
PARENT_DATSASET_PERMID_PRPOPERTY = "storage_provider.parent.dataset.id"
DATASET_TYPE_PROPERTY = "dataset.type"
WORKFLOW_NAME_PROPERTY = "ibrain2.workflow.name"
WORKFLOW_AUTHOR_PROPERTY = "ibrain2.workflow.author"
_workflowName = None
_workflowAuthor = None
def __init__(self, incoming):
AbstractMetadataParser.__init__(self, incoming)
workflowFile = self._findFile(incoming, self.WORKFLOW_FILE_PREFIX)
basename = os.path.splitext(workflowFile)[0]
tokens = basename.split("_")
if len(tokens) < 3:
raise Exception("Cannot parse workflow name and author from: "+workflowFile)
self._workflowName = tokens[1]
self._workflowAuthor = tokens[2]
def getDatasetPropertiesIter(self):
properties = AbstractMetadataParser.getDatasetPropertiesIter(self)
properties.append((self.WORKFLOW_NAME_PROPERTY, self._workflowName))
properties.append((self.WORKFLOW_AUTHOR_PROPERTY, self._workflowAuthor))
return properties
def getParentDatasetPermId(self):
return self.get(self.PARENT_DATSASET_PERMID_PRPOPERTY)
def getDatasetType(self):
return self.get(self.DATASET_TYPE_PROPERTY)
class AssayParser(AbstractPropertiesParser):
ASSAY_FILE_PREFIX="assay_"
ASSAY_ID_PROPERTY = "assay.id"
ASSAY_TYPE_PROPERTY = "assay.type"
ASSAY_DESC_PROPERTY = "assay.description"
LAB_LEADER_PROPERTY = "labinfo.pi"
EXPERIMENTER_PROPERTY = "experimenter.login"
WORKFLOW_NAME_PROPERTY = "workflow.name"
WORKFLOW_AUTHOR_PROPERTY = "workflow.author"
def __init__(self, incoming):
AbstractPropertiesParser.__init__(self, incoming, self._findFile(incoming, self.ASSAY_FILE_PREFIX))
""" path to the registration confirmation directory relative to the incoming dataset """
CONFIRMATION_DIRECTORY = "registration-status"
STATUS_PROPERTY = "storage_provider.storage.status"
STATUS_OK = "STORAGE_SUCCESS"
STATUS_ERROR = "STORAGE_FAILED"
ERROR_MSG_PROPERTY = "storage_provider.message"
OPENBIS_DATASET_ID_PROPERTY = "storage_provider.dataset.id"
IBRAIN2_STATUS_FILE_PREFIX = "ibrain2_dataset_id_"
IBRAIN2_STATUS_FILE_SUFFIX = ".properties"
def _getDestinationDir(self, incoming):
return File(incoming).getParentFile().getParentFile().getParent() + "/" + self.CONFIRMATION_DIRECTORY
def _getConfirmationFileName(self, ibrain2DatasetId):
return self.IBRAIN2_STATUS_FILE_PREFIX + ibrain2DatasetId + self.IBRAIN2_STATUS_FILE_SUFFIX
def _getStatusFilePath(self, ibrain2DatasetId, incoming):
return self._getDestinationDir(incoming) + "/" + self._getConfirmationFileName(ibrain2DatasetId)
return "" + name + " = " + value + "\n"
def _writeConfirmationFile(self, ibrain2DatasetId, fileContent, incoming):
confirmationFile = self._getStatusFilePath(ibrain2DatasetId, incoming)
self._writeFile(confirmationFile, fileContent)
def _writeFile(self, file, fileContent):
file = open(file, "w")
file.write(fileContent)
file.close()
def createSuccessStatus(self, ibrain2DatasetId, openbisDatasetId, incoming):
fileContent = self._prop(self.STATUS_PROPERTY, self.STATUS_OK)
fileContent += self._prop(AbstractMetadataParser.IBRAIN2_DATASET_ID_PROPERTY, ibrain2DatasetId)
fileContent += self._prop(self.OPENBIS_DATASET_ID_PROPERTY, openbisDatasetId)
self._writeConfirmationFile(ibrain2DatasetId, fileContent, incoming)
def createFailureStatus(self, ibrain2DatasetId, errorMessage, incoming):
fileContent = self._prop(self.STATUS_PROPERTY, self.STATUS_ERROR)
fileContent += self._prop(AbstractMetadataParser.IBRAIN2_DATASET_ID_PROPERTY, ibrain2DatasetId)
fileContent += self._prop(self.ERROR_MSG_PROPERTY, errorMessage)
self._writeConfirmationFile(ibrain2DatasetId, fileContent, incoming)
def setImageDatasetPropertiesAndRegister(imageDataset, metadataParser, incoming, service, factory):
iBrain2DatasetId = metadataParser.getIBrain2DatasetId()
imageRegistrationDetails = factory.createImageRegistrationDetails(imageDataset, incoming)
for propertyCode, value in metadataParser.getDatasetPropertiesIter():
imageRegistrationDetails.setPropertyValue(propertyCode, value)
tr = service.transaction(incoming, factory)
dataset = tr.createNewDataSet(imageRegistrationDetails)
dataset.setParentDatasets([metadataParser.getParentDatasetPermId()])
imageDataSetFolder = tr.moveFile(incoming.getPath(), dataset)
if tr.commit():
createSuccessStatus(iBrain2DatasetId, dataset, incoming.getPath())
def registerDerivedBlackBoxDataset(state, service, factory, incoming, metadataParser, datasetType, fileFormatType):
transaction = service.transaction(incoming, factory)
dataset = transaction.createNewDataSet()
dataset.setDataSetType(datasetType)
dataset.setFileFormatType(fileFormatType)
registerDerivedDataset(state, transaction, dataset, incoming, metadataParser)
def registerDerivedDataset(state, transaction, dataset, incoming, metadataParser):
iBrain2DatasetId = metadataParser.getIBrain2DatasetId()
openbisDatasetParent = metadataParser.getParentDatasetPermId()
(space, plate) = tryGetConnectedPlate(state, openbisDatasetParent, iBrain2DatasetId, incoming.getPath())
if plate == None:
return
dataset.setSample(transaction.getSample('/'+space+'/'+plate))
dataset.setMeasuredData(False)
for propertyCode, value in metadataParser.getDatasetPropertiesIter():
dataset.setPropertyValue(propertyCode, value)
dataset.setParentDatasets([metadataParser.getParentDatasetPermId()])
transaction.moveFile(incoming.getPath(), dataset)
if transaction.commit():
createSuccessStatus(iBrain2DatasetId, dataset, incoming.getPath())
def findCSVFile(dir):
for file in os.listdir(dir):
if file.endswith(".csv"):
return dir + "/" + file
raise Exception("No CSV file has been found in "+dir)
"""
Returns:
(plateSpace, plateCode) tuple for the plate connected with the specified dataset
or (None, None) if the dataset does not exist or is not connected to the plate.
def tryGetConnectedPlate(state, openbisDatasetId, iBrain2DatasetId, incomingPath):
openbis = state.getOpenBisService()
dataset = openbis.tryGetDataSet(openbisDatasetId)
if dataset != None:
plate = dataset.getSample()
if plate != None:
return (plate.getSpace().getCode(), plate.getCode())
else:
errorMsg = "No plate is connected to the dataset: "+openbisDatasetId+"."
errorMsg = "Dataset does not exist or is not accessible: "+openbisDatasetId+". Maybe the dataset has not been registered yet. Try again later."
RegistrationConfirmationUtils().createFailureStatus(iBrain2DatasetId, errorMsg, incomingPath)
return (None, None)
def createSuccessStatus(iBrain2DatasetId, dataset, incomingPath):
datasetCode = dataset.getDataSetCode()
RegistrationConfirmationUtils().createSuccessStatus(iBrain2DatasetId, datasetCode, incomingPath)
def createFailureStatus(iBrain2DatasetId, throwable, incoming):
RegistrationConfirmationUtils().createFailureStatus(iBrain2DatasetId, throwable.getMessage(), incoming.getPath())
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# -------------- TODO: remove tests
TEST_DIR = "/Users/tpylak/main/src/screening-demo/biozentrum/dropboxes/ibrain2-dropboxes-test"
def testMetadataParsers():
print "-- acquired ---------------------------------"
parser = AcquiredDatasetMetadataParser(TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
print "dataset type:", parser.getDatasetType()
print "plate:", parser.getPlateCode()
print "properties"
for key, value in parser.getDatasetPropertiesIter():
print key, value
print "\n-- derived ---------------------------------"
parser = DerivedDatasetMetadataParser(TEST_DIR+"/HCS_IMAGE_OVERVIEW/ibrain2_dataset_id_48")
print "dataset type:", parser.getDatasetType()
print "parent perm id:", parser.getParentDatasetPermId()
print "properties"
for key, value in parser.getDatasetPropertiesIter():
print key, value
def testAssayParsers():
print "-- assay ---------------------------------"
parser = AssayParser(TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
print "properties"
for key, value in parser.getPropertiesIter():
print key, value
def testConfirmationFiles():
IBRAIN2Utils().createSuccessStatus("123", "123123123123-12312", TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
IBRAIN2Utils().createFailureStatus("321", "Global catastrophy!", TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
#testAssayParsers()
#testMetadataParsers()
#testConfirmationFiles()