Newer
Older
class AbstractPropertiesParser:
_propertiesDict = None
def __init__(self, incoming, fileName):
path = os.path.join(incoming, fileName)
self._propertiesDict = self._parseMetadata(path)
def _findFile(self, incoming, prefix):
for file in os.listdir(incoming):
if file.startswith(prefix):
return file
raise Exception("No file with prefix '"+prefix+"' has been found!")
# Parses the metadata file from the given incoming directory.
# Each line should have a form:
# key = value
# Keys should be unique in the file.
# Returns:
# a dictionary with keys and values from the file.
def _parseMetadata(self, path):
f = open(path)
myDict = {}
for line in f:
line = line.strip()
if len(line) == 0 or line.startswith("#"):
continue
ix = line.find("=")
if ix == -1:
raise Exception("Cannot find '=' in line '"+line+"' in file: "+path)
key = line[:ix].strip()
value = line[ix+1:].strip()
if key in myDict:
raise Exception("Duplicated key '"+key+"' in file: "+path)
myDict[key] = value
return myDict
def get(self, propertyName):
return self._propertiesDict[propertyName]
# All properties in the file.
# Returns:
# an iterator which yields (propertyName, propertyValue) pairs
def getPropertiesIter(self):
return [ (key, value) for key, value in self._propertiesDict.iteritems() ]
# All dataset properties.
# Returns:
# an iterator which yields (propertyCode, propertyValue) pairs
def getDatasetPropertiesIter(self):
return [ (key, value) for key, value in self._propertiesDict.iteritems() if key.startswith(self.DATASET_PROPERTY_PREFIX) ]
class AbstractMetadataParser(AbstractPropertiesParser):
METADATA_FILE="metadata.properties"
IBRAIN2_DATASET_ID_PROPERTY = "ibrain2.dataset.id"
DATASET_PROPERTY_PREFIX = "ibrain2."
DATASET_TYPE_PROPERTY = "dataset.type"
def __init__(self, incoming):
AbstractPropertiesParser.__init__(self, incoming, self.METADATA_FILE)
def getDatasetType(self):
return self.get(self.DATASET_TYPE_PROPERTY)
def getIBrain2DatasetId(self):
return self.get(self.IBRAIN2_DATASET_ID_PROPERTY)
# --- concrete parser classes ----------------------
class AcquiredDatasetMetadataParser(AbstractMetadataParser):
PLATE_CODE_PRPOPERTY = "barcode"
INSTRUMENT_PROPERTY = "instrument.id"
TIMESTAMP_PROPERTY = "timestamp" # not used
# All dataset properties.
# Returns:
# an iterator which yields (propertyCode, propertyValue) pairs
def getDatasetPropertiesIter(self):
properties = AbstractPropertiesParser.getDatasetPropertiesIter(self)
properties = [ (key, value) for (key, value) in properties if key != "ibrain2.assay.id" ]
properties.append((self.INSTRUMENT_PROPERTY, self.get(self.INSTRUMENT_PROPERTY)))
return properties
def getPlateCode(self):
return self.get(self.PLATE_CODE_PRPOPERTY)
class DerivedDatasetMetadataParser(AbstractMetadataParser):
WORKFLOW_FILE_PREFIX = "workflow_"
PARENT_DATSASET_PERMID_PRPOPERTY = "storage_provider.parent.dataset.id"
DATASET_TYPE_PROPERTY = "dataset.type"
WORKFLOW_NAME_PROPERTY = "ibrain2.workflow.name"
WORKFLOW_AUTHOR_PROPERTY = "ibrain2.workflow.author"
_workflowName = None
_workflowAuthor = None
def __init__(self, incoming):
AbstractMetadataParser.__init__(self, incoming)
workflowFile = self._findFile(incoming, self.WORKFLOW_FILE_PREFIX)
basename = os.path.splitext(workflowFile)[0]
tokens = basename.split("_")
if len(tokens) < 3:
raise Exception("Cannot parse workflow name and author from: "+workflowFile)
self._workflowName = tokens[1]
self._workflowAuthor = tokens[2]
def getDatasetPropertiesIter(self):
properties = AbstractMetadataParser.getDatasetPropertiesIter(self)
properties.append((self.WORKFLOW_NAME_PROPERTY, self._workflowName))
properties.append((self.WORKFLOW_AUTHOR_PROPERTY, self._workflowAuthor))
return properties
def getParentDatasetPermId(self):
return self.get(self.PARENT_DATSASET_PERMID_PRPOPERTY)
def getDatasetType(self):
return self.get(self.DATASET_TYPE_PROPERTY)
class AssayParser(AbstractPropertiesParser):
ASSAY_FILE_PREFIX="assay_"
ASSAY_ID_PROPERTY = "assay.id"
ASSAY_TYPE_PROPERTY = "assay.type"
ASSAY_DESC_PROPERTY = "assay.description"
LAB_LEADER_PROPERTY = "labinfo.pi"
EXPERIMENTER_PROPERTY = "experimenter.login"
WORKFLOW_NAME_PROPERTY = "workflow.name"
WORKFLOW_AUTHOR_PROPERTY = "workflow.author"
def __init__(self, incoming):
AbstractPropertiesParser.__init__(self, incoming, self._findFile(incoming, self.ASSAY_FILE_PREFIX))
""" path to the registration confirmation directory relative to the incoming dataset """
CONFIRMATION_DIRECTORY = "registration-status"
STATUS_PROPERTY = "storage_provider.storage.status"
STATUS_OK = "STORAGE_SUCCESS"
STATUS_ERROR = "STORAGE_FAILED"
ERROR_MSG_PROPERTY = "storage_provider.message"
OPENBIS_DATASET_ID_PROPERTY = "storage_provider.dataset.id"
IBRAIN2_STATUS_FILE_PREFIX = "ibrain2_dataset_id_"
IBRAIN2_STATUS_FILE_SUFFIX = ".properties"
def _getDestinationDir(self, incoming):
return File(incoming).getParentFile().getParentFile().getParent() + "/" + self.CONFIRMATION_DIRECTORY
def _getConfirmationFileName(self, ibrain2DatasetId):
return self.IBRAIN2_STATUS_FILE_PREFIX + ibrain2DatasetId + self.IBRAIN2_STATUS_FILE_SUFFIX
def _getStatusFilePath(self, ibrain2DatasetId, incoming):
return self._getDestinationDir(incoming) + "/" + self._getConfirmationFileName(ibrain2DatasetId)
return "" + name + " = " + value + "\n"
def _writeConfirmationFile(self, ibrain2DatasetId, fileContent, incoming):
confirmationFile = self._getStatusFilePath(ibrain2DatasetId, incoming)
self._writeFile(confirmationFile, fileContent)
def _writeFile(self, file, fileContent):
file = open(file, "w")
file.write(fileContent)
file.close()
def createSuccessStatus(self, ibrain2DatasetId, openbisDatasetId, incoming):
fileContent = self._prop(self.STATUS_PROPERTY, self.STATUS_OK)
fileContent += self._prop(AbstractMetadataParser.IBRAIN2_DATASET_ID_PROPERTY, ibrain2DatasetId)
fileContent += self._prop(self.OPENBIS_DATASET_ID_PROPERTY, openbisDatasetId)
self._writeConfirmationFile(ibrain2DatasetId, fileContent, incoming)
def createFailureStatus(self, ibrain2DatasetId, errorMessage, incoming):
fileContent = self._prop(self.STATUS_PROPERTY, self.STATUS_ERROR)
fileContent += self._prop(AbstractMetadataParser.IBRAIN2_DATASET_ID_PROPERTY, ibrain2DatasetId)
fileContent += self._prop(self.ERROR_MSG_PROPERTY, errorMessage)
self._writeConfirmationFile(ibrain2DatasetId, fileContent, incoming)
def setImageDatasetPropertiesAndRegister(imageDataset, iBrain2DatasetId, metadataParser, incoming, service, factory):
imageRegistrationDetails = factory.createImageRegistrationDetails(imageDataset, incoming)
for propertyCode, value in metadataParser.getDatasetPropertiesIter():
imageRegistrationDetails.setPropertyValue(propertyCode, value)
tr = service.transaction(incoming, factory)
dataset = tr.createNewDataSet(imageRegistrationDetails)
dataset.setParentDatasets([metadataParser.getParentDatasetPermId()])
imageDataSetFolder = tr.moveFile(incoming.getPath(), dataset)
if tr.commit():
createSuccessStatus(iBrain2DatasetId, dataset, incoming.getPath())
def registerDerivedBlackBoxDataset(state, service, factory, incoming, metadataParser, datasetType, fileFormatType):
iBrain2DatasetId = metadataParser.getIBrain2DatasetId()
openbisDatasetParent = metadataParser.getParentDatasetPermId()
(space, plate) = tryGetConnectedPlate(state, openbisDatasetParent, iBrain2DatasetId, incoming.getPath())
if plate == None:
return
transaction = service.transaction(incoming, factory)
dataset = transaction.createNewDataSet()
dataset.setDataSetType(datasetType)
dataset.setFileFormatType(fileFormatType)
dataset.setSample(transaction.getSample('/'+space+'/'+plate))
dataset.setMeasuredData(False)
for propertyCode, value in metadataParser.getDatasetPropertiesIter():
dataset.setPropertyValue(propertyCode, value)
dataset.setParentDatasets([metadataParser.getParentDatasetPermId()])
transaction.moveFile(incoming.getPath(), dataset)
if transaction.commit():
createSuccessStatus(iBrain2DatasetId, dataset, incoming.getPath())
"""
Returns:
(plateSpace, plateCode) tuple for the plate connected with the specified dataset
or (None, None) if the dataset does not exist or is not connected to the plate.
def tryGetConnectedPlate(state, openbisDatasetId, iBrain2DatasetId, incomingPath):
openbis = state.getOpenBisService()
dataset = openbis.tryGetDataSet(openbisDatasetId)
if dataset != None:
plate = dataset.getSample()
if plate != None:
return (plate.getSpace().getCode(), plate.getCode())
else:
errorMsg = "No plate is connected to the dataset: "+openbisDatasetId+"."
errorMsg = "Dataset does not exist or is not accessible: "+openbisDatasetId+". Maybe the dataset has not been registered yet. Try again later."
RegistrationConfirmationUtils().createFailureStatus(iBrain2DatasetId, errorMsg, incomingPath)
return (None, None)
def createSuccessStatus(iBrain2DatasetId, dataset, incomingPath):
datasetCode = dataset.getDataSetCode()
RegistrationConfirmationUtils().createSuccessStatus(iBrain2DatasetId, datasetCode, incomingPath)
def createFailureStatus(iBrain2DatasetId, throwable, incoming):
RegistrationConfirmationUtils().createFailureStatus(iBrain2DatasetId, throwable.getMessage(), incoming.getPath())
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# -------------- TODO: remove tests
TEST_DIR = "/Users/tpylak/main/src/screening-demo/biozentrum/dropboxes/ibrain2-dropboxes-test"
def testMetadataParsers():
print "-- acquired ---------------------------------"
parser = AcquiredDatasetMetadataParser(TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
print "dataset type:", parser.getDatasetType()
print "plate:", parser.getPlateCode()
print "properties"
for key, value in parser.getDatasetPropertiesIter():
print key, value
print "\n-- derived ---------------------------------"
parser = DerivedDatasetMetadataParser(TEST_DIR+"/HCS_IMAGE_OVERVIEW/ibrain2_dataset_id_48")
print "dataset type:", parser.getDatasetType()
print "parent perm id:", parser.getParentDatasetPermId()
print "properties"
for key, value in parser.getDatasetPropertiesIter():
print key, value
def testAssayParsers():
print "-- assay ---------------------------------"
parser = AssayParser(TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
print "properties"
for key, value in parser.getPropertiesIter():
print key, value
def testConfirmationFiles():
IBRAIN2Utils().createSuccessStatus("123", "123123123123-12312", TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
IBRAIN2Utils().createFailureStatus("321", "Global catastrophy!", TEST_DIR+"/HCS_IMAGE_RAW/ibrain2_dataset_id_32")
#testAssayParsers()
#testMetadataParsers()
#testConfirmationFiles()