From 0da45e96d0556acc762c95b2f140a10af6a9dce9 Mon Sep 17 00:00:00 2001 From: barillac <barillac> Date: Wed, 7 May 2014 11:29:29 +0000 Subject: [PATCH] script for uploading FACS data for Yeastlab SVN: 31464 --- .../facs_data/BDLSRFortessaDropbox.py | 589 ++++++++++++++++++ .../drop-boxes/facs_data/import_facs.py | 105 ++++ .../drop-boxes/facs_data/plugin.properties | 19 + 3 files changed, 713 insertions(+) create mode 100644 plasmid/source/drop-boxes/facs_data/BDLSRFortessaDropbox.py create mode 100644 plasmid/source/drop-boxes/facs_data/import_facs.py create mode 100644 plasmid/source/drop-boxes/facs_data/plugin.properties diff --git a/plasmid/source/drop-boxes/facs_data/BDLSRFortessaDropbox.py b/plasmid/source/drop-boxes/facs_data/BDLSRFortessaDropbox.py new file mode 100644 index 00000000000..57ffbe2b18e --- /dev/null +++ b/plasmid/source/drop-boxes/facs_data/BDLSRFortessaDropbox.py @@ -0,0 +1,589 @@ +""" +@author: Aaron Ponti +""" +import re +import os +import logging +import xml.etree.ElementTree as xml +from datetime import datetime + +class Processor: + """The Processor class performs all steps required for registering datasets + from the assigned dropbox folder.""" + + # A transaction object passed by openBIS + transaction = None + + # The incoming folder to process (a java.io.File object) + incoming = "" + + # Constructor + def __init__(self, transaction, logFile): + + self.transaction = transaction + self.incoming = transaction.getIncoming() + + # Set up logging + logging.basicConfig(filename=logFile, level=logging.DEBUG) + + def createExperiment(self, expId, expName, + expType="LSR_FORTESSA_EXPERIMENT"): + """Create an experiment with given Experiment ID extended with the addition + of a string composed from current date and time. + + @param expID, the experiment ID + @param expName, the experiment name + @param expType, the experiment type that must already exist; optional, + default is "LSR_FORTESSA_EXPERIMENT" + """ + + # Make sure to keep the code length within the limits imposed by + # openBIS for codes + if len(expId) > 41: + expId = expId[0:41] + + # Create univocal ID + expId = expId + "_" + self.getCustomTimeStamp() + + # Create the experiment + logging.info("Register experiment %s" % expId) + exp = self.transaction.createNewExperiment(expId, expType) + if not exp: + msg = "Could not create experiment " + expId + "!" + logging.error(msg) + raise Exception(msg) + else: + logging.info("Created experiment with ID " + expId + ".") + + # Store the name + exp.setPropertyValue("LSR_FORTESSA_EXPERIMENT_NAME", expName) + + return exp + + def createSampleWithGenCode(self, spaceCode, + sampleType="LSR_FORTESSA_PLATE"): + """Create a sample with automatically generated code. + + @param spaceCode, the code of the space + @param sampleType, the sample type that must already exist + @return sample An ISample + """ + + # Make sure there are not slashes in the spaceCode + spaceCode = spaceCode.replace("/", "") + + # Create the sample + sample = self.transaction.createNewSampleWithGeneratedCode(spaceCode, sampleType) + if not sample: + msg = "Could not create sample with generated code" + logging.error(msg) + raise Exception(msg) + + return sample + + def formatExpDateForPostgreSQL(self, expDate): + """Format the experiment date to be compatible with postgreSQL's + 'timestamp' data type + + @param Date stored in the FCS file, in the form 01-JAN-2013 + @return Date in the form 2013-01-01 + """ + + monthMapper = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', + 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', + 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'} + + # Separate the date into day, month, and year + (day, month, year) = expDate.split("-") + + # Try mapping the month to digits (e.g. "06"). If the mapping does + # not work, return "NOT_FOUND" + month = monthMapper.get(month, "NOT_FOUND") + + # Build the date in the correct format. If the month was not found, + # return 01-01-1970 + if (month == "NOT_FOUND"): + logging.info("Invalid experiment date %s found. " \ + "Reverting to 1970/01/01." % expDate) + return "1970-01-01" + else: + return (year + "-" + month + "-" + day) + + def getCustomTimeStamp(self): + """Create an univocal time stamp based on the current date and time + (works around incomplete API of Jython 2.5). + """ + + t = datetime.now() + return (t.strftime("%y%d%m%H%M%S") + unicode(t)[20:]) + + def getSubFolders(self): + """Returns a list of subfolders of the passed incoming directory. + + @return list of subfolders (String) + """ + + incomingStr = self.incoming.getAbsolutePath() + return [name for name in os.listdir(incomingStr) + if os.path.isdir(os.path.join(incomingStr, name))] + + def processExperiment(self, experimentNode, + openBISExpType="LSR_FORTESSA_EXPERIMENT"): + """Register an IExperimentUpdatable based on the Experiment XML node. + + @param experimentNode An XML node corresponding to an Experiment + @param openBISExpType The experiment type + @return IExperimentUpdatable experiment + """ + + # Get the openBIS identifier + openBISIdentifier = experimentNode.attrib.get("openBISIdentifier") + + # Get the experiment name + expName = experimentNode.attrib.get("name") + + # Get the experiment date and reformat it to be compatible + # with postgreSQL + expDate = self.formatExpDateForPostgreSQL(experimentNode.attrib.get("date")) + + # Get the description + description = experimentNode.attrib.get("description") + + # Get the acquisition hardware + acqHardware = experimentNode.attrib.get("acq_hardware") + + # Get the acquisition software + acqSoftware = experimentNode.attrib.get("acq_software") + + # Get the owner name + owner = experimentNode.attrib.get("owner_name") + + # Create the experiment (with corrected ID if needed: see above) + openBISExperiment = self.createExperiment(openBISIdentifier, + expName, openBISExpType) + if not openBISExperiment: + msg = "Could not create experiment " + openBISIdentifier + logging.error(msg) + raise Exception(msg) + + # Set the date + openBISExperiment.setPropertyValue("LSR_FORTESSA_EXPERIMENT_DATE", + expDate) + # Set the description + openBISExperiment.setPropertyValue("LSR_FORTESSA_EXPERIMENT_DESCRIPTION", + description) + + # Set the acquisition hardware + openBISExperiment.setPropertyValue("LSR_FORTESSA_EXPERIMENT_ACQ_HARDWARE", + acqHardware) + + # Set the acquisition software + openBISExperiment.setPropertyValue("LSR_FORTESSA_EXPERIMENT_ACQ_SOFTWARE", + acqSoftware) + + # Set the experiment owner + openBISExperiment.setPropertyValue("LSR_FORTESSA_EXPERIMENT_OWNER", + owner) + + # Return the openBIS Experiment object + return openBISExperiment + + def processFCSFile(self, fcsFileNode, openBISTube, openBISExperiment): + """Register the FCS File using the parsed properties file + + @param fcsFileNode An XML node corresponding to an FCS file (dataset) + @param openBISTube An ISample object representing a Tube or Well + @param openBISExperiment An ISample object representing an Experiment + """ + + # Dataset type + datasetType = "LSR_FORTESSA_FCSFILE" + + # Create a new dataset + dataset = self.transaction.createNewDataSet() + if not dataset: + msg = "Could not get or create dataset" + logging.error(msg) + raise Exception(msg) + + # Set the dataset type + dataset.setDataSetType(datasetType) + + # Assign the dataset to the sample + dataset.setSample(openBISTube) + + # Assign the dataset to the experiment + dataset.setExperiment(openBISExperiment) + + # Set the file type + dataset.setFileFormatType("FCS") + + # Assign the file to the dataset (we will use the absolute path) + fileName = fcsFileNode.attrib.get("relativeFileName") + fileName = os.path.join(self.incoming.getAbsolutePath(), fileName) + + # Log + logging.info("PROCESSFCSFILE: Registering file: " + fileName) + + # Move the file + self.transaction.moveFile(fileName, dataset) + + + def processTray(self, trayNode, openBISExperiment): + """Register a Tray (Plate) based on the Tray XML node + and an IExperimentUpdatable object + + @param trayNode An XML node corresponding to a Tray (Plate) + @param openBISExperiment An IExperimentUpdatable object + @param openBISSampleType sample type (default "LSR_FORTESSA_PLATE") + @return ISample sample, or null + """ + + # openBIS sample type + openBISSampleType = "LSR_FORTESSA_PLATE" + + # Get the identifier of the space all relevant attributes + openBISSpaceIdentifier = \ + trayNode.attrib.get("openBISSpaceIdentifier") + + # Get the tray name + name = trayNode.attrib.get("name") + + # Get the tray geometry + trayGeometry = trayNode.attrib.get("trayGeometry") + + # Create the sample. The Plate is configured in openBIS to + # auto-generate its own identifier. + openBISTray = self.createSampleWithGenCode(openBISSpaceIdentifier, + openBISSampleType) + if not openBISTray: + msg = "Could not create plate sample." + logging.error(msg) + raise Exception(msg) + + # Set the experiment for the sample + openBISTray.setExperiment(openBISExperiment) + + # Set the plate name + openBISTray.setPropertyValue("LSR_FORTESSA_PLATE_NAME", name) + + # Set the tray geometry + openBISTray.setPropertyValue("LSR_FORTESSA_PLATE_GEOMETRY", trayGeometry) + + # Return the openBIS ISample object + return openBISTray + + + def processTubeOrWell(self, tubeNode, openBISContainerSample, + specimenName, openBISExperiment): + """Register a Tube or Well (as a child of a Specimen) based on the Tube or + Well XML node and an ISample object. + + The associated fcs file is attached as a IDataset + + @param tubeNode An XML node corresponding to a Tube or Well + @param openBISContainerSample An ISample object that will contain + the Tube or Well + @param specimenName Name of the specimen associated to the Tube or Well + @param openBISExperiment The IExperiment to which the Tube belongs + @param openBISSpecimenType (default "LSR_FORTESSA_TUBE"), the + sample type. One of LSR_FORTESSA_TUBE and LSR_FORTESSA_WELL. + @return ISample sample, or null + """ + + # Get the name + name = tubeNode.attrib.get("name") + + # openBIS type + if tubeNode.tag == "Tube": + openBISSpecimenType = "LSR_FORTESSA_TUBE" + elif tubeNode.tag == "Well": + openBISSpecimenType = "LSR_FORTESSA_WELL" + else: + msg = "Unknown tube type" + tubeNode.tag + logging.error(msg) + raise Exception(msg) + + # Build the openBIS Identifier + openBISSpaceIdentifier = \ + tubeNode.attrib.get("openBISSpaceIdentifier") + + # Create the sample. The Tube/Well is configured in openBIS to + # auto-generate its own identifier. + openBISTube = self.createSampleWithGenCode(openBISSpaceIdentifier, + openBISSpecimenType) + if not openBISTube: + msg = "Could not create sample with auto-generated identifier" + logging.error(msg) + raise Exception(msg) + + # Set the experiment to which it belongs + openBISTube.setExperiment(openBISExperiment) + + # Set the Specimen name as a property + openBISTube.setPropertyValue("LSR_FORTESSA_SPECIMEN", specimenName) + + # Set the name + if openBISSpecimenType == "LSR_FORTESSA_TUBE": + openBISTube.setPropertyValue("LSR_FORTESSA_TUBE_NAME", name) + elif openBISSpecimenType == "LSR_FORTESSA_WELL": + openBISTube.setPropertyValue("LSR_FORTESSA_WELL_NAME", name) + else: + msg = "Unknown value for openBISSpecimenType." + logging.error(msg) + raise Exception(msg) + + # Set the TubeSet container + openBISTube.setContainer(openBISContainerSample) + + # Return the openBIS ISample + return openBISTube + + def processTubeSet(self, experimentNode, openBISExperiment): + """Register a TubeSet (virtual tube container). + + @param experimentNode An XML node corresponding to an Experiment + @param openBISExperiment An IExperimentUpdatable object + @param openBISSampleType The TubeSet sample type + @return ISample sample, or null + """ + + # Sample type + openBISSampleType = "LSR_FORTESSA_TUBESET" + + # Get the identifier of the space all relevant attributes + openBISSpaceIdentifier = \ + experimentNode.attrib.get("openBISSpaceIdentifier") + + # Create the sample. The Tubeset is configured in openBIS to + # auto-generate its own identifier. + openBISTubeSet = self.createSampleWithGenCode(openBISSpaceIdentifier, + openBISSampleType) + if not openBISTubeSet: + msg = "Could not get or create TubeSet" + logging.error(msg) + raise Exception(msg) + + # Set the experiment for the sample + openBISTubeSet.setExperiment(openBISExperiment) + + logging.info("PROCESS_TUBESET: Created new TubeSet " \ + "with identifier %s, sample type %s" \ + % (openBISTubeSet.getSampleIdentifier(), + openBISSampleType)) + + # Return the openBIS ISample object + return openBISTubeSet + + def register(self, tree): + """Register the Experiment using the parsed properties file. + + @param tree ElementTree parsed from the properties XML file + """ + + # Get the root node (obitXML) + root = tree.getroot() + + # Create a virtual TubeSet + openBISTubeSet = None + + # Iterate over the children (Experiments) + for experimentNode in root: + + # The tag of the immediate children of the root experimentNode + # must be Experiment + if experimentNode.tag != "Experiment": + msg = "Expected Experiment node, found " + experimentNode.tag + logging.error(msg) + raise Exception(msg) + + # Process an Experiment XML node and get/create an IExperimentUpdatable + openBISExperiment = self.processExperiment(experimentNode, + "LSR_FORTESSA_EXPERIMENT") + + # Process children of the Experiment + for childNode in experimentNode: + + # The child of an Experiment can be a Tray or a Specimen + nodeType = childNode.tag + + if nodeType == "Specimen": + + # A specimen is a direct child of an experiment if there + # is no plate, and the FCS files are therefore associated + # to tubes. In this case, we create a virtual TubeSet + # sample container (one for all Tubes in the experiment). + if openBISTubeSet is None: + openBISTubeSet = self.processTubeSet(experimentNode, + openBISExperiment) + + # The only information we need from the Specimen is its + # name to associate to the Tubes as property + specimenNameProperty = childNode.attrib.get("name") + + # Now iterate over the children of the Specimen + for tubeNode in childNode: + + # The child of a Specimen is a Tube + if tubeNode.tag != "Tube": + msg = "Expected Tube node!" + logging.error(msg) + raise Exception(msg) + + # Process the tube node and get the openBIS object + openBISTube = self.processTubeOrWell(tubeNode, + openBISTubeSet, + specimenNameProperty, + openBISExperiment) + + # Now process the FCS file + for fcsNode in tubeNode: + + # The child of a Tube is an FCSFile + if fcsNode.tag != "FCSFile": + msg = "Expected FSC File node!" + logging.error(msg) + raise Exception(msg) + + # Process the FCS file node + self.processFCSFile(fcsNode, openBISTube, + openBISExperiment) + + elif nodeType == "Tray": + + # Process the tray node and get the openBIS object + openBISTray = self.processTray(childNode, + openBISExperiment) + + # Now iterate over the children of the Tray + for specimenNode in childNode: + + # The child of a Tray is a Specimen + if specimenNode.tag != "Specimen": + msg = "Expected Specimen node!" + logging.error(msg) + raise Exception(msg) + + # The only information we need from the Specimen is its + # name to associate to the Wells as property + specimenNameProperty = specimenNode.attrib.get("name") + + for wellNode in specimenNode: + + # The child of a Specimen is a Tube + if wellNode.tag != "Well": + msg = "Expected Well node!" + logging.error(msg) + raise Exception(msg) + + # Process the tube node and get the openBIS object + openBISWell = self.processTubeOrWell(wellNode, + openBISTray, + specimenNameProperty, + openBISExperiment) + + # Now process the FCS file + for fcsNode in wellNode: + + # The child of a Tube is an FCSFile + if fcsNode.tag != "FCSFile": + msg = "Expected FSC File node!" + logging.error(msg) + raise Exception(msg) + + # Process the FCS file node + self.processFCSFile(fcsNode, openBISWell, + openBISExperiment) + + else: + + msg = "The Node must be either a Specimen or a Tray" + logging.error(msg) + raise Exception(msg) + + # Log that we are finished with the registration + logging.info("REGISTER: Registration completed") + + + def run(self): + """Run the registration.""" + + # Make sure that incoming is a folder + if not self.incoming.isDirectory(): + msg = "Incoming MUST be a folder!" + logging.error(msg) + raise Exception(msg) + + # Log + logging.info("Incoming folder: " + self.incoming.getAbsolutePath()) + + # There must be just one subfolder: the user subfolder + subFolders = self.getSubFolders() + if len(subFolders) != 1: + msg = "Expected user subfolder!" + logging.error(msg) + raise Exception(msg) + + # Set the user folder + userFolder = os.path.join(self.incoming.getAbsolutePath(), + subFolders[0]) + + # In the user subfolder we must find the data_structure.ois file + dataFileName = os.path.join(userFolder, "data_structure.ois") + if not os.path.exists(dataFileName): + msg = "File data_structure.ois not found!" + logging.error(msg) + raise Exception(msg) + + # Now read the data structure file and store all the pointers to + # the properties files. The paths are stored relative to self.incoming, + # so we can easily build the full file paths. + propertiesFileList = [] + f = open(dataFileName) + try: + for line in f: + line = re.sub('[\r\n]', '', line) + propertiesFile = os.path.join(self.incoming.getAbsolutePath(), + line) + propertiesFileList.append(propertiesFile) + finally: + f.close() + + # Process (and ultimately register) all experiments + for propertiesFile in propertiesFileList: + + # Log + logging.info("* * * Processing: " + propertiesFile) + + # Read the properties file into an ElementTree + tree = xml.parse(propertiesFile) + + # Now register the experiment + self.register(tree) + + +def process(transaction): + """Dropbox entry point. + + @param transaction, the transaction object + """ + + # Get path to containing folder + # __file__ does not work (reliably) in Jython + dbPath = "../core-plugins/microscopy/1/dss/drop-boxes/BDLSRFortessaDropbox" + + # Path to the logs subfolder + logPath = os.path.join(dbPath, "logs") + + # Make sure the logs subforder exist + if not os.path.exists(logPath): + os.makedirs(logPath) + + # Path for the log file + logFile = os.path.join(logPath, "registration_log.txt") + + # Create a Processor + processor = Processor(transaction, logFile) + + # Run + processor.run() diff --git a/plasmid/source/drop-boxes/facs_data/import_facs.py b/plasmid/source/drop-boxes/facs_data/import_facs.py new file mode 100644 index 00000000000..e9a6e693c29 --- /dev/null +++ b/plasmid/source/drop-boxes/facs_data/import_facs.py @@ -0,0 +1,105 @@ +#! /usr/bin/env python +""" + Script for uploading FACS data for the YeastLab. + + FACS data are organized in one main folder which contains a folder with the username. Inside this folder there are folders and subfolders that correspond + to different FACS experiments. Inside the username folder there is a file called data_structure.ois which contains the structure of the FACS experiments. + This script creates a sample in openBIS for each FACS experiment and uploads the corresponding data in one dataset connected to the samples. + The space where the sample should go is detected from the username. The project and experiment are detected from the name of the folders given by the user. + The FACS experiment should be named by the users as PROJECT-EXPERIMENT-SAMPLE. + +""" + +import os, glob, re, csv, time, shutil +from java.io import File +from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import SimpleImageDataConfig, ImageMetadata, Location +from ch.systemsx.cisd.openbis.plugin.screening.shared.api.v1.dto import Geometry +from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import SimpleImageContainerDataConfig, ChannelColor +from loci.formats import ImageReader +from time import * +from datetime import * + + +def process(transaction): + incoming = transaction.getIncoming() + + projectNameList=[] + experimentNameList=[] + sampleNameList=[] + directoryToImportList=[] + userNameList=[] + + + # read the data_structure.ois file to get the structure of the FACS experiment. The name of the FACS experiment contains project, experiment, sample info for openBIS. + def getProjExpSample(incoming): + for userName in os.listdir(incoming): + if not userName.startswith('.'): + dataStructure = incoming + "/" + userName + "/data_structure.ois" + f = open(dataStructure) + lines=f.readlines() + for line in lines: + path = re.split("/", line) + directoryToImport= incoming +"/"+ "/".join(path[:-1]) + projectName = re.split("-",path[-1])[0] + if len(re.split("-",path[-1])) == 3: + experimentName = re.split("-",path[-1])[1] + sampleNameFile = re.split("-",path[-1])[2] + sampleName = re.split("_properties.oix", sampleNameFile)[0] + elif len(re.split("-",path[-1]))==2: + experimentNameProp = re.split("-",path[-1])[1] + experimentName = re.split("_properties.oix", experimentNameProp)[0] + sampleName = "na" + + + projectNameList.append(projectName) + experimentNameList.append(experimentName) + sampleNameList.append(sampleName) + directoryToImportList.append(directoryToImport) + userNameList.append(userName) + + return userNameList, projectNameList, experimentNameList, sampleNameList, directoryToImportList + + + getProjExpSample(incoming.getPath()) + + + for user in set (userNameList): + if user == "pontia": + space = "AARON" + elif user == "ottozd": + space = "DIANA" + elif user == "elfstrok": + space = "KRISTINA" + + + for proj in set(projectNameList): + print space + project = transaction.getProject("/" + space + "/" + proj) + if not project: + project = transaction.createNewProject("/" + space + "/" + proj) + + for exp in set(experimentNameList): + experiment = transaction.getExperiment("/" + space + "/" + proj + "/" + exp) + if not experiment: + experiment = transaction.createNewExperiment("/" + space + "/" + proj + "/" + exp, "FACS_DATA") + + for sample, directory in zip(sampleNameList,directoryToImportList): + if sample == "na": + sampleNew = transaction.createNewSampleWithGeneratedCode(space, "FACS_DATA") + sampleNew.setExperiment(experiment) + else: + sampleNew = transaction.createNewSample("/" + space + "/" + sample, "FACS_DATA" ) + sampleNew.setExperiment(experiment) + + dataSet = transaction.createNewDataSet() + dataSet.setDataSetType("FACS_DATA") + dataSet.setSample(sampleNew) + transaction.moveFile(directory, dataSet) + + + + + + + + diff --git a/plasmid/source/drop-boxes/facs_data/plugin.properties b/plasmid/source/drop-boxes/facs_data/plugin.properties new file mode 100644 index 00000000000..d7b020e7b35 --- /dev/null +++ b/plasmid/source/drop-boxes/facs_data/plugin.properties @@ -0,0 +1,19 @@ +# +# Image drop box which registers a dataset from the webUI +# +# +# Variables: +# incoming-root-dir +# Path to the directory which contains incoming directories for drop boxes. + +#input = image-dropbox +dropbox-name = facs_data + +incoming-dir = /Users/barillac/openbis-screening2/data/incoming-facs +incoming-data-completeness-condition = auto-detection +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 +script-path = import_facs.py +#script-path =BDLSRFortessaDropbox.py +program-class = ch.systemsx.cisd.etlserver.registrator.api.v2.ExampleJavaDataSetRegistrationDropboxV2 +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor + -- GitLab