From 32f024e3829b02bed1c456fccb09d192f27d755c Mon Sep 17 00:00:00 2001 From: kohleman <kohleman> Date: Mon, 25 Jun 2012 13:23:24 +0000 Subject: [PATCH] updated to V2 drop box syntax SVN: 25850 --- .../create-flowcell-dropbox.py | 220 ++++++++++-------- .../create-flowcell-dropbox/plugin.properties | 4 +- .../plugin.properties | 4 +- .../register-flowcell-dropbox.py | 76 +++--- 4 files changed, 172 insertions(+), 132 deletions(-) diff --git a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/create-flowcell-dropbox.py b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/create-flowcell-dropbox.py index e092beacec5..9b7f600f561 100755 --- a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/create-flowcell-dropbox.py +++ b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/create-flowcell-dropbox.py @@ -1,9 +1,33 @@ ''' -expected incoming Name for HiSeq2000 runs: 110715_SN792_0054_BC035RACXX +@copyright: +2012 ETH Zuerich, CISD + +@license: +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@description: +Parses the two Illumina provided files 'runParameters.xml' and 'RunInfo.xml' +and creates one Sample of type 'ILLUMINA_FLOW_CELL' and sets Sample properties +from those two XML files. Additionally the number of lanes are read out and +are created as contained samples of type 'ILLUMINA_FLOW_LANE'. + +@note: +print statements go to: <openBIS_HOME>/datastore_server/log/startup_log.txt +expected incoming Name for HiSeq runs: 110715_SN792_0054_BC035RACXX expected incoming Name for GAII runs: 110812_6353WAAXX -Note: -print statements go to: ~openbis/sprint/datastore_server/log/startup_log.txt +@author: +Manuel Kohler ''' import os @@ -69,107 +93,103 @@ def create_openbis_timestamp (): # ----------------------------------------------------------------------------- -# Create a "transaction" -- a way of grouping operations together so they all -# happen or none of them do. -transaction = service.transaction() - -incomingPath = incoming.getAbsolutePath() -print(incomingPath) - -# Get the incoming name -name = incoming.getName() - -split=name.split("_") -if (len(split) == 4): - IS_HISEQ_RUN=True -if (len(split) == 2): - pass - -# Search for the sample and check if there is already sample with this name -search_service = transaction.getSearchService() -sc = SearchCriteria() -sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, name)); -foundSamples = search_service.searchForSamples(sc) -if foundSamples.size() > 0: - raise NameError('Already found a Flow Cell with the following name: '+ name) - -# Parse the RunInfo.xml file -runInfo = parseXmlFile(incomingPath + '/' + RUNINFO) -print(runInfo) - -# Create a new Flow Cell and set the experiment -project = transaction.getProject(FLOWCELL_PROJECT_ID) -if project == None: - space = transaction.getSpace(FLOWCELL_SPACE) - if space == None: - space = transaction.createNewSpace(FLOWCELL_SPACE, None) - space.setDescription("A test space") - project = transaction.createNewProject(FLOWCELL_PROJECT_ID) - project.setDescription("A demo project") -expID = FLOWCELL_PROJECT_ID + '/' + datetime.now().strftime("%Y.%m") -exp = transaction.getExperiment(expID) -if exp == None: - exp = transaction.createNewExperiment(expID, EXPERIMENT_TYPE_CODE) -newFlowCell = transaction.createNewSample('/' + FLOWCELL_SPACE + '/' + name, "ILLUMINA_FLOW_CELL") -newFlowCell.setExperiment(exp) - -if IS_HISEQ_RUN: - run = runInfo.getAllchildren('Run')[0].attrib - if (run['Id'] != name): - raise NameError('Flowcell names do not match between directory name '+ name + - ' and ' + RUNINFO + 'property file: ' + run['Id']) - - # The HiSeq is providing more infos, which we will parse here: - runParameters = parseXmlFile(incomingPath + '/' + RUNPARAMETERS) - - newFlowCell.setPropertyValue("ILLUMINA_PIPELINE_VERSION", runParameters.getXmlElement(RUNPARAMETERS_XML['RTAVERSION'])) - newFlowCell.setPropertyValue("FLOWCELLTYPE", runParameters.getXmlElement(RUNPARAMETERS_XML['FLOWCELL'])) - newFlowCell.setPropertyValue("CONTROL_LANE", runParameters.getXmlElement(RUNPARAMETERS_XML['CONTROLLANE'])) - newFlowCell.setPropertyValue("SBS_KIT", runParameters.getXmlElement(RUNPARAMETERS_XML['SBS'])) - - read1 = runParameters.getAllchildren('Read1') - newFlowCell.setPropertyValue("CYCLES_REQUESTED_BY_CUSTOMER", read1[0].text) - - read2 = runParameters.getAllchildren('Read2') - if (str(read2[0].text) == '0'): - newFlowCell.setPropertyValue("END_TYPE", "SINGLE_READ") - else: - newFlowCell.setPropertyValue("END_TYPE", "PAIRED_END") - newFlowCell.setPropertyValue("PAIRED_END_KIT", runParameters.getXmlElement(RUNPARAMETERS_XML['PE'])) - - indexRead1 = runParameters.getAllchildren('IndexRead1') - newFlowCell.setPropertyValue("INDEXREAD", indexRead1[0].text) - - indexRead2 = runParameters.getAllchildren('IndexRead2') - newFlowCell.setPropertyValue("INDEXREAD2", indexRead2[0].text) - - def setFcProperty(searchId, dict): - children = runInfo.getAllchildren(searchId) - for element in (dict): - if (element <> '') and (dict[element] <> ''): - newFlowCell.setPropertyValue(element, children[0].attrib[dict[element]]) - - setFcProperty('FlowcellLayout', RUNINFO_XML) - - -sequencer = runInfo.getAllchildren('Instrument') -newFlowCell.setPropertyValue("SEQUENCER", INSTRUMENT[sequencer[0].text]) - -newFlowCell.setPropertyValue("FLOW_CELL_SEQUENCED_ON", create_openbis_timestamp()) -if IS_HISEQ_RUN: - maxLanes = runInfo.getAllchildren('FlowcellLayout')[0].attrib[RUNINFO_XML['LANECOUNT']] -else: - maxLanes = len(runInfo.getAllchildren('Tiles')[0]) - -# ----------------------------------------------------------------------------- - -def registerFlowLane(a_lane): +def registerFlowLane(transaction, a_lane, name, newFlowCell): ''' Registers a new Flow lane ''' newFlowLane = transaction.createNewSample('/' + FLOWCELL_SPACE + '/' + name + ':' + str(a_lane), "ILLUMINA_FLOW_LANE") newFlowLane.setContainer(newFlowCell) + +# ----------------------------------------------------------------------------- + +def process(transaction): + + incoming = transaction.getIncoming() + incomingPath = incoming.getAbsolutePath() + + # Get the incoming name + name = incoming.getName() + + split=name.split("_") + if (len(split) == 4): + IS_HISEQ_RUN=True + if (len(split) == 2): + pass + + # Search for the sample and check if there is already sample with this name + search_service = transaction.getSearchService() + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, name)); + foundSamples = search_service.searchForSamples(sc) + if foundSamples.size() > 0: + raise NameError('Already found a Flow Cell with the following name: '+ name) + + # Parse the RunInfo.xml file + runInfo = parseXmlFile(incomingPath + '/' + RUNINFO) + + # Create a new Flow Cell and set the experiment + project = transaction.getProject(FLOWCELL_PROJECT_ID) + if project == None: + space = transaction.getSpace(FLOWCELL_SPACE) + if space == None: + space = transaction.createNewSpace(FLOWCELL_SPACE, None) + space.setDescription("A test space") + project = transaction.createNewProject(FLOWCELL_PROJECT_ID) + project.setDescription("A demo project") + expID = FLOWCELL_PROJECT_ID + '/' + datetime.now().strftime("%Y.%m") + exp = transaction.getExperiment(expID) + if exp == None: + exp = transaction.createNewExperiment(expID, EXPERIMENT_TYPE_CODE) + newFlowCell = transaction.createNewSample('/' + FLOWCELL_SPACE + '/' + name, "ILLUMINA_FLOW_CELL") + newFlowCell.setExperiment(exp) + + if IS_HISEQ_RUN: + run = runInfo.getAllchildren('Run')[0].attrib + if (run['Id'] != name): + raise NameError('Flowcell names do not match between directory name '+ name + + ' and ' + RUNINFO + 'property file: ' + run['Id']) + + # The HiSeq is providing more infos, which we will parse here: + runParameters = parseXmlFile(incomingPath + '/' + RUNPARAMETERS) + + newFlowCell.setPropertyValue("ILLUMINA_PIPELINE_VERSION", runParameters.getXmlElement(RUNPARAMETERS_XML['RTAVERSION'])) + newFlowCell.setPropertyValue("FLOWCELLTYPE", runParameters.getXmlElement(RUNPARAMETERS_XML['FLOWCELL'])) + newFlowCell.setPropertyValue("CONTROL_LANE", runParameters.getXmlElement(RUNPARAMETERS_XML['CONTROLLANE'])) + newFlowCell.setPropertyValue("SBS_KIT", runParameters.getXmlElement(RUNPARAMETERS_XML['SBS'])) + + read1 = runParameters.getAllchildren('Read1') + newFlowCell.setPropertyValue("CYCLES_REQUESTED_BY_CUSTOMER", read1[0].text) + + read2 = runParameters.getAllchildren('Read2') + if (str(read2[0].text) == '0'): + newFlowCell.setPropertyValue("END_TYPE", "SINGLE_READ") + else: + newFlowCell.setPropertyValue("END_TYPE", "PAIRED_END") + newFlowCell.setPropertyValue("PAIRED_END_KIT", runParameters.getXmlElement(RUNPARAMETERS_XML['PE'])) + + indexRead1 = runParameters.getAllchildren('IndexRead1') + newFlowCell.setPropertyValue("INDEXREAD", indexRead1[0].text) + + indexRead2 = runParameters.getAllchildren('IndexRead2') + newFlowCell.setPropertyValue("INDEXREAD2", indexRead2[0].text) -[registerFlowLane(lane) for lane in range(1,int(maxLanes)+1)] + def setFcProperty(searchId, dict): + children = runInfo.getAllchildren(searchId) + for element in (dict): + if (element <> '') and (dict[element] <> ''): + newFlowCell.setPropertyValue(element, children[0].attrib[dict[element]]) + + setFcProperty('FlowcellLayout', RUNINFO_XML) + + sequencer = runInfo.getAllchildren('Instrument') + newFlowCell.setPropertyValue("SEQUENCER", INSTRUMENT[sequencer[0].text]) + + newFlowCell.setPropertyValue("FLOW_CELL_SEQUENCED_ON", create_openbis_timestamp()) + if IS_HISEQ_RUN: + maxLanes = runInfo.getAllchildren('FlowcellLayout')[0].attrib[RUNINFO_XML['LANECOUNT']] + else: + maxLanes = len(runInfo.getAllchildren('Tiles')[0]) + + [registerFlowLane(transaction, lane, name, newFlowCell) for lane in range(1,int(maxLanes)+1)] -shutil.rmtree(incomingPath) + shutil.rmtree(incomingPath) diff --git a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/plugin.properties b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/plugin.properties index e4d25523f27..9480a66c84c 100644 --- a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/plugin.properties +++ b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/create-flowcell-dropbox/plugin.properties @@ -7,6 +7,6 @@ # Path to the directory which contains incoming directories for drop boxes. incoming-dir = ${incoming-root-dir}/incoming-create-flowcell incoming-data-completeness-condition = auto-detection -top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.JythonTopLevelDataSetHandler +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 script-path = create-flowcell-dropbox.py -storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor \ No newline at end of file +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor diff --git a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/plugin.properties b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/plugin.properties index 7534e445085..9c4e3bbc2c1 100644 --- a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/plugin.properties +++ b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/plugin.properties @@ -6,6 +6,6 @@ # Path to the directory which contains incoming directories for drop boxes. incoming-dir = ${incoming-root-dir}/incoming-register-flowcell incoming-data-completeness-condition = auto-detection -top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.JythonTopLevelDataSetHandler +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 script-path = register-flowcell-dropbox.py -storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor \ No newline at end of file +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor diff --git a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/register-flowcell-dropbox.py b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/register-flowcell-dropbox.py index e19f4394b94..48c1f20c76e 100644 --- a/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/register-flowcell-dropbox.py +++ b/deep_sequencing_unit/source/core-plugins/illumina-ngs/1/dss/drop-boxes/register-flowcell-dropbox/register-flowcell-dropbox.py @@ -1,9 +1,31 @@ ''' -expected incoming Name for HiSeq2000 runs: 110715_SN792_0054_BC035RACXX +@copyright: +2012 ETH Zuerich, CISD + +@license: +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@description: +Registers an incoming directory as a data set in openBIS. The name of the directory is used to +search for the matching sample. + +@note: +print statements go to: <openBIS_HOME>/datastore_server/log/startup_log.txt +expected incoming Name for HiSeq runs: 110715_SN792_0054_BC035RACXX expected incoming Name for GAII runs: 110812_6353WAAXX -Note: -print statements go to: ~openbis/sprint/datastore_server/log/startup_log.txt +@author: +Manuel Kohler ''' from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria @@ -13,34 +35,32 @@ IS_HISEQ_RUN = False DATASET_TYPE_HISEQ = "ILLUMINA_HISEQ_OUTPUT" DATASET_TYPE_GA = "ILLUMINA_GA_OUTPUT" -# Create a "transaction" -- a way of grouping operations together so they all -# happen or none of them do. -transaction = service.transaction() +def process(transaction): -incomingPath = incoming.getAbsolutePath() + incomingPath = transaction.getIncoming().getAbsolutePath() -# Get the incoming name -name = incoming.getName() + # Get the incoming name + name = transaction.getIncoming().getName() -split = name.split("_") -if (len(split) == 4): - dataSet = transaction.createNewDataSet(DATASET_TYPE_HISEQ) - IS_HISEQ_RUN = True -if (len(split) == 2): - dataSet = transaction.createNewDataSet(DATASET_TYPE_GA) + split = name.split("_") + if (len(split) == 4): + dataSet = transaction.createNewDataSet(DATASET_TYPE_HISEQ) + IS_HISEQ_RUN = True + if (len(split) == 2): + dataSet = transaction.createNewDataSet(DATASET_TYPE_GA) -# Create a data set and set type -dataSet.setMeasuredData(False) + # Create a data set and set type + dataSet.setMeasuredData(False) -# Get the search service -search_service = transaction.getSearchService() + # Get the search service + search_service = transaction.getSearchService() -# Search for the sample -sc = SearchCriteria() -sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, name)); -foundSamples = search_service.searchForSamples(sc) - -if foundSamples.size() > 0: - # Add the incoming file into the data set - transaction.moveFile(incomingPath, dataSet) - dataSet.setSample(foundSamples[0]) + # Search for the sample + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, name)); + foundSamples = search_service.searchForSamples(sc) + + if foundSamples.size() > 0: + # Add the incoming file into the data set + transaction.moveFile(incomingPath, dataSet) + dataSet.setSample(foundSamples[0]) -- GitLab