diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/create-flowcell.py b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/create-flowcell.py index 11517d91798adc4d41aa693ba6805326ac4f785a..9ab05190fe88bd8b1e14c50c181c46f4dfa9ffb4 100644 --- a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/create-flowcell.py +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/create-flowcell.py @@ -38,6 +38,10 @@ import xml.etree.ElementTree as etree from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria from ch.systemsx.cisd.common.mail import EMailAddress +# The following module is located in the path defined in the datastore_server.conf +# Look for: -Dpython.path} +from gfb_utils import * + RUNPARAMETERS = 'RunParameters.xml' ALTERNATIVE_RUNPARAMETERS = 'runParameters.xml' RUNINFO = 'RunInfo.xml' @@ -57,15 +61,6 @@ RUNPARAMETERS_XML = {'FLOWCELL':'Flowcell', 'RTAVERSION':'RTAVersion', PERSISTENT_KEY_MAP = "persistent_key_map" -SEQUENCER_DICT = {'HISEQ_4000': 'Illumina HiSeq 4000', - 'HISEQ_3000': 'Illumina HiSeq 3000', - 'HISEQ_2500': 'Illumina HiSeq 2500', - 'HISEQ_2000': 'Illumina HiSeq 2000', - 'HISEQ_X': 'Illumina HiSeq X', - 'NEXTSEQ_500': 'Illumina NextSeq 500', - 'MISEQ': 'Illumina MiSeq', - 'UNIDENTIFIED': 'Unidentified'} - class parseXmlFile: @@ -93,59 +88,10 @@ class parseXmlFile: ''' for e in self.root.getchildren(): # the '//' means look recursively for all children not only direct ones - childList = self.tree.findall('//' + elementName) + childList = self.tree.findall('.//' + elementName) return childList -def get_model(run_id): - """ - Guesses the sequencer model from the run folder name - - Current Naming schema for Illumina run folders, as far as I know, - no documentation found on this, Illumina introduced a field called - <InstrumentID> on the NextSeq runParameters.xml. That might be an - option for the future. Alternatively a combination of the fields - <ApplicationName> and <ApplicationVersion>. - - MiSeq: 150130_M01761_0114_000000000-ACUR0 - NextSeq: 150202_NS500318_0047_AH3KLMBGXX - HiSeq 2000: 130919_SN792_0281_BD2CHRACXX - HiSeq 2500: 150203_D00535_0052_AC66RWANXX - HiSeq 3000: 150724_J00121_0017_AH2VYMBBXX - HiSeq 4000: 150210_K00111_0013_AH2372BBXX - HiSeq X: 141121_ST-E00107_0356_AH00C3CCXX - """ - date, machine_id, run_number, fc_string = os.path.basename(run_id).split("_") - - if machine_id.startswith("NS"): - model = SEQUENCER_DICT['NEXTSEQ_500'] - elif machine_id.startswith("M"): - model = SEQUENCER_DICT['MISEQ'] - elif machine_id.startswith("D"): - model = SEQUENCER_DICT['HISEQ_2500'] - elif machine_id.startswith("SN"): - model = SEQUENCER_DICT['HISEQ_2000'] - elif machine_id.startswith("J"): - model = SEQUENCER_DICT['HISEQ_3000'] - elif machine_id.startswith("K"): - model = SEQUENCER_DICT['HISEQ_4000'] - elif machine_id.startswith("ST"): - model = SEQUENCER_DICT['HISEQ_X'] - else: - model = SEQUENCER_DICT['UNIDENTIFIED'] - return model - - -def createOpenbisTimeStamp(file): - ''' - Creates a openBIS compatible time stamp of a file time stamp - ''' - mtime = os.path.getmtime(file) - lt = localtime(mtime) - tz = localtime().tm_hour - gmtime().tm_hour - return (strftime("%Y-%m-%d %H:%M:%S GMT" + "%+.2d" % tz + ":00", lt)) - - def registerFlowLane(a_lane, transaction, name, newFlowCell): ''' Registers a new Flow lane @@ -196,7 +142,6 @@ def post_storage(context): def searchSample(transaction, sampleName): - # Search for the sample and check if there is already sample with this ID search_service = transaction.getSearchService() sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleName)); @@ -285,7 +230,7 @@ def process(transaction): run_date, sequencer_id, running_number, tray_and_fcId = run_id.split("_") tray = tray_and_fcId[0] - if model is SEQUENCER_DICT['MISEQ']: + if model in [Sequencers.MISEQ]: fc_id = tray_and_fcId else: fc_id = tray_and_fcId[1:] @@ -304,12 +249,12 @@ def process(transaction): flow_lanes = new_flowcell.getContainedSamples() if len(flow_lanes) is 0: - if model in SEQUENCER_DICT['NEXTSEQ_500']: + if model in [Sequencers.NEXTSEQ_500]: max_lanes = 1 [registerFlowLane(lane, transaction, fc_id, new_flowcell) for lane in range(1,int(max_lanes)+1)] # NextSeq specific - if model in SEQUENCER_DICT['NEXTSEQ_500']: + if model in [Sequencers.NEXTSEQ_500]: run_mode = sanitizeString(runParameters.getAllchildren('Chemistry')[0].text) set_run_mode(transaction, new_flowcell, run_mode) recipe_folder = (runParameters.getAllchildren('RecipeFolder'))[0].text @@ -318,13 +263,13 @@ def process(transaction): new_flowcell.setPropertyValue("CONTROL_SOFTWARE_VERSION", runParameters.getAllchildren('ApplicationVersion')[0].text) # MiSeq specific - if model in SEQUENCER_DICT['MISEQ']: + if model in [Sequencers.MISEQ]: ReagentKitBarcode = (runParameters.getAllchildren('ReagentKitBarcode'))[0].text new_flowcell.setPropertyValue("SBS_KIT", ReagentKitBarcode) new_flowcell.setPropertyValue("CONTROL_SOFTWARE_VERSION", runParameters.getAllchildren('ApplicationVersion')[0].text) # HiSeq specific - if model in [SEQUENCER_DICT['HISEQ_2500'], SEQUENCER_DICT['HISEQ_3000'], SEQUENCER_DICT['HISEQ_4000'], SEQUENCER_DICT['HISEQ_X']]: + if model in HISEQ_LIST: run_mode = sanitizeString(runParameters.getXmlElement(RUNPARAMETERS_XML['RUN_MODE'])) new_flowcell.setPropertyValue("FLOWCELLTYPE", runParameters.getXmlElement(RUNPARAMETERS_XML['FLOWCELL'])) new_flowcell.setPropertyValue("SBS_KIT", runParameters.getXmlElement(RUNPARAMETERS_XML['SBS'])) @@ -343,7 +288,7 @@ def process(transaction): sequencer = runInfo.getAllchildren('Instrument') addVocabularyTerm(transaction, "SEQUENCER", sequencer[0].text) new_flowcell.setPropertyValue("SEQUENCER", sequencer[0].text) - new_flowcell.setPropertyValue("FLOW_CELL_SEQUENCED_ON", createOpenbisTimeStamp(os.path.join(incomingPath, RUNINFO))) + new_flowcell.setPropertyValue("FLOW_CELL_SEQUENCED_ON", create_openbis_timestamp(os.path.join(incomingPath, RUNINFO))) new_flowcell.setPropertyValue("RUN_NAME_FOLDER", run_id) readMap = {} diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/plugin.properties b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/plugin.properties index 66a6e198d4e5d1a54588ec3d8202315f940dad04..dd75e75ff45741de5cf5c598ba18323d0613f284 100644 --- a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/plugin.properties +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/create-flowcell/plugin.properties @@ -5,4 +5,5 @@ incoming-dir = ${incoming-root-dir}/create-flowcell incoming-data-completeness-condition = marker-file top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 script-path = create-flowcell.py -storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor \ No newline at end of file +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor +jython-version=2.7 \ No newline at end of file diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/plugin.properties b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/plugin.properties index ef56111b768f0d12631d249469e5c942506f01f0..2c8f40225a20a6f5d644035354094806260e4a52 100644 --- a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/plugin.properties +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/plugin.properties @@ -9,3 +9,4 @@ incoming-data-completeness-condition = marker-file top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 script-path = read-rta-timestamp.py storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor +jython-version=2.7 \ No newline at end of file diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/read-rta-timestamp.py b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/read-rta-timestamp.py index c701f27f3d6ab8a935f631acc79664f9b9ce3f6d..bd6fb57dfa77ad3e5804cd241e55fe35059b324e 100644 --- a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/read-rta-timestamp.py +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/read-rta-timestamp/read-rta-timestamp.py @@ -1,49 +1,70 @@ +''' +@copyright: +2015 ETH Zuerich, SIS + +@license: +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@description: +Reads out the timestamp of the RTAComplete.txt file to register the timestamp in openBIS + +@note: +print statements go to: <openBIS_HOME>/datastore_server/log/startup_log.txt + +@author: +Manuel Kohler +''' + import os import shutil from time import * from datetime import * from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria +from os.path import basename -MarkerGAComplete = 'RTAComplete.txt' -MarkerHiSeqComplete = 'RTAComplete.txt' -MarkerNextSeqComplete = 'RTAComplete.txt' - -def createOpenbisTimeStamp(file): - ''' - Creates a openBIS compatible time stamp of a file time stamp - ''' - mtime = os.path.getmtime(file) - lt = localtime(mtime) - tz = localtime().tm_hour - gmtime().tm_hour - return (strftime("%Y-%m-%d %H:%M:%S GMT" + "%+.2d" % tz + ":00", lt)) +# The following module is located in the path defined in the datastore_server.conf +# Look for: -Dpython.path} +from gfb_utils import * -# ----------------------------------------------------------------------------- +MARKER_RUN_COMPLETE = 'RTAComplete.txt' def process(transaction): - incomingPath = transaction.getIncoming().getAbsolutePath() - # Get the incoming name - name = transaction.getIncoming().getName() + incoming = transaction.getIncoming() + incoming_path = incoming.getAbsolutePath() + run_id = incoming.getName() + model = get_model(run_id) + + thread_property_dict = get_thread_properties(transaction) + absolutePath = os.path.dirname(os.path.realpath(thread_property_dict['script-path'])) + print(basename(absolutePath) + ": Auto-detected Illumina model: " + model) + + run_date, sequencer_id, running_number, tray_and_fcId = run_id.split("_") + tray = tray_and_fcId[0] + if model in [Sequencers.MISEQ]: + fc_id = tray_and_fcId + else: + fc_id = tray_and_fcId[1:] - split=name.split("_") - if (len(split) == 4): - if (split[1].startswith("NS")): - Markerfile = incomingPath + "/" + MarkerNextSeqComplete - name = split[-1][1:] + marker_file = os.path.join(incoming_path, MARKER_RUN_COMPLETE) + + search_service = transaction.getSearchService() + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, fc_id)); + found_samples = search_service.searchForSamples(sc) + + if found_samples.size() > 0: + sa = transaction.getSampleForUpdate(found_samples[0].getSampleIdentifier()) + sa.setPropertyValue("SEQUENCER_FINISHED", create_openbis_timestamp(marker_file)) else: - IS_HISEQ_RUN=True - Markerfile = incomingPath + "/" + MarkerHiSeqComplete - if (len(split) == 2): - Markerfile = incomingPath + "/" + MarkerGAComplete - - # Search for the sample and check if there is already sample with this name - search_service = transaction.getSearchService() - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, name)); - foundSamples = search_service.searchForSamples(sc) - - if foundSamples.size() > 0: - sa = transaction.getSampleForUpdate(foundSamples[0].getSampleIdentifier()) - sa.setPropertyValue("SEQUENCER_FINISHED", createOpenbisTimeStamp(Markerfile)) - - shutil.rmtree(incomingPath) + print(fc_id + " was not found in openBIS") \ No newline at end of file diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowcell/plugin.properties b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowcell/plugin.properties new file mode 100644 index 0000000000000000000000000000000000000000..82d34c7fd9fa6be9a84252b453ee480fd32a41fa --- /dev/null +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowcell/plugin.properties @@ -0,0 +1,6 @@ +incoming-dir = ${incoming-root-dir}/register-flowcell +incoming-data-completeness-condition = marker-file +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 +script-path = register-flowcell.py +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor +jython-version=2.7 diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowcell/register-flowcell.py b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowcell/register-flowcell.py new file mode 100644 index 0000000000000000000000000000000000000000..2f712bfd7944ca8a0c3cb0c0c29e368e7f079957 --- /dev/null +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowcell/register-flowcell.py @@ -0,0 +1,106 @@ +''' +@copyright: +2015 ETH Zuerich, SIS + +@license: +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@description: +Registers an incoming directory as a data set in openBIS. The name of the directory is used to +search for the matching sample. + +@note: +print statements go to: <openBIS_HOME>/datastore_server/log/startup_log.txt + +@author: +Manuel Kohler +''' + +import re +import glob +import os +from itertools import islice +from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria + +# The following module is located in the path defined in the datastore_server.conf +# Look for: -Dpython.path} +from gfb_utils import * + +ILLUMINA_HISEQ_OUTPUT_DS_TYPE = "ILLUMINA_HISEQ_OUTPUT" +ILLUMINA_MISEQ_OUTPUT_DS_TYPE = "ILLUMINA_MISEQ_OUTPUT" +ILLUMINA_NEXTSEQ_OUTPUT_DS_TYPE = "ILLUMINA_NEXTSEQ_OUTPUT" + + +def get_bcl_version(file): + pattern = re.compile("bcl2fastq") + matching_line_list = [] + bcl_version = "Not specified" + number_of_lines_to_read = 3 + + if file: + with open(file[0]) as nohup: + head = list(islice(nohup, number_of_lines_to_read)) + for line in head: + if re.search(pattern, line): + matching_line_list.append(line) + else: + print("File " + str(file) + " not found!") + + if matching_line_list: + bcl_version = matching_line_list[0].strip() + return bcl_version + + +def process(transaction): + incoming = transaction.getIncoming() + incoming_path = incoming.getAbsolutePath() + run_id = incoming.getName() + model = get_model(run_id) + + if model in HISEQ_LIST: + DATASET_TYPE = ILLUMINA_HISEQ_OUTPUT_DS_TYPE + elif model in [Sequencers.NEXTSEQ_500]: + DATASET_TYPE = ILLUMINA_NEXTSEQ_OUTPUT_DS_TYPE + elif model in [Sequencers.MISEQ]: + DATASET_TYPE = ILLUMINA_MISEQ_OUTPUT_DS_TYPE + else: + print("Could set a data set type for flowcell data!") + + thread_property_dict = get_thread_properties(transaction) + absolutePath = os.path.dirname(os.path.realpath(thread_property_dict['script-path'])) + print(os.path.basename(absolutePath) + ": Auto-detected Illumina model: " + model) + + run_date, sequencer_id, running_number, tray_and_fcId = run_id.split("_") + tray = tray_and_fcId[0] + if model in [Sequencers.MISEQ]: + fc_id = tray_and_fcId + else: + fc_id = tray_and_fcId[1:] + + file = glob.glob(os.path.join(incoming_path, "nohup*")) + bcl_version = get_bcl_version(file) + + found_flow_cell = search_unique_sample(transaction, fc_id) + + search_service = transaction.getSearchService() + get_flowcell_with_contained_samples = search_service.getSample(found_flow_cell[0].getSampleIdentifier()) + flowlanes = get_flowcell_with_contained_samples.getContainedSamples() + + for lane in flowlanes: + mutable_lane = transaction.getSampleForUpdate(lane.getSampleIdentifier()) + mutable_lane.setPropertyValue("BCL_VERSION", bcl_version) + + dataSet = transaction.createNewDataSet(DATASET_TYPE) + dataSet.setMeasuredData(False) + transaction.moveFile(incoming_path, dataSet) + dataSet.setSample(found_flow_cell[0]) diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/lib/crc32_v2.c b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/lib/crc32_v2.c new file mode 100644 index 0000000000000000000000000000000000000000..b38d22d40f641254661994d4cc01492bb7c8d35a --- /dev/null +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/lib/crc32_v2.c @@ -0,0 +1,206 @@ +/*----------------------------------------------------------------------------*\ + * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29. + * + * This program generates the CRC-32 values for the files named in the + * command-line arguments. These are the same CRC-32 values used by GZIP, + * PKZIP, and ZMODEM. The Crc32_ComputeBuf() can also be detached and + * used independently. + * + * THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE. + * + * Based on the byte-oriented implementation "File Verification Using CRC" + * by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67. + * + * v1.0.0: original release. + * v1.0.1: fixed printf formats. + * v1.0.2: fixed something else. + * v1.0.3: replaced CRC constant table by generator function. + * v1.0.4: reformatted code, made ANSI C. 1994-12-05. + * v2.0.0: rewrote to use memory buffer & static table, 2006-04-29. +\*----------------------------------------------------------------------------*/ + +#include <stdio.h> +#include <stdlib.h> + +/*----------------------------------------------------------------------------*\ + * Local functions +\*----------------------------------------------------------------------------*/ + +static int Crc32_ComputeFile( FILE *file, unsigned long *outCrc32 ); + +static unsigned long Crc32_ComputeBuf( unsigned long inCrc32, const void *buf, + size_t bufLen ); + +/*----------------------------------------------------------------------------*\ + * NAME: + * main() - main function for CRC-32 generation + * DESCRIPTION: + * Computes the CRC-32 value for the set of files named in the command- + * line arguments. + * ARGUMENTS: + * argc - command-line-argument count + * argv - command-line-argument strings + * RETURNS: + * err - 0 on success or executes exit(1) on error + * ERRORS: + * - file errors +\*----------------------------------------------------------------------------*/ + +int main( int argc, const char *argv[] ) +{ + FILE *file = NULL; + const char *filename; + unsigned long argIdx; + unsigned long crc32; + int err; + + /** compute crcs **/ + if (argc < 2) { + /** read from 'stdin' if no arguments given **/ + err = Crc32_ComputeFile( stdin, &crc32 ); + if (err == -1) goto ERR_EXIT; + printf("crc32 = 0x%08lX for (stdin)\n", crc32 ); + } else { + /** report named files in sequence **/ + for (argIdx=1; argIdx < argc; argIdx++) { + filename = argv[argIdx]; + file = fopen( filename, "rb" ); + if (file == NULL) { + fprintf( stderr, "error opening file \"%s\"!\n", filename ); + goto ERR_EXIT; + } + err = Crc32_ComputeFile( file, &crc32 ); + if (err == -1) goto ERR_EXIT; + /*printf("crc32 = 0x%08lX for \"%s\"\n", crc32, filename );*/ + printf("%08lX", crc32); + err = fclose( file ); + file = NULL; + if (err == EOF) { + fprintf( stderr, "error closing file \"%s\"!\n", filename ); + goto ERR_EXIT; + } + } + } + return( 0 ); + + /** error exit **/ +ERR_EXIT: + if (file != NULL) fclose( file ); + exit( 1 ); +} + +/*----------------------------------------------------------------------------*\ + * NAME: + * Crc32_ComputeFile() - compute CRC-32 value for a file + * DESCRIPTION: + * Computes the CRC-32 value for an opened file. + * ARGUMENTS: + * file - file pointer + * outCrc32 - (out) result CRC-32 value + * RETURNS: + * err - 0 on success or -1 on error + * ERRORS: + * - file errors +\*----------------------------------------------------------------------------*/ + +static int Crc32_ComputeFile( FILE *file, unsigned long *outCrc32 ) +{ +# define CRC_BUFFER_SIZE 8192 + unsigned char buf[CRC_BUFFER_SIZE]; + size_t bufLen; + + /** accumulate crc32 from file **/ + *outCrc32 = 0; + while (1) { + bufLen = fread( buf, 1, CRC_BUFFER_SIZE, file ); + if (bufLen == 0) { + if (ferror(file)) { + fprintf( stderr, "error reading file\n" ); + goto ERR_EXIT; + } + break; + } + *outCrc32 = Crc32_ComputeBuf( *outCrc32, buf, bufLen ); + } + return( 0 ); + + /** error exit **/ +ERR_EXIT: + return( -1 ); +} + +/*----------------------------------------------------------------------------*\ + * NAME: + * Crc32_ComputeBuf() - computes the CRC-32 value of a memory buffer + * DESCRIPTION: + * Computes or accumulates the CRC-32 value for a memory buffer. + * The 'inCrc32' gives a previously accumulated CRC-32 value to allow + * a CRC to be generated for multiple sequential buffer-fuls of data. + * The 'inCrc32' for the first buffer must be zero. + * ARGUMENTS: + * inCrc32 - accumulated CRC-32 value, must be 0 on first call + * buf - buffer to compute CRC-32 value for + * bufLen - number of bytes in buffer + * RETURNS: + * crc32 - computed CRC-32 value + * ERRORS: + * (no errors are possible) +\*----------------------------------------------------------------------------*/ + +static unsigned long Crc32_ComputeBuf( unsigned long inCrc32, const void *buf, + size_t bufLen ) +{ + static const unsigned long crcTable[256] = { + 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535, + 0x9E6495A3,0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD, + 0xE7B82D07,0x90BF1D91,0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D, + 0x6DDDE4EB,0xF4D4B551,0x83D385C7,0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC, + 0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,0x3B6E20C8,0x4C69105E,0xD56041E4, + 0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,0x35B5A8FA,0x42B2986C, + 0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,0x26D930AC, + 0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F, + 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB, + 0xB6662D3D,0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F, + 0x9FBFE4A5,0xE8B8D433,0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB, + 0x086D3D2D,0x91646C97,0xE6635C01,0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E, + 0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,0x65B0D9C6,0x12B7E950,0x8BBEB8EA, + 0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,0x4DB26158,0x3AB551CE, + 0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,0x4369E96A, + 0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9, + 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409, + 0xCE61E49F,0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81, + 0xB7BD5C3B,0xC0BA6CAD,0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739, + 0x9DD277AF,0x04DB2615,0x73DC1683,0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8, + 0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,0xF00F9344,0x8708A3D2,0x1E01F268, + 0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,0xFED41B76,0x89D32BE0, + 0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,0xD6D6A3E8, + 0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B, + 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF, + 0x4669BE79,0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703, + 0x220216B9,0x5505262F,0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7, + 0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A, + 0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,0x95BF4A82,0xE2B87A14,0x7BB12BAE, + 0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,0x86D3D2D4,0xF1D4E242, + 0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,0x88085AE6, + 0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45, + 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D, + 0x3E6E77DB,0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5, + 0x47B2CF7F,0x30B5FFE9,0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605, + 0xCDD70693,0x54DE5729,0x23D967BF,0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94, + 0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D }; + unsigned long crc32; + unsigned char *byteBuf; + size_t i; + + /** accumulate crc32 for buffer **/ + crc32 = inCrc32 ^ 0xFFFFFFFF; + byteBuf = (unsigned char*) buf; + for (i=0; i < bufLen; i++) { + crc32 = (crc32 >> 8) ^ crcTable[ (crc32 ^ byteBuf[i]) & 0xFF ]; + } + return( crc32 ^ 0xFFFFFFFF ); +} + +/*----------------------------------------------------------------------------*\ + * END OF MODULE: crc32.c +\*----------------------------------------------------------------------------*/ diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/plugin.properties b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/plugin.properties new file mode 100644 index 0000000000000000000000000000000000000000..8f519c1de0b7c03a12454965c0d2de47e8184a0e --- /dev/null +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/plugin.properties @@ -0,0 +1,6 @@ +incoming-dir = ${incoming-root-dir}/register-flowlane +incoming-data-completeness-condition = marker-file +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 +script-path = register-flowlane.py +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor +jython-version=2.7 \ No newline at end of file diff --git a/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/register-flowlane.py b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/register-flowlane.py new file mode 100644 index 0000000000000000000000000000000000000000..073256eee865c8f7f8d785eb07d0f183f6472214 --- /dev/null +++ b/deep_sequencing_unit/sourceTest/core-plugins/illumina-qgf/1/dss/drop-boxes/register-flowlane/register-flowlane.py @@ -0,0 +1,360 @@ +''' +@copyright: +2015 ETH Zuerich, SIS + +@license: +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@note: +print statements go to: <openBIS_HOME>/datastore_server/log/startup_log.txt + +@author: +Manuel Kohler +''' + + +import os +import shutil +import re +import subprocess +from collections import OrderedDict +from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria +# The following module is located in the path defined in the datastore_server.conf +# Look for: -Dpython.path} +from gfb_utils import * + +from __builtin__ import file + +FASTQ_GZ_PATTERN = "*.fastq.gz" +METADATA_FILE_SUFFIX = "_metadata.tsv" +AFFILIATION_PROPERTY_NAME='AFFILIATION' +INDEX1='BARCODE' +INDEX2='INDEX2' +EXTERNAL_SAMPLE_NAME='EXTERNAL_SAMPLE_NAME' +INDEXREAD1='INDEXREAD' +INDEXREAD2='INDEXREAD2' +SAMPLE_TYPE = 'SAMPLE_TYPE' +SAMPLE_CODE = 'SAMPLE_CODE' +NCBI_ORGANISM_TAXONOMY='NCBI_ORGANISM_TAXONOMY' +PHIX_TAXONOMY_ID='10847' +DEFAULT_INDEX='NoIndex' +CRC32_PATH='lib/crc32' +CRC32_PATH='lib/a.out' + + +def checkOnFileSize(file): + return os.stat(file).st_size == 0 + + +def CRC32_from_file(filename, transaction): + + if checkOnFileSize(filename): + raise Exception("FILE " + filename + " IS EMPTY!") + + threadPropertyDict = get_thread_properties(transaction) + absolutePath = os.path.dirname(os.path.realpath(threadPropertyDict['script-path'])) + fullPathCrc32 = (os.path.join(absolutePath, CRC32_PATH)) + if os.path.exists(fullPathCrc32): + args = [fullPathCrc32, filename] + p = subprocess.Popen(args, stdout=subprocess.PIPE) + cksum = (p.communicate()[0]) + print("Calculated crc32 checksum for: "+ os.path.basename(filename) + " " + cksum) + else: + cksum = 0 & 0xFFFFFFFF + return cksum + + +def writeMetadataFile(transaction, folder_name, meta_data_file_name, sequencing_sample_properties_dict, + fcMetaDataDict, experiment, sample_space, fastqFileList, flowLane): + ''' + Writes a file of meta data related to one sample + ''' + + sequencing_sample_properties_list = sequencing_sample_properties_dict.keys() + sequencing_sample_properties_list.sort() + + expId = experiment.getIdentifier() + try: + + + meta_data_file = open(meta_data_file_name,'w') + for propertyType in sequencing_sample_properties_list: + if (propertyType in [u'FLOW_CELL_PROPERTIES']): + continue + if propertyType in [SAMPLE_TYPE] or propertyType in [SAMPLE_CODE]: + meta_data_file.write(propertyType.encode('utf-8') + "\t" + + str(sequencing_sample_properties_dict[propertyType])+ "\n") + else: + meta_data_file.write(propertyType.encode('utf-8') + "\t" + + sequencing_sample_properties_dict[propertyType].encode('utf-8').replace('\n',',') + "\n") + + meta_data_file.write("EXPERIMENT\t" + expId + "\n".encode('utf-8')) + meta_data_file.write("\nFLOWCELL PROPERTIES\n".encode('utf-8')) + fcMetaDataDict["LANE_NUMBER"] = flowLane + keys = fcMetaDataDict.keys() + keys.sort() + + sequencer_vocabulary_description = get_vocabulary_descriptions(transaction, 'SEQUENCER') + meta_data_file.write('SEQUENCER_MODEL' + "\t" + + sequencer_vocabulary_description[fcMetaDataDict['SEQUENCER']].encode('utf-8') + "\n") + + for k in keys: + meta_data_file.write(k.encode('utf-8') + "\t" + fcMetaDataDict[k].encode('utf-8') + "\n") + + meta_data_file.write("\nFASTQ_FILES\n".encode('utf-8')) + for file in fastqFileList: + meta_data_file.write(os.path.basename(file) + "\t" + str(CRC32_from_file(file, transaction)) + "\n") + + except IOError: + print ('File error, could not write '+ file) + finally: + meta_data_file.close() + + destinationFolder = folder_name + extraCopySciCore (sample_space, meta_data_file_name, destinationFolder) + + +def extraCopySciCore (sample_space, filePath, destinationFolder=""): + ''' + Handles the extra copies of the data for transfer with datamover for SCICORE + ''' + + dropBoxFolder = '/Users/kohleman/tmp/scicore' + #dropBoxFolder = '/links/shared/dsu/dss/customers/biozentrum_scicore/drop-box' + + # if a sample is part of this space list then it will be transferred to sciCore + SPACE_LIST = ["UNI_BASEL_SALZBURGER", "BIOCENTER_HANDSCHIN", "BIOCENTER_ZAVOLAN", + "BIOCENTER_KELLER", "BIOCENTER_SILANDER", "ETHZ_NEUROSTEMX", + "UNI_BASEL_UTZINGER", "UNI_BASEL_GAGNEUX", "BIOZENTRUM_SPANG", + "BIOZENTRUM_JENAL"] + + basename = os.path.basename(filePath) + + if (sample_space in SPACE_LIST): + dirname = os.path.join(dropBoxFolder, destinationFolder) + if not os.path.exists(dirname): + os.mkdir(dirname) + print("COPYING " + filePath + " TO " + dirname) + shutil.copy(filePath, dirname) + else: + print(sample_space + " not in SPACE_LIST. Sample will not be copied to BC2.") + + +def get_sample_properties (transaction, sample): + + sample_properties_dict = {} + # returns Map<String, String> + sample_properties = sample.getSample().getProperties() + sequencing_sample_type = sample.getSampleType() + sequencing_sample_code = sample.getCode() + sample_properties_dict[SAMPLE_TYPE] = sequencing_sample_type + sample_properties_dict[SAMPLE_CODE] = sequencing_sample_code + + for property in sample_properties: + code = property.getPropertyType().getSimpleCode() + sample_properties_dict[code] = property.tryGetAsString() + + ordered_sample_properties_dict = OrderedDict(sorted(sample_properties_dict.items(), key=lambda t: t[0])) + return ordered_sample_properties_dict + + +def searchParents (search_service, parents): + + sc = SearchCriteria() + # set the Search Criteria to an OR condition, default is AND + sc.setOperator(SearchCriteria.SearchOperator.MATCH_ANY_CLAUSES) + # Get the codes for all parents + for parent in parents: + parentSubCode = parent.getSubCode() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, parentSubCode)); + # all parents of the flow lane + foundParents = search_service.searchForSamples(sc) + return foundParents + + +def renameFiles (fastq_files, undetermined, flow_cell_id): + + newFastqFileList = [] + for file in fastq_files: + if undetermined: + folder = os.path.dirname(file) + fileName = os.path.basename(file) + filepart, suffix = fileName.split('.',1) + new_file = folder + "/" + flow_cell_id + '_' + filepart + "." + suffix + print ("Renaming file " + file + " to " + new_file) + os.rename(file, new_file) + else: + new_file = file + newFastqFileList.append(new_file) + return newFastqFileList + + +def put_files_to_dataset (transaction, dataSet, fastq_files, folder_name, flow_cell_id, sample_space, undetermined): + + for file in fastq_files: + extraCopySciCore (sample_space, file, folder_name) + transaction.moveFile(file, dataSet, folder_name) + +# ------------------------------------------------------------------------------- + +def split_incoming_folder_name (name): + split=name.split("_") + + # expected incoming Name, e.g.: BSSE_QGF_22266_H0W8YBGXX_1 + if (len(split) == 5): + sample_code = '-'.join([split[0], split[1], split[2]]) + flowCellId = split[3] + flowLane = split[-1] + undetermined = False + + # expected Undetermined_H0W8YBGXX + elif (len(split) == 2): + sample_code = '' + flowCellId = split[-1] + flowLane = '1' + undetermined = True + + # MiSeq BSSE_QGF_36097_000000000_AH4PH_1 + elif (len(split) == 6): + sample_code = '-'.join([split[0], split[1], split[2]]) + flowCellId = '-'.join([split[3],split[4]]) + flowLane = split[-1] + undetermined = False + + #MiSeq Undetermined_000000000_AH4PH_1 + elif (len(split) == 4): + sample_code = '' + flowCellId = '-'.join([split[1], split[2]]) + flowLane = split[-1] + undetermined = True + else: + print("Expected different naming schema!") + + incoming_sample = flowCellId + ':' + flowLane + return sample_code, flowCellId, flowLane, incoming_sample, undetermined + +# ------------------------------------------------------------------------------- + +def get_vocabulary_descriptions (transaction, vocabulary_name): + vocabulary_descriptions_dict = {} + vocabulary = transaction.getVocabulary(vocabulary_name) + vocabulary_terms = vocabulary.getTerms() + for term in vocabulary_terms: + vocabulary_descriptions_dict[term.getCode()] = term.getDescription() + return vocabulary_descriptions_dict + +# ------------------------------------------------------------------------------- + + +def process_regular_samples(transaction, name, sample_code, flowLane, fastq_files, first_fastq_file, search_unique_sample, fcMetaDataDict, dataSet): + foundSample = search_unique_sample(transaction, sample_code) + sequencing_sample = foundSample[0].getSample() + experiment = sequencing_sample.getExperiment() + sequencing_sample_code = sequencing_sample.getCode() + print "sequencing_sample_code: " + sequencing_sample_code + sequencing_sample_properties_dict = get_sample_properties(transaction, foundSample[0]) + if (INDEX1 in sequencing_sample_properties_dict) and (fcMetaDataDict[INDEXREAD1] > 0): + #print(sequencing_sample_properties_dict[INDEX1]) + dataSet.setPropertyValue(INDEX1, sequencing_sample_properties_dict[INDEX1]) + if (INDEX2 in sequencing_sample_properties_dict) and (fcMetaDataDict[INDEXREAD2] > 0): + dataSet.setPropertyValue(INDEX2, sequencing_sample_properties_dict[INDEX2]) + dataSet.setPropertyValue(EXTERNAL_SAMPLE_NAME, sequencing_sample_properties_dict[EXTERNAL_SAMPLE_NAME]) + sample_space = foundSample[0].getSpace() + filepart, suffix = first_fastq_file.split('.', 1) + meta_data_file_name = filepart.rsplit('_', 2)[0] + METADATA_FILE_SUFFIX + # get a file from the IDataSetRegistrationTransaction so it is automatically part of the data set + meta_data_file_path = transaction.createNewFile(dataSet, name, meta_data_file_name) + writeMetadataFile(transaction, name, meta_data_file_path, sequencing_sample_properties_dict, fcMetaDataDict, experiment, sample_space, fastq_files, flowLane) + + return fastq_files, sample_space + + +def process_undetermined(transaction, undetermined, name, flowCellId, flowLane, fastq_files, first_fastq_file, search_service, fcMetaDataDict, parents, dataSet): + sample_space = "" + newFastqFiles = [] + lane_parents = searchParents(search_service, parents) + print "Found " + str(lane_parents.size()) + " parents" + newFastqFiles = renameFiles(fastq_files, undetermined, flowCellId) + for parent in lane_parents: + sequencing_sample_properties_dict = get_sample_properties(transaction, parent) + parent_sample = parent.getSample() + sample_code = parent_sample.getCode() + experiment = parent_sample.getExperiment() + sample_space = parent.getSpace() + + # Special Sample Types without index (e.g. ILLUMINA_SEQUENCING_NEUROSTEMX_SINGLECELL) are caught here. + # as those samples do not have a NCBI ORGANISM TAXONOMY + if NCBI_ORGANISM_TAXONOMY not in sequencing_sample_properties_dict: + print sample_code + ": Processing Sample without NCBI ORGANISM TAXONOMY: ILLUMINA_SEQUENCING_NEUROSTEMX_SINGLECELL" + meta_data_file_path = transaction.createNewFile(dataSet, name, sample_code + '_' + flowCellId + '_' + first_fastq_file.split('.')[0] + METADATA_FILE_SUFFIX) + writeMetadataFile(transaction, name, meta_data_file_path, sequencing_sample_properties_dict, fcMetaDataDict, experiment, sample_space, newFastqFiles, flowLane) + + elif (INDEX1 not in sequencing_sample_properties_dict or sequencing_sample_properties_dict[INDEX1] == 'NOINDEX') and \ + (INDEX2 not in sequencing_sample_properties_dict or sequencing_sample_properties_dict[INDEX2] == 'NOINDEX') and \ + (sequencing_sample_properties_dict[NCBI_ORGANISM_TAXONOMY] != PHIX_TAXONOMY_ID): + print 'NONINDEXED sample and Taxonomy id is NOT ' + PHIX_TAXONOMY_ID + ', probably a pool: ' + sample_code + meta_data_file_path = transaction.createNewFile(dataSet, name, sample_code + '_' + flowCellId + '_' + first_fastq_file.split('.')[0] + METADATA_FILE_SUFFIX) + writeMetadataFile(transaction, name, meta_data_file_path, sequencing_sample_properties_dict, fcMetaDataDict, experiment, sample_space, newFastqFiles, flowLane) + + else: + print sample_code + ": Create parent meta data file" + meta_data_file_path = transaction.createNewFile(dataSet, name, 'PARENT_' + sample_code + '_' + flowCellId + METADATA_FILE_SUFFIX) + writeMetadataFile(transaction, name, meta_data_file_path, sequencing_sample_properties_dict, fcMetaDataDict, experiment, sample_space, [], flowLane) + + return newFastqFiles, sample_space + +def process(transaction): + + undetermined = False + print("\n" + str(datetime.now())) + + incomingPath = transaction.getIncoming().getAbsolutePath() + name = transaction.getIncoming().getName() + + sample_code, flowCellId, flowLane, incoming_sample, undetermined = split_incoming_folder_name (name) + + # get all fastqs + fastq_files = get_file_names(incomingPath, FASTQ_GZ_PATTERN) + + # BSSE-QGF-22266-H0W8YBGXX-1-654-BC3-TTAGGC_S1_L001_R1_001.fastq.gz + # BSSE-QGF-22051-H0T25AGXX-1-1-1-TAAGGCGA-CTCTCTAT_S46_L001_R1_001.fastq.gz + first_fastq_file = os.path.basename(fastq_files[0]) + + search_service = transaction.getSearchService() + + flowcell_sample_immutable = search_unique_sample (transaction, flowCellId) + fcMetaDataDict = get_sample_properties(transaction, flowcell_sample_immutable[0]) + flow_lane_immutable = search_unique_sample (transaction, incoming_sample) + + sample = flow_lane_immutable[0].getSample() + parents = sample.getParents() + + dataSet = transaction.createNewDataSet("FASTQ_GZ") + dataSet.setMeasuredData(False) + dataSet.setPropertyValue(INDEX1, DEFAULT_INDEX) + dataSet.setPropertyValue(INDEX2, DEFAULT_INDEX) + dirName = transaction.createNewDirectory(dataSet,name) + + if undetermined: + fastq_files, sample_space = process_undetermined(transaction, undetermined, name, flowCellId, flowLane, + fastq_files, first_fastq_file, search_service, fcMetaDataDict, parents, dataSet) + else: + fastq_files, sample_space = process_regular_samples(transaction, name, sample_code, flowLane, + fastq_files, first_fastq_file, search_unique_sample, fcMetaDataDict, dataSet) + + put_files_to_dataset (transaction, dataSet, fastq_files, name, flowCellId, sample_space, undetermined) + + sa = transaction.getSampleForUpdate(flow_lane_immutable[0].getSampleIdentifier()) + sa.setPropertyValue("DATA_TRANSFERRED", create_openbis_timestamp_now()) + dataSet.setSample(flow_lane_immutable[0]) +