From 143ccd9e3f4600e91e5dad5a06af97a3850bcc06 Mon Sep 17 00:00:00 2001 From: kohleman <kohleman> Date: Tue, 29 Sep 2015 14:30:59 +0000 Subject: [PATCH] - some refactoring/renaming - added test for a NexSeq and a MiSeq SVN: 34742 --- .../Jython/createSampleSheet_bcl2fastq.py | 245 +++++----- .../createSampleSheet_bcl2fastq_Test.py | 421 ++++++++++++++++-- 2 files changed, 503 insertions(+), 163 deletions(-) diff --git a/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py b/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py index d59d8787e43..93832af4cca 100644 --- a/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py +++ b/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py @@ -77,12 +77,12 @@ class Sequencers: HISEQ_LIST = [Sequencers.HISEQ_2000, Sequencers.HISEQ_2500, Sequencers.HISEQ_3000, Sequencers.HISEQ_4000, Sequencers.HISEQ_X] -def login(logger, configMap): - logger.info('Logging into ' + configMap['openbisServer']) - service = OpenbisServiceFacadeFactory.tryCreate(configMap['openbisUserName'], - configMap['openbisPassword'], - configMap['openbisServer'], - configMap['connectionTimeout']) +def login(logger, config_dict): + logger.info('Logging into ' + config_dict['openbisServer']) + service = OpenbisServiceFacadeFactory.tryCreate(config_dict['openbisUserName'], + config_dict['openbisPassword'], + config_dict['openbisServer'], + config_dict['connectionTimeout']) return service @@ -143,8 +143,6 @@ def parseOptions(logger): args = parser.parse_args() - print(type(args)) - if args.outdir[-1] <> '/': args.outdir = args.outdir + '/' @@ -170,59 +168,59 @@ def readConfig(logger): ILLUMINA = 'ILLUMINA' logger.info('Reading config file') - configMap = {} + config_dict = {} configParameters = parseConfigurationFile() - configMap['facilityName'] = configParameters.get(GENERAL, 'facilityName') - configMap['facilityNameShort'] = configParameters.get(GENERAL, 'facilityNameShort') - configMap['facilityInstitution'] = configParameters.get(GENERAL, 'facilityInstitution') - configMap['mailList'] = configParameters.get(GENERAL, 'mailList') - configMap['mailFrom'] = configParameters.get(GENERAL, 'mailFrom') - configMap['smptHost'] = configParameters.get(GENERAL, 'smptHost') - configMap['SampleSheetFileName'] = configParameters.get(GENERAL, 'SampleSheetFileName') - configMap['separator'] = configParameters.get(GENERAL, 'separator') - configMap['indexSeparator'] = configParameters.get(GENERAL, 'indexSeparator') - - configMap['openbisUserName'] = configParameters.get(OPENBIS, 'openbisUserName') - configMap['openbisPassword'] = configParameters.get(OPENBIS, 'openbisPassword', raw=True) - configMap['openbisServer'] = configParameters.get(OPENBIS, 'openbisServer') - configMap['connectionTimeout'] = configParameters.getint(OPENBIS, 'connectionTimeout') - configMap['illuminaFlowCellTypeName'] = configParameters.get(OPENBIS, 'illuminaFlowCellTypeName') - configMap['index1Name'] = configParameters.get(OPENBIS, 'index1Name') - configMap['index2Name'] = configParameters.get(OPENBIS, 'index2Name') - configMap['index1Length'] = configParameters.get(OPENBIS, 'index1Length') - configMap['index2Length'] = configParameters.get(OPENBIS, 'index2Length') - configMap['endType'] = configParameters.get(OPENBIS, 'endType') - configMap['cycles'] = configParameters.get(OPENBIS, 'cycles') - configMap['controlLane'] = configParameters.get(OPENBIS, 'controlLane') - configMap['ncbi'] = configParameters.get(OPENBIS, 'ncbi') - configMap['externalSampleName'] = configParameters.get(OPENBIS, 'externalSampleName') - configMap['laneCount'] = configParameters.get(OPENBIS, 'laneCount') - configMap['kit'] = configParameters.get(OPENBIS, 'kit') - - configMap['headerSection'] = configParameters.get(ILLUMINA, 'headerSection') - configMap['readsSection'] = configParameters.get(ILLUMINA, 'readsSection') - configMap['settingsSection'] = configParameters.get(ILLUMINA, 'settingsSection') - configMap['dataSectionSingleRead'] = configParameters.get(ILLUMINA, 'dataSectionSingleRead') - configMap['dataSectionDualRead'] = configParameters.get(ILLUMINA, 'dataSectionDualRead') - configMap['workflow'] = configParameters.get(ILLUMINA, 'workflow') - configMap['application'] = configParameters.get(ILLUMINA, 'application') - configMap['chemistry'] = configParameters.get(ILLUMINA, 'chemistry') - - configMap['truSeqAdapter1'] = configParameters.get(ILLUMINA, 'truSeqAdapter1') - configMap['truSeqAdapter2'] = configParameters.get(ILLUMINA, 'truSeqAdapter2') - configMap['nexteraAdapter'] = configParameters.get(ILLUMINA, 'nexteraAdapter') - configMap['iemFileVersion'] = configParameters.get(ILLUMINA, 'iemFileVersion') - - configMap['configureBclToFastqPath'] = configParameters.get(ILLUMINA, 'configureBclToFastqPath') - configMap['failedReads'] = configParameters.get(ILLUMINA, 'failedReads') - configMap['clusterCount'] = configParameters.get(ILLUMINA, 'clusterCount') - configMap['clusterCountNumber'] = configParameters.get(ILLUMINA, 'clusterCountNumber') - configMap['outputDir'] = configParameters.get(ILLUMINA, 'outputDir') - configMap['sampleSheetName'] = configParameters.get(ILLUMINA, 'sampleSheetName') - configMap['baseMask'] = configParameters.get(ILLUMINA, 'baseMask') - - return configMap + config_dict['facilityName'] = configParameters.get(GENERAL, 'facilityName') + config_dict['facilityNameShort'] = configParameters.get(GENERAL, 'facilityNameShort') + config_dict['facilityInstitution'] = configParameters.get(GENERAL, 'facilityInstitution') + config_dict['mailList'] = configParameters.get(GENERAL, 'mailList') + config_dict['mailFrom'] = configParameters.get(GENERAL, 'mailFrom') + config_dict['smptHost'] = configParameters.get(GENERAL, 'smptHost') + config_dict['SampleSheetFileName'] = configParameters.get(GENERAL, 'SampleSheetFileName') + config_dict['separator'] = configParameters.get(GENERAL, 'separator') + config_dict['indexSeparator'] = configParameters.get(GENERAL, 'indexSeparator') + + config_dict['openbisUserName'] = configParameters.get(OPENBIS, 'openbisUserName') + config_dict['openbisPassword'] = configParameters.get(OPENBIS, 'openbisPassword', raw=True) + config_dict['openbisServer'] = configParameters.get(OPENBIS, 'openbisServer') + config_dict['connectionTimeout'] = configParameters.getint(OPENBIS, 'connectionTimeout') + config_dict['illuminaFlowCellTypeName'] = configParameters.get(OPENBIS, 'illuminaFlowCellTypeName') + config_dict['index1Name'] = configParameters.get(OPENBIS, 'index1Name') + config_dict['index2Name'] = configParameters.get(OPENBIS, 'index2Name') + config_dict['index1Length'] = configParameters.get(OPENBIS, 'index1Length') + config_dict['index2Length'] = configParameters.get(OPENBIS, 'index2Length') + config_dict['endType'] = configParameters.get(OPENBIS, 'endType') + config_dict['cycles'] = configParameters.get(OPENBIS, 'cycles') + config_dict['controlLane'] = configParameters.get(OPENBIS, 'controlLane') + config_dict['ncbi'] = configParameters.get(OPENBIS, 'ncbi') + config_dict['externalSampleName'] = configParameters.get(OPENBIS, 'externalSampleName') + config_dict['laneCount'] = configParameters.get(OPENBIS, 'laneCount') + config_dict['kit'] = configParameters.get(OPENBIS, 'kit') + + config_dict['headerSection'] = configParameters.get(ILLUMINA, 'headerSection') + config_dict['readsSection'] = configParameters.get(ILLUMINA, 'readsSection') + config_dict['settingsSection'] = configParameters.get(ILLUMINA, 'settingsSection') + config_dict['dataSectionSingleRead'] = configParameters.get(ILLUMINA, 'dataSectionSingleRead') + config_dict['dataSectionDualRead'] = configParameters.get(ILLUMINA, 'dataSectionDualRead') + config_dict['workflow'] = configParameters.get(ILLUMINA, 'workflow') + config_dict['application'] = configParameters.get(ILLUMINA, 'application') + config_dict['chemistry'] = configParameters.get(ILLUMINA, 'chemistry') + + config_dict['truSeqAdapter1'] = configParameters.get(ILLUMINA, 'truSeqAdapter1') + config_dict['truSeqAdapter2'] = configParameters.get(ILLUMINA, 'truSeqAdapter2') + config_dict['nexteraAdapter'] = configParameters.get(ILLUMINA, 'nexteraAdapter') + config_dict['iemFileVersion'] = configParameters.get(ILLUMINA, 'iemFileVersion') + + config_dict['configureBclToFastqPath'] = configParameters.get(ILLUMINA, 'configureBclToFastqPath') + config_dict['failedReads'] = configParameters.get(ILLUMINA, 'failedReads') + config_dict['clusterCount'] = configParameters.get(ILLUMINA, 'clusterCount') + config_dict['clusterCountNumber'] = configParameters.get(ILLUMINA, 'clusterCountNumber') + config_dict['outputDir'] = configParameters.get(ILLUMINA, 'outputDir') + config_dict['sampleSheetName'] = configParameters.get(ILLUMINA, 'sampleSheetName') + config_dict['baseMask'] = configParameters.get(ILLUMINA, 'baseMask') + + return config_dict def getDate(): @@ -254,7 +252,7 @@ def get_vocabulary(vocabulary_code, service): return vocabulary_dict -def send_email(emails, files, flowCellName, configMap, logger): +def send_email(emails, files, flowCellName, config_dict, logger): """ Send out an email to the specified recipients """ @@ -262,7 +260,7 @@ def send_email(emails, files, flowCellName, configMap, logger): emails_list = emails.split() msg = MIMEMultipart() - msg['From'] = configMap['mailFrom'] + msg['From'] = config_dict['mailFrom'] msg['To'] = COMMASPACE.join(emails_list) msg['Date'] = formatdate(localtime=True) msg['Subject'] = 'Generated Sample Sheet for flowcell ' + flowCellName @@ -276,8 +274,8 @@ def send_email(emails, files, flowCellName, configMap, logger): part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(f)) msg.attach(part) - smtp = smtplib.SMTP(configMap['smptHost']) - smtp.sendmail(configMap['mailFrom'], emails_list, msg.as_string()) + smtp = smtplib.SMTP(config_dict['smptHost']) + smtp.sendmail(config_dict['mailFrom'], emails_list, msg.as_string()) smtp.close() logger.info('Sent email to ' + COMMASPACE.join(emails_list)) @@ -389,7 +387,8 @@ def get_contained_sample_properties(contained_samples, service): try: assert parents.size() >= 1 except AssertionError: - print (str(parents.size()) + ' parents found for lane ' + lane.getCode()) + pass +# print (str(parents.size()) + ' parents found for lane ' + lane.getCode()) samplesPerLaneDict[lane.getCode()[-1]] = len(parents) @@ -445,13 +444,18 @@ def write_sample_sheet(sampleSheetDict, headerList, myoptions, logger, fileName) return fileName -def write_sample_sheet_single_lane(ordered_sample_sheet_dict, flowCellDict, - parentDict, configMap, myoptions, logger, csv_file): +def write_sample_sheet_single_lane(model, ordered_sample_sheet_dict, flowCellDict, index_length_dict, + parentDict, config_dict, myoptions, logger, csv_file): newline = lineending[myoptions.lineending] - header_list = create_header_section (configMap, parentDict, flowCellDict) + + if (model is Sequencers.NEXTSEQ_500): + lane_number = 2 + else: + lane_number = int(flowCellDict[config_dict['laneCount']]) + 1 - for lane in range(1, int(flowCellDict[configMap['laneCount']]) + 1): + for lane in range(1, lane_number): + header_list = create_header_section (model, config_dict, parentDict, flowCellDict, index_length_dict, lane) per_lane_dict = [ordered_sample_sheet_dict[key] for key in ordered_sample_sheet_dict.keys() if int(key[0]) == lane] csv_file_path = myoptions.outdir + csv_file + "_" + str(lane) + CSV try: @@ -465,7 +469,7 @@ def write_sample_sheet_single_lane(ordered_sample_sheet_dict, flowCellDict, print ('File error: ' + str(err)) -def create_header_section (configMap, parentDict, flowCellDict): +def create_header_section (model, config_dict, parentDict, flowCellDict, index_length_dict, lane): kitsDict = {"CHIP_SEQ_SAMPLE_PREP" : ["",""], "TRUSEQ_RNA_SAMPLEPREPKIT_V2_ILLUMINA" : ["A","TruSeq LT"], @@ -490,62 +494,71 @@ def create_header_section (configMap, parentDict, flowCellDict): "NEXTERA_DNA_SAMPLE_PREP_KIT_BUFFER_HMW" : ["N", "Nextera"] } - separator = configMap['separator'] + separator = config_dict['separator'] header_list = [] - + + len_index1 = index_length_dict[int(lane)][0] + len_index2 = index_length_dict[int(lane)][1] + + # here we take the first sample to determine the Sample Prep Kit try: - assay = kitsDict [parentDict.itervalues().next()[configMap['kit']]][1] + assay = kitsDict [parentDict.itervalues().next()[config_dict['kit']]][1] except: - print "No Kit set for sample. Will not set the assay value in the sample sheet" +# print "No Kit set for sample. Will not set the assay value in the sample sheet" assay = "" - header_section = configMap['headerSection'].split(separator) + header_section = config_dict['headerSection'].split(separator) header_section.reverse() header_list = [header_section.pop().strip()] - header_list.append(header_section.pop().strip() + separator + configMap['iemFileVersion']) - header_list.append(header_section.pop().strip() + separator + configMap['facilityInstitution']) - header_list.append(header_section.pop().strip() + separator + configMap['facilityName']) + header_list.append(header_section.pop().strip() + separator + config_dict['iemFileVersion']) + header_list.append(header_section.pop().strip() + separator + config_dict['facilityInstitution']) + header_list.append(header_section.pop().strip() + separator + config_dict['facilityName']) header_list.append(header_section.pop().strip() + separator + flowCellDict['Name']) header_list.append(header_section.pop().strip() + separator + datetime.now().strftime('%m/%d/%Y')) - header_list.append(header_section.pop().strip() + separator + configMap['workflow']) - header_list.append(header_section.pop().strip() + separator + configMap['application']) + header_list.append(header_section.pop().strip() + separator + config_dict['workflow']) + header_list.append(header_section.pop().strip() + separator + config_dict['application']) header_list.append(header_section.pop().strip() + separator + assay) - header_list.append(header_section.pop().strip() + separator + flowCellDict[configMap['endType']] + '_' + flowCellDict[configMap['cycles']]) - header_list.append(header_section.pop().strip() + separator + configMap['chemistry']) + header_list.append(header_section.pop().strip() + separator + flowCellDict[config_dict['endType']] + '_' + flowCellDict[config_dict['cycles']]) + header_list.append(header_section.pop().strip() + separator + config_dict['chemistry']) header_list.append('') - reads_section = configMap['readsSection'].split(separator) + reads_section = config_dict['readsSection'].split(separator) reads_section.reverse() header_list.append(reads_section.pop()) - header_list.append(flowCellDict[configMap['cycles']]) - if (flowCellDict[configMap['endType']] == 'PAIRED_END'): - header_list.append(flowCellDict[configMap['cycles']]) + header_list.append(flowCellDict[config_dict['cycles']]) + if (flowCellDict[config_dict['endType']] == 'PAIRED_END'): + header_list.append(flowCellDict[config_dict['cycles']]) header_list.append('') - settings_section = configMap['settingsSection'].split(separator) + settings_section = config_dict['settingsSection'].split(separator) settings_section.reverse() header_list.append(settings_section.pop()) if ('nextera' in assay.lower()): - header_list.append(configMap['nexteraAdapter']) + header_list.append(config_dict['nexteraAdapter']) if ('truseq' in assay.lower()): - header_list.append(configMap['truSeqAdapter1']) - header_list.append(configMap['truSeqAdapter2']) + header_list.append(config_dict['truSeqAdapter1']) + header_list.append(config_dict['truSeqAdapter2']) header_list.append('') - if int(flowCellDict['INDEXREAD2']) > 0: - SeqDataSection = configMap['dataSectionDualRead'].split(',') + if int(flowCellDict['INDEXREAD2']) > 0 and len_index2 > 0: + SeqDataSection = config_dict['dataSectionDualRead'].split(',') else: - SeqDataSection = configMap['dataSectionSingleRead'].split(',') + SeqDataSection = config_dict['dataSectionSingleRead'].split(',') SeqDataSection.reverse() header_list.append(SeqDataSection.pop()) - header_list.append(','.join(SeqDataSection.pop().strip().split())) + + if model in Sequencers.NEXTSEQ_500: + # leaving out the 'Lane', as there are four but treat them as one + header_list.append(','.join(SeqDataSection.pop().strip().split()[1:])) + else: + header_list.append(','.join(SeqDataSection.pop().strip().split())) return header_list -def verify_index_length (parentDict, flowCellDict, configMap, logger): +def verify_index_length (parentDict, flowCellDict, config_dict, logger): index_length_dict = {} verified_per_lane_dict = [] @@ -553,7 +566,7 @@ def verify_index_length (parentDict, flowCellDict, configMap, logger): flowcell_len_index1 = int(flowCellDict['INDEXREAD']) flowcell_len_index2 = int(flowCellDict['INDEXREAD2']) - print("Flowcell has index length [" + str(flowcell_len_index1) + ", " + str(flowcell_len_index2) + "]") + logger.info("Flowcell has index length [" + str(flowcell_len_index1) + ", " + str(flowcell_len_index2) + "]") for lane in range(1,int(flowCellDict['LANECOUNT'])+1): index1_set = set () @@ -566,12 +579,12 @@ def verify_index_length (parentDict, flowCellDict, configMap, logger): for sample in per_lane_list: # If no index then just skip this sample - if (configMap['index1Name'] not in sample) or (sample[configMap['index1Name']] == 'NOINDEX'): + if (config_dict['index1Name'] not in sample) or (sample[config_dict['index1Name']] == 'NOINDEX'): continue - index1 = sample[configMap['index1Name']] + index1 = sample[config_dict['index1Name']] index2="" - if configMap['index2Name'] in sample: - index2 = sample[configMap['index2Name']] + if config_dict['index2Name'] in sample: + index2 = sample[config_dict['index2Name']] index1_set.add(len(index1)) if index2: @@ -599,28 +612,25 @@ def verify_index_length (parentDict, flowCellDict, configMap, logger): return index_length_dict -def create_sample_sheet_dict(model, parentDict, flowCellDict, configMap, index1Vocabulary, +def create_sample_sheet_dict(model, parentDict, index_length_dict, flowCellDict, config_dict, index1Vocabulary, index2Vocabulary, flowCellName, logger): sampleSheetDict = {} - separator = configMap['separator'] - - index_length_dict = verify_index_length(parentDict, flowCellDict, configMap, logger) - print(index_length_dict) + separator = config_dict['separator'] for key in parentDict.keys(): lane = parentDict[key]['LANE'][-1:] # If no index then just skip this sample - if (configMap['index1Name'] not in parentDict[key]) or (parentDict[key][configMap['index1Name']] == 'NOINDEX'): + if (config_dict['index1Name'] not in parentDict[key]) or (parentDict[key][config_dict['index1Name']] == 'NOINDEX'): continue - index1 = parentDict[key][configMap['index1Name']] + index1 = parentDict[key][config_dict['index1Name']] index2="" - if configMap['index2Name'] in parentDict[key]: - index2 = parentDict[key][configMap['index2Name']] - indexNumber = index2Vocabulary[parentDict[key][configMap['index2Name']]].split()[2] + if config_dict['index2Name'] in parentDict[key]: + index2 = parentDict[key][config_dict['index2Name']] + indexNumber = index2Vocabulary[parentDict[key][config_dict['index2Name']]].split()[2] try: - kit = parentDict[key][configMap['kit']] + kit = parentDict[key][config_dict['kit']] prefix = kitsDict[kit][0] except: # print "Missing Kit on " + str(key) @@ -642,7 +652,7 @@ def create_sample_sheet_dict(model, parentDict, flowCellDict, configMap, index1V sampleSheetDict[lane + '_' + key] = [ lane_string + key + separator - + key + '_' + sanitize_string(parentDict[key][configMap['externalSampleName']]) + '_' + index1[0:len_index1] + '_' + index2[0:len_index2] + separator + + key + '_' + sanitize_string(parentDict[key][config_dict['externalSampleName']]) + '_' + index1[0:len_index1] + '_' + index2[0:len_index2] + separator + separator + separator + index1Vocabulary[index1].split()[1] + separator @@ -655,7 +665,7 @@ def create_sample_sheet_dict(model, parentDict, flowCellDict, configMap, index1V sampleSheetDict[lane + '_' + key] = [ lane_string + key + separator - + key + '_' + sanitize_string(parentDict[key][configMap['externalSampleName']]) + '_' + index1[0:len_index1] + separator + + key + '_' + sanitize_string(parentDict[key][config_dict['externalSampleName']]) + '_' + index1[0:len_index1] + separator + separator + separator + index1Vocabulary[index1].split()[1] + separator @@ -663,7 +673,7 @@ def create_sample_sheet_dict(model, parentDict, flowCellDict, configMap, index1V + key + separator ] - csv_file_name = configMap['SampleSheetFileName'] + '_' + flowCellName + csv_file_name = config_dict['SampleSheetFileName'] + '_' + flowCellName ordered_sample_sheet_dict = OrderedDict(sorted(sampleSheetDict.items(), key=lambda t: t[0])) return ordered_sample_sheet_dict, csv_file_name @@ -689,8 +699,6 @@ def main (): foundFlowCell, containedSamples = get_flowcell(config_dict['illuminaFlowCellTypeName'], flowCellName, service, logger) parentDict, samplesPerLaneDict = get_contained_sample_properties(containedSamples, service) - logger.info('Found ' + str(len(parentDict)) + ' samples on the flow cell ' + flowCellName) - flowCellName = foundFlowCell.getCode() flowCellDict = transform_sample_to_dict(foundFlowCell) model = get_model(flowCellDict['RUN_NAME_FOLDER']) @@ -699,14 +707,15 @@ def main (): index1Vocabulary = get_vocabulary(config_dict['index1Name'], service) index2Vocabulary = get_vocabulary(config_dict['index2Name'], service) - ordered_sample_sheet_dict, csv_file_name = create_sample_sheet_dict(model, parentDict, + index_length_dict = verify_index_length(parentDict, flowCellDict, config_dict, logger) + ordered_sample_sheet_dict, csv_file_name = create_sample_sheet_dict(model, parentDict, index_length_dict, flowCellDict, config_dict, index1Vocabulary, index2Vocabulary, flowCellName, logger) if myoptions.singlelane: - write_sample_sheet_single_lane(ordered_sample_sheet_dict, flowCellDict, + write_sample_sheet_single_lane(model, ordered_sample_sheet_dict, flowCellDict, index_length_dict, parentDict, config_dict, myoptions, logger, csv_file_name) else: - header_list = create_header_section (config_dict, parentDict, flowCellDict) + header_list = create_header_section (model, config_dict, parentDict, flowCellDict, index_length_dict, samplesPerLaneDict.keys()[0]) sampleSheetFile = write_sample_sheet(ordered_sample_sheet_dict, header_list, myoptions, logger, myoptions.outdir + csv_file_name + CSV) if myoptions.maillist: diff --git a/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq_Test.py b/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq_Test.py index 62c968ea749..b716ddd253a 100644 --- a/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq_Test.py +++ b/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq_Test.py @@ -1,60 +1,72 @@ import unittest -import re +import re +# import pytest from createSampleSheet_bcl2fastq import * -class test_sanitize_string(unittest.TestCase): +def getTodayDate(): + from datetime import date + d = date.today() + return d.strftime("%m/%d/%Y") +class test_sanitize_string(unittest.TestCase): + + def testDefault(self): self.assertEqual(sanitize_string('abc#a$v%c^D&P-'), 'abc_a_v_c_D_P_') - - + + def testOnlySpecialChars(self): self.assertEqual(sanitize_string('@#$%^&*('), '_') - - + + class test_get_model(unittest.TestCase): - + + def test_HiseqX(self): self.assertEqual(get_model('141121_ST-E00107_0356_AH00C3CCXX'), Sequencers.HISEQ_X) - + + def test_expectError(self): self.assertNotEqual(get_model('150724_J00121_0017_AH2VYMBBXX'), Sequencers.NEXTSEQ_500) - - + + class test_get_reverse_complement(unittest.TestCase): + + def test_happyCase(self): self.assertEqual(get_reverse_complement('ACTGAATTTT'), 'AAAATTCAGT', 'Reverse complement is faulty') - + + def test_failingCase(self): self.assertNotEqual(get_reverse_complement('ACTG'), 'CAGA') - - -class create_sample_sheet(unittest.TestCase): - + + + +class create_sample_sheet_C7GMNANXX(unittest.TestCase): + + def setUp(self): self.myCode = 'C7GMNANXX' self.logger = setUpLogger('log/') self.config_dict = readConfig(self.logger) - -# self.options = parseOptions(self.logger) - + import argparse import shlex parser = argparse.ArgumentParser() - + parser.add_argument('--flowcell') parser.add_argument('--lineending') parser.add_argument('--outdir') - + cmd_string = ['--flowcell', self.myCode, '--lineending', 'win32', '--outdir', '../../targets/playground'] self.options = parser.parse_args(cmd_string) - + self.service = OpenbisServiceFacadeFactory.tryCreate(self.config_dict['openbisUserName'], self.config_dict['openbisPassword'], self.config_dict['openbisServer'], self.config_dict['connectionTimeout']) - + self.flowcell, self.containedSamples = get_flowcell('ILLUMINA_FLOW_CELL', self.myCode, self.service, self.logger) self.flowCellDict = transform_sample_to_dict(self.flowcell) @@ -63,63 +75,382 @@ class create_sample_sheet(unittest.TestCase): self.flowCellName = self.flowcell.getCode() self.index1Vocabulary = get_vocabulary(self.config_dict['index1Name'], self.service) self.index2Vocabulary = get_vocabulary(self.config_dict['index2Name'], self.service) - - - + self.index_length_dict = verify_index_length(self.parentDict, self.flowCellDict, self.config_dict, self.logger) + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + def test_get_flowCell (self): self.assertEqual(self.flowcell.getCode(), self.myCode) self.assertEqual(self.containedSamples.size(), 8) - + fcProp = self.flowcell.getProperties() self.assertEqual(fcProp['SEQUENCER'], 'D00535') self.assertEqual(self.flowCellDict['FLOWCELLTYPE'], 'HiSeq Flow Cell v4') - - + + def test_get_contained_sample_properties(self): self.assertEqual(self.parentDict['BSSE_QGF_34778_C7GMNANXX_1']['BARCODE'], 'GTCCGC') - self.assertEqual(self.parentDict['BSSE_QGF_32285_C7GMNANXX_7']['CONTACT_PERSON_EMAIL'], 'yann.bourgeois@unibas.ch') self.assertEqual(self.samplesPerLaneDict['2'], 23) - - + + def test_get_vocabulary(self): self.assertEqual(self.index1Vocabulary['CACTCAA'], 'Illumina A032 CACTCAA') self.assertEqual(self.index2Vocabulary['GTAAGGAG'],'Index2 (i5) 505 GTAAGGAG') - - + + def test_verify_index_length(self): - self.index_length_dict = verify_index_length(self.parentDict, self.flowCellDict, self.config_dict, self.logger) self.assertDictEqual(self.index_length_dict, {6: [6, 0], 5: [6, 0], 7: [8, 8], 8: [8, 8], 3: [8, 0], 2: [6, 0], 1: [6, 0], 4: [8, 0]}) - - + + def test_create_sample_sheet_dict(self): - + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) - - self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) - + self.ordered_sample_sheet_dict['5_BSSE_QGF_32303_C7GMNANXX_5'] = \ [u'5,BSSE_QGF_32303_C7GMNANXX_5,BSSE_QGF_32303_C7GMNANXX_5_TR_EG_1_GGCTAC,,,SureSelectXT,GGCTAC,BSSE_QGF_32303_C7GMNANXX_5,'] - + self.ordered_sample_sheet_dict['5_BSSE_QGF_36788_C7GMNANXX_5'] = \ [u'5,BSSE_QGF_36788_C7GMNANXX_5,BSSE_QGF_36788_C7GMNANXX_5_G_33_run2_TAAGGC,,,N701,TAAGGC,BSSE_QGF_36788_C7GMNANXX_5,'] - + self.ordered_sample_sheet_dict['BSSE_QGF_32281_C7GMNANXX_8']= \ [u'8,BSSE_QGF_32281_C7GMNANXX_8,BSSE_QGF_32281_C7GMNANXX_8_F2_18_3_P162Nextera_TAGGCATG_CTATTAAG,,,N706,TAGGCATG,518,CTATTAAG,BSSE_QGF_32281_C7GMNANXX_8,'] - + + + def test_create_header_section(self): + + self.date = getTodayDate() + self.create_header_section = create_header_section(self.model, self.config_dict, self.parentDict, self.flowCellDict, self.index_length_dict, 5) + self.assertListEqual(self.create_header_section, ['[Header]', 'IEMFileVersion,4', 'Investigator Name,ETHZ_D-BSSE', + 'Project Name,Genomics Facility Basel', u'Experiment Name,C7GMNANXX', + 'Date,'+ self.date, 'Workflow,GenerateFASTQ', 'Application,FASTQ Only', + 'Assay,', u'Description,PAIRED_END_126', 'Chemistry,Default', '', '[Reads]', + u'126', u'126', '', '[Settings]', '', '[Data]', + 'Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description']) + + self.create_header_section = create_header_section(self.model, self.config_dict, self.parentDict, self.flowCellDict, self.index_length_dict, 8) + self.assertListEqual(self.create_header_section, ['[Header]', 'IEMFileVersion,4', 'Investigator Name,ETHZ_D-BSSE', 'Project Name,Genomics Facility Basel', + u'Experiment Name,C7GMNANXX', 'Date,'+ self.date, 'Workflow,GenerateFASTQ', 'Application,FASTQ Only', + 'Assay,', u'Description,PAIRED_END_126', 'Chemistry,Default', '', '[Reads]', u'126', u'126', '', + '[Settings]', '', '[Data]', 'Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description']) + + def test_write_sample_sheet_single_lane(self): self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, + self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + write_sample_sheet_single_lane(self.model, self.ordered_sample_sheet_dict, self.flowCellDict, self.index_length_dict, + self.parentDict, self.config_dict, self.options, self.logger, self.csv_file_name) + + def tearDown(self): + self.service.logout() + self.logger.info('Logged out') + + + + + + +class create_sample_sheet_C7P5KANXX(unittest.TestCase): + + + def setUp(self): + self.myCode = 'C7P5KANXX' + self.logger = setUpLogger('log/') + self.config_dict = readConfig(self.logger) + + import argparse + import shlex + parser = argparse.ArgumentParser() + + parser.add_argument('--flowcell') + parser.add_argument('--lineending') + parser.add_argument('--outdir') + + cmd_string = ['--flowcell', self.myCode, '--lineending', 'win32', '--outdir', '../../targets/playground'] + self.options = parser.parse_args(cmd_string) + + self.service = OpenbisServiceFacadeFactory.tryCreate(self.config_dict['openbisUserName'], + self.config_dict['openbisPassword'], + self.config_dict['openbisServer'], + self.config_dict['connectionTimeout']) + + self.flowcell, self.containedSamples = get_flowcell('ILLUMINA_FLOW_CELL', + self.myCode, self.service, self.logger) + self.flowCellDict = transform_sample_to_dict(self.flowcell) + self.parentDict, self.samplesPerLaneDict = get_contained_sample_properties( + self.containedSamples, self.service) + self.flowCellName = self.flowcell.getCode() + self.index1Vocabulary = get_vocabulary(self.config_dict['index1Name'], self.service) + self.index2Vocabulary = get_vocabulary(self.config_dict['index2Name'], self.service) + self.index_length_dict = verify_index_length(self.parentDict, self.flowCellDict, self.config_dict, self.logger) + + + def test_get_flowCell (self): + self.assertEqual(self.flowcell.getCode(), self.myCode) + self.assertEqual(self.containedSamples.size(), 8) + + fcProp = self.flowcell.getProperties() + self.assertEqual(fcProp['SEQUENCER'], 'D00404') + self.assertEqual(self.flowCellDict['FLOWCELLTYPE'], 'HiSeq Flow Cell v4') + + + def test_get_contained_sample_properties(self): + self.assertEqual(self.parentDict['BSSE_QGF_36781_C7P5KANXX_8']['BARCODE'], 'CTTGTAA') + self.assertEqual(self.parentDict['BSSE_QGF_36779_C7P5KANXX_8']['NCBI_ORGANISM_TAXONOMY'], '10090') + self.assertEqual(self.samplesPerLaneDict['8'], 6) + + def test_verify_index_length(self): + self.assertDictEqual(self.index_length_dict, {6: [8, 8], 5: [8, 8], 7: [8, 8], 8: [7, 7], 3: [8, 8], 2: [8, 8], 1: [8, 8], 4: [8, 8]}) + + + def test_create_sample_sheet_dict(self): + + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, + self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + self.assertDictEqual(self.ordered_sample_sheet_dict, + {u'8_BSSE_QGF_36781_C7P5KANXX_8': [u'8,BSSE_QGF_36781_C7P5KANXX_8,BSSE_QGF_36781_C7P5KANXX_8_Ribomethseq_mousecerebellum_comparison_CTTGTAA_NOINDEX,,,A012,CTTGTAA,(NoIndex),NOINDEX,BSSE_QGF_36781_C7P5KANXX_8,'], + u'8_BSSE_QGF_36780_C7P5KANXX_8': [u'8,BSSE_QGF_36780_C7P5KANXX_8,BSSE_QGF_36780_C7P5KANXX_8_Ribomethseq_HEK_comparison_GCCAATA_NOINDEX,,,A006,GCCAATA,(NoIndex),NOINDEX,BSSE_QGF_36780_C7P5KANXX_8,'], + u'8_BSSE_QGF_36552_C7P5KANXX_8': [u'8,BSSE_QGF_36552_C7P5KANXX_8,BSSE_QGF_36552_C7P5KANXX_8_CLIP_444_1_TGACCAA_NOINDEX,,,A004,TGACCAA,(NoIndex),NOINDEX,BSSE_QGF_36552_C7P5KANXX_8,'], + u'8_BSSE_QGF_36779_C7P5KANXX_8': [u'8,BSSE_QGF_36779_C7P5KANXX_8,BSSE_QGF_36779_C7P5KANXX_8_HITS_CLIP_Fibrillarin_mouseNeurons_2_CAGATCA_NOINDEX,,,A007,CAGATCA,(NoIndex),NOINDEX,BSSE_QGF_36779_C7P5KANXX_8,']}) + + + def test_create_header_section(self): + self.date = getTodayDate() + + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + self.create_header_section = create_header_section(self.model, self.config_dict, self.parentDict, self.flowCellDict, self.index_length_dict, 5) + self.assertListEqual(self.create_header_section, ['[Header]', 'IEMFileVersion,4', 'Investigator Name,ETHZ_D-BSSE', + 'Project Name,Genomics Facility Basel', u'Experiment Name,C7P5KANXX', + 'Date,09/29/2015', 'Workflow,GenerateFASTQ', 'Application,FASTQ Only', + 'Assay,', u'Description,SINGLE_READ_51', 'Chemistry,Default', '', + '[Reads]', u'51', '', '[Settings]', '', '[Data]', + 'Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description']) + + def test_write_sample_sheet_single_lane(self): + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, + self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + write_sample_sheet_single_lane(self.model, self.ordered_sample_sheet_dict, self.flowCellDict, self.index_length_dict, + self.parentDict, self.config_dict, self.options, self.logger, self.csv_file_name) + + def tearDown(self): + self.service.logout() + self.logger.info('Logged out') - self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, + +class create_sample_sheet_HJWC3BGXX(unittest.TestCase): + """ + NextSeq + """ + + + def setUp(self): + self.myCode = 'HJWC3BGXX' + self.logger = setUpLogger('log/') + self.config_dict = readConfig(self.logger) + + import argparse + import shlex + parser = argparse.ArgumentParser() + + parser.add_argument('--flowcell') + parser.add_argument('--lineending') + parser.add_argument('--outdir') + + cmd_string = ['--flowcell', self.myCode, '--lineending', 'win32', '--outdir', '../../targets/playground'] + self.options = parser.parse_args(cmd_string) + + self.service = OpenbisServiceFacadeFactory.tryCreate(self.config_dict['openbisUserName'], + self.config_dict['openbisPassword'], + self.config_dict['openbisServer'], + self.config_dict['connectionTimeout']) + + self.flowcell, self.containedSamples = get_flowcell('ILLUMINA_FLOW_CELL', + self.myCode, self.service, self.logger) + self.flowCellDict = transform_sample_to_dict(self.flowcell) + self.parentDict, self.samplesPerLaneDict = get_contained_sample_properties( + self.containedSamples, self.service) + self.flowCellName = self.flowcell.getCode() + self.index1Vocabulary = get_vocabulary(self.config_dict['index1Name'], self.service) + self.index2Vocabulary = get_vocabulary(self.config_dict['index2Name'], self.service) + self.index_length_dict = verify_index_length(self.parentDict, self.flowCellDict, self.config_dict, self.logger) + + + def test_get_flowCell (self): + self.assertEqual(self.flowcell.getCode(), self.myCode) + self.assertEqual(self.containedSamples.size(), 1) + + fcProp = self.flowcell.getProperties() + self.assertEqual(fcProp['SEQUENCER'], 'NS500318') + self.assertEqual(self.flowCellDict['ILLUMINA_PIPELINE_VERSION'], '2.4.6') + + + def test_get_contained_sample_properties(self): + self.assertEqual(self.parentDict['BSSE_QGF_37091_HJWC3BGXX_1']['BARCODE'], 'AGTCAAC') + self.assertEqual(self.parentDict['BSSE_QGF_37100_HJWC3BGXX_1']['NCBI_ORGANISM_TAXONOMY'], '9606') + self.assertEqual(self.samplesPerLaneDict['1'], 18) + + def test_verify_index_length(self): + self.assertDictEqual(self.index_length_dict, {3: [6, 0], 2: [6, 0], 1: [6, 0], 4: [6, 0]}) + + + def test_create_sample_sheet_dict(self): + + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, + self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + self.assertDictEqual(self.ordered_sample_sheet_dict, + {u'1_BSSE_QGF_37098_HJWC3BGXX_1': [u'BSSE_QGF_37098_HJWC3BGXX_1,BSSE_QGF_37098_HJWC3BGXX_1_TB_358_PQR_2_0_GTTTCG,,,A021,GTTTCG,BSSE_QGF_37098_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37093_HJWC3BGXX_1': [u'BSSE_QGF_37093_HJWC3BGXX_1,BSSE_QGF_37093_HJWC3BGXX_1_TB_356_PQR_1_0_ATGTCA,,,A015,ATGTCA,BSSE_QGF_37093_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37094_HJWC3BGXX_1': [u'BSSE_QGF_37094_HJWC3BGXX_1,BSSE_QGF_37094_HJWC3BGXX_1_TB_357_BKM_1_0_CCGTCC,,,A016,CCGTCC,BSSE_QGF_37094_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37095_HJWC3BGXX_1': [u'BSSE_QGF_37095_HJWC3BGXX_1,BSSE_QGF_37095_HJWC3BGXX_1_TB_356_MTD_1_0_GTCCGC,,,A018,GTCCGC,BSSE_QGF_37095_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37087_HJWC3BGXX_1': [u'BSSE_QGF_37087_HJWC3BGXX_1,BSSE_QGF_37087_HJWC3BGXX_1_TB_355_DMSO1_GATCAG,,,A009,GATCAG,BSSE_QGF_37087_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37096_HJWC3BGXX_1': [u'BSSE_QGF_37096_HJWC3BGXX_1,BSSE_QGF_37096_HJWC3BGXX_1_TB_358_GDC_1_0_GTGAAA,,,A019,GTGAAA,BSSE_QGF_37096_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37092_HJWC3BGXX_1': [u'BSSE_QGF_37092_HJWC3BGXX_1,BSSE_QGF_37092_HJWC3BGXX_1_TB_357_COL_0_05_AGTTCC,,,A014,AGTTCC,BSSE_QGF_37092_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37088_HJWC3BGXX_1': [u'BSSE_QGF_37088_HJWC3BGXX_1,BSSE_QGF_37088_HJWC3BGXX_1_TB_355_PQR_0_5_TAGCTT,,,A010,TAGCTT,BSSE_QGF_37088_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37089_HJWC3BGXX_1': [u'BSSE_QGF_37089_HJWC3BGXX_1,BSSE_QGF_37089_HJWC3BGXX_1_TB_359_BKM_0_5_1_GGCTAC,,,A011,GGCTAC,BSSE_QGF_37089_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37101_HJWC3BGXX_1': [u'BSSE_QGF_37101_HJWC3BGXX_1,BSSE_QGF_37101_HJWC3BGXX_1_TB_357_GDC_2_0_ACTGAT,,,A025,ACTGAT,BSSE_QGF_37101_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37090_HJWC3BGXX_1': [u'BSSE_QGF_37090_HJWC3BGXX_1,BSSE_QGF_37090_HJWC3BGXX_1_TB_358_MTD_0_5_CTTGTA,,,A012,CTTGTA,BSSE_QGF_37090_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37099_HJWC3BGXX_1': [u'BSSE_QGF_37099_HJWC3BGXX_1,BSSE_QGF_37099_HJWC3BGXX_1_TB_360_BKM_2_0_1_CGTACG,,,A022,CGTACG,BSSE_QGF_37099_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37091_HJWC3BGXX_1': [u'BSSE_QGF_37091_HJWC3BGXX_1,BSSE_QGF_37091_HJWC3BGXX_1_TB_358_GDC_0_5_AGTCAA,,,A013,AGTCAA,BSSE_QGF_37091_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37102_HJWC3BGXX_1': [u'BSSE_QGF_37102_HJWC3BGXX_1,BSSE_QGF_37102_HJWC3BGXX_1_TB_361_COL_0_2_1_ATTCCT,,,A027,ATTCCT,BSSE_QGF_37102_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37100_HJWC3BGXX_1': [u'BSSE_QGF_37100_HJWC3BGXX_1,BSSE_QGF_37100_HJWC3BGXX_1_TB_357_MTD_2_0_GAGTGG,,,A023,GAGTGG,BSSE_QGF_37100_HJWC3BGXX_1,'], + u'1_BSSE_QGF_37097_HJWC3BGXX_1': [u'BSSE_QGF_37097_HJWC3BGXX_1,BSSE_QGF_37097_HJWC3BGXX_1_TB_358_COL_0_1_GTGGCC,,,A020,GTGGCC,BSSE_QGF_37097_HJWC3BGXX_1,']}) + + def test_create_header_section(self): + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + lane = 1 + self.date = getTodayDate() + self.create_header_section = create_header_section(self.model, self.config_dict, self.parentDict, self.flowCellDict, self.index_length_dict, lane) + self.assertListEqual(self.create_header_section, ['[Header]', 'IEMFileVersion,4', 'Investigator Name,ETHZ_D-BSSE', 'Project Name,Genomics Facility Basel', + u'Experiment Name,HJWC3BGXX', 'Date,' + self.date, 'Workflow,GenerateFASTQ', 'Application,FASTQ Only', + 'Assay,', u'Description,SINGLE_READ_81', 'Chemistry,Default', '', '[Reads]', u'81', '', '[Settings]', '', + '[Data]', 'Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description']) + + def test_write_sample_sheet_single_lane(self): + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + write_sample_sheet_single_lane(self.model, self.ordered_sample_sheet_dict, self.flowCellDict, self.index_length_dict, + self.parentDict, self.config_dict, self.options, self.logger, self.csv_file_name) + + def tearDown(self): + self.service.logout() + self.logger.info('Logged out') + + +class create_sample_sheet_000000000_AH5W3(unittest.TestCase): + """ + MiSeq + """ + + + def setUp(self): + self.myCode = '000000000-AH5W3' + self.logger = setUpLogger('log/') + self.config_dict = readConfig(self.logger) + + import argparse + import shlex + parser = argparse.ArgumentParser() + + parser.add_argument('--flowcell') + parser.add_argument('--lineending') + parser.add_argument('--outdir') + + cmd_string = ['--flowcell', self.myCode, '--lineending', 'win32', '--outdir', '../../targets/playground'] + self.options = parser.parse_args(cmd_string) + + self.service = OpenbisServiceFacadeFactory.tryCreate(self.config_dict['openbisUserName'], + self.config_dict['openbisPassword'], + self.config_dict['openbisServer'], + self.config_dict['connectionTimeout']) + + self.flowcell, self.containedSamples = get_flowcell('ILLUMINA_FLOW_CELL', + self.myCode, self.service, self.logger) + self.flowCellDict = transform_sample_to_dict(self.flowcell) + self.parentDict, self.samplesPerLaneDict = get_contained_sample_properties( + self.containedSamples, self.service) + self.flowCellName = self.flowcell.getCode() + self.index1Vocabulary = get_vocabulary(self.config_dict['index1Name'], self.service) + self.index2Vocabulary = get_vocabulary(self.config_dict['index2Name'], self.service) + self.index_length_dict = verify_index_length(self.parentDict, self.flowCellDict, self.config_dict, self.logger) + + + def test_get_flowCell (self): + self.assertEqual(self.flowcell.getCode(), self.myCode) + self.assertEqual(self.containedSamples.size(), 1) - write_sample_sheet_single_lane(self.ordered_sample_sheet_dict, self.flowCellDict, + fcProp = self.flowcell.getProperties() + self.assertEqual(fcProp['SEQUENCER'], 'M01761') + self.assertEqual(self.flowCellDict['ILLUMINA_PIPELINE_VERSION'], '1.18.54') + + + def test_get_contained_sample_properties(self): + self.assertEqual(self.parentDict['BSSE_QGF_36763_000000000_AH5W3_1']['BARCODE'], 'ATGTCAG') + self.assertEqual(self.parentDict['BSSE_QGF_36761_000000000_AH5W3_1']['NCBI_ORGANISM_TAXONOMY'], '10090') + self.assertEqual(self.samplesPerLaneDict['1'], 10) + + def test_verify_index_length(self): + self.assertDictEqual(self.index_length_dict,{1: [6, 0]}) + + + def test_create_sample_sheet_dict(self): + + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, + self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + self.assertDictEqual(self.ordered_sample_sheet_dict, + {u'1_BSSE_QGF_36763_000000000_AH5W3_1': [u'1,BSSE_QGF_36763_000000000_AH5W3_1,BSSE_QGF_36763_000000000_AH5W3_1_H7_PRE_Idx_15_ATGTCA,,,A015,ATGTCA,BSSE_QGF_36763_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36765_000000000_AH5W3_1': [u'1,BSSE_QGF_36765_000000000_AH5W3_1,BSSE_QGF_36765_000000000_AH5W3_1_H8_NBC_Idx_16_CCGTCC,,,A016,CCGTCC,BSSE_QGF_36765_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36768_000000000_AH5W3_1': [u'1,BSSE_QGF_36768_000000000_AH5W3_1,BSSE_QGF_36768_000000000_AH5W3_1_H8_BMPC_Idx_19_GTGAAA,,,A019,GTGAAA,BSSE_QGF_36768_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36767_000000000_AH5W3_1': [u'1,BSSE_QGF_36767_000000000_AH5W3_1,BSSE_QGF_36767_000000000_AH5W3_1_H8_PRE_Idx_20_GTGGCC,,,A020,GTGGCC,BSSE_QGF_36767_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36766_000000000_AH5W3_1': [u'1,BSSE_QGF_36766_000000000_AH5W3_1,BSSE_QGF_36766_000000000_AH5W3_1_H8_SPC_Idx_18_GTCCGC,,,A018,GTCCGC,BSSE_QGF_36766_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36761_000000000_AH5W3_1': [u'1,BSSE_QGF_36761_000000000_AH5W3_1,BSSE_QGF_36761_000000000_AH5W3_1_H7_NBC_Idx_12_CTTGTA,,,A012,CTTGTA,BSSE_QGF_36761_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36762_000000000_AH5W3_1': [u'1,BSSE_QGF_36762_000000000_AH5W3_1,BSSE_QGF_36762_000000000_AH5W3_1_H7_SPC_Idx_13_AGTCAA,,,A013,AGTCAA,BSSE_QGF_36762_000000000_AH5W3_1,'], + u'1_BSSE_QGF_36764_000000000_AH5W3_1': [u'1,BSSE_QGF_36764_000000000_AH5W3_1,BSSE_QGF_36764_000000000_AH5W3_1_H7_BMPC_Idx_14_AGTTCC,,,A014,AGTTCC,BSSE_QGF_36764_000000000_AH5W3_1,']}) + + + def test_create_header_section(self): + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + lane = 1 + self.date = getTodayDate() + self.create_header_section = create_header_section(self.model, self.config_dict, self.parentDict, self.flowCellDict, self.index_length_dict, lane) + self.assertListEqual(self.create_header_section, ['[Header]', 'IEMFileVersion,4', 'Investigator Name,ETHZ_D-BSSE', 'Project Name,Genomics Facility Basel', + u'Experiment Name,000000000-AH5W3', 'Date,' + self.date, 'Workflow,GenerateFASTQ', 'Application,FASTQ Only', + 'Assay,', u'Description,PAIRED_END_301', 'Chemistry,Default', '', '[Reads]', u'301', u'301', '', '[Settings]', '', + '[Data]', 'Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description']) + + def test_write_sample_sheet_single_lane(self): + self.model = get_model(self.flowCellDict['RUN_NAME_FOLDER']) + + self.ordered_sample_sheet_dict, self.csv_file_name = create_sample_sheet_dict(self.model, self.parentDict, self.index_length_dict, + self.flowCellDict, self.config_dict, self.index1Vocabulary, self.index2Vocabulary, self.flowCellName, self.logger) + + write_sample_sheet_single_lane(self.model, self.ordered_sample_sheet_dict, self.flowCellDict, self.index_length_dict, self.parentDict, self.config_dict, self.options, self.logger, self.csv_file_name) def tearDown(self): self.service.logout() self.logger.info('Logged out') + def main(): unittest.main() -- GitLab