From 47f3903463ea552593ca53b83f2726d15876b41a Mon Sep 17 00:00:00 2001 From: kohleman <kohleman> Date: Thu, 15 Dec 2016 12:41:15 +0000 Subject: [PATCH] minor: small code cleanups SVN: 37508 --- .../Jython/createSampleSheet_bcl2fastq.py | 89 ++++++++++--------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py b/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py index ecebfb2db2b..774f9a843dd 100644 --- a/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py +++ b/deep_sequencing_unit/source/Jython/createSampleSheet_bcl2fastq.py @@ -71,8 +71,8 @@ CSV = ".csv" class Sequencers: - HISEQ_4000, HISEQ_3000, HISEQ_2500, HISEQ_2000, HISEQ_X, NEXTSEQ_500, MISEQ , UNIDENTIFIED= \ - ('Illumina HiSeq 4000','Illumina HiSeq 3000','Illumina HiSeq 2500','Illumina HiSeq 2000', + HISEQ_4000, HISEQ_3000, HISEQ_2500, HISEQ_2000, HISEQ_X, NEXTSEQ_500, MISEQ , UNIDENTIFIED = \ + ('Illumina HiSeq 4000', 'Illumina HiSeq 3000', 'Illumina HiSeq 2500', 'Illumina HiSeq 2000', 'Illumina HiSeq X', 'Illumina NextSeq 500', 'Illumina MiSeq', 'Unidentified') HISEQ_LIST = [Sequencers.HISEQ_2000, Sequencers.HISEQ_2500, Sequencers.HISEQ_3000, Sequencers.HISEQ_4000, Sequencers.HISEQ_X] @@ -249,10 +249,11 @@ def get_vocabulary(vocabulary_code, service): vocabulary_dict[term.getCode()] = term.getLabel() else: print ('No vocabulary found for ' + vocabulary_code) + print(vocabulary_dict) return vocabulary_dict -def send_email(emails, files, flowCellName, config_dict, logger): +def send_email(emails, flowCellName, config_dict, logger, subject, body, files=""): """ Send out an email to the specified recipients """ @@ -263,9 +264,9 @@ def send_email(emails, files, flowCellName, config_dict, logger): msg['From'] = config_dict['mailFrom'] msg['To'] = COMMASPACE.join(emails_list) msg['Date'] = formatdate(localtime=True) - msg['Subject'] = 'Generated Sample Sheet for flowcell ' + flowCellName + msg['Subject'] = subject - msg.attach(MIMEText('Sample Sheet for ' + flowCellName + ' attached.')) + msg.attach(MIMEText(body)) for f in files: part = MIMEBase('application', 'octet-stream') @@ -486,26 +487,26 @@ def write_sample_sheet_single_lane(model, ordered_sample_sheet_dict, flowCellDic def create_header_section (model, config_dict, parentDict, flowCellDict, index_length_dict, lane): - kitsDict = {"CHIP_SEQ_SAMPLE_PREP" : ["",""], - "TRUSEQ_RNA_SAMPLEPREPKIT_V2_ILLUMINA" : ["A","TruSeq LT"], + kitsDict = {"CHIP_SEQ_SAMPLE_PREP" : ["", ""], + "TRUSEQ_RNA_SAMPLEPREPKIT_V2_ILLUMINA" : ["A", "TruSeq LT"], "NEXTERA_XT_DNA_SAMPLE_PREPARATION_KIT_ILLUMINA" : ["S", "Nextera XT"], - "TRUSEQ_CHIP_SAMPLE_PREP_KIT" : ["A","TruSeq LT"], - "MRNA_SEQ_SAMPLE_PREP" : ["",""], - "TRUSEQRNA_SAMPLE_PREP_KIT" : ["A","TruSeq LT"], - "NEBNEXT_DNA_SAMPLE_PREP_MASTER_MIX_SET1" : ["A","TruSeq LT"], - "NEBNEXT_CHIP-SEQ_LIBRARY_PREP_REAGENT_SET" : ["A","TruSeq LT"], - "RIBOZERO_SCRIPTSEQ_MRNA-SEQ_KIT" : ["",""], + "TRUSEQ_CHIP_SAMPLE_PREP_KIT" : ["A", "TruSeq LT"], + "MRNA_SEQ_SAMPLE_PREP" : ["", ""], + "TRUSEQRNA_SAMPLE_PREP_KIT" : ["A", "TruSeq LT"], + "NEBNEXT_DNA_SAMPLE_PREP_MASTER_MIX_SET1" : ["A", "TruSeq LT"], + "NEBNEXT_CHIP-SEQ_LIBRARY_PREP_REAGENT_SET" : ["A", "TruSeq LT"], + "RIBOZERO_SCRIPTSEQ_MRNA-SEQ_KIT" : ["", ""], "NEXTERA_DNA_SAMPLE_PREPARATION_KIT_ILLUMINA" : ["N", "Nextera"], - "GENOMICDNA_SAMPLE_PREP" : ["",""], - "AGILENT_SURESELECTXT_AUTOMATEDLIBRARYPREP" : ["",""], - "TRUSEQ_DNA_SAMPLE_PREP_KIT" : ["A","TruSeq LT"], + "GENOMICDNA_SAMPLE_PREP" : ["", ""], + "AGILENT_SURESELECTXT_AUTOMATEDLIBRARYPREP" : ["", ""], + "TRUSEQ_DNA_SAMPLE_PREP_KIT" : ["A", "TruSeq LT"], "NEXTERA_DNA_SAMPLE_PREP_KITS" : ["N", "Nextera"], - "AGILENT_SURESELECT_ENRICHMENTSYSTEM" : ["",""], - "TRUSEQ_DNA_SAMPLE_PREP_KIT_V2" : ["A","TruSeq LT"], - "AGILENT_SURESELECT_HUMAN_ALL_EXON_V5_UTRS" : ["",""], - "POLYA_SCRIPTSEQ_MRNA-SEQ_KIT" : ["",""], - "AGILENT_SURESELECTXT2_MOUSE_ALL_EXON" : ["",""], - "PAIRED_END_DNA_SAMPLE_PREP" : ["",""], + "AGILENT_SURESELECT_ENRICHMENTSYSTEM" : ["", ""], + "TRUSEQ_DNA_SAMPLE_PREP_KIT_V2" : ["A", "TruSeq LT"], + "AGILENT_SURESELECT_HUMAN_ALL_EXON_V5_UTRS" : ["", ""], + "POLYA_SCRIPTSEQ_MRNA-SEQ_KIT" : ["", ""], + "AGILENT_SURESELECTXT2_MOUSE_ALL_EXON" : ["", ""], + "PAIRED_END_DNA_SAMPLE_PREP" : ["", ""], "NEXTERA_DNA_SAMPLE_PREP_KIT_BUFFER_HMW" : ["N", "Nextera"] } @@ -549,11 +550,11 @@ def create_header_section (model, config_dict, parentDict, flowCellDict, index_l settings_section = config_dict['settingsSection'].split(separator) settings_section.reverse() header_list.append(settings_section.pop()) - if ('nextera' in assay.lower()): - header_list.append(config_dict['nexteraAdapter']) - if ('truseq' in assay.lower()): - header_list.append(config_dict['truSeqAdapter1']) - header_list.append(config_dict['truSeqAdapter2']) +# if ('nextera' in assay.lower()): +# header_list.append(config_dict['nexteraAdapter']) +# if ('truseq' in assay.lower()): +# header_list.append(config_dict['truSeqAdapter1']) +# header_list.append(config_dict['truSeqAdapter2']) header_list.append('') if int(flowCellDict['INDEXREAD2']) > 0 and len_index2 > 0: @@ -583,7 +584,7 @@ def verify_index_length (parentDict, flowCellDict, config_dict, logger): logger.info("Flowcell has index length [" + str(flowcell_len_index1) + ", " + str(flowcell_len_index2) + "]") - for lane in range(1,int(flowCellDict['LANECOUNT'])+1): + for lane in range(1, int(flowCellDict['LANECOUNT']) + 1): index1_set = set () index2_set = set () index1_length = 0 @@ -597,7 +598,7 @@ def verify_index_length (parentDict, flowCellDict, config_dict, logger): if (config_dict['index1Name'] not in sample) or (sample[config_dict['index1Name']] == 'NOINDEX'): continue index1 = sample[config_dict['index1Name']] - index2="" + index2 = "" if config_dict['index2Name'] in sample: index2 = sample[config_dict['index2Name']] @@ -622,7 +623,7 @@ def verify_index_length (parentDict, flowCellDict, config_dict, logger): logger.info("Index2 Length Set: " + str(index2_set)) logger.info("Final length of index1 " + str(index1_length)) logger.info("Final length of index2 " + str(index2_length)) - #print("Lane " + str(lane) + " [" + str(index1_length) + "," + str(index2_length) + "]") + # print("Lane " + str(lane) + " [" + str(index1_length) + "," + str(index2_length) + "]") return index_length_dict @@ -645,11 +646,11 @@ def create_sample_sheet_dict(service, barcodesPerLaneDict, containedSamples, sam for key in lane_sample_properties.keys(): if lane_sample_properties[key][u'NCBI_ORGANISM_TAXONOMY'] != u'10847' and not single_index_set: index1 = "" - lane_string ="" + lane_string = "" if model in HISEQ_LIST or model in Sequencers.MISEQ: lane_string = lane_int + separator - line = separator.join([lane_string + key, key + '_' + sanitize_string(lane_sample_properties[key][config_dict['externalSampleName']]),"", "", "", "", key, ""]) + line = separator.join([lane_string + key, key + '_' + sanitize_string(lane_sample_properties[key][config_dict['externalSampleName']]), "", "", "", "", key, ""]) sampleSheetDict[lane_int + '_' + key] = [line] single_index_set = True @@ -660,23 +661,23 @@ def create_sample_sheet_dict(service, barcodesPerLaneDict, containedSamples, sam if ((config_dict['index1Name'] not in lane_sample_properties[key] or lane_sample_properties[key][config_dict['index1Name']] == 'NOINDEX')): continue - index1 = lane_sample_properties[key][config_dict['index1Name']] - index2="" + index1 = index1Vocabulary[lane_sample_properties[key][config_dict['index1Name']]] + index2 = "" if config_dict['index2Name'] in lane_sample_properties[key]: index2 = lane_sample_properties[key][config_dict['index2Name']] # Not needed, won't use it any more indexNumber = index2Vocabulary[lane_sample_properties[key][config_dict['index2Name']]].split()[2] - #try: + # try: # kit = lane_sample_properties[key][config_dict['kit']] # prefix = kitsDict[kit][0] - #except: + # except: # prefix = "" len_index1 = index_length_dict[int(lane_int)][0] len_index2 = index_length_dict[int(lane_int)][1] - lane_string ="" + lane_string = "" if model in HISEQ_LIST or model in Sequencers.MISEQ: lane_string = lane_int + separator @@ -688,12 +689,12 @@ def create_sample_sheet_dict(service, barcodesPerLaneDict, containedSamples, sam line = separator.join([lane_string + key, key + '_' + sanitize_string(lane_sample_properties[key][config_dict['externalSampleName']]) + '_' + index1[0:len_index1] + '_' + index2[0:len_index2], - "", "","", index1[0:len_index1],"", index2_processed, key, ""]) + "", "", "", index1[0:len_index1], "", index2_processed, key, ""]) sampleSheetDict[lane_int + '_' + key] = [line] else: line = separator.join([lane_string + key, key + '_' + sanitize_string(lane_sample_properties[key][config_dict['externalSampleName']]) + '_' + index1[0:len_index1], - "", "","", index1[0:len_index1], key, ""]) + "", "", "", index1[0:len_index1], key, ""]) sampleSheetDict[lane_int + '_' + key] = [line] csv_file_name = config_dict['SampleSheetFileName'] + '_' + flowCellName @@ -736,8 +737,14 @@ def main (): samplesPerLaneDict, model, parentDict, index_length_dict, flowCellDict, config_dict, index1Vocabulary, index2Vocabulary, flowCellName, logger) - - write_sample_sheet_single_lane(model, ordered_sample_sheet_dict, flowCellDict, index_length_dict, + if len(ordered_sample_sheet_dict) < len(containedSamples): + subject = "Warning: Creation of Sample Sheet (" + flowCellName + ") failed. No indices found." + body = "Warning: No parents/libraries assigned to one of the flowlanes of " + flowCellName + \ + ". Either it is a non-indexed lane or the parents are not set.\n" + \ + "Check in the following map for missing lanes:\n" + str(ordered_sample_sheet_dict) + send_email(config_dict['mailList'], flowCellName, config_dict, logger, subject, body) + else: + write_sample_sheet_single_lane(model, ordered_sample_sheet_dict, flowCellDict, index_length_dict, parentDict, config_dict, myoptions, logger, csv_file_name) logout(service, logger) -- GitLab