Skip to content
Snippets Groups Projects
createSampleSheet.py 27.5 KiB
Newer Older
  • Learn to ignore specific revisions
  • kohleman's avatar
    kohleman committed
    @copyright:
    
    Copyright 2014 ETH Zuerich, SIS
    
    kohleman's avatar
    kohleman committed
    Licensed under the Apache License, Version 2.0 (the 'License');
    
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
     
    http://www.apache.org/licenses/LICENSE-2.0
     
    Unless required by applicable law or agreed to in writing, software
    
    kohleman's avatar
    kohleman committed
    distributed under the License is distributed on an 'AS IS' BASIS,
    
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
    
    @author:
    Manuel Kohler
    
    
    kohleman's avatar
    kohleman committed
    @description:
    
    Creates the SampleSheet.csv out of values from openBIS for Demultiplexing 
    used in the Illumina pipeline (configureBclToFastq.pl) 
    
    
    kohleman's avatar
    kohleman committed
    @attention:
    Runs under Jython
    
    kohleman's avatar
    kohleman committed
    @note:
    Takes into account to replace special characters with an underscore so that the Illumina script
    does not fail
    
    HiSeq Header Description
    ========================
    
    Column Header  Description
    FCID  Flow cell ID
    Lane  Positive integer, indicating the lane number (1-8)
    SampleID  ID of the sample
    SampleRef  The reference used for alignment for the sample
    Index  Index sequences. Multiple index reads are separated by a hyphen (for example, ACCAGTAA-GGACATGA).
    Description  Description of the sample
    Control  Y indicates this lane is a control lane, N means sample
    Recipe Recipe used during sequencing
    Operator Name or ID of the operator
    SampleProject  The project the sample belongs to
    '''
    
    from __future__ import with_statement
    import os
    import logging
    import re
    import sys
    
    kohleman's avatar
    kohleman committed
    import string
    import smtplib
    
    from ConfigParser import SafeConfigParser
    from optparse import OptionParser
    from datetime import *
    
    
    kohleman's avatar
    kohleman committed
    from email.MIMEMultipart import MIMEMultipart
    from email.MIMEBase import MIMEBase
    from email.MIMEText import MIMEText
    from email.Utils import COMMASPACE, formatdate
    from email import Encoders
    
    
    from ch.systemsx.cisd.openbis.dss.client.api.v1 import OpenbisServiceFacadeFactory
    from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria
    from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria
    
    
    kohleman's avatar
    kohleman committed
    lineending = {'win32':'\r\n', 'linux':'\n', 'mac':'\r'}
    
    kohleman's avatar
    kohleman committed
    COMMA = ','
    
    kohleman's avatar
    kohleman committed
    def login(logger, configMap):
    
    kohleman's avatar
    kohleman committed
      logger.info('Logging into ' + configMap['openbisServer'])
    
      service = OpenbisServiceFacadeFactory.tryCreate(configMap['openbisUserName'],
                                                      configMap['openbisPassword'],
                                                      configMap['openbisServer'],
                                                      configMap['connectionTimeout'])
      return service
    
    
    kohleman's avatar
    kohleman committed
    def logout (service, logger):
    
      service.logout()
    
    kohleman's avatar
    kohleman committed
      logger.info('Logged out')
    
    kohleman's avatar
    kohleman committed
    def setUpLogger(logPath, logLevel=logging.INFO):
    
    kohleman's avatar
    kohleman committed
      logFileName = 'createSampleSheet'
    
    kohleman's avatar
    kohleman committed
      d = datetime.now()
    
    kohleman's avatar
    kohleman committed
      logFileName = logFileName + '_' + d.strftime('%Y-%m-%d_%H_%M_%S') + '.log'
    
    kohleman's avatar
    kohleman committed
      logging.basicConfig(filename=logPath + logFileName,
                          format='%(asctime)s [%(levelname)s] %(message)s', level=logLevel)
    
    kohleman's avatar
    kohleman committed
      logger = logging.getLogger(logFileName)
      return logger
    
    kohleman's avatar
    kohleman committed
    def parseOptions(logger):
    
    kohleman's avatar
    kohleman committed
      logger.info('Parsing command line parameters')
      parser = OptionParser(version='%prog 1.0')
      parser.add_option('-f', '--flowcell',
    
    kohleman's avatar
    kohleman committed
                      dest='flowcell',
                      help='The flowcell which is used to create the SampleSheet.csv',
                      metavar='<flowcell>')
    
    kohleman's avatar
    kohleman committed
      parser.add_option('-m', '--mailist',
    
    kohleman's avatar
    kohleman committed
                      dest='maillist',
    
    kohleman's avatar
    kohleman committed
                      default=False,
                      action='store_true',
    
    kohleman's avatar
    kohleman committed
                      help='Generated Sample Sheet will be addtionally sent as email to the defined list of recipients',
                      metavar='<maillist>')
    
    kohleman's avatar
    kohleman committed
      parser.add_option('-l', '--lineending',
    
    kohleman's avatar
    kohleman committed
                      dest='lineending',
    
    kohleman's avatar
    kohleman committed
                      type='choice',
                      action='store',
    
    kohleman's avatar
    kohleman committed
                      choices=['win32', 'linux', 'mac'],
                      default='linux',
    
    kohleman's avatar
    kohleman committed
                      help='Specify end of line separator: win32, linux, mac. Default: linux' ,
                      metavar='<lineending>')
    
    kohleman's avatar
    kohleman committed
      parser.add_option('-o', '--outdir',
    
    kohleman's avatar
    kohleman committed
                      dest='outdir',
    
    kohleman's avatar
    kohleman committed
                      default='./',
    
    kohleman's avatar
    kohleman committed
                      help='Specify the ouput directory. Default: ./' ,
                      metavar='<outdir>')
    
    kohleman's avatar
    kohleman committed
      parser.add_option('-s', '--singlelane',
    
    kohleman's avatar
    kohleman committed
                      dest='singlelane',
    
    kohleman's avatar
    kohleman committed
                      default=False,
                      action='store_true',
    
    kohleman's avatar
    kohleman committed
                      help='Creates a single Sample Sheet for each lane. Default: False')
    
    kohleman's avatar
    kohleman committed
      parser.add_option('-d', '--debug',
    
    kohleman's avatar
    kohleman committed
                      dest='debug',
    
    kohleman's avatar
    kohleman committed
                      default=False,
                      action='store_true',
    
    kohleman's avatar
    kohleman committed
                      help='Verbose debug logging. Default: False')
    
      parser.add_option('-v', '--verbose',
                      dest='verbose',
                      default=False,
                      action='store_true',
                      help='Write Sample Sheet to stout. Default: False')
    
    
    
      (options, args) = parser.parse_args()
    
    kohleman's avatar
    kohleman committed
      if options.outdir[-1] <> '/':
        options.outdir = options.outdir + '/'
    
      if options.flowcell is None:
    
    kohleman's avatar
    kohleman committed
        parser.print_help()
    
        exit(-1)
      return options
    
    
    kohleman's avatar
    kohleman committed
    def parseConfigurationFile(propertyFile='etc/createSampleSheet.properties'):
      '''
      Parses the given config files and returns the values
      '''
      config = SafeConfigParser()
      config.read(propertyFile)
      config.sections()
      return config
    
    def readConfig(logger):
      GENERAL = 'GENERAL'
      OPENBIS = 'OPENBIS'
      ILLUMINA = 'ILLUMINA'
    
    
    kohleman's avatar
    kohleman committed
      logger.info('Reading config file')
    
      configMap = {}
    
      configParameters = parseConfigurationFile()
    
    kohleman's avatar
    kohleman committed
      configMap['facilityName'] = configParameters.get(GENERAL, 'facilityName')
      configMap['facilityNameShort'] = configParameters.get(GENERAL, 'facilityNameShort')
      configMap['facilityInstitution'] = configParameters.get(GENERAL, 'facilityInstitution')
      configMap['mailList'] = configParameters.get(GENERAL, 'mailList')
      configMap['mailFrom'] = configParameters.get(GENERAL, 'mailFrom')
      configMap['smptHost'] = configParameters.get(GENERAL, 'smptHost')
      configMap['SampleSheetFileName'] = configParameters.get(GENERAL, 'SampleSheetFileName')
      configMap['separator'] = configParameters.get(GENERAL, 'separator')
      configMap['indexSeparator'] = configParameters.get(GENERAL, 'indexSeparator')
    
      configMap['openbisUserName'] = configParameters.get(OPENBIS, 'openbisUserName')
      configMap['openbisPassword'] = configParameters.get(OPENBIS, 'openbisPassword', raw=True)
      configMap['openbisServer'] = configParameters.get(OPENBIS, 'openbisServer')
      configMap['connectionTimeout'] = configParameters.getint(OPENBIS, 'connectionTimeout')
      configMap['illuminaFlowCellTypeName'] = configParameters.get(OPENBIS, 'illuminaFlowCellTypeName')
      configMap['index1Name'] = configParameters.get(OPENBIS, 'index1Name')
      configMap['index2Name'] = configParameters.get(OPENBIS, 'index2Name')
      configMap['index1Length'] = configParameters.get(OPENBIS, 'index1Length')
      configMap['index2Length'] = configParameters.get(OPENBIS, 'index2Length')
      configMap['endType'] = configParameters.get(OPENBIS, 'endType')
      configMap['cycles'] = configParameters.get(OPENBIS, 'cycles')
      configMap['controlLane'] = configParameters.get(OPENBIS, 'controlLane')
      configMap['ncbi'] = configParameters.get(OPENBIS, 'ncbi')
      configMap['externalSampleName'] = configParameters.get(OPENBIS, 'externalSampleName')
      configMap['laneCount'] = configParameters.get(OPENBIS, 'laneCount')
    
      configMap['hiSeqNames'] = configParameters.get(ILLUMINA, 'hiSeqNames')
      configMap['miSeqNames'] = configParameters.get(ILLUMINA, 'miSeqNames')
      configMap['hiSeqHeader'] = configParameters.get(ILLUMINA, 'hiSeqHeader')
    
      configMap['miSeqHeaderSection'] = configParameters.get(ILLUMINA, 'miSeqHeaderSection')
      configMap['miSeqReadsSection'] = configParameters.get(ILLUMINA, 'miSeqReadsSection')
      configMap['miSeqSettingsSection'] = configParameters.get(ILLUMINA, 'miSeqSettingsSection')
      configMap['miSeqWorkflow'] = configParameters.get(ILLUMINA, 'miSeqWorkflow')
      configMap['miSeqApplication'] = configParameters.get(ILLUMINA, 'miSeqApplication')
      configMap['miSeqChemistry'] = configParameters.get(ILLUMINA, 'miSeqChemistry')
    
      configMap['nexteraAdapter'] = configParameters.get(ILLUMINA, 'nexteraAdapter')
      configMap['iemFileVersion'] = configParameters.get(ILLUMINA, 'iemFileVersion')
    
      configMap['configureBclToFastqPath'] = configParameters.get(ILLUMINA, 'configureBclToFastqPath')
      configMap['failedReads'] = configParameters.get(ILLUMINA, 'failedReads')
      configMap['clusterCount'] = configParameters.get(ILLUMINA, 'clusterCount')
      configMap['clusterCountNumber'] = configParameters.get(ILLUMINA, 'clusterCountNumber')
      configMap['outputDir'] = configParameters.get(ILLUMINA, 'outputDir')
      configMap['sampleSheetName'] = configParameters.get(ILLUMINA, 'sampleSheetName')
      configMap['baseMask'] = configParameters.get(ILLUMINA, 'baseMask')
    
    
      return configMap
    
    def getDate():
      d = datetime.now()
    
    kohleman's avatar
    kohleman committed
      return d.strftime('%A, %d of %B %Y')
    
    def sanitizeString(myString):
      return re.sub('[^A-Za-z0-9]+', '_', myString)
    
    kohleman's avatar
    kohleman committed
    def getVocabulary(vocabularyCode, service):
    
    kohleman's avatar
    kohleman committed
      ''' Returns the vocabulary terms and vocabulary labels of a vocabulary in a dictionary
          specified by the parameter vocabularyCode
          '''
      terms = []
    
      vocabularies = service.listVocabularies()
      vocabularyDict = {}
      for vocabulary in vocabularies:
        if (vocabulary.getCode() == vocabularyCode):
          terms = vocabulary.getTerms()
      if terms:
        for term in terms:
          vocabularyDict[term.getCode()] = term.getLabel()
      else:
        print ('No vocabulary found for ' + vocabularyCode)
    
    kohleman's avatar
    kohleman committed
      return vocabularyDict
    
    def sendMail(emails, files, flowCellName, configMap, logger):
    
    kohleman's avatar
    kohleman committed
      '''
      Send out an email to the specified recipients
      '''
    
    kohleman's avatar
    kohleman committed
      COMMASPACE = ', '
    
    kohleman's avatar
    kohleman committed
      listofEmails = emails.split()
    
    kohleman's avatar
    kohleman committed
      msg = MIMEMultipart()
      msg['From'] = configMap['mailFrom']
      msg['To'] = COMMASPACE.join(listofEmails)
      msg['Date'] = formatdate(localtime=True)
    
    kohleman's avatar
    kohleman committed
      msg['Subject'] = 'Generated Sample Sheet for flowcell ' + flowCellName
    
    
      msg.attach(MIMEText('Sample Sheet for ' + flowCellName + ' attached.'))
    
    kohleman's avatar
    kohleman committed
      for f in files:
    
    kohleman's avatar
    kohleman committed
            part = MIMEBase('application', 'octet-stream')
    
    kohleman's avatar
    kohleman committed
            part.set_payload(open(f, 'rb').read())
    
    kohleman's avatar
    kohleman committed
            Encoders.encode_base64(part)
            part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(f))
            msg.attach(part)
    
      smtp = smtplib.SMTP(configMap['smptHost'])
      smtp.sendmail(configMap['mailFrom'], listofEmails, msg.as_string())
      smtp.close()
    
      logger.info('Sent email to ' + COMMASPACE.join(listofEmails))
    
    kohleman's avatar
    kohleman committed
    def getFlowCell (illuminaFlowCellTypeName, flowCellName, service, logger):
    
      '''
      Getting the the matching FlowCell
      '''
      sc = SearchCriteria();
      sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.TYPE, illuminaFlowCellTypeName));
      sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, flowCellName));
      foundSample = service.searchForSamples(sc)
      try:
        assert foundSample.size() == 1
      except AssertionError:
    
    kohleman's avatar
    kohleman committed
        print (str(foundSample.size()) + ' flow cells found which match.')
    
    kohleman's avatar
    kohleman committed
        exit(1)
    
    
    kohleman's avatar
    kohleman committed
      logger.info('Found ' + foundSample[0].getCode() + ' in openBIS')
    
      # Search for contained samples
      sampleSc = SearchCriteria()
      sampleSc.addSubCriteria(SearchSubCriteria.createSampleContainerCriteria(sc))
      foundContainedSamples = service.searchForSamples(sampleSc)
    
      return foundSample[0], foundContainedSamples
    
    
    
    kohleman's avatar
    kohleman committed
    def getParents(sampleName, service):
    
      '''
      Returns a list of parents of a sample 
      '''
      sc = SearchCriteria();
      sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleName));
      foundSample = service.searchForSamples(sc)
    
      try:
        assert foundSample.size() == 1
      except AssertionError:
    
    kohleman's avatar
    kohleman committed
        print (str(foundSample.size()) + ' flow lanes found which match.')
    
    
      # set the criteria for getting the parents when providing the child name
      sampleSc = SearchCriteria()
      sampleSc.addSubCriteria(SearchSubCriteria.createSampleChildCriteria(sc))
      foundParentSamples = service.searchForSamples(sampleSc)
    
    kohleman's avatar
    kohleman committed
    
      return foundParentSamples
    
    def getContainedSampleProperties(containedSamples, service):
    
      # TODO: update description and function name
      '''   
    
      Takes a  list of contained samples, retrieves the parents and their properties and returns it
      as a dictionary. The key is the sample name, the value is a list of the properties
    
    kohleman's avatar
    kohleman committed
      
      Additionally a dictionary with the lane (key) and the number of samples (value) is returned  
    
      '''
      parentDict = {}
    
    kohleman's avatar
    kohleman committed
      samplesPerLaneDict = {}
    
      for lane in containedSamples:
    
    kohleman's avatar
    kohleman committed
        parents = getParents (lane.getCode(), service)
    
    kohleman's avatar
    kohleman committed
    
        try:
          assert parents.size() >= 1
        except AssertionError:
          print (str(parents.size()) + ' parents found for lane ' + lane.getCode())
    
    
    kohleman's avatar
    kohleman committed
        samplesPerLaneDict[lane.getCode()[-1]] = len(parents)
    
        for parent in parents:
          parentCode = parent.getCode()
          parentProperties = parent.getProperties()
          propertyDict = {}
          for property in parentProperties:
            propertyDict[property] = parentProperties.get(property)
    
          propertyDict['LANE'] = lane.getCode()
    
    
    kohleman's avatar
    kohleman committed
          myKey = sanitizeString(parentCode + '_' + lane.getCode())
          parentDict[myKey] = propertyDict
    
    kohleman's avatar
    kohleman committed
      return parentDict, samplesPerLaneDict
    
    def convertSampleToDict(foundFlowCell):
    
    kohleman's avatar
    kohleman committed
      '''
      converts <type 'ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.Sample'> to a python dict
      '''
    
      flowCellDict = {}
      fcProperties = foundFlowCell.getProperties()
      for property in fcProperties:
        flowCellDict[property] = fcProperties.get(property)
    
    kohleman's avatar
    kohleman committed
      flowCellDict['Project'] = foundFlowCell.getExperimentIdentifierOrNull().split('/')[-1]
      flowCellDict['Name'] = foundFlowCell.getIdentifier().split('/')[-1]
    
      return flowCellDict
    
    
    kohleman's avatar
    kohleman committed
    
    def createDemultiplexCommands(myoptions, configMap, laneIndexDict, endType, cycles, lane,
    
                                  myFileName, indexRead1Length, indexRead2Length):
    
    kohleman's avatar
    kohleman committed
      '''
      Builds up a command line for the demultiplexing of a single flow lane
      '''
    
      newline = lineending[myoptions.lineending]
      indexlength = laneIndexDict[lane][0]
      if laneIndexDict[lane][1] == 'DUALINDEX':
        basesMask = 'Y' + cycles + COMMA + 'I' + str(indexlength / 2) + 'n' + COMMA + 'I' + \
                    str(indexlength / 2) + 'n'
      else:
    
        basesMask = 'Y' + cycles + COMMA + 'I' + str(indexlength) + (indexRead1Length - indexlength + 1)*'n'
    
    kohleman's avatar
    kohleman committed
      if endType == 'PAIRED_END':
        basesMask = basesMask + COMMA + 'Y' + cycles
      return ([' '.join([configMap['configureBclToFastqPath'], configMap['failedReads'],
                         configMap['clusterCount'], configMap['clusterCountNumber'],
    
                         configMap['outputDir'], '../../../Unaligned_' + str(lane), configMap['sampleSheetName'],
    
                         myFileName, configMap['baseMask'], basesMask, "--no-eamss", newline])])
    
    kohleman's avatar
    kohleman committed
    
    def createHiseqSampleSheet(parentDict, flowCellDict, samplesPerLaneDict, flowCellName, configMap,
                               logger, myoptions):
    
    kohleman's avatar
    kohleman committed
      Builds up a dictionary with all entries in the Sample Sheet
    
      '''
      sampleSheetDict = {}
    
    kohleman's avatar
    kohleman committed
      laneIndexDict = {}
      # the illlumina pipeline uses always one base less than the sequencer is sequencing
      DEMULTIPLEX_INDEX_LENGTH_PENALTY = -1
    
      logger.debug (parentDict)
      logger.debug(samplesPerLaneDict)
    
      # Making sure the header is always a the top of the file
    
      sampleSheetDict[u'!'] = ([configMap['hiSeqHeader']])
    
    kohleman's avatar
    kohleman committed
      endType = flowCellDict[configMap['endType']]
      cycles = flowCellDict[configMap['cycles']]
    
      index1Name = configMap['index1Name']
      index2Name = configMap['index2Name']
    
    
      indexRead1Length = int(flowCellDict[configMap['index1Length']]) + DEMULTIPLEX_INDEX_LENGTH_PENALTY
    
      try:
        indexRead2Length = int(flowCellDict[configMap['index2Length']]) + DEMULTIPLEX_INDEX_LENGTH_PENALTY
      except:
    
    kohleman's avatar
    kohleman committed
      for key in parentDict.keys():
        index = ''
    
    kohleman's avatar
    kohleman committed
        dualIndex = False
    
    kohleman's avatar
    kohleman committed
        lane = parentDict[key]['LANE'][-1:]
    
        
        if index1Name in parentDict[key] and indexRead1Length > 0 and (parentDict[key])[index1Name] != 'NOINDEX':
    
    kohleman's avatar
    kohleman committed
          index = parentDict[key][index1Name][0:indexRead1Length]
    
    
        if index2Name in parentDict[key] and indexRead2Length > 0 and (parentDict[key])[index2Name] != 'NOINDEX':
    
    kohleman's avatar
    kohleman committed
          index = index + configMap['indexSeparator'] + parentDict[key][index2Name][0:indexRead2Length]
          dualIndex = True
    
    
    kohleman's avatar
    kohleman committed
        # little hack to make the non-indexed control lane and non-indexed runs also part of
        # the sample sheet
    
    kohleman's avatar
    kohleman committed
        # eventually hide in a function
    
           
        if len(index) == 0:
          # more than one samples in an non-indexed lane 
          if samplesPerLaneDict[lane] > 1:
    
            # if it is not a PhiX  and not a Pool Sample
    
            if (parentDict[key]['NCBI_ORGANISM_TAXONOMY'] != '10847') and parentDict[key]['BARCODE_COMPLEXITY_CHECKER'] == 'No Pool':
    
              # then just use this sample and skip the others if there are more
              samplesPerLaneDict[lane] = 0
              index = ' '
          
          # only one non-indexed sample in the lane   
          elif samplesPerLaneDict[lane] == 1:
            index = ' '
        
    
    kohleman's avatar
    kohleman committed
        # Set flag if this lane is a control lane. has no influence on the result, but makes reading
        # the demultiplex statistics easier
    
        try:
          if lane == flowCellDict[configMap['controlLane']]:
            control = 'Y'
          else:
            control = 'N'
        except:
          control = 'N'     
    
    kohleman's avatar
    kohleman committed
        if len(index) > 0:
    
    kohleman's avatar
    kohleman committed
          sampleSheetDict[lane + '_' + key] = [flowCellName + COMMA
                                + lane + COMMA
                                + key + COMMA
                                + parentDict[key][configMap['ncbi']] + COMMA
                                + index + COMMA
                                + sanitizeString(parentDict[key][configMap['externalSampleName']]) + COMMA
                                + control + COMMA
                                + endType + '_' + cycles + COMMA
                                + configMap['facilityInstitution'] + COMMA
    
    kohleman's avatar
    kohleman committed
          sampleEndType = 'DUALINDEX' if dualIndex else 'SINGLEINDEX'
          laneIndexDict[int(lane)] = [len(index), sampleEndType]
    
    
      sortedSampleSheetList = sampleSheetDict.keys()
      sortedSampleSheetList.sort()
    
        # if single lane demultiplexing is activated
    
    kohleman's avatar
    kohleman committed
      if myoptions.singlelane:
    
    kohleman's avatar
    kohleman committed
        demultiplexCommandList = []
        for lane in range(1, int(flowCellDict[configMap['laneCount']]) + 1):
    
    kohleman's avatar
    kohleman committed
          laneSeparatedList = [sample for sample in sortedSampleSheetList if sample[0] == str(lane)]
          # Making sure the header is also included at the top
          laneSeparatedList.insert(0, u'!')
          logger.debug(laneSeparatedList)
    
    kohleman's avatar
    kohleman committed
          myFileName = myoptions.outdir + configMap['SampleSheetFileName'] + '_' + str(lane) + '_' + \
                       flowCellName + '.csv'
    
          demultiplexCommandList.append(createDemultiplexCommands(myoptions, configMap, laneIndexDict,
    
                                             endType, cycles, lane, myFileName, indexRead1Length, indexRead2Length))
    
          SamplesheetFile = writeSampleSheet(myoptions, logger, sampleSheetDict, laneSeparatedList,
    
    kohleman's avatar
    kohleman committed
                           fileName=myFileName)
    
        writeDemultiplexCommandList(logger, demultiplexCommandList,
    
    kohleman's avatar
    kohleman committed
                                    fileName=myoptions.outdir + flowCellName + '_DemultiplexCommandList.txt')
        pickleDemultiplexCommandList(logger, demultiplexCommandList,
                                    fileName=myoptions.outdir + flowCellName + '_DemultiplexCommandList.pickle')
    
    kohleman's avatar
    kohleman committed
      else:
    
        myFileName = myoptions.outdir + configMap['SampleSheetFileName'] + '_' + \
                       flowCellName + '.csv'
    
    kohleman's avatar
    kohleman committed
        writeSampleSheet(myoptions, logger, sampleSheetDict, sortedSampleSheetList,
                         fileName=myFileName)
    
      return sampleSheetDict, myFileName
    
    kohleman's avatar
    kohleman committed
    
    def writeDemultiplexCommandList(logger, demultiplexCommandList,
    
    kohleman's avatar
    kohleman committed
      try:
    
    kohleman's avatar
    kohleman committed
        with open(fileName, 'wb') as demuxFile:
    
    kohleman's avatar
    kohleman committed
          for listElement in demultiplexCommandList:
            demuxFile.write(*listElement)
    
          logger.info('Writing file ' + fileName)
          print('Written ' + fileName)
      except IOError, err:
        logger.error('File error: ' + str(err))
        print ('File error: ' + str(err))
    
    
    kohleman's avatar
    kohleman committed
    def pickleDemultiplexCommandList(logger, demultiplexCommandList, fileName):
      import pickle
    
      try:
        with open(fileName, 'w') as pickleDemux:
          pickle.dump(demultiplexCommandList, pickleDemux)
        logger.info('Writing file ' + fileName)
      except IOError, err:
        logger.error('File error: ' + str(err))
        print ('File error: ' + str(err))
    
    
    
    kohleman's avatar
    kohleman committed
    def writeSampleSheet(myoptions, logger, sampleSheetDict, sortedSampleSheetList, fileName):
    
    kohleman's avatar
    kohleman committed
      '''
      Writes the given dictionary out to a csv file. The additional list is sorted and is used to write
      the dictionary in a sorted order.   
      '''
      newline = lineending[myoptions.lineending]
      try:
    
    kohleman's avatar
    kohleman committed
        with open(fileName, 'w') as sampleSheetFile:
    
    kohleman's avatar
    kohleman committed
          for listElement in sortedSampleSheetList:
    
            if myoptions.verbose:
              print sampleSheetDict[listElement][0]
    
    kohleman's avatar
    kohleman committed
            sampleSheetFile.write(sampleSheetDict[listElement][0] + newline)
    
    kohleman's avatar
    kohleman committed
    
          logger.info('Writing file ' + fileName)
    
    kohleman's avatar
    kohleman committed
      except IOError, err:
        logger.error('File error: ' + str(err))
    
    kohleman's avatar
    kohleman committed
        print ('File error: ' + str(err))
    
    def writeMiSeqSampleSheet(sampleSheetDict, headerList, flowCellName, myoptions, logger, fileName):
    
    kohleman's avatar
    kohleman committed
      '''
    
    kohleman's avatar
    kohleman committed
      Writes the given dictionary to a csv file. The order does not matter. As the header is not fixed
      we first need to write the headerList in the file. This is specific to MiSeq
    
    kohleman's avatar
    kohleman committed
      '''
      newline = lineending[myoptions.lineending]
      try:
    
        with open(fileName, 'wb') as sampleSheetFile:
    
    kohleman's avatar
    kohleman committed
          for listElement in headerList:
    
            if myoptions.verbose:
              print listElement
    
    kohleman's avatar
    kohleman committed
            sampleSheetFile.write(listElement + newline)
          for sample in sampleSheetDict:
    
            if myoptions.verbose:
              print sampleSheetDict[sample][0]
    
    kohleman's avatar
    kohleman committed
            sampleSheetFile.write(sampleSheetDict[sample][0] + newline)
    
          logger.info('Writing file ' + fileName)
    
    
    kohleman's avatar
    kohleman committed
      except IOError:
    
    kohleman's avatar
    kohleman committed
        logger.error('File error: ' + str(err))
    
    kohleman's avatar
    kohleman committed
        print ('File error: ' + str(err))
    
    
    def createMiSeqSampleSheet(parentDict, flowCellDict, configMap, index1Vocabulary, index2Vocabulary,
                                flowCellName, logger, myoptions):
    
      '''
      '''
      sampleSheetDict = {}
      headerList = []
    
      separator = configMap['separator']
    
    kohleman's avatar
    kohleman committed
      miSeqHeaderSection = configMap['miSeqHeaderSection'].split(separator)
    
      miSeqHeaderSection.reverse()
    
    kohleman's avatar
    kohleman committed
      headerList = [miSeqHeaderSection.pop().strip()]
      headerList.append(miSeqHeaderSection.pop().strip() + separator + configMap['iemFileVersion'])
      headerList.append(miSeqHeaderSection.pop().strip() + separator + configMap['facilityInstitution'])
      headerList.append(miSeqHeaderSection.pop().strip() + separator + configMap['facilityName'])
      headerList.append(miSeqHeaderSection.pop().strip() + separator + flowCellDict['Name'])
      headerList.append(miSeqHeaderSection.pop().strip() + separator + datetime.now().strftime('%d.%m.%Y'))
      headerList.append(miSeqHeaderSection.pop().strip() + separator + configMap['miSeqWorkflow'])
      headerList.append(miSeqHeaderSection.pop().strip() + separator + configMap['miSeqApplication'])
    
    kohleman's avatar
    kohleman committed
      headerList.append(miSeqHeaderSection.pop().strip() + separator + '')
      headerList.append(miSeqHeaderSection.pop().strip() + separator + flowCellDict[configMap['endType']] + '_' + flowCellDict[configMap['cycles']])
    
    kohleman's avatar
    kohleman committed
      headerList.append(miSeqHeaderSection.pop().strip() + separator + configMap['miSeqChemistry'])
      headerList.append('')
    
    kohleman's avatar
    kohleman committed
      miSeqReadsSection = configMap['miSeqReadsSection'].split(separator)
      miSeqReadsSection.reverse()
      headerList.append(miSeqReadsSection.pop())
    
    kohleman's avatar
    kohleman committed
      headerList.append(flowCellDict[configMap['cycles']])
      if (flowCellDict[configMap['endType']] == 'PAIRED_END'):
        headerList.append(flowCellDict[configMap['cycles']])
    
    kohleman's avatar
    kohleman committed
      headerList.append('')
    
    
    kohleman's avatar
    kohleman committed
      miSeqSettingsSection = configMap['miSeqSettingsSection'].split(separator)
      miSeqSettingsSection.reverse()
      headerList.append(miSeqSettingsSection.pop())
    #  if ('nextera' in (separator + parentDict.itervalues().next()['KIT'].lower())):
    #    headerList.append(configMap['nexteraAdapter'])
    #  if ('truseq' in (separator + parentDict.itervalues().next()['KIT'].lower())):
    #    headerList.append(configMap['truSeqAdapter'])
      headerList.append('')
    
    kohleman's avatar
    kohleman committed
      miSeqDataSection = configMap['miSeqDataSection'].split(',')
      miSeqDataSection.reverse()
      headerList.append(miSeqDataSection.pop())
      headerList.append(','.join(miSeqDataSection.pop().strip().split()))
    
    kohleman's avatar
    kohleman committed
      for key in parentDict.keys():
        lane = parentDict[key]['LANE'][-1:]
        # If no index then just skip this  sample
        if configMap['index1Name'] not in parentDict[key]:
          continue
    
    kohleman's avatar
    kohleman committed
        index1 = parentDict[key][configMap['index1Name']]
    
        index2 = parentDict[key][configMap['index2Name']]
    
    kohleman's avatar
    kohleman committed
        sampleSheetDict[lane + '_' + key] = [key + separator
    
                                + sanitizeString(parentDict[key][configMap['externalSampleName']]) + separator
    
    kohleman's avatar
    kohleman committed
                                + separator
                                + separator
                                + index1Vocabulary[index1] + separator
                                + index1 + separator
    
                                + index2Vocabulary[index2].split()[2] + separator
                                + index2 + separator
    
    kohleman's avatar
    kohleman committed
                                + separator
                                + key + '_' + flowCellName
                                ]
    
      myFileName = myoptions.outdir + configMap['SampleSheetFileName'] + '_' + \
                       flowCellName + '.csv'
    
    
    kohleman's avatar
    kohleman committed
      sampleSheetFile = writeMiSeqSampleSheet(sampleSheetDict, headerList, flowCellName,
    
                                              myoptions, logger, fileName=myFileName)
    
    kohleman's avatar
    kohleman committed
      return sampleSheetFile
    
    kohleman's avatar
    kohleman committed
    def main ():
    
    kohleman's avatar
    kohleman committed
      logger = setUpLogger('log/')
      logger.info('Started Creation of Sample Sheet...')
    
    kohleman's avatar
    kohleman committed
      myoptions = parseOptions(logger)
    
    kohleman's avatar
    kohleman committed
      if myoptions.debug:
        logger.setLevel(logging.DEBUG)
    
      flowCellName = myoptions.flowcell
      configMap = readConfig(logger)
      service = login(logger, configMap)
    
    kohleman's avatar
    kohleman committed
      foundFlowCell, containedSamples = getFlowCell(configMap['illuminaFlowCellTypeName'], flowCellName,
                                                    service, logger)
      parentDict, samplesPerLaneDict = getContainedSampleProperties(containedSamples, service)
      logger.info('Found ' + str(len(parentDict)) + ' samples on the flow cell ' + flowCellName)
    
    kohleman's avatar
    kohleman committed
      # take this variable from the FlowCellDict
    
      #flowCellName = foundFlowCell.getCode().split('_')[3][1:]
      flowCellName = foundFlowCell.getCode()
      if '-' in flowCellName:
        flowCellName = flowCellName.split('_')[3]
      else:
        flowCellName = flowCellName.split('_')[3][1:]
    
    
    kohleman's avatar
    kohleman committed
      flowCellDict = convertSampleToDict(foundFlowCell)
    
      hiseqList = configMap['hiSeqNames'].split()
      miseqList = configMap['miSeqNames'].split()
    
      runFolderName = flowCellDict['Name']
    
    #  for hiseq in hiseqList:
    #    if hiseq in runFolderName:
    #      logger.info('Detected HiSeq run.')
    #      SampleSheetFile = createHiseqSampleSheet(parentDict, flowCellDict, samplesPerLaneDict, flowCellName, configMap,
    #                         logger, myoptions)
    #      break
    
      sampleSheetDict, SampleSheetFile = createHiseqSampleSheet(parentDict, flowCellDict, samplesPerLaneDict, flowCellName, configMap,
    
    kohleman's avatar
    kohleman committed
                             logger, myoptions)
    
    kohleman's avatar
    kohleman committed
    #  for miseq in miseqList:
    #    if miseq in runFolderName:
    #      logger.info('Detected MiSeq run.')
    #      index1Vocabulary = getVocabulary(configMap['index1Name'], service)
    #      index2Vocabulary = getVocabulary(configMap['index2Name'], service)
    #      SampleSheetFile = createMiSeqSampleSheet(parentDict, flowCellDict, configMap,
    #                                    index1Vocabulary, index2Vocabulary, flowCellName, logger, myoptions)
    #      break
    
    kohleman's avatar
    kohleman committed
      if myoptions.maillist:
    
        sendMail(configMap['mailList'], [SampleSheetFile], flowCellName, configMap, logger)
    
    kohleman's avatar
    kohleman committed
      logout(service, logger)
    
    kohleman's avatar
    kohleman committed
    if __name__ == "__main__":
        main()