Skip to content
Snippets Groups Projects
Commit cec4f52e authored by kohleman's avatar kohleman
Browse files

cleaner code

SVN: 24724
parent d70b08e1
No related branches found
No related tags found
No related merge requests found
...@@ -9,15 +9,25 @@ TOTAL_READS, MAPPED_READS ...@@ -9,15 +9,25 @@ TOTAL_READS, MAPPED_READS
Obviously you need a working samtools binary Obviously you need a working samtools binary
Uses 'flagstat' and 'view -H'
Note: Note:
print statements go to: ~openbis/sprint/datastore_server/log/startup_log.txt print statements go to: ~openbis/sprint/datastore_server/log/startup_log.txt
''' '''
import os import os
import fnmatch
import re
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria
FOLDER='/net/bs-dsu-data/array0/dsu/dss/incoming-jython-alignment/' FOLDER='/links/shared/dsu-dss/dss/incoming-jython-alignment/'
SAMTOOLS='/usr/local/dsu/samtools/samtools' SAMTOOLS='/usr/local/dsu/samtools/samtools'
BAM_PATTERN='*.bam'
matches = []
searchStrings = ['@PG']
programList = []
# Create a "transaction" -- a way of grouping operations together so they all # Create a "transaction" -- a way of grouping operations together so they all
# happen or none of them do. # happen or none of them do.
...@@ -30,27 +40,76 @@ dataSet.setMeasuredData(False) ...@@ -30,27 +40,76 @@ dataSet.setMeasuredData(False)
incomingPath = incoming.getAbsolutePath() incomingPath = incoming.getAbsolutePath()
# Get the incoming name # Get the incoming name
# expected:
# Project_110907_SN792_0059_AC012FACXX_3/Sample_BSSE-DSU-1662/BSSE-DSU-1662_CGATGTA_L003_R1_001_sorted.bam
name = incoming.getName() name = incoming.getName()
# expected incoming Name, e.g.:ETHZ_BSSE_110429_63558AAXX_1_sorted.bam split=name.split('_')
split=name.split("_") if (len(split) == 6):
sample=split[2]+ '_'+ split[3] + ':' + split[4] incoming_sample=split[1]+ '_'+ split[2] + '_' + split[3] + '_' + split[4]+ ':' + split[-1]
if (len(split) ==4):
incoming_sample=split[1]+ '_'+ split[2] + ':' + split[-1]
# Looking for BAMS:
for root, dirnames, filenames in os.walk(FOLDER + name):
for filename in fnmatch.filter(filenames, BAM_PATTERN):
matches.append(os.path.join(root, filename))
# -----------------------------------------------------------------------------
def listSearch (myList, searchString):
'''
Searches for a given String in a list.
Only lines matching the start of a line a considerd as a match
'''
matches = []
for i in range (0, len(myList)):
if(re.match(searchString, myList[i])):
matches.append(myList[i])
return (matches)
# -----------------------------------------------------------------------------
def programParameters (programList):
'''
Extracts the aligner datils from the bam header
'''
elements = {}
for program in range(0, len(programList)):
line = programList[program].split('\t')
for element in range (1, len(line)):
key, value = line[element].split(":")
elements[key] = value
return elements
# Extract values from a samtools view and set the results as DataSet properties # Extract values from a samtools view and set the results as DataSet properties
# Command: samtools view -H ETHZ_BSSE_110429_63558AAXX_1_sorted.bam # Command: samtools view -H ETHZ_BSSE_110429_63558AAXX_1_sorted.bam
arguments = SAMTOOLS + ' view -H ' + FOLDER + name
#print('Arguments: '+ arguments) arguments = SAMTOOLS + ' view -H ' + matches[0]
print('Arguments: '+ arguments)
cmdResult=os.popen(arguments).read() cmdResult=os.popen(arguments).read()
properties=cmdResult.split("\n")[-2].split('\t')
aligner=(properties[1].split(':')[1].upper() + '_' + properties[2].split(':')[1])
command=properties[3]
arguments = SAMTOOLS + ' flagstat ' + FOLDER + name properties=cmdResult.split("\n")
for s in range (0, len(searchStrings)):
programList = listSearch (properties, searchStrings[s])
print(programList)
e = programParameters (programList)
dataSet.setPropertyValue("ALIGNMENT_SOFTWARE", e['ID'])
dataSet.setPropertyValue("VERSION", e['VN'])
dataSet.setPropertyValue("ISSUED_COMMAND", e['CL'])
arguments = SAMTOOLS + ' flagstat ' + matches[0]
cmdResult=os.popen(arguments).read() cmdResult=os.popen(arguments).read()
totalReads=cmdResult.split('\n')[0].split(' ')[0] totalReads=cmdResult.split('\n')[0].split(' ')[0]
mappedReads=cmdResult.split('\n')[2].split(' ')[0] mappedReads=cmdResult.split('\n')[2].split(' ')[0]
dataSet.setPropertyValue("ALIGNMENT_SOFTWARE", aligner)
dataSet.setPropertyValue("ISSUED_COMMAND", command)
dataSet.setPropertyValue("SAMTOOLS_FLAGSTAT", cmdResult) dataSet.setPropertyValue("SAMTOOLS_FLAGSTAT", cmdResult)
dataSet.setPropertyValue("TOTAL_READS", totalReads) dataSet.setPropertyValue("TOTAL_READS", totalReads)
dataSet.setPropertyValue("MAPPED_READS", mappedReads) dataSet.setPropertyValue("MAPPED_READS", mappedReads)
...@@ -63,8 +122,16 @@ search_service = transaction.getSearchService() ...@@ -63,8 +122,16 @@ search_service = transaction.getSearchService()
# Search for the sample # Search for the sample
sc = SearchCriteria() sc = SearchCriteria()
sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample)); sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, incoming_sample));
foundSamples = search_service.searchForSamples(sc) foundSamples = search_service.searchForSamples(sc)
if foundSamples.size() > 0: if foundSamples.size() > 0:
dataSet.setSample(foundSamples[0]) dataSet.setSample(foundSamples[0])
# Search for parent data set of the same sample
dataSetSc = SearchCriteria()
dataSetSc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.TYPE, 'FASTQ_GZ'))
dataSetSc.addSubCriteria(SearchSubCriteria.createSampleCriteria(sc))
foundDataSets = search_service.searchForDataSets(dataSetSc)
if foundDataSets.size() > 0:
dataSet.setParentDatasets([ds.getDataSetCode() for ds in foundDataSets])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment