Skip to content
Snippets Groups Projects
Commit a4a7dd43 authored by kohleman's avatar kohleman
Browse files

added percentage calculation

SVN: 21826
parent 4bda36e0
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/python #!/usr/local/dsu/Python-3.2/python
''' '''
Usage: python filterOnQualityForFastqGzip.py <fastqfile.gz> Usage: python filter_on_quality_for_gzip.py <fastqfile>o
Reqires bcltofastq converted gzipped fastq file which are generated Reqires bcltofastq converted gzipped fastq file which are generated
by the Illumina pipeline Casava 1.8+ by the Illumina pipeline Casava 1.8+
Calculates the amount of chastity filtered reads in a gzipped fastq file
@author: Manuel Kohler @author: Manuel Kohler
@copyright: ETH Zurich @copyright: ETH Zurich
@precondition: gzip, python 3.2 @precondition: gzip
''' '''
import gzip import gzip
...@@ -18,8 +17,12 @@ import argparse ...@@ -18,8 +17,12 @@ import argparse
def parseCommandLine(): def parseCommandLine():
parser = argparse.ArgumentParser(description='Counts the filtered and ' + parser = argparse.ArgumentParser(description='Counts the filtered and ' +
'non-filtered read of a gzipped fastq file') 'non-filtered read of a gzipped fastq file')
parser.add_argument('-f', '--file', dest='fastq_file', action='store', parser.add_argument('-i', '--input_file', dest='fastq_file', action='store',
required=True, help='Which fastq.gz file you want to process?') required=True, help='Which fastq.gz file you want to process?')
parser.add_argument('-o', '--output', dest='output', action='store', default='fastq_stats',
type=str, help='Output file name')
parser.add_argument('-f', '--format', dest='format', action='store', default='txt',
type=str, choices=['txt', 'json', 'both'], help='Output format')
args = parser.parse_args() args = parser.parse_args()
return(args) return(args)
...@@ -27,11 +30,13 @@ def parseCommandLine(): ...@@ -27,11 +30,13 @@ def parseCommandLine():
line_number = 1 line_number = 1
new_line = 1 new_line = 1
is_filtered = 0 is_filtered = 0
width = 20
def formatNumber(n): def formatNumber(n):
return ('{:>20}'.format('{:,}'.format(n))) return ('{:>20}'.format('{:,}'.format(n)))
def calulatePercentage(v1, v2):
return round(100 * (v1 / (v1 + v2)), 2)
args = parseCommandLine() args = parseCommandLine()
with gzip.open(args.fastq_file, 'rb') as file: with gzip.open(args.fastq_file, 'rb') as file:
...@@ -40,12 +45,14 @@ with gzip.open(args.fastq_file, 'rb') as file: ...@@ -40,12 +45,14 @@ with gzip.open(args.fastq_file, 'rb') as file:
# fastq quadruples # fastq quadruples
new_line = line_number + 4 new_line = line_number + 4
l = line.decode('utf8') l = line.decode('utf8')
#print(l.split(':')[7])
if (l.split(':')[7] == 'Y'): if (l.split(':')[7] == 'Y'):
is_filtered += 1 is_filtered += 1
line_number += 1 line_number += 1
print('File: ' + args.fastq_file)
unfiltered = ((line_number - 1) / 4) - is_filtered unfiltered = ((line_number - 1) / 4) - is_filtered
print(str(formatNumber(is_filtered)) + ' Number of filtered reads (BAD)') print('File: ' + args.fastq_file)
print(str(formatNumber(int(unfiltered))) + ' Number of non-filtered reads (GOOD)') print(str(formatNumber(int(unfiltered))) + ' number of non-filtered reads (GOOD)')
print('\n') print(str(formatNumber(is_filtered)) + ' number of filtered reads (BAD)')
print(str(formatNumber(calulatePercentage(unfiltered, is_filtered))) + ' % of non-filtered reads (GOOD)')
print(str(formatNumber(calulatePercentage(is_filtered, unfiltered))) + ' % of filtered reads (BAD)')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment