diff --git a/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py b/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py index 7106d5c04592ea19fc42bad1dfcdc1ddc3f5b5ea..9c434a9bdcb51f8d45eb0ab529377479e3f09bce 100644 --- a/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py +++ b/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py @@ -1,13 +1,12 @@ -#!/usr/bin/python +#!/usr/local/dsu/Python-3.2/python ''' -Usage: python filterOnQualityForFastqGzip.py <fastqfile.gz> +Usage: python filter_on_quality_for_gzip.py <fastqfile>o Reqires bcltofastq converted gzipped fastq file which are generated by the Illumina pipeline Casava 1.8+ -Calculates the amount of chastity filtered reads in a gzipped fastq file @author: Manuel Kohler @copyright: ETH Zurich -@precondition: gzip, python 3.2 +@precondition: gzip ''' import gzip @@ -18,8 +17,12 @@ import argparse def parseCommandLine(): parser = argparse.ArgumentParser(description='Counts the filtered and ' + 'non-filtered read of a gzipped fastq file') - parser.add_argument('-f', '--file', dest='fastq_file', action='store', + parser.add_argument('-i', '--input_file', dest='fastq_file', action='store', required=True, help='Which fastq.gz file you want to process?') + parser.add_argument('-o', '--output', dest='output', action='store', default='fastq_stats', + type=str, help='Output file name') + parser.add_argument('-f', '--format', dest='format', action='store', default='txt', + type=str, choices=['txt', 'json', 'both'], help='Output format') args = parser.parse_args() return(args) @@ -27,11 +30,13 @@ def parseCommandLine(): line_number = 1 new_line = 1 is_filtered = 0 -width = 20 def formatNumber(n): return ('{:>20}'.format('{:,}'.format(n))) +def calulatePercentage(v1, v2): + return round(100 * (v1 / (v1 + v2)), 2) + args = parseCommandLine() with gzip.open(args.fastq_file, 'rb') as file: @@ -40,12 +45,14 @@ with gzip.open(args.fastq_file, 'rb') as file: # fastq quadruples new_line = line_number + 4 l = line.decode('utf8') + #print(l.split(':')[7]) if (l.split(':')[7] == 'Y'): is_filtered += 1 line_number += 1 -print('File: ' + args.fastq_file) unfiltered = ((line_number - 1) / 4) - is_filtered -print(str(formatNumber(is_filtered)) + ' Number of filtered reads (BAD)') -print(str(formatNumber(int(unfiltered))) + ' Number of non-filtered reads (GOOD)') -print('\n') +print('File: ' + args.fastq_file) +print(str(formatNumber(int(unfiltered))) + ' number of non-filtered reads (GOOD)') +print(str(formatNumber(is_filtered)) + ' number of filtered reads (BAD)') +print(str(formatNumber(calulatePercentage(unfiltered, is_filtered))) + ' % of non-filtered reads (GOOD)') +print(str(formatNumber(calulatePercentage(is_filtered, unfiltered))) + ' % of filtered reads (BAD)')