added percentage calculation

SVN: 21826

added percentage calculation
a4a7dd43 · kohleman · 4bda36e0 · a4a7dd43
Commit a4a7dd43 authored 13 years ago by kohleman
--- a/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py
+++ b/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py
-#!/usr/bin/python
+#!/usr/local/dsu/Python-3.2/python
 '''
-Usage: python filterOnQualityForFastqGzip.py <fastqfile.gz>
+Usage: python filter_on_quality_for_gzip.py <fastqfile>o
 Reqires bcltofastq converted gzipped fastq file which are generated 
 by the Illumina pipeline Casava 1.8+
-Calculates the amount of chastity filtered reads in a gzipped fastq file
 @author: Manuel Kohler
 @copyright: ETH Zurich
-@precondition: gzip, python 3.2
+@precondition: gzip
 '''
 import gzip 
@@ -18,8 +17,12 @@ import argparse
 def parseCommandLine():
  parser = argparse.ArgumentParser(description='Counts the filtered and ' + 
                                  'non-filtered read of a gzipped fastq file')
-  parser.add_argument('-f', '--file', dest='fastq_file', action='store',
+  parser.add_argument('-i', '--input_file', dest='fastq_file', action='store',
                   required=True, help='Which fastq.gz file you want to process?')  
+  parser.add_argument('-o', '--output', dest='output', action='store', default='fastq_stats',
+                  type=str, help='Output file name')
+  parser.add_argument('-f', '--format', dest='format', action='store', default='txt',
+                   type=str, choices=['txt', 'json', 'both'], help='Output format')
  args = parser.parse_args()
  return(args)
@@ -27,11 +30,13 @@ def parseCommandLine():
 line_number = 1
 new_line = 1
 is_filtered = 0
-width = 20
 def formatNumber(n):
  return ('{:>20}'.format('{:,}'.format(n)))
+def calulatePercentage(v1, v2):
+  return round(100 * (v1 / (v1 + v2)), 2)
 args = parseCommandLine()
 with gzip.open(args.fastq_file, 'rb') as file:
@@ -40,12 +45,14 @@ with gzip.open(args.fastq_file, 'rb') as file:
      # fastq quadruples
      new_line = line_number + 4
      l = line.decode('utf8')
+      #print(l.split(':')[7]) 
      if (l.split(':')[7] == 'Y'):
        is_filtered += 1
    line_number += 1
-print('File: ' + args.fastq_file)
 unfiltered = ((line_number - 1) / 4) - is_filtered
-print(str(formatNumber(is_filtered)) + ' Number of filtered reads (BAD)')
+print('File: ' + args.fastq_file)
-print(str(formatNumber(int(unfiltered))) + ' Number of non-filtered reads (GOOD)')
+print(str(formatNumber(int(unfiltered))) + ' number of non-filtered reads (GOOD)')
-print('\n')
+print(str(formatNumber(is_filtered)) + ' number of filtered reads (BAD)')
+print(str(formatNumber(calulatePercentage(unfiltered, is_filtered))) + ' % of non-filtered reads (GOOD)')
+print(str(formatNumber(calulatePercentage(is_filtered, unfiltered))) + ' % of filtered reads (BAD)')