From 03bca043ef7cd524521241067a5c98bff3d50721 Mon Sep 17 00:00:00 2001
From: kohleman <kohleman>
Date: Wed, 22 Jun 2011 14:07:55 +0000
Subject: [PATCH] initial check in which offers basic functionality

SVN: 21818
---
 .../Python/filterOnQualityForFastqGzip.py     | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py

diff --git a/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py b/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py
new file mode 100644
index 00000000000..7106d5c0459
--- /dev/null
+++ b/deep_sequencing_unit/source/Python/filterOnQualityForFastqGzip.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+'''
+Usage: python filterOnQualityForFastqGzip.py <fastqfile.gz>
+Reqires bcltofastq converted gzipped fastq file which are generated 
+by the Illumina pipeline Casava 1.8+
+Calculates the amount of chastity filtered reads in a gzipped fastq file
+  
+@author: Manuel Kohler
+@copyright: ETH Zurich
+@precondition: gzip, python 3.2
+'''
+
+import gzip 
+import sys
+import argparse
+
+
+def parseCommandLine():
+  parser = argparse.ArgumentParser(description='Counts the filtered and ' + 
+                                  'non-filtered read of a gzipped fastq file')
+  parser.add_argument('-f', '--file', dest='fastq_file', action='store',
+                   required=True, help='Which fastq.gz file you want to process?')  
+
+  args = parser.parse_args()
+  return(args)
+
+line_number = 1
+new_line = 1
+is_filtered = 0
+width = 20
+
+def formatNumber(n):
+  return ('{:>20}'.format('{:,}'.format(n)))
+
+args = parseCommandLine()
+
+with gzip.open(args.fastq_file, 'rb') as file:
+  for line in file:
+    if (line_number == new_line):
+      # fastq quadruples
+      new_line = line_number + 4
+      l = line.decode('utf8')
+      if (l.split(':')[7] == 'Y'):
+        is_filtered += 1
+    line_number += 1
+
+print('File: ' + args.fastq_file)
+unfiltered = ((line_number - 1) / 4) - is_filtered
+print(str(formatNumber(is_filtered)) + ' Number of filtered reads (BAD)')
+print(str(formatNumber(int(unfiltered))) + ' Number of non-filtered reads (GOOD)')
+print('\n')
-- 
GitLab