From 0eba687ce35279cf9ebdb8bcf1068c54436fd9cb Mon Sep 17 00:00:00 2001
From: kohleman <kohleman>
Date: Wed, 22 Jan 2014 16:14:36 +0000
Subject: [PATCH] -added pattern parameter - write numbers of bars in the
 middle, more robust against small or huge differences in frequencies -renamed
 variables

SVN: 30417
---
 .../source/R/barcodeDistribution.R            | 38 ++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/deep_sequencing_unit/source/R/barcodeDistribution.R b/deep_sequencing_unit/source/R/barcodeDistribution.R
index ff665ca682e..33b7a9c2670 100644
--- a/deep_sequencing_unit/source/R/barcodeDistribution.R
+++ b/deep_sequencing_unit/source/R/barcodeDistribution.R
@@ -4,11 +4,15 @@ library("ShortRead")
 args <- commandArgs(TRUE)
 print(args)
 path <- (args[1])
-path <- "/Users/kohleman/PycharmProjects/qgf/barcodeDistribution/testData"  
-filenames <- list.files(path, pattern="lane6", full.names=TRUE)
+flowcell <- (args[2])
+pattern <- (args[3])
+#flowcell <- "C3CFPACXX"
+#path <- "/Users/kohleman/PycharmProjects/qgf/barcodeDistribution/testData"  
+#pattern <- 'lane4'
+filenames <- list.files(path, pattern=pattern, full.names=TRUE)
 totalIndexList <- ""
 
-plotTopIndices <- function (topIndices, xName="") {
+plotTopIndices <- function (topIndices, xName="", path) {
   
   # Creates a barplot out of a data frame.
   #
@@ -19,7 +23,11 @@ plotTopIndices <- function (topIndices, xName="") {
   #
   # Returns:
   #   an image/ a pdf
-    
+  
+  fileName <- (paste(xName,".pdf", sep=""))
+  fileFullPath <- (file.path(path, fileName))
+
+  pdf(file=fileFullPath)
   par(mfcol=c(1, 1))
   countLength<- nchar(toString(max(topIndices$Freq)))
   
@@ -28,7 +36,8 @@ plotTopIndices <- function (topIndices, xName="") {
   
   b <- barplot(topIndices$Freq, main="Top 15 indices distribution", xlab=paste("Count of Indices for", xName), horiz=TRUE,
                names=topIndices$totalIndexList, cex.names=.75, las=1)
-  text(cex=.75, x=topIndices$Freq-(countLength * 2), y=b, labels=topIndices$Freq, xpd=TRUE)
+  text(cex=.75, x=topIndices$Freq/2, y=b, labels=topIndices$Freq, xpd=TRUE)
+  dev.off()
 }
 
 # -----------------------------------------------------------------------------
@@ -38,6 +47,9 @@ multmerge <- function(datalist, mergeBy){
   Reduce(function(...) {merge(..., by = mergeBy, all=TRUE)}, datalist)
 }
 
+# returns string w/o leading or trailing whitespace
+trim <- function (x) gsub("^\\s+|\\s+$", "", x)
+
 # -----------------------------------------------------------------------------
 
 streamFASTQ <- function (file, verbose = TRUE) {
@@ -82,20 +94,20 @@ if (length(ldf) > 1) {
   list1 <- multmerge(ldf, "totalIndexList")
   # Replacing all NAs with 0
   list1 [is.na(list1)] <- 0
-  # sum up the values row-wise for each column, but leave out the first and the
-  # last column 
-  list1$Freq<- apply(list1[,c(-1,-length(list1))],1,sum)
+  # sum up the values row-wise for each column, but leave out the first column
+  list1$Freq<- apply(list1[,c(-1)],1,sum)
 } else {
   print ("Only one FASTQ found")
   list1 <- ldf[[1]]
 }
 
-ttt1 <- list1[c("totalIndexList","Freq")]
-orderedDf <- ttt1[with(ttt1, order(- Freq)), ]
+subList <- list1[c("totalIndexList","Freq")]
+orderedDf <- subList[with(subList, order(- Freq)), ]
 topIndices <- orderedDf[1:15, ]
 #print (topIndices)
 
-fileBaseName <- (basename(filenames[1]))
-plotTopIndices (topIndices, fileBaseName)
+fileBaseName <- paste(flowcell, unlist(strsplit(basename(filenames[1]), "_"))[1], sep="_")
+#fileBaseName <- trim(fileBaseName)
+plotTopIndices (topIndices, fileBaseName, path)
 
-#write.table(list1, file = "list1.csv", sep = ",", col.names = NA, qmethod = "double")
\ No newline at end of file
+#write.table(list1, file = "list1.csv", sep = ",", col.names = NA, qmethod = "double")
-- 
GitLab