From 0eba687ce35279cf9ebdb8bcf1068c54436fd9cb Mon Sep 17 00:00:00 2001 From: kohleman <kohleman> Date: Wed, 22 Jan 2014 16:14:36 +0000 Subject: [PATCH] -added pattern parameter - write numbers of bars in the middle, more robust against small or huge differences in frequencies -renamed variables SVN: 30417 --- .../source/R/barcodeDistribution.R | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/deep_sequencing_unit/source/R/barcodeDistribution.R b/deep_sequencing_unit/source/R/barcodeDistribution.R index ff665ca682e..33b7a9c2670 100644 --- a/deep_sequencing_unit/source/R/barcodeDistribution.R +++ b/deep_sequencing_unit/source/R/barcodeDistribution.R @@ -4,11 +4,15 @@ library("ShortRead") args <- commandArgs(TRUE) print(args) path <- (args[1]) -path <- "/Users/kohleman/PycharmProjects/qgf/barcodeDistribution/testData" -filenames <- list.files(path, pattern="lane6", full.names=TRUE) +flowcell <- (args[2]) +pattern <- (args[3]) +#flowcell <- "C3CFPACXX" +#path <- "/Users/kohleman/PycharmProjects/qgf/barcodeDistribution/testData" +#pattern <- 'lane4' +filenames <- list.files(path, pattern=pattern, full.names=TRUE) totalIndexList <- "" -plotTopIndices <- function (topIndices, xName="") { +plotTopIndices <- function (topIndices, xName="", path) { # Creates a barplot out of a data frame. # @@ -19,7 +23,11 @@ plotTopIndices <- function (topIndices, xName="") { # # Returns: # an image/ a pdf - + + fileName <- (paste(xName,".pdf", sep="")) + fileFullPath <- (file.path(path, fileName)) + + pdf(file=fileFullPath) par(mfcol=c(1, 1)) countLength<- nchar(toString(max(topIndices$Freq))) @@ -28,7 +36,8 @@ plotTopIndices <- function (topIndices, xName="") { b <- barplot(topIndices$Freq, main="Top 15 indices distribution", xlab=paste("Count of Indices for", xName), horiz=TRUE, names=topIndices$totalIndexList, cex.names=.75, las=1) - text(cex=.75, x=topIndices$Freq-(countLength * 2), y=b, labels=topIndices$Freq, xpd=TRUE) + text(cex=.75, x=topIndices$Freq/2, y=b, labels=topIndices$Freq, xpd=TRUE) + dev.off() } # ----------------------------------------------------------------------------- @@ -38,6 +47,9 @@ multmerge <- function(datalist, mergeBy){ Reduce(function(...) {merge(..., by = mergeBy, all=TRUE)}, datalist) } +# returns string w/o leading or trailing whitespace +trim <- function (x) gsub("^\\s+|\\s+$", "", x) + # ----------------------------------------------------------------------------- streamFASTQ <- function (file, verbose = TRUE) { @@ -82,20 +94,20 @@ if (length(ldf) > 1) { list1 <- multmerge(ldf, "totalIndexList") # Replacing all NAs with 0 list1 [is.na(list1)] <- 0 - # sum up the values row-wise for each column, but leave out the first and the - # last column - list1$Freq<- apply(list1[,c(-1,-length(list1))],1,sum) + # sum up the values row-wise for each column, but leave out the first column + list1$Freq<- apply(list1[,c(-1)],1,sum) } else { print ("Only one FASTQ found") list1 <- ldf[[1]] } -ttt1 <- list1[c("totalIndexList","Freq")] -orderedDf <- ttt1[with(ttt1, order(- Freq)), ] +subList <- list1[c("totalIndexList","Freq")] +orderedDf <- subList[with(subList, order(- Freq)), ] topIndices <- orderedDf[1:15, ] #print (topIndices) -fileBaseName <- (basename(filenames[1])) -plotTopIndices (topIndices, fileBaseName) +fileBaseName <- paste(flowcell, unlist(strsplit(basename(filenames[1]), "_"))[1], sep="_") +#fileBaseName <- trim(fileBaseName) +plotTopIndices (topIndices, fileBaseName, path) -#write.table(list1, file = "list1.csv", sep = ",", col.names = NA, qmethod = "double") \ No newline at end of file +#write.table(list1, file = "list1.csv", sep = ",", col.names = NA, qmethod = "double") -- GitLab