Skip to content
Snippets Groups Projects
Commit 50aaf9eb authored by kohleman's avatar kohleman
Browse files

initial check in, needs to be re-factored but does the job for now

SVN: 21841
parent be9fe9c2
No related branches found
No related tags found
No related merge requests found
require(multicore)
require(ShortRead)
require(RColorBrewer)
require(RSvgDevice)
# ShortRead Quality assessment ################################################
args <- commandArgs(TRUE)
fastq_file <- args[1]
# Ugly but only way to simulate a rsplit
p=""
tokens<-unlist(strsplit(fastq_file, "/"))
path_tokens<-tokens[2:(length(tokens)-1)]
fastq_only<-tokens[length(tokens)]
for (x in 1:length(path_tokens)) p<-paste(p,path_tokens[x],sep="/")
qa <- qa(p,fastq_only, type="fastq")
qa_report <- report (qa, dest=paste(fastq_file, "ShortRead_qa", sep="_"),
type="html")
# Boxplot for per cycle Phred quality
reads <- readFastq(fastq_file)
# if more than a million reads, sample randomly
if (length(reads) > 10000000) {
reads <- sample(reads,10000000)
}
qual <- FastqQuality(quality(quality(reads))) # get quality scores
readM <- as(qual, "matrix") # convert scores to matrix
devSVG(file = paste(fastq_file,"boxplot.svg", sep="_"), width = 10,
height = 8, bg = "white", fg = "black", onefile=TRUE, xmlHeader=TRUE)
boxplot(readM, outline = FALSE, main="Per Cycle Read Quality", sub=fastq_file,
xlab="Cycle", ylab="Phred Quality", col=brewer.pal(11, "Spectral")[1])
dev.off()
# Save box plot as boxplot.pdf in current folder
pdf(file=paste(fastq_file,"boxplot.pdf", sep="_"))
boxplot(readM, outline = FALSE, main="Per Cycle Read Quality", sub=fastq_file,
xlab="Cycle", ylab="Phred Quality", col=brewer.pal(11, "Spectral")[1])
dev.off()
# Nucleotides per Cycle ######################################################
a <-alphabetByCycle(sread(reads))
cycles <- dim(a)[2]
total_number_bases_first_cycle <- a[,1][[1]]+ a[,1][[2]]+a[,1][[3]]+ a[,1][[4]]
+ a[,1][[5]]
n <- c(names(a[1,1]),names(a[2,1]), names(a[3,1]), names(a[4,1]),
names(a[15,1]))
# Save box plot in current folder
pdf(file=paste(fastq_only,"nuc_per_cycle.pdf", sep="_"))
par(xpd=T, mar=par()$mar+c(0,0,0,4))
barplot(a, main="Numbers of nucleotides per cycle", sub=fastq_file,
ylab="Absolute number", col=heat.colors(5), names.arg=c(1:cycles))
legend(cycles+10, total_number_bases_first_cycle, n, fill=heat.colors(5))
dev.off()
devSVG(file = paste(fastq_only,"nuc_per_cycle.svg", sep="_"), width = 10,
height = 8, bg = "white", fg = "black", onefile=TRUE, xmlHeader=TRUE)
par(xpd=T, mar=par()$mar+c(0,0,0,4))
barplot(a, main="Numbers of nucleotides per cycle", sub=fastq_file,
ylab="Absolute number", col=heat.colors(5), names.arg=c(1:cycles))
legend(cycles+10, total_number_bases_first_cycle, n, fill=heat.colors(5))
dev.off()
# Taken from
# http://www.bioconductor.org/help/workflows/high-throughput-sequencing/
seq <- readFastq(fastq_file)
tbl <- tables(seq)[[2]]
pdf(file=paste(fastq_only,"NumberOfOccurrences.pdf", sep="_"))
## Calculate and plot cumulative reads vs. occurrences
xyplot(cumsum(nReads * nOccurrences) ~ nOccurrences, tbl,
scales=list(x=list(log=TRUE)), main=fastq_only, type="b", pch=20,
xlab="Number of Occurrences",
ylab="Cumulative Number of Reads")
dev.off()
devSVG(file = paste(fastq_only,"NumberOfOccurrences.svg", sep="_"), width = 10,
height = 8, bg = "white", fg = "black", onefile=TRUE, xmlHeader=TRUE)
xyplot(cumsum(nReads * nOccurrences) ~ nOccurrences, tbl,
scales=list(x=list(log=TRUE)), main=fastq_only, type="b", pch=20,
xlab="Number of Occurrences",
ylab="Cumulative Number of Reads")
dev.off()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment