## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------ BiocStyle::latex(use.unsrturl=FALSE) ## ----setup, include=FALSE, cache=FALSE----------------------------------- library(knitr) # set global chunk options for knitr opts_chunk$set(comment=NA, warning=FALSE, message=FALSE, fig.path='figure/systemPipeR-') options(formatR.arrow=TRUE, width=95) unlink("test.db") ## ----eval=TRUE----------------------------------------------------------- library(systemPipeR) ## ----eval=FALSE---------------------------------------------------------- ## library(systemPipeRdata) ## genWorkenvir(workflow="varseq") ## setwd("varseq") ## ----eval=FALSE---------------------------------------------------------- ## source("systemPipeVARseq_Fct.R") ## ----eval=TRUE----------------------------------------------------------- targetspath <- system.file("extdata", "targetsPE.txt", package="systemPipeR") targets <- read.delim(targetspath, comment.char = "#")[,1:5] targets ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="param/trimPE.param", mytargets="targetsPE.txt")[1:4] # Note: subsetting! ## filterFct <- function(fq, cutoff=20, Nexceptions=0) { ## qcount <- rowSums(as(quality(fq), "matrix") <= cutoff) ## fq[qcount <= Nexceptions] # Retains reads where Phred scores are >= cutoff with N exceptions ## } ## preprocessReads(args=args, Fct="filterFct(fq, cutoff=20, Nexceptions=0)", batchsize=100000) ## writeTargetsout(x=args, file="targets_PEtrim.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="bwa.param", mytargets="targets.txt") ## fqlist <- seeFastq(fastq=infile1(args), batchsize=100000, klength=8) ## pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist)) ## seeFastqPlot(fqlist) ## dev.off() ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="bwa.param", mytargets="targets.txt") ## sysargs(args)[1] # Command-line parameters for first FASTQ file ## ----eval=FALSE---------------------------------------------------------- ## bampaths <- runCommandline(args=args) ## ----eval=FALSE---------------------------------------------------------- ## moduleload(modules(args)) ## system("bwa index -a bwtsw ./data/tair10.fasta") ## resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", cores(args)), memory="10gb") ## reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01", ## resourceList=resources) ## waitForJobs(reg) ## ----eval=FALSE---------------------------------------------------------- ## file.exists(outpaths(args)) ## ----eval=FALSE---------------------------------------------------------- ## library(gmapR); library(BiocParallel); library(BatchJobs) ## gmapGenome <- GmapGenome(reference(args), directory="data", name="gmap_tair10chr", create=TRUE) ## args <- systemArgs(sysma="gsnap.param", mytargets="targetsPE.txt") ## f <- function(x) { ## library(gmapR); library(systemPipeR) ## args <- systemArgs(sysma="gsnap.param", mytargets="targetsPE.txt") ## gmapGenome <- GmapGenome(reference(args), directory="data", name="gmap_tair10chr", create=FALSE) ## p <- GsnapParam(genome=gmapGenome, unique_only=TRUE, molecule="DNA", max_mismatches=3) ## o <- gsnap(input_a=infile1(args)[x], input_b=infile2(args)[x], params=p, output=outfile1(args)[x]) ## } ## funs <- makeClusterFunctionsTorque("torque.tmpl") ## param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs) ## register(param) ## d <- bplapply(seq(along=args), f) ## writeTargetsout(x=args, file="targets_gsnap_bam.txt") ## ----eval=FALSE---------------------------------------------------------- ## read_statsDF <- alignStats(args=args) ## write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"), ## urlbase="http://biocluster.ucr.edu/~tgirke/", ## urlfile="./results/IGVurl.txt") ## ----eval=FALSE---------------------------------------------------------- ## writeTargetsout(x=args, file="targets_bam.txt") ## system("java -jar CreateSequenceDictionary.jar R=./data/tair10.fasta O=./data/tair10.dict") ## # system("java -jar /opt/picard/1.81/CreateSequenceDictionary.jar R=./data/tair10.fasta O=./data/tair10.dict") ## args <- systemArgs(sysma="gatk.param", mytargets="targets_bam.txt") ## resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", 1), memory="10gb") ## reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01", ## resourceList=resources) ## waitForJobs(reg) ## writeTargetsout(x=args, file="targets_gatk.txt") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="sambcf.param", mytargets="targets_bam.txt") ## resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", 1), memory="10gb") ## reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01", ## resourceList=resources) ## waitForJobs(reg) ## writeTargetsout(x=args, file="targets_sambcf.txt") ## ----eval=FALSE---------------------------------------------------------- ## library(gmapR); library(BiocParallel); library(BatchJobs) ## args <- systemArgs(sysma="vartools.param", mytargets="targets_gsnap_bam.txt") ## f <- function(x) { ## library(VariantTools); library(gmapR); library(systemPipeR) ## args <- systemArgs(sysma="vartools.param", mytargets="targets_gsnap_bam.txt") ## gmapGenome <- GmapGenome(systemPipeR::reference(args), directory="data", name="gmap_tair10chr", create=FALSE) ## tally.param <- TallyVariantsParam(gmapGenome, high_base_quality = 23L, indels = TRUE) ## bfl <- BamFileList(infile1(args)[x], index=character()) ## var <- callVariants(bfl[[1]], tally.param) ## sampleNames(var) <- names(bfl) ## writeVcf(asVCF(var), outfile1(args)[x], index = TRUE) ## } ## funs <- makeClusterFunctionsTorque("torque.tmpl") ## param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs) ## register(param) ## d <- bplapply(seq(along=args), f) ## writeTargetsout(x=args, file="targets_vartools.txt") ## ----eval=FALSE---------------------------------------------------------- ## library(VariantAnnotation) ## args <- systemArgs(sysma="filter_gatk.param", mytargets="targets_gatk.txt") ## filter <- "totalDepth(vr) >= 2 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))>=1" ## # filter <- "totalDepth(vr) >= 20 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))==6" ## filterVars(args, filter, varcaller="gatk", organism="A. thaliana") ## writeTargetsout(x=args, file="targets_gatk_filtered.txt") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="filter_sambcf.param", mytargets="targets_sambcf.txt") ## filter <- "rowSums(vr) >= 2 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)" ## # filter <- "rowSums(vr) >= 20 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)" ## filterVars(args, filter, varcaller="bcftools", organism="A. thaliana") ## writeTargetsout(x=args, file="targets_sambcf_filtered.txt") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="filter_vartools.param", mytargets="targets_vartools.txt") ## filter <- "(values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 2 & (values(vr)$n.read.pos / (values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 0.8)" ## # filter <- "(values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 20 & (values(vr)$n.read.pos / (values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 0.8)" ## filterVars(args, filter, varcaller="vartools", organism="A. thaliana") ## writeTargetsout(x=args, file="targets_vartools_filtered.txt") ## ----eval=FALSE---------------------------------------------------------- ## library("GenomicFeatures") ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt") ## txdb <- loadDb("./data/tair10.sqlite") ## fa <- FaFile(systemPipeR::reference(args)) ## variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt") ## txdb <- loadDb("./data/tair10.sqlite") ## fa <- FaFile(systemPipeR::reference(args)) ## variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_vartools_filtered.txt") ## txdb <- loadDb("./data/tair10.sqlite") ## fa <- FaFile(systemPipeR::reference(args)) ## variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt") ## combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous")) ## write.table(combineDF, "./results/combineDF_nonsyn_gatk.xls", quote=FALSE, row.names=FALSE, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt") ## combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous")) ## write.table(combineDF, "./results/combineDF_nonsyn_sambcf.xls", quote=FALSE, row.names=FALSE, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_vartools_filtered.txt") ## combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous")) ## write.table(combineDF, "./results/combineDF_nonsyn_vartools.xls", quote=FALSE, row.names=FALSE, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt") ## write.table(varSummary(args), "./results/variantStats_gatk.xls", quote=FALSE, col.names = NA, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt") ## write.table(varSummary(args), "./results/variantStats_sambcf.xls", quote=FALSE, col.names = NA, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_vartools_filtered.txt") ## write.table(varSummary(args), "./results/variantStats_vartools.xls", quote=FALSE, col.names = NA, sep="\t") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt") ## varlist <- sapply(names(outpaths(args))[1:4], function(x) as.character(read.delim(outpaths(args)[x])$VARID)) ## vennset_gatk <- overLapper(varlist, type="vennsets") ## args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt") ## varlist <- sapply(names(outpaths(args))[1:4], function(x) as.character(read.delim(outpaths(args)[x])$VARID)) ## vennset_bcf <- overLapper(varlist, type="vennsets") ## pdf("./results/vennplot_var.pdf") ## vennPlot(list(vennset_gatk, vennset_bcf), mymain="", mysub="GATK: red; BCFtools: blue", colmode=2, ccol=c("blue", "red")) ## dev.off() ## ----sessionInfo, results='asis'----------------------------------------- toLatex(sessionInfo())