## ----style, eval=TRUE, echo=FALSE, results='asis'------------------------ BiocStyle::latex() ## ----env, echo=FALSE,warning=FALSE,message=FALSE------------------------- suppressPackageStartupMessages(library("metaX")) #suppressPackageStartupMessages(library("R.utils")) ## ----figSetup, echo = FALSE---------------------------------------------- x <- NULL ii <- 0 # Current figure number fn <- function(x) { ii <<- ii + 1 figString <<- paste('\\textbf{Figure ', ii, '.} ', x, sep = '') return(x) } ## ----eval=FALSE---------------------------------------------------------- ## ## not run ## para <- importDataFromQI(para,file="qi.csv") ## ----eval=FALSE---------------------------------------------------------- ## ## not run ## para <- importDataFromXCMS(para,file="xcms.txt") ## ----eval=FALSE---------------------------------------------------------- ## ## not run ## para <- importDataFromMetaboAnalyst(para, ## file="http://www.metaboanalyst.ca/resources/data/lcms_table.csv") ## ----eval=TRUE----------------------------------------------------------- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) head(para@rawPeaks[,1:4]) ## ----eval=TRUE----------------------------------------------------------- ## set the sample list file path sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") sampleListFile(para) <- sfile ## print the object: para ## ----eval=TRUE,echo=FALSE------------------------------------------------ library(data.table) ss <- read.delim(para@sampleListFile) head(ss) ## ----exampleDir, eval=TRUE----------------------------------------------- list.files(system.file("cdf", package = "faahKO"), recursive = TRUE,full.names = TRUE) ## ----eval=TRUE----------------------------------------------------------- ## create a metaXpara-class object #library("metaX") para <- new("metaXpara") ## set the MS data path dir.case(para) <- system.file("cdf/KO", package = "faahKO") dir.ctrl(para) <- system.file("cdf/WT", package = "faahKO") ## ----eval=TRUE----------------------------------------------------------- ## set the sample list file path sampleFile <- system.file("extdata/faahKO_sampleList.txt", package = "metaX") sampleListFile(para) <- sampleFile samList <- read.delim(sampleFile) print(samList) ## ----eval=TRUE----------------------------------------------------------- ## set parameters for peak picking xcmsSet.peakwidth(para) <- c(20,50) xcmsSet.snthresh(para) <- 10 xcmsSet.prefilter(para) <- c(3,100) xcmsSet.noise(para) <- 0 xcmsSet.nSlaves(para) <- 4 ## ----eval=TRUE----------------------------------------------------------- ## set parameters for peak annotation group.bw(para) <- 5 group.minfrac(para) <- 0.3 group.mzwid(para) <- 0.015 group.max(para) <- 1000 ## ----eval=FALSE---------------------------------------------------------- ## p <- peakFinder(para) ## ----rawPeaks,eval=FALSE,echo=TRUE,results='hide',warning=FALSE,message=FALSE---- ## para <- new("metaXpara") ## pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") ## sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") ## rawPeaks(para) <- read.delim(pfile,check.names = FALSE) ## sampleListFile(para) <- sfile ## ratioPairs(para) <- "S:C" ## plsdaPara <- new("plsDAPara") ## para@outdir <- "output" ## p <- metaXpipe(para,plsdaPara=plsdaPara,cvFilter=0.3,remveOutlier = TRUE, ## outTol = 1.2, doQA = TRUE, doROC = TRUE, qcsc = FALSE, ## nor.method = "pqn", pclean = TRUE, t = 1, scale = "uv",) ## ----missValueImputeMethod1,eval=TRUE------------------------------------ ## bpca, svdImpute, knn, rf, min missValueImputeMethod(para) <- "knn" ## ----eval=TRUE----------------------------------------------------------- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) ## bpca, svdImpute, knn, rf, min para <- missingValueImpute(para,method="knn") ## ----nor11,fig.height=2,fig.width=6,fig.align='center',echo=FALSE,fig.cap=fn("\\label{qcdesign}The experiment design which contained QC samples.")---- par(mar=c(0,0,0,0)) sname <- c(rep("QC",3),rep("S",3),"...",rep("QC",1),rep("S",3),"...",rep("QC",3)) cname <- ifelse(sname=="QC","green","yellow") xname <- 1:length(sname) plot(xname,rep(1,length(xname)),type="n",axes=FALSE,ylim=c(0.6,1.2)) points(xname,rep(1,length(xname)),pch=22,cex=4.5,col=cname,bg=cname) arrows(x0 = 1,x1 = 4,y0 = 0.8,y1 = 0.8,lwd = 3) text(xname,rep(1,length(xname)),labels = sname) text(1,0.85,labels = "Sample injection order",cex=1,adj = c(0,0)) ## ----QC,eval=FALSE------------------------------------------------------- ## para <- new("metaXpara") ## pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") ## sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") ## rawPeaks(para) <- read.delim(pfile,check.names = FALSE)[1:100,] ## sampleListFile(para) <- sfile ## para <- reSetPeaksData(para) ## para <- missingValueImpute(para) ## res <- doQCRLSC(para,cpu=1) ## ----eval=FALSE---------------------------------------------------------- ## para <- new("metaXpara") ## pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") ## sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") ## rawPeaks(para) <- read.delim(pfile,check.names = FALSE) ## sampleListFile(para) <- sfile ## para <- reSetPeaksData(para) ## para <- missingValueImpute(para) ## para <- metaX::normalize(para,method="pqn",valueID="value") ## ----eval=FALSE---------------------------------------------------------- ## para <- new("metaXpara") ## pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") ## sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") ## rawPeaks(para) <- read.delim(pfile,check.names = FALSE) ## sampleListFile(para) <- sfile ## para <- reSetPeaksData(para) ## p <- filterPeaks(para,ratio=0.2) ## p <- filterQCPeaks(para,ratio=0.5) ## ----eval=FALSE---------------------------------------------------------- ## para <- new("metaXpara") ## pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") ## sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") ## rawPeaks(para) <- read.delim(pfile,check.names = FALSE) ## sampleListFile(para) <- sfile ## para <- reSetPeaksData(para) ## para <- missingValueImpute(para) ## rs <- autoRemoveOutlier(para,valueID="value") ## ----eval=TRUE,fig.align='center',fig.height=4,fig.width=6,fig.cap=fn("\\label{powerplot}The power and sample size distribution.")---- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) para <- metaX::normalize(para) para <- transformation(para,valueID = "value") para <- preProcess(para,scale = "pareto",valueID="value") res <- powerAnalyst(para,group=c("C","S"),log=FALSE, maxInd=200,showPlot = TRUE) print(res) ## ----eval=TRUE,fig.align='center',fig.height=6,fig.width=6,fig.cap=fn("\\label{netplot}The correlation network."),warning=FALSE,cache=TRUE---- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) op <- par() par(mar=c(0,0,0,0)) gg <- plotNetwork(para,group=c("S","C"),degree.thr = 10, cor.thr = 0.85,showPlot=TRUE) par(op) ## ----eval=TRUE,cache=TRUE------------------------------------------------ res <- pathwayAnalysis(id=c("HMDB00060","HMDB00056","HMDB00064"), outfile="pathway.csv") ## ----eval=TRUE,results='asis'-------------------------------------------- xtable::xtable(head(res[,2:4])) ## ----eval=TRUE,warning=FALSE,message=FALSE,cache=TRUE-------------------- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE)[1:100,] sampleListFile(para) <- sfile para <- reSetPeaksData(para) res <- featureSelection(para, group=c("S","C"), method = "rf", valueID = "value", fold = 5) ## ----eval=TRUE,results='asis'-------------------------------------------- xtable::xtable(head(res$results)) ## ----eval=TRUE,results='asis'-------------------------------------------- print(res$optVariables) ## ----eval=TRUE,results='asis',cache=TRUE--------------------------------- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) para <- transformation(para,valueID = "value",method=1) ## ----eval=TRUE,results='asis',cache=TRUE--------------------------------- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) para <- metaX::preProcess(para,valueID = "value",scale="uv") ## ----eval=TRUE,results='asis',cache=TRUE,message=FALSE,warning=FALSE,fig.show="hide"---- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE)[1:50,] sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) ratioPairs(para) <- "S:C" addValueNorm(para) <- para plsdaPara <- new("plsDAPara") plsdaPara@nperm <- 10 res <- peakStat(para,plsdaPara,doROC = TRUE) ## ----eval=TRUE----------------------------------------------------------- head(res@quant) ## ----eval=TRUE,warning=FALSE,cache=TRUE,message=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{pcaplot}The PCA score plot.")---- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) para <- transformation(para,valueID = "value") metaX::plotPCA(para,valueID="value",scale="pareto",center=TRUE,rmQC = FALSE) ## ----eval=TRUE,echo=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{pcaplot}The PCA score plot."),warning=FALSE,message=FALSE---- library(ggplot2) library(reshape2) library(dplyr) pca.res <- readRDS("metaX-pca.rds") x <- para@peaksData x$class <- as.character(x$class) x$class[is.na(x$class)] <- "QC" x<-dcast(x,ID~sample,value.var = "value") row.names(x) <- x$ID x$ID <- NULL plotData <- data.frame(x=pca.res@scores[,1], y=pca.res@scores[,2], z=pca.res@scores[,3], sample=names(x)) sampleList <- read.delim(para@sampleListFile) plotData <- merge(plotData,sampleList,by="sample",sort=FALSE) plotData$class <- as.character(plotData$class) plotData$class[is.na(plotData$class)] <- "QC" plotData$batch <- as.character(plotData$batch) ggobj <-ggplot(data = plotData,aes(x=x,y=y,colour=class))+ geom_hline(yintercept=0,colour="white",size=1)+ geom_vline(xintercept=0,colour="white",size=1)+ #geom_point()+ xlab(paste("PC1"," (",sprintf("%.2f%%",100*pca.res@R2[1]),") ",sep=""))+ ylab(paste("PC2"," (",sprintf("%.2f%%",100*pca.res@R2[2]),") ",sep=""))+ #theme_bw()+ theme(#legend.justification=c(1,1), #legend.position=c(1,1), legend.position="top", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background=element_rect(fill="#E3E3EE"))+ #theme(legend.direction = 'horizontal', legend.position = 'top')+ stat_ellipse(geom = "polygon", type="euclid",alpha = 0.4, aes(fill = class))+ stat_ellipse(geom = "path") ggobj <- ggobj + geom_text(aes(label=order),size=4,hjust=-0.2) ggobj <- ggobj + geom_point(aes(shape=batch))+ scale_shape_manual(values=1:n_distinct(plotData$batch)) ##print(ggobj) ## ----eval=TRUE,echo=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{3dpcaplot}The 3D PCA score plot."),warning=FALSE,message=FALSE---- library(scatterplot3d) col <- as.numeric(as.factor(plotData$class)) s3d <- scatterplot3d(plotData$x,plotData$y,plotData$z,type="h",angle = 24, col.grid="lightblue",lty.hplot=2,pch="",color="gray", xlab = paste("PC1"," (", sprintf("%.2f%%",100*pca.res@R2[1]),") ",sep=""), ylab = paste("PC2"," (", sprintf("%.2f%%",100*pca.res@R2[2]),") ",sep=""), zlab = paste("PC3"," (", sprintf("%.2f%%",100*pca.res@R2[3]),") ",sep="") )#color = as.numeric(as.factor(plotData$class))) s3d$points(plotData$x,plotData$y,plotData$z, pch = 1,col = col) s3d.coords <- s3d$xyz.convert(plotData$x,plotData$y,plotData$z) text(s3d.coords$x, s3d.coords$y, labels = plotData$order, pos = 4,cex=0.5,col = col) classLabel <- levels(as.factor(plotData$class)) legend(s3d$xyz.convert(max(plotData$x)*0.7, max(plotData$y), min(plotData$z)), col=as.numeric(as.factor(classLabel)), yjust=0,pch=1, legend = classLabel, cex = 0.8) ## ----eval=TRUE,echo=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{pcaloadingplot}The PCA loading plot."),warning=FALSE,message=FALSE---- loadings_data <- as.data.frame(pca.res@loadings) loadings_data <- as.matrix(loadings_data) HotEllipse<-abs(cbind(metaX:::HotE(loadings_data[,1],loadings_data[,2])))*0.9 outliers<-as.numeric() for (i in 1:nrow(loadings_data)){ sample<-abs(loadings_data[i,]) out.PC1<-which(HotEllipse[,1]0,1,0)#+out.PC1.PC3+out.PC2.PC3>0,1,0) outliers<-c(outliers,outlier) } dat <- as.data.frame(loadings_data) dat$label <- row.names(dat) dat$label[outliers==0] <- "" dat$col <- as.character(outliers) dat$alllabel <- row.names(dat) gg <- ggplot(data=dat,aes(x=PC1,y=PC2,colour=col))+ geom_hline(yintercept=0,linetype=2)+ geom_vline(xintercept=0,linetype=2)+ geom_point(alpha=0.5)+ theme_bw()+ theme(legend.position="none") gg <- gg + geom_text(aes(label=label),size=2.5,vjust=0,hjust=0) print(gg) ## ----eval=TRUE,warning=FALSE,cache=TRUE,message=FALSE-------------------- para <- new("metaXpara") pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") rawPeaks(para) <- read.delim(pfile,check.names = FALSE) sampleListFile(para) <- sfile para <- reSetPeaksData(para) para <- missingValueImpute(para) para <- metaX::normalize(para,method="pqn") plsdaPara <- new("plsDAPara") plsdaPara@nperm <- 100 plsda.res <- runPLSDA(para = para,plsdaPara = plsdaPara, sample = c("S","C"),valueID="value") cat("R2Y: ",plsda.res$plsda$res$R2,"\n") cat("Q2Y: ",plsda.res$plsda$res$Q2,"\n") ## permutation test cat("P-value R2Y: ",plsda.res$pvalue$R2,"\n") cat("P-value Q2Y: ",plsda.res$pvalue$Q2,"\n") ## ----eval=TRUE,echo=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{permutationtest}The permutation test plot for PLS-DA."),warning=FALSE,message=FALSE---- plotdat <- as.matrix(cbind(unlist(plsda.res$plsda$res),plsda.res$plsda$perm)) plotdat <- as.data.frame(t(plotdat)) x1 <- plotdat$cor[1] y1 <- plotdat$R2[1] y2 <- plotdat$Q2[1] plotdat$cor <- abs(plotdat$cor) plotdat <- plotdat[order(plotdat$cor),] par(mar=c(3,3,2,1),mgp=c(1.6,0.6,0),cex.lab=1.2,cex.main=0.9) plot(plotdat$cor,plotdat$R2,ylim=c(min(plotdat$R2,plotdat$Q2),1),pch=16, xlab="Cor",ylab="Value",col="blue") points(plotdat$cor,plotdat$Q2,pch=15,col="red") lm.r <- lm(I(R2-y1)~I(cor-x1)+0,data=plotdat) lm.q <- lm(I(Q2-y2)~I(cor-x1)+0,data=plotdat) #lines(plotdat$cor,predict(lm.r,data=plotdat$cor),col="blue",lty=2) #lines(plotdat$cor,predict(lm.q,data=plotdat$cor),col="red",lty=2) int.R <- predict(lm.r,newdata=list(cor=0))+y1 int.Q <- predict(lm.q,newdata=list(cor=0))+y2 abline(int.R,coef(lm.r),lty=2,col="blue") abline(int.Q,coef(lm.q),lty=2,col="red") #abline(lm.q,lty=2,col="red") legend("bottomright",pch=c(16,15),legend = c("R2","Q2"),col=c("blue","red")) title(main = paste("Intercepts:","R2=(0.0,",sprintf("%.4f",int.R), "), Q2=(0.0,",sprintf("%.4f",int.Q),")")) ## ----eval=TRUE,echo=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{scoreplotplsda}The score plot for PLS-DA."),warning=FALSE,message=FALSE---- library(pls) result <- plsda.res sampleList <- read.delim(para@sampleListFile) peaksData <- para@peaksData peaksData <- peaksData[peaksData$class %in% c("S","C"),] peaksData$class <- as.character(peaksData$class) plsData <- dcast(peaksData,sample+class~ID, value.var = "value") plsData$class[is.na(plsData$class)] <- "QC" plotData <- data.frame(x=result$model$scores[,1], y=result$model$scores[,2], sample=plsData$sample, class=result$class$rawLabel) plotData$class <- as.character(plotData$class) plotData$class[is.na(plotData$class)] <- "QC" sampleList$class <- NULL plotData <- merge(plotData,sampleList,by="sample",sort=FALSE) ggobj <-ggplot(data = plotData,aes(x=x,y=y,colour=class))+ geom_hline(yintercept=0,colour="white",size=1)+ geom_vline(xintercept=0,colour="white",size=1)+ geom_point()+ xlab(paste("PC1"," (",sprintf("%.2f%%",explvar(result$model)[1]),") ", sep=""))+ ylab(paste("PC2"," (",sprintf("%.2f%%",explvar(result$model)[2]),") ", sep=""))+ #theme_bw()+ theme(legend.justification=c(1,1), legend.position=c(1,1), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background=element_rect(fill="#E3E3EE"))+ #theme(legend.direction = 'horizontal', legend.position = 'top')+ stat_ellipse(geom = "polygon", type="euclid",alpha = 0.4, aes(fill = class))+ stat_ellipse(geom = "path",level=0.95) ggobj <- ggobj + geom_text(aes(label=order),size=4,hjust=-0.2) print(ggobj) ## ----eval=TRUE,echo=FALSE,fig.align='center',fig.height=5,fig.width=5,fig.cap=fn("\\label{loadingplotplsda}The loading plot for PLS-DA."),warning=FALSE,message=FALSE---- loadings_data <- as.data.frame(plsda.res$model$loadings[,1:2]) names(loadings_data) <- c("PC1","PC2") loadings_data <- as.matrix(loadings_data) HotEllipse<-abs(cbind(metaX:::HotE(loadings_data[,1],loadings_data[,2])))*0.9 outliers<-as.numeric() for (i in 1:nrow(loadings_data)){ sample<-abs(loadings_data[i,]) out.PC1<-which(HotEllipse[,1]0,1,0)#+out.PC1.PC3+out.PC2.PC3>0,1,0) outliers<-c(outliers,outlier) } dat <- as.data.frame(loadings_data) dat$label <- row.names(dat) dat$label[outliers==0] <- "" dat$col <- as.character(outliers) dat$alllabel <- row.names(dat) gg <- ggplot(data=dat,aes(x=PC1,y=PC2,colour=col))+ geom_hline(yintercept=0,linetype=2)+ geom_vline(xintercept=0,linetype=2)+ geom_point(alpha=0.5)+ theme_bw()+ theme(legend.position="none") gg <- gg + geom_text(aes(label=label),size=2.5,vjust=0,hjust=0) print(gg) ## ----eval=FALSE---------------------------------------------------------- ## para <- new("metaXpara") ## pfile <- system.file("extdata/MTBLS79.txt",package = "metaX") ## sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX") ## rawPeaks(para) <- read.delim(pfile,check.names = FALSE) ## sampleListFile(para) <- sfile ## para <- reSetPeaksData(para) ## para <- missingValueImpute(para) ## para <- metaX::normalize(para,method="pqn",valueID="value") ## selectBestComponent(para,np=10,sample=c("S","C"),scale="pareto",valueID="value",k=5) ## ----eval=TRUE----------------------------------------------------------- ratioPairs(para) <- "KO:WT" ## ----eval=TRUE----------------------------------------------------------- ratioPairs(para) <- "A:B;C:B;D:B" ## ----eval=TRUE----------------------------------------------------------- ## set the output parameters outdir(para) <- "test" prefix(para) <- "metaX" ## ----sessioninfo, results='asis', echo=FALSE----------------------------- toLatex(sessionInfo())