## ----style, eval=TRUE, echo = FALSE, results = 'asis'-------------------- BiocStyle::markdown() ## ---- echo=FALSE--------------------------------------------------------- library(knitr) ## ------------------------------------------------------------------------ library(Harman) library(HarmanData) library(RColorBrewer) ## ------------------------------------------------------------------------ data(OLF) ## ------------------------------------------------------------------------ table(olf.info) ## ------------------------------------------------------------------------ olf.info[1:7,] ## ------------------------------------------------------------------------ head(olf.data) ## ------------------------------------------------------------------------ olf.harman <- harman(olf.data, expt=olf.info$Treatment, batch=olf.info$Batch, limit=0.95) ## ------------------------------------------------------------------------ str(olf.harman) ## ---- fig.height=4, fig.width=7------------------------------------------ plot(olf.harman) ## ---- fig.height=5, fig.width=5------------------------------------------ arrowPlot(olf.harman) ## ------------------------------------------------------------------------ summary(olf.harman) kable(olf.harman$stats) ## ---- fig.height=5, fig.width=5------------------------------------------ arrowPlot(olf.harman, 1, 8) ## ---- fig.height=5, fig.width=5------------------------------------------ arrowPlot(olf.harman, 8, 9) ## ---- fig.height=4, fig.width=7------------------------------------------ par(mfrow=c(1,2)) pcaPlot(olf.harman, colBy='batch', pchBy='expt', main='Col by Batch') pcaPlot(olf.harman, colBy='expt', pchBy='batch', main='Col by Expt') ## ------------------------------------------------------------------------ olf.data.corrected <- reconstructData(olf.harman) ## ------------------------------------------------------------------------ olf.data.remade <- reconstructData(olf.harman, this='original') all.equal(as.matrix(olf.data), olf.data.remade) ## ---- fig.height=9, fig.width=5------------------------------------------ olf.data.diff <- abs(as.matrix(olf.data) - olf.data.corrected) diff_rowvar <- apply(olf.data.diff, 1, var) by_rowvar <- order(diff_rowvar) par(mfrow=c(1,1)) image(x=1:ncol(olf.data.diff), y=1:nrow(olf.data.diff), z=t(olf.data.diff[by_rowvar, ]), xlab='Sample', ylab='Probeset', main='Harman probe adjustments (ordered by variance)', cex.main=0.7, col=brewer.pal(9, 'Reds')) ## ---- echo=FALSE--------------------------------------------------------- rm(list=ls()[grep("olf", ls())]) ## ------------------------------------------------------------------------ require(Harman) require(HarmanData) data(IMR90) ## ---- echo=FALSE--------------------------------------------------------- kable(imr90.info) ## ------------------------------------------------------------------------ table(expt=imr90.info$Treatment, batch=imr90.info$Batch) ## ---- fig.height=4, fig.width=7------------------------------------------ # define a quick PCA and plot function pca <- function(exprs, pc_x=1, pc_y=2, cols, ...) { pca <- prcomp(t(exprs), retx=TRUE, center=TRUE) if(is.factor(cols)) { factor_names <- levels(cols) num_levels <- length(factor_names) palette <- rainbow(num_levels) mycols <- palette[cols] } else { mycols <- cols } plot(pca$x[, pc_x], pca$x[, pc_y], xlab=paste('PC', pc_x, sep=''), ylab=paste('PC', pc_y, sep=''), col=mycols, ...) if(is.factor(cols)) { legend(x=min(pca$x[, pc_x]), y=max(pca$x[, pc_y]), legend=factor_names, fill=palette, cex=0.5) } } par(mfrow=c(1,2), mar=c(4, 4, 4, 2) + 0.1) imr90.pca <- prcomp(t(imr90.data), scale. = TRUE) prcompPlot(imr90.pca, pc_x=1, pc_y=2, colFactor=imr90.info$Batch, pchFactor=imr90.info$Treatment, main='IMR90, PC1 v PC2') prcompPlot(imr90.pca, pc_x=1, pc_y=3, colFactor=imr90.info$Batch, pchFactor=imr90.info$Treatment, main='IMR90, PC1 v PC3') ## ------------------------------------------------------------------------ imr90.hm <- harman(as.matrix(imr90.data), expt=imr90.info$Treatment, batch=imr90.info$Batch) ## ---- echo=FALSE--------------------------------------------------------- kable(imr90.hm$stats) ## ---- fig.height=4, fig.width=7------------------------------------------ plot(imr90.hm, pc_x=1, pc_y=2) plot(imr90.hm, pc_x=1, pc_y=3) ## ---- fig.height=5, fig.width=5------------------------------------------ arrowPlot(imr90.hm, pc_x=1, pc_y=3) ## ---- fig.height=4, fig.width=7------------------------------------------ imr90.hm <- harman(as.matrix(imr90.data), expt=imr90.info$Treatment, batch=imr90.info$Batch, limit=0.9) plot(imr90.hm, pc_x=1, pc_y=3) ## ---- echo=FALSE--------------------------------------------------------- kable(imr90.hm$stats) ## ---- fig.height=4, fig.width=7------------------------------------------ imr90.data.corrected <- reconstructData(imr90.hm) par(mfrow=c(1,2)) #pca_cols <- rainbow(3)[imr90.hm$factors$batch] pca(imr90.data, 1, 3, cols=imr90.hm$factors$batch, cex=1.5, pch=16, main='PCA, Original') pca(imr90.data.corrected, 1, 3, cols=imr90.hm$factors$batch, cex=1.5, pch=16, main='PCA, Corrected') ## ---- echo=FALSE--------------------------------------------------------- rm(list=ls()[grep("imr90", ls())]) ## ------------------------------------------------------------------------ library(bladderbatch, quietly=TRUE) library(Harman) # This loads an ExpressionSet object called bladderEset data(bladderdata) bladderEset ## ---- echo=FALSE--------------------------------------------------------- kable(pData(bladderEset)) ## ------------------------------------------------------------------------ table(batch=pData(bladderEset)$batch, expt=pData(bladderEset)$outcome) ## ---- fig.height=4, fig.width=7------------------------------------------ par(mfrow=c(1,2)) pca(exprs(bladderEset), cols=as.factor(pData(bladderEset)$batch), pch=as.integer(pData(bladderEset)$outcome), main='Col by Batch') pca(exprs(bladderEset), cols=pData(bladderEset)$outcome, pch=pData(bladderEset)$batch, main='Col by Expt') ## ---- cache=TRUE--------------------------------------------------------- expt <- pData(bladderEset)$outcome batch <- pData(bladderEset)$batch bladder_harman <- harman(exprs(bladderEset), expt=expt, batch=batch) sum(bladder_harman$stats$correction < 1) ## ------------------------------------------------------------------------ summary(bladder_harman) ## ---- fig.height=4, fig.width=7------------------------------------------ par(mfrow=c(1,1)) plot(bladder_harman) ## ---- fig.height=4, fig.width=7------------------------------------------ plot(bladder_harman, 1, 3) ## ---- fig.height=5, fig.width=5------------------------------------------ arrowPlot(bladder_harman, 1, 3) ## ------------------------------------------------------------------------ CorrectedBladderEset <- bladderEset exprs(CorrectedBladderEset) <- reconstructData(bladder_harman) comment(bladderEset) <- 'Original' comment(CorrectedBladderEset) <- 'Corrected' ## ------------------------------------------------------------------------ library(limma, quietly=TRUE) design <- model.matrix(~0 + expt) colnames(design) <- c("Biopsy", "mTCC", "Normal", "sTCC", "sTCCwCIS") contrast_matrix <- makeContrasts(sTCCwCIS - sTCC, sTCCwCIS - Normal, Biopsy - Normal, levels=design) fit <- lmFit(exprs(bladderEset), design) fit2 <- contrasts.fit(fit, contrast_matrix) fit2 <- eBayes(fit2) summary(classifyTestsT(fit2)) ## ------------------------------------------------------------------------ fit_hm <- lmFit(exprs(CorrectedBladderEset), design) fit2_hm <- contrasts.fit(fit_hm, contrast_matrix) fit2_hm <- eBayes(fit2_hm) summary(classifyTestsT(fit2_hm)) ## ---- echo=FALSE--------------------------------------------------------- rm(list=ls()[!grepl("^pca$", ls())]) ## ------------------------------------------------------------------------ library(minfi, quietly = TRUE) logit2 ilogit2 ilogit2(Inf) ilogit2(-Inf) ## ------------------------------------------------------------------------ betas <- seq(0, 1, by=0.05) betas newBetas <- shiftBetas(betas, shiftBy=1e-4) newBetas range(betas) range(newBetas) logit2(betas) logit2(newBetas) ## ------------------------------------------------------------------------ library(minfiData, quietly = TRUE) baseDir <- system.file("extdata", package="minfiData") targets <- read.metharray.sheet(baseDir) ## ---- cache=TRUE--------------------------------------------------------- library(missMethyl, quietly = TRUE) rgSet <- read.metharray.exp(targets=targets) mRaw <- preprocessRaw(rgSet) mSetSw <- SWAN(mRaw) mSet <- preprocessIllumina(rgSet, bg.correct=TRUE, normalize="controls", reference=2) detP <- detectionP(rgSet) keep <- rowSums(detP < 0.01) == ncol(rgSet) mSetIl <- mSet[keep,] mSetSw <- mSetSw[keep,] ## ------------------------------------------------------------------------ kable(pData(mSetSw)[,c('Sample_Group', 'person', 'sex', 'status', 'Array', 'Slide')]) ## ---- fig.height=5, fig.width=5------------------------------------------ par(mfrow=c(1, 1)) cancer_by_gender_factor <- as.factor(paste(pData(mSetSw)$sex, pData(mSetSw)$status) ) mdsPlot(mSetSw, sampGroups=cancer_by_gender_factor, pch=16) mdsPlot(mSetSw, sampGroups=as.factor(pData(mSet)$Slide), pch=16) ## ------------------------------------------------------------------------ table(gender=pData(mSetSw)$sex, slide=pData(mSetSw)$Slide) ## ------------------------------------------------------------------------ cancer_by_gender_factor <- as.factor(paste(pData(mSetSw)$sex, pData(mSetSw)$status) ) table(expt=cancer_by_gender_factor, batch=pData(mSetSw)$Slide) ## ------------------------------------------------------------------------ table(expt=pData(mSetSw)$status, batch=pData(mSetSw)$Slide) ## ---- fig.height=4, fig.width=7------------------------------------------ par(mfrow=c(1,2)) library(lumi, quietly = TRUE) shifted_betas <- shiftBetas(betas=getBeta(mSetIl), shiftBy=1e-7) shifted_ms <- beta2m(shifted_betas) # same as logit2() from package minfi plot(density(shifted_ms, 0.05), main="Shifted M-values, shiftBy = 1e-7", cex.main=0.7) shifted_betas <- shiftBetas(betas=getBeta(mSetIl), shiftBy=1e-4) shifted_ms <- beta2m(shifted_betas) # same as logit2() from package minfi plot(density(shifted_ms, 0.05), main="Shifted M-values, shiftBy = 1e-4", cex.main=0.7) ## ---- fig.height=4, fig.width=7------------------------------------------ par(mfrow=c(1,2)) plot(density(shifted_betas, 0.1), main="Beta-values, shiftBy = 1e-4", cex.main=0.7) plot(density(shifted_ms, 0.1), main="M-values, shiftBy = 1e-4", cex.main=0.7) ## ---- fig.height=4, fig.width=7------------------------------------------ par(mfrow=c(1,2)) plot(density(shifted_ms, 0.1), main="GenomeStudio-like M-values, shiftBy = 1e-4", cex.main=0.7) plot(density(getM(mSetSw), 0.1), main="SWAN M-values", cex.main=0.7) ## ---- fig.height=4, fig.width=7------------------------------------------ methHarman <- harman(getM(mSetSw), expt=pData(mSetSw)$status, batch=pData(mSetSw)$Slide, limit=0.65) summary(methHarman) plot(methHarman, 2, 5) ## ------------------------------------------------------------------------ ms_hm <- reconstructData(methHarman) betas_hm <- m2beta(ms_hm) ## ------------------------------------------------------------------------ library(limma, quietly=TRUE) group <- factor(pData(mSetSw)$status, levels=c("normal", "cancer")) id <- factor(pData(mSetSw)$person) design <- model.matrix(~id + group) design ## ------------------------------------------------------------------------ fit_hm <- lmFit(ms_hm, design) fit_hm <- eBayes(fit_hm) fit <- lmFit(getM(mSetSw), design) fit <- eBayes(fit) ## ------------------------------------------------------------------------ summary_fit_hm <- summary(decideTests(fit_hm)) summary_fit <- summary(decideTests(fit)) summary_fit_hm summary_fit ## ---- echo=FALSE--------------------------------------------------------- rm(list=ls()[!grepl("^pca$", ls())]) ## ------------------------------------------------------------------------ # Call dependencies library(msmsEDA) library(Harman) data(msms.dataset) msms.dataset ## ------------------------------------------------------------------------ # Preprocess to remove rows which are all 0 and replace NA values with 0. msms_pp <- pp.msms.data(msms.dataset) ## ------------------------------------------------------------------------ kable(pData(msms_pp)) ## ------------------------------------------------------------------------ # Create experimental and batch variables expt <- pData(msms_pp)$treat batch <- pData(msms_pp)$batch table(expt, batch) ## ------------------------------------------------------------------------ # Log2 transform data, add 1 to avoid infinite values log_ms_exprs <- log(exprs(msms_pp) + 1, 2) # Correct data with Harman hm <- harman(log_ms_exprs, expt=expt, batch=batch) summary(hm) ## ---- fig.height=4, fig.width=7------------------------------------------ plot(hm) ## ------------------------------------------------------------------------ # Reconstruct data and convert from Log2, removing 1 as we added it before. log_ms_exprs_hm <- reconstructData(hm) ms_exprs_hm <- 2^log_ms_exprs_hm - 1 # Place corrected data into a new 'MSnSet' instance msms_pp_hm <- msms_pp exprs(msms_pp_hm) <- ms_exprs_hm ## ---- echo=FALSE--------------------------------------------------------- rm(list=ls()[!grepl("^pca$", ls())]) ## ---- warning=FALSE------------------------------------------------------ library(affydata, quietly = TRUE) data(Dilution) Dilution.log <- Dilution intensity(Dilution.log) <- log(intensity(Dilution)) cols <- brewer.pal(nrow(pData(Dilution)), 'Paired') ## ---- echo=FALSE--------------------------------------------------------- kable(pData(Dilution)) ## ---- fig.height=4, fig.width=4------------------------------------------ boxplot(Dilution, col=cols) ## ---- fig.height=7, fig.width=7------------------------------------------ Dilution.loess <-normalize(Dilution.log, method="loess") Dilution.qnt <-normalize(Dilution.log, method="quantiles") par(mfrow=c(2,2), mar=c(4, 4, 4, 2) + 0.1) boxplot(Dilution.loess, col=cols, main='Loess') boxplot(Dilution.qnt, col=cols, main='Quantile') pca(exprs(Dilution.loess), cols=cols, cex=1.5, pch=16, main='PCA Loess') pca(exprs(Dilution.qnt), cols=cols, cex=1.5, pch=16, main='PCA Quantile') ## ------------------------------------------------------------------------ library(Harman) loess.hm <- harman(exprs(Dilution.loess), expt=pData(Dilution.loess)$liver, batch=pData(Dilution.loess)$scanner) qnt.hm <- harman(exprs(Dilution.qnt), expt=pData(Dilution.qnt)$liver, batch=pData(Dilution.qnt)$scanner) ## ---- echo=FALSE, fig.align='left'--------------------------------------- kable(loess.hm$stats) ## ---- echo=FALSE, fig.align='left'--------------------------------------- kable(qnt.hm$stats) ## ------------------------------------------------------------------------ loess.hm <- harman(exprs(Dilution.loess), expt=pData(Dilution.loess)$liver, batch=pData(Dilution.loess)$scanner, limit=0.75) qnt.hm <- harman(exprs(Dilution.qnt), expt=pData(Dilution.qnt)$liver, batch=pData(Dilution.qnt)$scanner, limit=0.75) ## ---- echo=FALSE--------------------------------------------------------- kable(loess.hm$stats) ## ---- echo=FALSE--------------------------------------------------------- kable(qnt.hm$stats) ## ---- fig.height=3, fig.width=7------------------------------------------ par(mfrow=c(2,2), mar=c(4, 4, 4, 2) + 0.1) plot(loess.hm, 1, 2, pch=17, col=cols) plot(qnt.hm, 1, 2, pch=17, col=cols) ## ------------------------------------------------------------------------ Dilution.loess.hm <- Dilution.loess Dilution.qnt.hm <- Dilution.qnt exprs(Dilution.loess.hm) <- reconstructData(loess.hm) exprs(Dilution.qnt.hm) <- reconstructData(qnt.hm)