## ----dataset_matrix, echo=FALSE----------------------------------------------- suppressPackageStartupMessages(library(ComplexHeatmap)) # All possible softwares, labels and report levels sws <- c('MaxQuant', 'DIA-NN', 'Spectronaut', 'Proteome Discoverer', 'sage', 'FragPipe', 'PEAKS') label <- c('LFQ', 'multiplex') # Create a combination dataset, by default leave value cells blank ## value: 0 for not yet processed data, 1 for already processed data ## dataset: what dataset will be used d <- expand.grid(software = sws, label = label) d$value <- 'no' d$dataset <- '' # Fill in manually with currently known information d$dataset[which(d$label == 'LFQ' & d$software == 'Spectronaut')] <- 'A_DIA_LFQ' d$dataset[which(d$label == 'LFQ' & d$software == 'DIA-NN')] <- 'A_DIA_LFQ' d$dataset[which(d$label == 'LFQ' & d$dataset == '')] <- 'A_DDA_LFQ' d$dataset[which(d$label == 'multiplex' & d$software %in% c('DIA-NN', 'Spectronaut'))] <- 'B_DIA_plex' d$dataset[which(d$dataset == '')] <- 'C_DDA_TMT' ## d$value[which(d$software %in% c('DIA-NN', 'MaxQuant', 'sage', 'FragPipe'))] <- 'yes' ## d$value[which(d$software == 'Spectronaut' & d$label == 'LFQ')] <- 'yes' ## d$value[which(d$software == 'FragPipe')] <- 'yes' ## d$value[which(d$software == 'PEAKS' & d$label == 'LFQ')] <- 'yes' d$value <- "yes" # Dataset to matrix for plotting d <- tidyr::unite(d, col, label) #lvls <- c('LFQ_PSM', 'LFQ_peptide', 'LFQ_PG', # 'multiplex_PSM', 'multiplex_peptide', 'multiplex_PG') #d$col <- factor(d$col, levels = lvls) #d <- dplyr::arrange(d, col) m <- d |> dplyr::select(-dataset) |> tidyr::spread(key = 'col', value = 'value') rownames(m) <- m$software m$software <- NULL m <- as.matrix(m) colnames(m) <- gsub('LFQ_', '', fixed = TRUE, colnames(m)) colnames(m) <- gsub('multiplex_', '', fixed = TRUE, colnames(m)) # Create a helped matrix storing the text annotations m_anno <- d |> dplyr::select(-value) |> tidyr::spread(key = 'col', value = 'dataset') rownames(m_anno) <- m_anno$software m_anno$software <- NULL m_anno <- as.matrix(m_anno) colnames(m_anno) <- gsub('LFQ_', '', fixed = TRUE, colnames(m_anno)) colnames(m_anno) <- gsub('multiplex_', '', fixed = TRUE, colnames(m_anno)) ## Create a plot using the ComplexHeatmap package colors <- structure(rep('grey95', 2), names = c("yes", "no")) Heatmap(m, name = "Available", col = colors, cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left", column_names_side = "top", column_names_centered = TRUE, column_names_rot = 0, show_heatmap_legend = FALSE, border = TRUE, cell_fun = function(j, i, x, y, width, height, fill) { grid.text(m_anno[i, j], x, y, gp = gpar(fontsize = 10)) }) ## ----MsDataHub---------------------------------------------------------------- library("MsDataHub") MsDataHub() |> dplyr::filter(grepl("19137577", SourceUrl)) |> dplyr::pull(Title) ## ----loadFromMsDataHub-------------------------------------------------------- vanPuyvelde_2022_LFQ_DDA_FragPipe_A_2_psm.tsv() Derks_2022_plex_DIA_DIANN_report_subset.tsv() ## ----library, message = FALSE------------------------------------------------- library(QFeatures) ## ----mq-lfq-psm, message=FALSE------------------------------------------------ dataMaxquantLFQevidence <- vanPuyvelde_2022_LFQ_DDA_MaxQuant_evidence.txt() |> read.delim() nrow(dataMaxquantLFQevidence) ## ----mq-lfq-psm2, message=FALSE----------------------------------------------- qfMaxquant <- readQFeatures(dataMaxquantLFQevidence, quantCols = "Intensity", runCol = "Experiment") names(qfMaxquant) <- paste('psm', names(qfMaxquant), sep = '_') qfMaxquant ## ----mq-lfq-peptide0, message=FALSE------------------------------------------- dataMaxquantLFQpeptide <- vanPuyvelde_2022_LFQ_DDA_MaxQuant_peptides.txt() |> read.delim() nrow(dataMaxquantLFQpeptide) ## ----mq-lfq-peptide1, message=FALSE------------------------------------------- (i <- grep('Intensity.', colnames(dataMaxquantLFQpeptide), fixed = TRUE)) colnames(dataMaxquantLFQpeptide)[i] ## ----mq-lfq-peptide2, message=FALSE------------------------------------------- readQFeatures(dataMaxquantLFQpeptide, quantCols = i, fnames = 'Sequence') ## ----mq-lfq-peptide3, message=FALSE------------------------------------------- pepSE <- readSummarizedExperiment(dataMaxquantLFQpeptide, quantCols = i, fnames = 'Sequence') pepSE qfMaxquant <- addAssay(qfMaxquant, pepSE, name = 'peptides') qfMaxquant ## ----mq-lfq-pg, message=FALSE------------------------------------------------- dataMaxquantLFQprotein <- vanPuyvelde_2022_LFQ_DDA_MaxQuant_proteinGroups.txt() |> read.delim() nrow(dataMaxquantLFQprotein) ## get indices of LFQ intensity columns (i <- grep('LFQ.intensity.', colnames(dataMaxquantLFQprotein), fixed = TRUE)) colnames(dataMaxquantLFQprotein)[i] ## load the data protSE <- readSummarizedExperiment(dataMaxquantLFQprotein, quantCols = i, fnames = 'Protein.IDs') protSE qfMaxquant <- addAssay(qfMaxquant, protSE, name = 'proteinGroups') qfMaxquant ## ----mq-tmt, message=FALSE---------------------------------------------------- dataMaxquantTMTevidence <- Christoforou_2016_TMT_DDA_MaxQuant_evidence.txt() |> read.delim() (i <- grep('Reporter.intensity.\\d+', colnames(dataMaxquantTMTevidence))) colnames(dataMaxquantTMTevidence)[i] qfMaxquantTMT <- readQFeatures(dataMaxquantTMTevidence, quantCols = i, runCol = 'Raw.file', fnames = 'Sequence') qfMaxquantTMT ## ----diann-tsv, message = FALSE----------------------------------------------- qfDiannLFQ <- vanPuyvelde_2022_LFQ_DIA_DIANN_report.tsv() |> read.delim() |> readQFeaturesFromDIANN(runCol = 'Run') qfDiannLFQ ## ----diann-parquet, message=FALSE--------------------------------------------- qfDiannParquet <- vanPuyvelde_2022_LFQ_DIA_DIANN_report.parquet() |> arrow::read_parquet() |> readQFeaturesFromDIANN(runCol = 'Run') qfDiannParquet ## ----diann-plexdia, message=FALSE--------------------------------------------- qfDiannPlex <- Derks_2022_plex_DIA_DIANN_report_subset.tsv() |> read.delim() |> readQFeaturesFromDIANN(runCol = 'Run', multiplexing = 'mTRAQ') qfDiannPlex ## ----------------------------------------------------------------------------- qfDiannPlex$sample <- 'mixed standard' qfDiannPlex$rep <- rep(1:3, each = 3) qfDiannPlex$label <- paste0('mTraq d', rep(c(0, 4, 8), times = 3)) colData(qfDiannPlex) ## ----sage-lfq, message = FALSE, warning=FALSE--------------------------------- dataSageLFQ <- vanPuyvelde_2022_LFQ_DDA_sage_lfq.tsv() |> read.delim() (i <- grep('.mzML', colnames(dataSageLFQ), fixed = TRUE)) colnames(dataSageLFQ)[i] qfSageLFQ <- readQFeatures(dataSageLFQ, quantCols = i, name = 'peptides') qfSageLFQ ## ----sage-tmt, message = FALSE, warning = FALSE------------------------------- dataSageTMT <- Christoforou_2016_TMT_DDA_sage_tmt.tsv() |> read.delim() ## ----------------------------------------------------------------------------- colnames(dataSageTMT) ## ----sage-tmt-2, message = FALSE, warning = FALSE----------------------------- dataSageTMTident <- Christoforou_2016_TMT_DDA_sage_results.sage.tsv() |> read.delim() dataSageTMTfinal <- merge(dataSageTMT, dataSageTMTident, by = c('filename', 'scannr')) (i <- grep('tmt_', colnames(dataSageTMTfinal), fixed = TRUE)) colnames(dataSageTMTfinal)[i] qfSageTMT <- readQFeatures(dataSageTMTfinal, quantCols = i) qfSageTMT ## ----sager, eval = FALSE------------------------------------------------------ # sager::sageQFeatures( # Christoforou_2016_TMT_DDA_sage_tmt.tsv(), # Christoforou_2016_TMT_DDA_sage_results.sage.tsv()) ## ----fragpipe-lfq0, message = FALSE------------------------------------------- fls <- MsDataHub() |> dplyr::filter(grepl("2022_LFQ_DDA_FragPipe", Title)) |> dplyr::pull(1) fls ## ----fragpipe-lfq1, message = FALSE------------------------------------------- lst <- lapply(fls, function(fl) { call(fl) |> eval() |> read.delim() |> readSummarizedExperiment(quantCols = "Intensity") }) names(lst) <- fls ## ----fragpipe-lfq2, message = FALSE------------------------------------------- qfFpipeLFQ <- QFeatures(lst) qfFpipeLFQ ## ----fragpipe-lfq-names, message=FALSE---------------------------------------- names(qfFpipeLFQ) <- sub('vanPuyvelde_2022_LFQ_DDA_FragPipe_(\\w_\\d_psm)\\.tsv', '\\1', names(qfFpipeLFQ)) qfFpipeLFQ ## ----fragpipe-tmt, message=FALSE---------------------------------------------- fls <- MsDataHub() |> dplyr::filter(grepl("Christoforou_2016_TMT_DDA_FragPipe_Fraction", Title)) |> dplyr::pull(1) lst <- lapply(fls, function(fl) { x <- eval(call(fl)) |> read.delim() i <- grep('Intensity\\.', colnames(x)) readSummarizedExperiment(x, quantCols = i) }) names(lst) <- fls QFeatures(lst)