## ----dataset_matrix, echo=FALSE-----------------------------------------------
suppressPackageStartupMessages(library(ComplexHeatmap))

# All possible softwares, labels and report levels
sws <- c('MaxQuant', 'DIA-NN', 'Spectronaut', 'Proteome Discoverer',
         'sage', 'FragPipe', 'PEAKS')
label <- c('LFQ', 'multiplex')

# Create a combination dataset, by default leave value cells blank
## value: 0 for not yet processed data, 1 for already processed data
## dataset: what dataset will be used
d <- expand.grid(software = sws, label = label)
d$value <- 'no'
d$dataset <- ''

# Fill in manually with currently known information
d$dataset[which(d$label == 'LFQ' & d$software == 'Spectronaut')] <- 'A_DIA_LFQ'
d$dataset[which(d$label == 'LFQ' & d$software == 'DIA-NN')] <- 'A_DIA_LFQ'
d$dataset[which(d$label == 'LFQ' & d$dataset == '')] <- 'A_DDA_LFQ'
d$dataset[which(d$label == 'multiplex' & d$software %in% c('DIA-NN', 'Spectronaut'))] <- 'B_DIA_plex'
d$dataset[which(d$dataset == '')] <- 'C_DDA_TMT'

## d$value[which(d$software %in% c('DIA-NN', 'MaxQuant', 'sage', 'FragPipe'))] <- 'yes'
## d$value[which(d$software == 'Spectronaut' & d$label == 'LFQ')] <- 'yes'
## d$value[which(d$software == 'FragPipe')] <- 'yes'
## d$value[which(d$software == 'PEAKS' & d$label == 'LFQ')] <- 'yes'
d$value <- "yes"

# Dataset to matrix for plotting
d <- tidyr::unite(d, col, label)
#lvls <- c('LFQ_PSM', 'LFQ_peptide', 'LFQ_PG',
#          'multiplex_PSM', 'multiplex_peptide', 'multiplex_PG')
#d$col <- factor(d$col, levels = lvls)
#d <- dplyr::arrange(d, col)

m <- d |> dplyr::select(-dataset) |> tidyr::spread(key = 'col', value = 'value')
rownames(m) <- m$software
m$software <- NULL
m <- as.matrix(m)
colnames(m) <- gsub('LFQ_', '', fixed = TRUE, colnames(m))
colnames(m) <- gsub('multiplex_', '', fixed = TRUE, colnames(m))

# Create a helped matrix storing the text annotations
m_anno <- d |> dplyr::select(-value) |> tidyr::spread(key = 'col', value = 'dataset')
rownames(m_anno) <- m_anno$software
m_anno$software <- NULL
m_anno <- as.matrix(m_anno)
colnames(m_anno) <- gsub('LFQ_', '', fixed = TRUE, colnames(m_anno))
colnames(m_anno) <- gsub('multiplex_', '', fixed = TRUE, colnames(m_anno))

## Create a plot using the ComplexHeatmap package
colors <- structure(rep('grey95', 2), names = c("yes", "no"))

Heatmap(m, name = "Available",
        col = colors,
        cluster_rows = FALSE, cluster_columns = FALSE,
        row_names_side = "left",
        column_names_side = "top",
        column_names_centered = TRUE,
        column_names_rot = 0,
        show_heatmap_legend = FALSE,
        border = TRUE,
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(m_anno[i, j], x, y, gp = gpar(fontsize = 10))
        })

## ----MsDataHub----------------------------------------------------------------
library("MsDataHub")
MsDataHub() |>
    dplyr::filter(grepl("19137577", SourceUrl)) |>
    dplyr::pull(Title)

## ----loadFromMsDataHub--------------------------------------------------------
vanPuyvelde_2022_LFQ_DDA_FragPipe_A_2_psm.tsv()
Derks_2022_plex_DIA_DIANN_report_subset.tsv()

## ----library, message = FALSE-------------------------------------------------
library(QFeatures)

## ----mq-lfq-psm, message=FALSE------------------------------------------------
dataMaxquantLFQevidence <-
    vanPuyvelde_2022_LFQ_DDA_MaxQuant_evidence.txt() |>
    read.delim()

nrow(dataMaxquantLFQevidence)

## ----mq-lfq-psm2, message=FALSE-----------------------------------------------
qfMaxquant <- readQFeatures(dataMaxquantLFQevidence,
                            quantCols = "Intensity",
                            runCol = "Experiment")
names(qfMaxquant) <- paste('psm', names(qfMaxquant), sep = '_')

qfMaxquant

## ----mq-lfq-peptide0, message=FALSE-------------------------------------------
dataMaxquantLFQpeptide <-
    vanPuyvelde_2022_LFQ_DDA_MaxQuant_peptides.txt() |>
    read.delim()
nrow(dataMaxquantLFQpeptide)

## ----mq-lfq-peptide1, message=FALSE-------------------------------------------
(i <- grep('Intensity.', colnames(dataMaxquantLFQpeptide), fixed = TRUE))
colnames(dataMaxquantLFQpeptide)[i]

## ----mq-lfq-peptide2, message=FALSE-------------------------------------------
readQFeatures(dataMaxquantLFQpeptide, quantCols = i, fnames = 'Sequence')

## ----mq-lfq-peptide3, message=FALSE-------------------------------------------
pepSE <- readSummarizedExperiment(dataMaxquantLFQpeptide,
                                  quantCols = i,
                                  fnames = 'Sequence')
pepSE
qfMaxquant <- addAssay(qfMaxquant,
                      pepSE,
                      name = 'peptides')
qfMaxquant

## ----mq-lfq-pg, message=FALSE-------------------------------------------------
dataMaxquantLFQprotein <-
    vanPuyvelde_2022_LFQ_DDA_MaxQuant_proteinGroups.txt() |>
    read.delim()
nrow(dataMaxquantLFQprotein)

## get indices of LFQ intensity columns
(i <- grep('LFQ.intensity.', colnames(dataMaxquantLFQprotein),
           fixed = TRUE))
colnames(dataMaxquantLFQprotein)[i]

## load the data
protSE <- readSummarizedExperiment(dataMaxquantLFQprotein,
                                   quantCols = i,
                                   fnames = 'Protein.IDs')
protSE
qfMaxquant <- addAssay(qfMaxquant,
                       protSE,
                       name = 'proteinGroups')
qfMaxquant

## ----mq-tmt, message=FALSE----------------------------------------------------
dataMaxquantTMTevidence <-
    Christoforou_2016_TMT_DDA_MaxQuant_evidence.txt() |>
    read.delim()

(i <- grep('Reporter.intensity.\\d+', colnames(dataMaxquantTMTevidence)))
colnames(dataMaxquantTMTevidence)[i]

qfMaxquantTMT <- readQFeatures(dataMaxquantTMTevidence,
                               quantCols = i,
                               runCol = 'Raw.file',
                               fnames = 'Sequence')

qfMaxquantTMT

## ----diann-tsv, message = FALSE-----------------------------------------------
qfDiannLFQ <-
    vanPuyvelde_2022_LFQ_DIA_DIANN_report.tsv() |>
    read.delim() |>
    readQFeaturesFromDIANN(runCol = 'Run')

qfDiannLFQ

## ----diann-parquet, message=FALSE---------------------------------------------
qfDiannParquet <-
    vanPuyvelde_2022_LFQ_DIA_DIANN_report.parquet() |>
    arrow::read_parquet() |>
    readQFeaturesFromDIANN(runCol = 'Run')

qfDiannParquet

## ----diann-plexdia, message=FALSE---------------------------------------------
qfDiannPlex <-
    Derks_2022_plex_DIA_DIANN_report_subset.tsv() |>
    read.delim() |>
    readQFeaturesFromDIANN(runCol = 'Run',
                           multiplexing = 'mTRAQ')

qfDiannPlex

## -----------------------------------------------------------------------------
qfDiannPlex$sample <- 'mixed standard'
qfDiannPlex$rep <- rep(1:3, each = 3)
qfDiannPlex$label <- paste0('mTraq d', rep(c(0, 4, 8), times = 3))
colData(qfDiannPlex)

## ----sage-lfq, message = FALSE, warning=FALSE---------------------------------
dataSageLFQ <-
    vanPuyvelde_2022_LFQ_DDA_sage_lfq.tsv() |>
    read.delim()

(i <- grep('.mzML', colnames(dataSageLFQ), fixed = TRUE))
colnames(dataSageLFQ)[i]

qfSageLFQ <- readQFeatures(dataSageLFQ,
                           quantCols = i,
                           name = 'peptides')

qfSageLFQ

## ----sage-tmt, message = FALSE, warning = FALSE-------------------------------
dataSageTMT <-
    Christoforou_2016_TMT_DDA_sage_tmt.tsv() |>
    read.delim()

## -----------------------------------------------------------------------------
colnames(dataSageTMT)

## ----sage-tmt-2, message = FALSE, warning = FALSE-----------------------------
dataSageTMTident <-
    Christoforou_2016_TMT_DDA_sage_results.sage.tsv() |>
    read.delim()

dataSageTMTfinal <- merge(dataSageTMT, dataSageTMTident, by = c('filename', 'scannr'))

(i <- grep('tmt_', colnames(dataSageTMTfinal), fixed = TRUE))
colnames(dataSageTMTfinal)[i]


qfSageTMT <- readQFeatures(dataSageTMTfinal, quantCols = i)
qfSageTMT

## ----sager, eval = FALSE------------------------------------------------------
# sager::sageQFeatures(
#            Christoforou_2016_TMT_DDA_sage_tmt.tsv(),
#            Christoforou_2016_TMT_DDA_sage_results.sage.tsv())

## ----fragpipe-lfq0, message = FALSE-------------------------------------------
fls <- MsDataHub() |>
    dplyr::filter(grepl("2022_LFQ_DDA_FragPipe", Title)) |>
    dplyr::pull(1)
fls

## ----fragpipe-lfq1, message = FALSE-------------------------------------------
lst <- lapply(fls, function(fl) {
    call(fl) |>
        eval() |>
        read.delim() |>
        readSummarizedExperiment(quantCols = "Intensity")
})

names(lst) <- fls

## ----fragpipe-lfq2, message = FALSE-------------------------------------------
qfFpipeLFQ <- QFeatures(lst)
qfFpipeLFQ

## ----fragpipe-lfq-names, message=FALSE----------------------------------------
names(qfFpipeLFQ) <- sub('vanPuyvelde_2022_LFQ_DDA_FragPipe_(\\w_\\d_psm)\\.tsv', '\\1', names(qfFpipeLFQ))
qfFpipeLFQ

## ----fragpipe-tmt, message=FALSE----------------------------------------------

fls <- MsDataHub() |>
    dplyr::filter(grepl("Christoforou_2016_TMT_DDA_FragPipe_Fraction", Title)) |>
    dplyr::pull(1)

lst <- lapply(fls,
       function(fl) {
           x <- eval(call(fl)) |>
               read.delim()
           i <- grep('Intensity\\.', colnames(x))
           readSummarizedExperiment(x, quantCols = i)
       })

names(lst) <- fls
QFeatures(lst)