## ----chunk_setup, include = FALSE--------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.crop = FALSE ) ## ----load_libraries, message=FALSE, warning=FALSE----------------------------- # 1. Install MetaProViz from Bioconductor devel: # if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") # BiocManager::install(version = "devel") # BiocManager::install("MetaProViz") # 2. Install the latest development version from GitHub using devtools # remotes::install_github("saezlab/MetaProViz") # Install Rtools if you haven’t done this yet, using the appropriate version (e.g.windows or macOS). library(MetaProViz) # dependencies that need to be loaded: library(magrittr) library(dplyr) library(rlang) library(ggfortify) library(tibble) ## ----load_data---------------------------------------------------------------- data(intracell_raw) Intra <- intracell_raw%>% column_to_rownames("Code") ## ----show_data_preview, echo=FALSE-------------------------------------------- # https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html # Check how our data looks like: Intra[1:5, c(1:4,21,44)]%>% kableExtra::kbl(caption = "Preview of the DF `Intra` including columns with sample information and metabolite ids with their measured values.") %>% kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) #%>% #kableExtra::scroll_box(width = "100%", height = "200px") ## ----processing, fig.width=6, fig.height=4.5, fig.align="left"---------------- PreprocessingResults <- processing(data=Intra[-c(49:58) ,-c(1:3)], #remove pool samples and columns with sample information metadata_sample=Intra[-c(49:58) , c(1:3)], #remove pool samples and columns with metabolite measurements metadata_info = c(Conditions = "Conditions", Biological_Replicates = "Biological_Replicates"), featurefilt = "Modified", cutoff_featurefilt = 0.8, tic = TRUE, mvi = TRUE, hotellins_confidence = 0.99,# We perform outlier testing using 0.99 confidence intervall core = FALSE, save_plot = "svg", save_table= "csv", print_plot = TRUE, path = NULL) # This is the results table: Intra_Preprocessed <- PreprocessingResults[["DF"]][["Preprocessing_output"]] ## ----show_preprocessing_results, echo=FALSE----------------------------------- # Check how our data looks like: Intra_Preprocessed[29:32, 1:9]%>% kableExtra::kbl(caption = "Preview of the pre-processing results, which has an additional column `Outlier` including the results of Hotellins T2.") %>% kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) #%>% #kableExtra::scroll_box(width = "100%", height = "200px") ## ----remove_outliers---------------------------------------------------------- Intra_Preprocessed <- Intra_Preprocessed%>% filter(Outliers=="no")#remove MS55_29 ## ----replicate_sum------------------------------------------------------------ Intra_Preprocessed <- replicate_sum(data=Intra_Preprocessed[,-c(1:4)], metadata_sample=Intra_Preprocessed[,c(1:4)], metadata_info = c(Conditions="Conditions", Biological_Replicates="Biological_Replicates", Analytical_Replicates="Analytical_Replicates")) ## ----pca_plot, fig.align="left", fig.width=6, fig.height=4.5, fig.cap="Figure: Do the samples cluster for the Cell type?"---- #Create the metadata file: MetaData_Sample <- Intra_Preprocessed[,c(1:2)]%>% mutate(Celltype = case_when(Conditions=="HK2" ~ 'Healthy', Conditions=="786-O" ~ 'Primary Tumour', TRUE ~ 'Metastatic Tumour'))%>% mutate(Status = case_when(Conditions=="HK2" ~ 'Healthy', TRUE ~ 'Cancer')) #Make PCA plot viz_pca(metadata_info= c(color="Celltype", shape="Status"), metadata_sample= MetaData_Sample, data= Intra_Preprocessed[,-c(1:5)], plot_name = "Cell type") ## ----heatmap_plot, fig.align="left", fig.cap="Colour for sample metadata."---- viz_heatmap(data = Intra_Preprocessed[,-c(1:4)], metadata_sample = MetaData_Sample, metadata_info = c(color_Sample = list("Conditions","Biological_Replicates", "Celltype", "Status"))) ## ----dma, fig.width=7, fig.height=5, fig.align="left"------------------------- # Perform multiple comparison All_vs_One using annova: DMA_Res <- dma(data=Intra_Preprocessed[,-c(1:3)], #we need to remove columns that do not include metabolite measurements metadata_sample=Intra_Preprocessed[,c(1:3)],#only maintain the information about condition and replicates metadata_info = c(Conditions="Conditions", Numerator="786-M1A" , Denominator = "HK2"),# we compare 786-M1A_vs_HK2 pval ="t.test", padj="fdr") # Inspect the dma results tables: DMA_786M1A_vs_HK2 <- DMA_Res[["dma"]][["786-M1A _vs_ HK2"]] ## ----show_dma_results, echo=FALSE--------------------------------------------- # Check how our data looks like: DMA_786M1A_vs_HK2[c(7,9,11:12,14),]%>% kableExtra::kbl(caption = "2. Preview of the dma results for the comparison of 786-M1A versus HK2 cells.", row.names=FALSE) %>% kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) ## ----match_ids_kegg----------------------------------------------------------- #--------Add metabolite IDs to our example data: # 1. Load Feature metainformation of our example data data(cellular_meta) MappingInfo <- cellular_meta # 2. Merge with our differential results (FYI: you can also do this automatically as part of the dma function using the parameter metadata_feature) ORA_Input <- merge(DMA_786M1A_vs_HK2, MappingInfo, by= "Metabolite", all.x=TRUE)%>% dplyr::filter(!is.na(KEGGCompound))%>%#remove features without KEGG ID tibble::column_to_rownames("KEGGCompound")%>% dplyr::select(-Metabolite) #--------Load KEGG pathways: KEGG_Pathways <- metsigdb_kegg() ## ----run_ora------------------------------------------------------------------ #Perform ORA DM_ORA_res <- standard_ora(data= ORA_Input , #Input data requirements: column `t.val` and column `Metabolite` metadata_info=c(pvalColumn="p.adj", percentageColumn="t.val", PathwayTerm= "term", PathwayFeature= "Metabolite"), input_pathway=KEGG_Pathways,#Pathway file requirements: column `term`, `Metabolite` and `Description`. Above we loaded the Kegg_Pathways using Load_KEGG() pathway_name="KEGG") # Lets check how the results look like: DM_ORA_786M1A_vs_HK2 <- DM_ORA_res[["ClusterGosummary"]] ## ----show_ora_results, echo=FALSE--------------------------------------------- # Check how our data looks like: DM_ORA_786M1A_vs_HK2[c(1:5),-1]%>% kableExtra::kbl(caption = "Preview of the ORA results for the comparison of 786-M1A versus HK2 cells.", row.names=FALSE) %>% kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) ## ----volcano_pea-------------------------------------------------------------- #Here we select only a few pathways to make only the most important plots: InputPEA2 <- DM_ORA_786M1A_vs_HK2 %>% filter(!is.na(GeneRatio)) %>% filter(pvalue <= 0.1)%>% dplyr::rename("term"="ID") viz_volcano(plot_types="PEA", metadata_info= c(PEA_Pathway="term",# Needs to be the same in both, metadata_feature and data2. PEA_stat="pvalue",#Column data2 PEA_score="GeneRatio",#Column data2 PEA_Feature="Metabolite"),# Column metadata_feature (needs to be the same as row names in data) metadata_feature= KEGG_Pathways,#Must be the pathways used for pathway analysis data= ORA_Input, #Must be the data you have used as an input for the pathway analysis data2= InputPEA2, #Must be the results of the pathway analysis plot_name= "KEGG", select_label = NULL) ## ----load_mca_rules----------------------------------------------------------- # Example of all possible flows: data(mca_twocond_rules) MCA2Cond_Rules <- mca_twocond_rules ## ----show_mca_2cond_rules, echo=FALSE----------------------------------------- # Check how our data looks like: MCA2Cond_Rules%>% kableExtra::kbl(caption ="Metabolite Clustering Analysis: 2 Conditions.", row.names=FALSE) %>% kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) # easyalluvial::alluvial_wide(mca_2cond[,c(1:2,4)], fill_by = 'last_variable' ) # easyalluvial::alluvial_wide(mca_2cond[,c(1:2,5)], fill_by = 'last_variable' ) ## ----load_mca_core_rules------------------------------------------------------ # Example of all possible flows: data(mca_core_rules) MCA_CoRe_Rule <- mca_core_rules ## ----show_mca_core_rules, echo=FALSE------------------------------------------ # Check how our data looks like: MCA_CoRe_Rule[,1:6]%>% kableExtra::kbl(caption ="Metabolite Clustering Analysis: core.", row.names=FALSE) %>% kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) ## ----session_info, echo=FALSE----------------------------------------------------------------------------------------- options(width = 120) sessionInfo()