## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 5, eval = TRUE ) library(scGraphVerse) ## ----loaddata----------------------------------------------------------------- # Note: This vignette requires external packages from Suggests # Install if needed: # BiocManager::install(c("TENxPBMCData", "scater", "SingleR", "celldex")) # Helper function to preprocess PBMC data preprocess_pbmc <- function(pbmc_obj) { sce <- scater::logNormCounts(pbmc_obj) symbols_tenx <- SummarizedExperiment::rowData(sce)$Symbol_TENx valid <- !is.na(symbols_tenx) & symbols_tenx != "" sce <- sce[valid, ] rownames(sce) <- make.unique(symbols_tenx[valid]) SummarizedExperiment::assay(sce, "logcounts") <- as.matrix(SummarizedExperiment::assay(sce, "logcounts")) colnames(sce) <- paste0("cell_", seq_len(ncol(sce))) return(sce) } # 1. Load and preprocess PBMC data if (requireNamespace("TENxPBMCData", quietly = TRUE)) { pbmc_obj <- TENxPBMCData::TENxPBMCData("pbmc3k") pbmc_obj1 <- TENxPBMCData::TENxPBMCData("pbmc4k") sce <- preprocess_pbmc(pbmc_obj) sce1 <- preprocess_pbmc(pbmc_obj1) # 2. Cell type annotation if (requireNamespace("celldex", quietly = TRUE) && requireNamespace("SingleR", quietly = TRUE)) { ref <- celldex::HumanPrimaryCellAtlasData() pred1 <- SingleR::SingleR(test = sce1, ref = ref, labels=ref$label.main) SummarizedExperiment::colData(sce1)$pcell <- pred1$labels pred <- SingleR::SingleR(test = sce, ref = ref, labels=ref$label.main) SummarizedExperiment::colData(sce)$pcell <- pred$labels # 3. Select top 100 B-cell genes genes <- selgene( object = sce, top_n = 100, cell_type = "B_cell", cell_type_col = "pcell", remove_rib = TRUE, remove_mt = TRUE ) genes1 <- selgene( object = sce1, top_n = 100, cell_type = "B_cell", cell_type_col = "pcell", remove_rib = TRUE, remove_mt = TRUE ) # 4. Find intersection commong <- intersect(genes, genes1) # 5. Subset data pbmc1_sub <- sce[ commong, SummarizedExperiment::colData(sce)$pcell == "B_cell" ] pbmc2_sub <- sce1[ commong, SummarizedExperiment::colData(sce1)$pcell == "B_cell" ] pbmc1_sub <- pbmc1_sub[, 1:85] pbmc2_sub <- pbmc2_sub[, 1:85] # Create MultiAssayExperiment for multi-sample analysis bcell_mae <- create_mae(list(pbmc1 = pbmc1_sub, pbmc2 = pbmc2_sub)) } } ## ----genie3------------------------------------------------------------------- # Choose method: "GENIE3", "GRNBoost2", "ZILGM", "PCzinb" or "JRF" method <- "GENIE3" if (exists("bcell_mae")) { networks <- infer_networks( count_matrices_list = bcell_mae, method = method, nCores = 1 ) } ## ----adjacency, fig.alt="plot Graphs"----------------------------------------- if (exists("networks")) { # Weighted adjacency (returns SummarizedExperiment) wadj_se <- generate_adjacency(networks) # Symmetrize (returns SummarizedExperiment) swadj_se <- symmetrize(wadj_se, weight_function = "mean") # Binary cutoff (top 1%, returns SummarizedExperiment) binary_se <- cutoff_adjacency( count_matrices = bcell_mae, weighted_adjm_list = swadj_se, n = 1, method = method, quantile_threshold = 0.99, nCores = 1 ) # Plot if (requireNamespace("ggraph", quietly = TRUE)) { plotg(binary_se) } } ## ----consensus, fig.alt="plot consensus Graph"-------------------------------- if (exists("binary_se")) { # Consensus by vote consensus <- create_consensus(binary_se, method = "vote") # Plot consensus network if (requireNamespace("ggraph", quietly = TRUE)) { plotg(consensus) } } ## ----comparecommunity, fig.alt="comparing Graphs with reference"-------------- if (exists("consensus")) { # Note: For comparison with external databases like STRINGdb, # use stringdb_adjacency() to fetch reference networks # Community detection if (requireNamespace("igraph", quietly = TRUE)) { communities <- community_path(consensus) } } ## ----stringdb----------------------------------------------------------------- # Note: This requires internet connection for STRINGdb downloads # This section requires the STRINGdb package from Suggests if (exists("consensus") && requireNamespace("STRINGdb", quietly = TRUE)) { str_result <- stringdb_adjacency( genes = rownames(consensus), species = 9606, required_score = 900, keep_all_genes = TRUE ) # Extract binary adjacency and symmetrize str_binary <- str_result$binary ground_truth_se <- symmetrize( build_network_se(list(string_network = str_binary)), weight_function = "mean" ) ground_truth <- SummarizedExperiment::assay(ground_truth_se, 1) # Edge mining: TP rates (returns list of dataframes) em <- edge_mining( consensus, ground_truth = ground_truth, query_edge_types = "TP" ) # Display results if (length(em) > 0) { print(head(em[[1]])) } } ## ----sessioninfo-------------------------------------------------------------- sessionInfo()