Contents

1 Introduction

scToppR is a package that allows seamless, workflow-based interaction with ToppGene, a portal for gene enrichment analysis. Researchers can use scToppR to directly query ToppGene’s databases and conduct analysis with a few lines of code. scToppR’s availability on Bioconductor ensures easy installation and integration with other Bioconductor workflows, allowing researchers to incorporate functional enrichment analysis from ToppGene into their existing pipelines.

The use of data from ToppGene is governed by their Terms of Use: https://toppgene.cchmc.org/navigation/termsofuse.jsp

This vignette shows the use of scToppR within a differential expression workflow. Using the ‘airway’ dataset, we’ll perform a quick differential expression analysis using DESeq2. With the list of differentially expressed genes, we can easily use scToppR.

2 Installation

if (!requireNamespace("BiocManager", quietly = TRUE)) {
    install.packages("BiocManager")
}
BiocManager::install("scToppR")

3 Load Data and Perform Differential Expression Analysis

library(scToppR)

suppressMessages({
    library(airway)
    library(DESeq2)
})
data("airway")

se <- airway
rownames(se) <- rowData(se)$gene_name

dds <- DESeqDataSet(se, design = ~ cell + dex)

smallestGroupSize <- 3
keep <- rowSums(counts(dds) >= 10) >= smallestGroupSize
dds <- dds[keep, ]

dds <- DESeq(dds)
res <- results(dds)

# add the gene names as a column in the results
res$gene <- rownames(res)

# add cluster column - here, with this bulk RNAseq data, we will only have 1 cluster
res$cluster <- "cluster0"

4 Using scToppR with Differential Expression Results

With these results, we will use scToppR to querry the ToppGene database for all categories for each cluster using the toppFun() function. This function requires users to specify the columns in their dataset.

# This is how you would run the analysis with live data (requires internet)
if (curl::has_internet()) {
    toppdata.airway <- toppFun(res,
        type = "degs",
        gene_col = "gene",
        cluster_col = "cluster",
        p_val_col = "padj",
        logFC_col = "log2FoldChange"
    )
} else {
    data("toppdata.airway")
}
head(toppdata.airway)
##                        Category         ID
## 1 GeneOntologyMolecularFunction GO:0030545
## 2 GeneOntologyMolecularFunction GO:0030546
## 3 GeneOntologyMolecularFunction GO:0048018
## 4 GeneOntologyMolecularFunction GO:0008083
## 5 GeneOntologyMolecularFunction GO:0008201
## 6 GeneOntologyMolecularFunction GO:0005539
##                                    Name       PValue  QValueFDRBH  QValueFDRBY
## 1 signaling receptor regulator activity 1.628709e-08 1.308924e-05 0.0001032841
## 2 signaling receptor activator activity 1.948695e-08 1.308924e-05 0.0001032841
## 3              receptor ligand activity 2.617847e-08 1.308924e-05 0.0001032841
## 4                growth factor activity 3.528585e-08 1.323219e-05 0.0001044122
## 5                       heparin binding 7.683397e-07 2.305019e-04 0.0018188374
## 6             glycosaminoglycan binding 1.894662e-06 4.736656e-04 0.0037375857
##   QValueBonferroni TotalGenes GenesInTerm GenesInQuery GenesInTermInQuery
## 1     2.443063e-05      19978         662          816                 59
## 2     2.923042e-05      19978         600          816                 55
## 3     3.926771e-05      19978         589          816                 54
## 4     5.292877e-05      19978         172          816                 25
## 5     1.152510e-03      19978         188          816                 24
## 6     2.841993e-03      19978         283          816                 30
##   Source URL
## 1           
## 2           
## 3           
## 4           
## 5           
## 6           
##                                                                                                                                                                                                                                                                                                                                                                                                  Genes
## 1 GDF5, ADM2, SEMA3A, FBN2, SEMA6D, AGT, FGF10, CCL7, FGF13, CCL8, FGF14, CCL11, FST, CX3CL1, CXCL12, SFRP2, ANG, APOB, TNFSF13B, DKK2, TNFSF9, MSTN, GDNF, STC1, APLN, BTC, NGF, ICOSLG, CCK, TNFRSF11B, TGFB2, IL34, NRG1, GDF6, FNDC5, HMGB2, PGF, FLRT3, VEGFA, DLL4, GDF15, WNT2, IGFBP2, PRKCE, CCN2, TSLP, IL12A, INHBB, JAG2, TNFSF15, INHBE, PSCA, TYMP, EDA, LEP, LGALS9, LIF, GDF7, TMEM35A
## 2                           GDF5, ADM2, SEMA3A, FBN2, SEMA6D, AGT, FGF10, CCL7, FGF13, CCL8, FGF14, CCL11, CX3CL1, CXCL12, SFRP2, ANG, APOB, TNFSF13B, TNFSF9, MSTN, GDNF, STC1, APLN, BTC, NGF, ICOSLG, CCK, TNFRSF11B, TGFB2, IL34, NRG1, GDF6, FNDC5, HMGB2, PGF, FLRT3, VEGFA, DLL4, GDF15, WNT2, IGFBP2, PRKCE, CCN2, TSLP, IL12A, INHBB, JAG2, TNFSF15, INHBE, TYMP, EDA, LEP, LGALS9, LIF, GDF7
## 3                                  GDF5, ADM2, SEMA3A, FBN2, SEMA6D, AGT, FGF10, CCL7, FGF13, CCL8, FGF14, CCL11, CX3CL1, CXCL12, SFRP2, ANG, APOB, TNFSF13B, TNFSF9, MSTN, GDNF, STC1, APLN, BTC, NGF, ICOSLG, CCK, TNFRSF11B, TGFB2, IL34, NRG1, GDF6, FNDC5, HMGB2, PGF, FLRT3, VEGFA, DLL4, GDF15, WNT2, IGFBP2, CCN2, TSLP, IL12A, INHBB, JAG2, TNFSF15, INHBE, TYMP, EDA, LEP, LGALS9, LIF, GDF7
## 4                                                                                                                                                                                                                                           GDF5, AGT, FGF10, FGF13, FGF14, CXCL12, MSTN, GDNF, BTC, NGF, TGFB2, IL34, NRG1, GDF6, PGF, VEGFA, GDF15, CCN2, IL12A, INHBB, JAG2, INHBE, TYMP, LIF, GDF7
## 5                                                                                                                                                                                                                                 LGR4, FGF10, CCL7, CCL8, FGF14, FGFR2, ANG, COL28A1, APOB, POSTN, RSPO2, MSTN, ADAMTS5, THBS1, PGF, VEGFA, COL11A1, ADAMTS1, CCN1, CCN2, LXN, CRISPLD2, EVA1C, RSPO1
## 6                                                                                                                                                                               LGR4, FGF10, CCL7, CCL8, FGF14, FGFR2, ANG, COL28A1, APOB, POSTN, RSPO2, HAPLN3, MSTN, MAMDC2, ADAMTS5, TNFRSF11B, TGFBR2, THBS1, TNFAIP6, HMMR, PGF, VEGFA, COL11A1, ADAMTS1, CCN1, CCN2, LXN, CRISPLD2, EVA1C, RSPO1
##    Cluster
## 1 cluster0
## 2 cluster0
## 3 cluster0
## 4 cluster0
## 5 cluster0
## 6 cluster0

As the code reminds you, the use of this data must be done so in accordance with ToppGene’s Terms of Use. For more information, please visit: https://toppgene.cchmc.org/navigation/termsofuse.jsp

If you have an existing SummarizedExperiment or SingleCellExperiment object, scToppR also includes a function to add the toppData results to the metadata of that object. This allows users to easily access their ToppGene results within their existing data objects.

se <- addToppData(se, toppdata.airway)
print(head(metadata(se)$toppData))
##                        Category         ID
## 1 GeneOntologyMolecularFunction GO:0030545
## 2 GeneOntologyMolecularFunction GO:0030546
## 3 GeneOntologyMolecularFunction GO:0048018
## 4 GeneOntologyMolecularFunction GO:0008083
## 5 GeneOntologyMolecularFunction GO:0008201
## 6 GeneOntologyMolecularFunction GO:0005539
##                                    Name       PValue  QValueFDRBH  QValueFDRBY
## 1 signaling receptor regulator activity 1.628709e-08 1.308924e-05 0.0001032841
## 2 signaling receptor activator activity 1.948695e-08 1.308924e-05 0.0001032841
## 3              receptor ligand activity 2.617847e-08 1.308924e-05 0.0001032841
## 4                growth factor activity 3.528585e-08 1.323219e-05 0.0001044122
## 5                       heparin binding 7.683397e-07 2.305019e-04 0.0018188374
## 6             glycosaminoglycan binding 1.894662e-06 4.736656e-04 0.0037375857
##   QValueBonferroni TotalGenes GenesInTerm GenesInQuery GenesInTermInQuery
## 1     2.443063e-05      19978         662          816                 59
## 2     2.923042e-05      19978         600          816                 55
## 3     3.926771e-05      19978         589          816                 54
## 4     5.292877e-05      19978         172          816                 25
## 5     1.152510e-03      19978         188          816                 24
## 6     2.841993e-03      19978         283          816                 30
##   Source URL
## 1           
## 2           
## 3           
## 4           
## 5           
## 6           
##                                                                                                                                                                                                                                                                                                                                                                                                  Genes
## 1 GDF5, ADM2, SEMA3A, FBN2, SEMA6D, AGT, FGF10, CCL7, FGF13, CCL8, FGF14, CCL11, FST, CX3CL1, CXCL12, SFRP2, ANG, APOB, TNFSF13B, DKK2, TNFSF9, MSTN, GDNF, STC1, APLN, BTC, NGF, ICOSLG, CCK, TNFRSF11B, TGFB2, IL34, NRG1, GDF6, FNDC5, HMGB2, PGF, FLRT3, VEGFA, DLL4, GDF15, WNT2, IGFBP2, PRKCE, CCN2, TSLP, IL12A, INHBB, JAG2, TNFSF15, INHBE, PSCA, TYMP, EDA, LEP, LGALS9, LIF, GDF7, TMEM35A
## 2                           GDF5, ADM2, SEMA3A, FBN2, SEMA6D, AGT, FGF10, CCL7, FGF13, CCL8, FGF14, CCL11, CX3CL1, CXCL12, SFRP2, ANG, APOB, TNFSF13B, TNFSF9, MSTN, GDNF, STC1, APLN, BTC, NGF, ICOSLG, CCK, TNFRSF11B, TGFB2, IL34, NRG1, GDF6, FNDC5, HMGB2, PGF, FLRT3, VEGFA, DLL4, GDF15, WNT2, IGFBP2, PRKCE, CCN2, TSLP, IL12A, INHBB, JAG2, TNFSF15, INHBE, TYMP, EDA, LEP, LGALS9, LIF, GDF7
## 3                                  GDF5, ADM2, SEMA3A, FBN2, SEMA6D, AGT, FGF10, CCL7, FGF13, CCL8, FGF14, CCL11, CX3CL1, CXCL12, SFRP2, ANG, APOB, TNFSF13B, TNFSF9, MSTN, GDNF, STC1, APLN, BTC, NGF, ICOSLG, CCK, TNFRSF11B, TGFB2, IL34, NRG1, GDF6, FNDC5, HMGB2, PGF, FLRT3, VEGFA, DLL4, GDF15, WNT2, IGFBP2, CCN2, TSLP, IL12A, INHBB, JAG2, TNFSF15, INHBE, TYMP, EDA, LEP, LGALS9, LIF, GDF7
## 4                                                                                                                                                                                                                                           GDF5, AGT, FGF10, FGF13, FGF14, CXCL12, MSTN, GDNF, BTC, NGF, TGFB2, IL34, NRG1, GDF6, PGF, VEGFA, GDF15, CCN2, IL12A, INHBB, JAG2, INHBE, TYMP, LIF, GDF7
## 5                                                                                                                                                                                                                                 LGR4, FGF10, CCL7, CCL8, FGF14, FGFR2, ANG, COL28A1, APOB, POSTN, RSPO2, MSTN, ADAMTS5, THBS1, PGF, VEGFA, COL11A1, ADAMTS1, CCN1, CCN2, LXN, CRISPLD2, EVA1C, RSPO1
## 6                                                                                                                                                                               LGR4, FGF10, CCL7, CCL8, FGF14, FGFR2, ANG, COL28A1, APOB, POSTN, RSPO2, HAPLN3, MSTN, MAMDC2, ADAMTS5, TNFRSF11B, TGFBR2, THBS1, TNFAIP6, HMMR, PGF, VEGFA, COL11A1, ADAMTS1, CCN1, CCN2, LXN, CRISPLD2, EVA1C, RSPO1
##    Cluster
## 1 cluster0
## 2 cluster0
## 3 cluster0
## 4 cluster0
## 5 cluster0
## 6 cluster0

5 Visualizing ToppGene Results

The toppData dataframe (whether from live API call or cached data) includes all results from ToppGene. We can use this dataframe to quickly generate pathway analysis plots using the toppPlot() function. The function can be used to generate a single plot, for example:

toppPlot(toppdata.airway,
    category = "GeneOntologyMolecularFunction",
    clusters = "cluster0"  
)

The toppPlot() function can also be used with the toppData dataframe directly, without needing to add it to a SummarizedExperiment or SingleCellExperiment object.

toppPlot(se,
    category = "GeneOntologyMolecularFunction",
    clusters = "cluster0"  
)

The toppPlot() function can also create a plot for each cluster for a specified category; simply assign the parameter clusters to NULL. In this case, the function will return a list of plots.

plot_list <- toppPlot(toppdata.airway,
    category = "GeneOntologyMolecularFunction",
    clusters = NULL
)
plot_list[1]
## $data
##                         Category         ID
## 1  GeneOntologyMolecularFunction GO:0016500
## 2  GeneOntologyMolecularFunction GO:0017017
## 3  GeneOntologyMolecularFunction GO:0003933
## 4  GeneOntologyMolecularFunction GO:0003934
## 5  GeneOntologyMolecularFunction GO:0009032
## 6  GeneOntologyMolecularFunction GO:0018708
## 7  GeneOntologyMolecularFunction GO:0031703
## 8  GeneOntologyMolecularFunction GO:0004947
## 9  GeneOntologyMolecularFunction GO:0016616
## 10 GeneOntologyMolecularFunction GO:0016160
##                                                                                     Name
## 1                                                      protein-hormone receptor activity
## 2                              MAP kinase tyrosine/serine/threonine phosphatase activity
## 3                                                            GTP cyclohydrolase activity
## 4                                                          GTP cyclohydrolase I activity
## 5                                                       thymidine phosphorylase activity
## 6                                                     thiol S-methyltransferase activity
## 7                                                    type 2 angiotensin receptor binding
## 8                                                           bradykinin receptor activity
## 9  oxidoreductase activity, acting on the CH-OH group of donors, NAD or NADP as acceptor
## 10                                                                      amylase activity
##         PValue QValueFDRBH QValueFDRBY QValueBonferroni TotalGenes GenesInTerm
## 1  0.001721303  0.05163909   0.4074721                1      19978          68
## 2  0.001469897  0.05163271   0.4074218                1      19978          13
## 3  0.001666347  0.05163271   0.4074218                1      19978           2
## 4  0.001666347  0.05163271   0.4074218                1      19978           2
## 5  0.001666347  0.05163271   0.4074218                1      19978           2
## 6  0.001666347  0.05163271   0.4074218                1      19978           2
## 7  0.001666347  0.05163271   0.4074218                1      19978           2
## 8  0.001666347  0.05163271   0.4074218                1      19978           2
## 9  0.001686669  0.05163271   0.4074218                1      19978         124
## 10 0.001237670  0.04726072   0.3729235                1      19978           6
##    GenesInQuery GenesInTermInQuery Source URL
## 1           816                  9           
## 2           816                  4           
## 3           816                  2           
## 4           816                  2           
## 5           816                  2           
## 6           816                  2           
## 7           816                  2           
## 8           816                  2           
## 9           816                 13           
## 10          816                  3           
##                                                                                                 Genes
## 1                                        EPHB2, EPHB3, LGR4, FGFR2, MERTK, MCHR1, LHCGR, EPHA4, EPHA5
## 2                                                                         DUSP10, DUSP1, DUSP5, DUSP8
## 3                                                                                         GCH1, GCHFR
## 4                                                                                         GCH1, GCHFR
## 5                                                                                          UPP1, TYMP
## 6                                                                                        TMT1A, TMT1B
## 7                                                                                         AGT, ZBTB16
## 8                                                                                      BDKRB1, BDKRB2
## 9  ADH1A, ADH1B, ADH1C, ALDH3A1, HSD17B6, BDH1, ADHFE1, DHRS3, HSD11B1, HSD11B2, PHGDH, AKR1B10, DCXR
## 10                                                                                AMY1A, AMY1B, AMY1C
##     Cluster geneRatio
## 1  cluster0 0.1323529
## 2  cluster0 0.3076923
## 3  cluster0 1.0000000
## 4  cluster0 1.0000000
## 5  cluster0 1.0000000
## 6  cluster0 1.0000000
## 7  cluster0 1.0000000
## 8  cluster0 1.0000000
## 9  cluster0 0.1048387
## 10 cluster0 0.5000000

All of these plots can also be automatically saved by the toppPlot() function. The files and their save locations can be set using the parameters: -save = TRUE -save_dir=“/path/to/save_directory” -file_name_prefix=“GO_Molecular_Function”

The cluster/celltype name will be automatically added to the filename prior to saving.

plot_list <- toppPlot(toppdata.airway,
    category = "GeneOntologyMolecularFunction",
    clusters = NULL,
    save = TRUE,
    save_dir = tempdir(),
    file_prefix = "GO_molecular_function"
)

scToppR also uses the toppBalloon() function to create a balloon plot, allowing researchers to quickly compare the top terms from the ToppGene results.

toppBalloon(toppdata.airway,
    categories = "GeneOntologyBiologicalProcess"
)

Some advantages of using scToppR in a pipeline include access to the other categories in ToppGene. Users can quickly view results from all ToppGene categories using these plotting function, or by examining the toppData results. For example, a user could explore any common results among celltypes in terms such as Pathway, ToppCell, and TFBS.

For example, a quick look at the toppBalloon plot for Pathway shows a distinction with the Dendritic Cells compared to others:

toppBalloon(toppdata.airway,
    categories = "Pathway"
)

The Pubmed category also provides researchers with other papers exploring similar data:

toppBalloon(toppdata.airway,
    categories = "Pubmed"
)

6 Saving ToppGene Results

To save toppData results, scToppR also includes a toppSave() function. This function can save the toppData results as a single file, or it can split the data into different clusters/celltypes and save each individually. To do so, set save = TRUE in the function call. The function saves the files as Excel spreadsheets by default, but this can be changed to .csv or .tsv files using the format parameter.

toppSave(toppdata.airway,
    filename = "airway_toppData",
    save_dir = tempdir(),
    split = TRUE,
    format = "xlsx"
)
sessionInfo()
## R version 4.6.0 alpha (2026-04-05 r89794)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.51.7               airway_1.31.0              
##  [3] SummarizedExperiment_1.41.1 Biobase_2.71.0             
##  [5] GenomicRanges_1.63.2        Seqinfo_1.1.0              
##  [7] IRanges_2.45.0              S4Vectors_0.49.2           
##  [9] BiocGenerics_0.57.1         generics_0.1.4             
## [11] MatrixGenerics_1.23.0       matrixStats_1.5.0          
## [13] scToppR_0.99.10             knitr_1.51                 
## [15] BiocStyle_2.39.0           
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.57           bslib_0.10.0       
##  [4] ggplot2_4.0.2       httr2_1.2.2         lattice_0.22-9     
##  [7] vctrs_0.7.3         tools_4.6.0         curl_7.0.0         
## [10] parallel_4.6.0      tibble_3.3.1        pkgconfig_2.0.3    
## [13] Matrix_1.7-5        RColorBrewer_1.1-3  S7_0.2.1-1         
## [16] lifecycle_1.0.5     compiler_4.6.0      farver_2.1.2       
## [19] stringr_1.6.0       textshaping_1.0.5   tinytex_0.59       
## [22] codetools_0.2-20    htmltools_0.5.9     sass_0.4.10        
## [25] yaml_2.3.12         pillar_1.11.1       jquerylib_0.1.4    
## [28] BiocParallel_1.45.0 cachem_1.1.0        DelayedArray_0.37.1
## [31] magick_2.9.1        viridis_0.6.5       abind_1.4-8        
## [34] tidyselect_1.2.1    locfit_1.5-9.12     zip_2.3.3          
## [37] digest_0.6.39       stringi_1.8.7       dplyr_1.2.1        
## [40] bookdown_0.46       labeling_0.4.3      forcats_1.0.1      
## [43] fastmap_1.2.0       grid_4.6.0          cli_3.6.6          
## [46] SparseArray_1.11.13 magrittr_2.0.5      patchwork_1.3.2    
## [49] S4Arrays_1.11.1     dichromat_2.0-0.1   withr_3.0.2        
## [52] scales_1.4.0        rappdirs_0.3.4      rmarkdown_2.31     
## [55] XVector_0.51.0      otel_0.2.0          gridExtra_2.3      
## [58] ragg_1.5.2          openxlsx_4.2.8.1    evaluate_1.0.5     
## [61] viridisLite_0.4.3   rlang_1.2.0         Rcpp_1.1.1-1       
## [64] glue_1.8.1          BiocManager_1.30.27 jsonlite_2.0.0     
## [67] R6_2.6.1            systemfonts_1.3.2