Interactive Volcano Plot

Guangchuang Yu
School of Basic Medical Sciences, Southern Medical University

2025-12-02

Example Data and Differential Analysis

library(airway)
library(DESeq2)
library(org.Hs.eg.db)
library(AnnotationDbi)

data("airway")

counts_mat <- assay(airway)
airway <- airway[rowSums(counts_mat) > 1, ]

dds <- DESeqDataSet(airway, design = ~ cell + dex)
dds <- DESeq(dds)
res <- results(dds, contrast = c("dex", "trt", "untrt"))

df <- as.data.frame(res)
df$gene_id <- rownames(df)
df$symbol <- mapIds(org.Hs.eg.db, 
                    keys = df$gene_id,
                    column = "SYMBOL", 
                    keytype = "ENSEMBL", 
                    multiVals = "first")

df <- df[!is.na(df$symbol), ]
df <- df[, c("log2FoldChange", "padj", "symbol")]

head(df)
                log2FoldChange         padj symbol
ENSG00000000003    -0.38125398 1.281209e-03 TSPAN6
ENSG00000000419     0.20681260 1.962081e-01   DPM1
ENSG00000000457     0.03792043 9.111955e-01  SCYL3
ENSG00000000460    -0.08816818 8.946325e-01  FIRRM
ENSG00000000938    -1.37822703           NA    FGR
ENSG00000000971     0.42640213 1.818075e-05    CFH

Interactive Volcano Plot Visualization

By default, it creates an interactive plot. If you pass the parameter interactive = FALSE, it will generate a regular static ggplot figure.

In the interactive plot, hovering the mouse will display gene name, logFC, and adjusted P values information.

onclick_fun parameter can be used to define custom actions when clicking on data points. Here we use the built-in function onclick_genecards to open GeneCards page for the clicked gene.

Another options for redirecting to external websites can be:

library(ivolcano)

p <- ivolcano(df,
        logFC_col = "log2FoldChange",
        pval_col = "padj",
        gene_col = "symbol",
        top_n = 5,
        onclick_fun=onclick_genecards)

print(p)

We can also use the fanyi package to retrieve gene information from NCBI and display it on the plot. Here we also translate the gene ‘summary’ information into Chinese.

library(org.Hs.eg.db)
df$gene_id <- rownames(df)
df$entrez <- mapIds(org.Hs.eg.db, 
                    keys = df$gene_id,
                    column = "ENTREZID", 
                    keytype = "ENSEMBL", 
                    multiVals = "first")
'select()' returned 1:many mapping between keys and columns
top_eg <- df$entrez[order(df$padj)][1:50]

library(fanyi)
fanyi v0.0.8 Learn more at https://yulab-smu.top/

Please cite:

D Wang, G Chen, L Li, S Wen, Z Xie, X Luo, L Zhan, S Xu, J Li, R
Wang, Q Wang, G Yu. Reducing language barriers, promoting information
absorption, and communication using fanyi. Chinese Medical Journal. 2024,
137(16):1950-1956. doi: 10.1097/CM9.0000000000003242
gs <- gene_summary(top_eg)

# not run, as it required api key
# see also https://github.com/YuLab-SMU/fanyi
#
# gs$summary_cn <- tencent_translate(gs$summary)

head(gs)
          uid    name
SPARCL1  8404 SPARCL1
CACNB2    783  CACNB2
DUSP1    1843   DUSP1
SAMHD1  25939  SAMHD1
MAOA     4128    MAOA
GPX3     2878    GPX3
                                                                            description
SPARCL1                                                                    SPARC like 1
CACNB2                           calcium voltage-gated channel auxiliary subunit beta 2
DUSP1                                                    dual specificity phosphatase 1
SAMHD1  SAM and HD domain containing deoxynucleoside triphosphate triphosphohydrolase 1
MAOA                                                                monoamine oxidase A
GPX3                                                           glutathione peroxidase 3
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              summary
SPARCL1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Predicted to enable calcium ion binding activity; collagen binding activity; and extracellular matrix binding activity. Predicted to be involved in regulation of synapse organization. Located in extracellular space. [provided by Alliance of Genome Resources, Jul 2025]
CACNB2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        This gene encodes a subunit of a voltage-dependent calcium channel protein that is a member of the voltage-gated calcium channel superfamily. The gene product was originally identified as an antigen target in Lambert-Eaton myasthenic syndrome, an autoimmune disorder. Mutations in this gene are associated with Brugada syndrome. Alternatively spliced variants encoding different isoforms have been described. [provided by RefSeq, Feb 2013]
DUSP1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        The protein encoded by this gene is a phosphatase with dual specificity for tyrosine and threonine. The encoded protein can dephosphorylate MAP kinase MAPK1/ERK2, which results in its involvement in several cellular processes. This protein appears to play an important role in the human cellular response to environmental stress as well as in the negative regulation of cellular proliferation. Finally, the encoded protein can make some solid tumors resistant to both chemotherapy and radiotherapy, making it a target for cancer therapy. [provided by RefSeq, Aug 2017]
SAMHD1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                This gene may play a role in regulation of the innate immune response. The encoded protein is upregulated in response to viral infection and may be involved in mediation of tumor necrosis factor-alpha proinflammatory responses. Mutations in this gene have been associated with Aicardi-Goutieres syndrome. [provided by RefSeq, Mar 2010]
MAOA                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       This gene is one of two neighboring gene family members that encode mitochondrial enzymes which catalyze the oxidative deamination of amines, such as dopamine, norepinephrine, and serotonin. Mutation of this gene results in Brunner syndrome. This gene has also been associated with a variety of other psychiatric disorders, including antisocial behavior. Alternatively spliced transcript variants encoding multiple isoforms have been observed. [provided by RefSeq, Jul 2012]
GPX3    The protein encoded by this gene belongs to the glutathione peroxidase family, members of which catalyze the reduction of organic hydroperoxides and hydrogen peroxide (H2O2) by glutathione, and thereby protect cells against oxidative damage. Several isozymes of this gene family exist in vertebrates, which vary in cellular location and substrate specificity. This isozyme is secreted, and is abundantly found in plasma. Downregulation of expression of this gene by promoter hypermethylation has been observed in a wide spectrum of human malignancies, including thyroid cancer, hepatocellular carcinoma and chronic myeloid leukemia. This isozyme is also a selenoprotein, containing the rare amino acid selenocysteine (Sec) at its active site. Sec is encoded by the UGA codon, which normally signals translation termination. The 3' UTRs of selenoprotein mRNAs contain a conserved stem-loop structure, designated the Sec insertion sequence (SECIS) element, that is necessary for the recognition of UGA as a Sec codon, rather than as a stop signal. Alternatively spliced transcript variants have been found for this gene. [provided by RefSeq, Jul 2016]

Define the onclick function to select multiple columns of information for display. Here we use the gene description (full name) and the summary information. The onclick_fanyi function will return a function definition according to our requirements, which can be passed to ivolcano.

onclick_fun <- onclick_fanyi(gs, c("description", "summary"))

p2 <- ivolcano(df,
        logFC_col="log2FoldChange",
        pval_col="padj",
        gene_col="symbol",
        onclick_fun = onclick_fun)

print(p2)

Point Size Adjustment

The size_by can be used to adjust the point size based on a specific variable or column in the data frame. Options include “negLogP” for negative log (adjusted) P-values, “absLogFC” for absolute log fold changes, or a column name in the data frame.

p3 <- ivolcano(df,
        logFC_col="log2FoldChange",
        pval_col="padj",
        gene_col="symbol",
        size_by = "negLogP")

print(p3)

If size_by is set to “manual”, users can utilize the point_size parameter to define the point sizes for different categories. The default setting is list(base = 2, medium = 4, large = 6), where base sets the size for non-significant points, medium for points meeting both pval_cutoff and logFC_cutoff, and large for points meeting both pval_cutoff2 and logFC_cutoff2 if these parameters are provided.

p4 <- ivolcano(df,
        logFC_col="log2FoldChange",
        pval_col="padj",
        gene_col="symbol",
        pval_cutoff = 0.05,
        pval_cutoff2 = 0.01,
        logFC_cutoff = 1,
        logFC_cutoff2 = 2,
        size_by = "manual")

print(p4)

FigureYa Themed Volcano Plot

The ivolcano function integrates the FigureYa themed volcano plot, which can generate more aesthetically pleasing academic publication-level volcano plots. This is mainly achieved by specifying more stringent thresholds through the pval_cutoff2 and logFC_cutoff2 parameters, and the function will automatically apply the FigureYa color theme.

# load example data
f1 <- system.file('extdata/easy_input_limma.rds', package='ivolcano')
df_limma <- readRDS(f1)

p5 <- ivolcano(df_limma, 
        logFC_col = "logFC",
        pval_col = "P.Value",
        pval_cutoff = 0.05,
        pval_cutoff2 = 0.01,
        logFC_cutoff = 1,
        logFC_cutoff2 = 2,
        gene_col = "X")
print(p5)

Custom Gene Display and Styles

Highlight Top Significant Genes

The ivolcano function provide functionality for custom gene display. You can use top_n to specify displaying the top N most significant genes (by default). By default, it shows the top N most significant upregulated genes and top N most significant downregulated genes (total 2N genes). If you set label_mode = "all", it will display the top N most significant genes including both upregulated and downregulated (total N genes).

Highlight Selected Genes

If you define the filter parameter, it will filter genes based on that condition for display. In this case, the top_n parameter will not take effect.

You can specify genes of interest for highlighting.

# load selected gene data
f2 <- system.file('extdata/easy_input_selected.rds', package='ivolcano')
selected <- readRDS(f2)
genes <- selected[,1]
genes
[1] "KLK10"  "KLK7"   "KLK8"   "KRT16"  "KLK5"   "KRT6A"  "KCTD12" "KCNK1" 
[9] "KRT18" 
# display selected genes
# here X is the column in our volcano plot data frame that contains gene names

p6 <- ivolcano(df_limma, 
        logFC_col = "logFC",
        pval_col = "P.Value",
        pval_cutoff = 0.05,
        pval_cutoff2 = 0.01,
        logFC_cutoff = 1,
        logFC_cutoff2 = 2,
        gene_col = "X",
        filter = 'X %in% genes')
print(p6)

You can write judgement conditions to filter genes, for example, you can automatically mark genes with particular large LogFC values.

# Mark genes with extreme logFC values
p7 <- ivolcano(df_limma, 
        logFC_col = "logFC",
        pval_col = "P.Value",
        pval_cutoff = 0.05,
        pval_cutoff2 = 0.01,
        logFC_cutoff = 1,
        logFC_cutoff2 = 2,
        gene_col = "X",
        filter = 'logFC > 8')
        
print(p7)