## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------ BiocStyle::latex() ## ----echo=TRUE, eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- ## source("http://www.bioconductor.org/biocLite.R") ## biocLite(c("PADOG", "GSVA", "AnnotationDbi", "topGO", "pathview", "gage", ## "globaltest", "limma", "edgeR", "safe", "org.Hs.eg.db", "org.Mm.eg.db", ## "org.Rn.eg.db")) ## ----echo=TRUE, eval=FALSE,tidy=TRUE------------------------------------ ## install.packages("devtools") ## ----echo=TRUE, eval=FALSE,tidy=TRUE------------------------------------ ## library(devtools) ## install_bitbucket("malhamdoosh/egseadata", ref="Stable_Release") ## ----echo=TRUE, eval=FALSE,tidy=TRUE------------------------------------ ## source("http://bioconductor.org/biocLite.R") ## biocLite("EGSEA") ## ----echo=TRUE, eval=FALSE,tidy=TRUE------------------------------------ ## library(devtools) ## install_bitbucket("malhamdoosh/egsea", ref="Stable_Release") ## ----echo=TRUE, eval=TRUE,tidy=TRUE------------------------------------- library(EGSEA) ## ----echo=TRUE, eval=TRUE,tidy=TRUE------------------------------------- library(EGSEAdata) data(il13.data) v = il13.data$voom names(v) v$design contrasts = il13.data$contra contrasts ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- # prepare gene set collections gs.annots = buildIdxEZID(entrezIDs=rownames(v$E), species="human", msigdb.gsets="c5", kegg.exclude = c("Metabolism")) names(gs.annots) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- # perform the EGSEA analysis # set display.top = 20 to display more gene sets. It takes longer time to run. gsa = egsea(voom.results=v, contrasts=contrasts, gs.annots=gs.annots, symbolsMap=v$genes, baseGSEAs=egsea.base()[-2], display.top = 3, sort.by="avg.rank", egsea.dir="./il13-egsea-report", num.threads = 4) topSets(gsa, contrast=1, gs.label="kegg", number = 10) topSets(gsa, contrast=1, gs.label="kegg", sort.by="ora", number = 10, names.only=FALSE) topSets(gsa, contrast="comparison", gs.label="kegg", number = 10) ## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- gs.annots = buildIdxEZID(entrezIDs=rownames(v$E), species="human") gsetdb.annots = buildGeneSetDBIdxEZID(entrezIDs=rownames(v$E), species="human") gs.annots = c(gs.annots, gsetdb.annots) names(gs.annots) ## ----echo=TRUE, eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- ## # load the mammary dataset ## library(EGSEA) ## library(EGSEAdata) ## data(mam.data) ## v = mam.data$voom ## names(v) ## v$design ## contrasts = mam.data$contra ## contrasts ## # build the gene set collections ## gs.annots = buildIdxEZID(entrezIDs=rownames(v$E), species="mouse", ## msigdb.gsets = "c2", ## kegg.exclude = "all") ## names(gs.annots) ## # create Entrez IDs - Symbols map ## symbolsMap = v$genes[,c(1,3)] ## colnames(symbolsMap) = c("FeatureID", "Symbols") ## symbolsMap[, "Symbols"] = as.character(symbolsMap[, "Symbols"]) ## # replace NA Symbols with IDs ## na.sym = is.na(symbolsMap[, "Symbols"]) ## symbolsMap[na.sym, "Symbols"] = symbolsMap[na.sym, "FeatureID"] ## # perform the EGSEA analysis ## # set report = TRUE to generate the EGSEA interactive report ## gsa = egsea(voom.results=v, contrasts=contrasts, gs.annots=gs.annots, ## symbolsMap=symbolsMap, baseGSEAs=egsea.base()[-c(2,5,6,9)], ## display.top= 20, sort.by="med.rank", ## egsea.dir="./mam-egsea-report", num.threads=4, report=FALSE) ## # show top 20 comparative gene sets in C2 collection ## topSets(gsa, contrast="comparison", gs.label="c2", number = 20) ## ----echo=TRUE, eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)---- ## # load the count matrix and other relevant data ## library(EGSEAdata) ## data(il13.data.cnt) ## cnt = il13.data.cnt$counts ## group = il13.data.cnt$group ## group ## design = il13.data.cnt$design ## contrasts = il13.data.cnt$contra ## genes = il13.data.cnt$genes ## # build the gene set collections ## gs.annots = buildIdxEZID(entrezIDs=rownames(cnt), species="human", msigdb.gsets="none", ## kegg.exclude = c("Metabolism")) ## # perform the EGSEA analysis ## # set report = TRUE to generate the EGSEA interactive report ## gsa = egsea.cnt(counts=cnt, group=group, design=design, contrasts=contrasts, ## gs.annots=gs.annots, symbolsMap=genes, baseGSEAs=egsea.base()[-2], display.top = 5, ## sort.by="avg.rank", egsea.dir="./il13-egsea-cnt-report", num.threads = 4, report=FALSE) ## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)---- # load IL-13 dataset library(EGSEAdata) data(il13.data) voom.results = il13.data$voom contrast = il13.data$contra # find Differentially Expressed genes library(limma) vfit = lmFit(voom.results, voom.results$design) vfit = contrasts.fit(vfit, contrast) vfit = eBayes(vfit) # select DE genes (Entrez IDs and logFC) at p-value <= 0.05 and |logFC| >= 1 top.Table = topTable(vfit, coef=1, number=Inf, p.value=0.05, lfc=1) deGenes = as.character(top.Table$FeatureID) logFC = top.Table$logFC names(logFC) = deGenes # build the gene set collection index gs.annots = buildIdxEZID(entrezIDs=deGenes, species="human", msigdb.gsets="none", kegg.exclude = c("Metabolism")) # perform the ORA analysis # set report = TRUE to generate the EGSEA interactive report gsa = egsea.ora(entrezIDs=deGenes, universe= as.character(voom.results$genes[,1]), logFC =logFC, title="X24IL13-X24", gs.annots=gs.annots, symbolsMap=top.Table[, c(1,2)], display.top = 5, egsea.dir="./il13-egsea-ora-report", num.threads = 4, report=FALSE) ## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)---- library(EGSEAdata) data(il13.data) v = il13.data$voom # load KEGG pathways data(kegg.pathways) # select 50 pathways gsets = kegg.pathways$human$kg.sets[1:50] gsets[1] # build custom gene set collection using these 50 pathways gs.annots = buildCustomIdxEZID(entrezIDs=rownames(v$E), gsets= gsets, species="human") names(gs.annots) colnames(gs.annots$anno)