### R code from vignette source 'rTRM_Introduction.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: rTRM_Introduction.Rnw:22-32 ################################################### library(ggplot2) library(knitr) library(rTRM) library(org.Mm.eg.db) opts_chunk$set(fig.align = 'center', fig.width = 3, fig.height = 3, fig.show = 'hold', tidy = TRUE, comment = "", highlight = FALSE, prompt = TRUE) #options(replace.assign=TRUE,width = 80) knit_hooks$set(no.mar = function(before, options, envir) { if (before) par(mar = rep(0,4)) # no margins. }) ################################################### ### code chunk number 2: rTRM_Introduction.Rnw:55-78 ################################################### # load the rTRM package library(rTRM) # load network example. load(system.file(package = "rTRM", "extra/example.rda")) # plot network plot(g,vertex.size=20,vertex.label.cex=.8,layout=layout.graphopt) # define target and query nodes: target = "N6" query = c("N7", "N12", "N28") # find TRM: s = findTRM(g, target = target, query = query, method = "nsa", max.bridge = 1) # annotate nodes: V(s)$color = "white" V(s)[ name %in% query]$color = "steelblue2" V(s)[ name %in% target]$color = "steelblue4" # plot: plot(s,vertex.size=20,vertex.label.cex=.8) ################################################### ### code chunk number 3: rTRM_Introduction.Rnw:90-92 ################################################### pwm = getMatrices() head(pwm, 1) ################################################### ### code chunk number 4: rTRM_Introduction.Rnw:96-98 ################################################### ann = getAnnotations() head(ann) ################################################### ### code chunk number 5: rTRM_Introduction.Rnw:102-104 ################################################### map = getMaps() head(map) ################################################### ### code chunk number 6: rTRM_Introduction.Rnw:108-110 ################################################### o = getOrthologs(organism = "mouse") head(o) ################################################### ### code chunk number 7: rTRM_Introduction.Rnw:115-117 ################################################### getOrthologFromMatrix("MA0009.1", organism="human") getOrthologFromMatrix("MA0009.1", organism="mouse") ################################################### ### code chunk number 8: rTRM_Introduction.Rnw:125-129 ################################################### # check statistics about the network. biogrid_mm() # load mouse PPI network: data(biogrid_mm) ################################################### ### code chunk number 9: rTRM_Introduction.Rnw:134-145 (eval = FALSE) ################################################### ## # obtain dataset. ## db = getBiogridData() # retrieves latest release. ## # db = getBiogridData("3.2.96") # to get a specific release. ## ## # check release: ## db$release ## db$data ## ## # process PPI data for different organisms (currently supported human and mouse): ## biogrid_hs = processBiogrid(db, org = "human") ## biogrid_mm = processBiogrid(db, org = "mouse") ################################################### ### code chunk number 10: rTRM_Introduction.Rnw:154-174 (eval = FALSE) ################################################### ## library(PSICQUIC) ## psicquic <- PSICQUIC() ## providers(psicquic) ## ## # obtain BioGrid human PPIs (as data.frame): ## tbl = interactions(psicquic, species="9606",provider="BioGrid") ## ## # the target and source node information needs to be polished (i.e. must be Entrez gene id only) ## biogrid_hs = data.frame(source=tbl$A,target=tbl$B) ## biogrid_hs$source = sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$source) ## biogrid_hs$target = sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$target) ## ## # create graph. ## library(igraph) ## biogrid_hs=graph.data.frame(biogrid_hs,directed=FALSE) ## biogrid_hs=simplify(biogrid_hs) ## ## # annotate with symbols. ## library(org.Hs.eg.db) ## V(biogrid_hs)$label=select(org.Hs.eg.db,keys=V(biogrid_hs)$name,columns=c("SYMBOL"))$SYMBOL ################################################### ### code chunk number 11: rTRM_Introduction.Rnw:183-188 ################################################### # read motif enrichment results. motif_file = system.file("extra/sox2_motif_list.rda", package = "rTRM") load(motif_file) length(motif_list) head(motif_list) ################################################### ### code chunk number 12: rTRM_Introduction.Rnw:193-198 ################################################### # get the corresponding gene. tfs_list = getOrthologFromMatrix(motif_list, organism = "mouse") tfs_list = unique(unlist(tfs_list, use.names = FALSE)) length(tfs_list) head(tfs_list) ################################################### ### code chunk number 13: rTRM_Introduction.Rnw:203-212 ################################################### # load expression data. eg_esc_file = system.file("extra/ESC-expressed.txt", package = "rTRM") eg_esc = scan(eg_esc_file, what = "") length(eg_esc) head(eg_esc) tfs_list_esc = tfs_list[tfs_list %in% eg_esc] length(tfs_list_esc) head(tfs_list_esc) ################################################### ### code chunk number 14: rTRM_Introduction.Rnw:217-241 ################################################### # load and process PPI data. biogrid_mm() data(biogrid_mm) ppi = biogrid_mm vcount(ppi) ecount(ppi) # remove outliers. f = c("Ubc", "Sumo1", "Sumo2", "Sumo3") f=select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID f ppi = removeVertices(ppi, f) vcount(ppi) ecount(ppi) # filter by expression. ppi_esc = induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ]) vcount(ppi_esc) ecount(ppi_esc) # ensure a single component. ppi_esc = getLargestComp(ppi_esc) vcount(ppi_esc) ecount(ppi_esc) ################################################### ### code chunk number 15: rTRM_Introduction.Rnw:246-254 ################################################### # define target. target = select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID target # find TRM. s = findTRM(ppi_esc, target, tfs_list_esc, method = "nsa", max.bridge = 1) vcount(s) ecount(s) ################################################### ### code chunk number 16: rTRM_Introduction.Rnw:259-266 ################################################### # generate layout (order by cluster, then label) cl = getConcentricList(s, target, tfs_list_esc) l = layout.concentric(s, cl, order = "label") # plot TRM. plotTRM(s, layout = l, vertex.cex = 15, label.cex = .8) plotTRMlegend(s, title = "ESC Sox2 TRM", cex = .8) ################################################### ### code chunk number 17: rTRM_Introduction.Rnw:276-285 ################################################### library(rTRM) library(BSgenome.Mmusculus.UCSC.mm8.masked) # Sox2 peaks found against mm8 library(PWMEnrich) registerCoresPWMEnrich(1) # register number of cores for parallelization in PWMEnrich library(MotifDb) # select mouse PWMs: sel.mm = values(MotifDb)$organism %in% c("Mmusculus") pwm.mm = MotifDb[sel.mm] ################################################### ### code chunk number 18: rTRM_Introduction.Rnw:290-294 ################################################### # generate logn background model of PWMs: p = as.list(pwm.mm) p = lapply(p, function(x) round(x * 100)) p = lapply(p, function(x) t(apply(x, 1, as.integer))) ################################################### ### code chunk number 19: rTRM_Introduction.Rnw:299-300 (eval = FALSE) ################################################### ## pwm_logn = makeBackground(p, Mmusculus, type = "logn") ################################################### ### code chunk number 20: rTRM_Introduction.Rnw:302-303 ################################################### load(system.file("extra/pwm_mm_logn.rda", package = "rTRM")) ################################################### ### code chunk number 21: rTRM_Introduction.Rnw:308-316 ################################################### sox2_bed = read.table(system.file("extra/ESC_Sox2_peaks.txt", package = "rTRM")) colnames(sox2_bed) = c("chr", "start", "end") sox2_seq = getSequencesFromGenome(sox2_bed, Mmusculus, append.id="Sox2") # PWMEnrich throws an error if the sequences are shorter than the motifs so we filter those sequences. min.width = max(sapply(p, ncol)) sox2_seq_filter = sox2_seq[width(sox2_seq) >= min.width] ################################################### ### code chunk number 22: rTRM_Introduction.Rnw:321-323 (eval = FALSE) ################################################### ## # find enrichment: ## sox2_enr = motifEnrichment(sox2_seq_filter, pwms=pwm_logn, group.only=TRUE) ################################################### ### code chunk number 23: rTRM_Introduction.Rnw:326-327 ################################################### load(system.file("extra/sox2_enr.rda", package = "rTRM")) ################################################### ### code chunk number 24: rTRM_Introduction.Rnw:332-340 ################################################### res = groupReport(sox2_enr) plot(density(res$raw.score),main="",log="x",xlab="log(raw.score)") abline(v=log2(5),col="red") mtext(text="log2(5)",at=log2(5),side=3,cex=.8,col="red") res.gene = unique(values(MotifDb[res$id[res$raw.score > 5]])$geneId) res.gene = unique(na.omit(res.gene)) ################################################### ### code chunk number 25: rTRM_Introduction.Rnw:345-374 ################################################### data(biogrid_mm) ppi = biogrid_mm vcount(ppi) ecount(ppi) f = c("Ubc", "Sumo1", "Sumo2", "Sumo3") f=select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID ppi = removeVertices(ppi, f) vcount(ppi) ecount(ppi) # filter by expression. eg_esc = scan(system.file("extra/ESC-expressed.txt", package = "rTRM"), what = "") ppi_esc = induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ]) vcount(ppi_esc) ecount(ppi_esc) # ensure a single component. ppi_esc = getLargestComp(ppi_esc) vcount(ppi_esc) ecount(ppi_esc) sox2.gene = select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID sox2_trm = findTRM(ppi_esc, target=sox2.gene, query = res.gene) cl = getConcentricList(sox2_trm, t=sox2.gene,e=res.gene) l = layout.concentric(sox2_trm, concentric=cl, order="label") plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .8) plotTRMlegend(sox2_trm, title = "ESC Sox2 TRM", cex = .8) ################################################### ### code chunk number 26: rTRM_Introduction.Rnw:379-385 ################################################### m=getSimilarityMatrix(list(PWMEnrich=sox2_trm,HOMER=s)) m d=as.data.frame.table(m) g = ggplot(d, aes(x = Var1, y = Var2, fill = Freq)) g + geom_tile() + scale_fill_gradient2(limit = c(0, 100), low = "white", mid = "darkblue", high = "orange", guide = guide_legend("similarity", reverse=TRUE), midpoint = 50) + xlab(NULL) + ylab(NULL) + theme(aspect.ratio = 1, axis.text.x=element_text(angle = 90, vjust = .5, hjust = 1)) ################################################### ### code chunk number 27: rTRM_Introduction.Rnw:392-395 ################################################### plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .7) l=layout.arc(sox2_trm,target=sox2.gene,query=res.gene) plotTRM(sox2_trm, layout=l,vertex.cex=15,label.cex=.7) ################################################### ### code chunk number 28: rTRM_Introduction.Rnw:402-403 ################################################### citation(package="rTRM") ################################################### ### code chunk number 29: rTRM_Introduction.Rnw:408-409 ################################################### sessionInfo()