## ----knitr, echo=FALSE, results="hide"----------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,tidy.opts=list(width.cutoff=30),dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE) ## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----options, results="hide", echo=FALSE--------------------------------- options(digits=3, width=80, prompt=" ", continue=" ") ## ----install_cellTree, eval=FALSE---------------------------------------- ## source("http://bioconductor.org/biocLite.R") ## biocLite("cellTree") ## ----install_missing_bioconductor_packages, eval=FALSE------------------- ## biocLite(c("HSMMSingleCell", "org.Hs.eg.db", "biomaRt")) ## ----init_sincell, cache=FALSE, eval=TRUE,warning=FALSE------------------ library(cellTree) ## ----load_hsmm_data, eval=TRUE------------------------------------------- # load HSMMSingleCell package and load the data set: library(HSMMSingleCell) data(HSMM_expr_matrix) # Total number of genes * cells: dim(HSMM_expr_matrix) ## ----compute_lda_maptpx, eval=FALSE-------------------------------------- ## # Run LDA inference using 'maptpx' method ## # finding best number of topics k between 3 and 8: ## lda.results = compute.lda(HSMM_expr_matrix, k.topics=3:8, method="maptpx") ## ----compute_lda_gibbs, eval=FALSE--------------------------------------- ## # Run LDA inference using 'Gibbs' method for k = 6 topics: ## lda.results = compute.lda(HSMM_expr_matrix, k.topics=6, method="Gibbs") ## ----compute_lda_with_hgnc, eval=FALSE----------------------------------- ## HSMM_expr_matrix.hgnc = HSMM_expr_matrix ## ## library("biomaRt") ## ensembl.ids = sapply(strsplit(rownames(HSMM_expr_matrix), split=".",fixed=TRUE), ## "[", ## 1) ## ensembl.mart = useMart(host="www.ensembl.org", ## "ENSEMBL_MART_ENSEMBL", ## dataset = "hsapiens_gene_ensembl") ## gene.map = getBM(attributes = c("ensembl_gene_id", "entrezgene", "hgnc_symbol"), ## filters = "ensembl_gene_id", ## values = ensembl.ids, ## mart = ensembl.mart) ## idx = match(ensembl.ids, gene.map$ensembl_gene_id) ## hgnc.ids = gene.map$hgnc_symbol[idx] ## has.hgnc.ids = !is.na(hgnc.ids)&(hgnc.ids!="") ## rownames(HSMM_expr_matrix.hgnc)[has.hgnc.ids] = hgnc.ids[has.hgnc.ids] ## ## HSMM_lda_model = compute.lda(HSMM_expr_matrix.hgnc, k.topics=6) ## ----load_lda_with_hgnc, eval=TRUE--------------------------------------- # Load pre-computed LDA model for skeletal myoblast RNA-Seq data # from HSMMSingleCell package: data(HSMM_lda_model) # Number of topics of fitted model: print(HSMM_lda_model$K) # Model uses HGCN gene names: head(rownames(HSMM_lda_model$theta)) ## ----pairwise_distances, eval=TRUE--------------------------------------- # Compute pairwise distance between cells # based on topic distributions in the fitted model: dists = get.cell.dists(HSMM_lda_model) print(dists[1:5,1:5]) ## ----day_annotation, eval=TRUE------------------------------------------- # Recover sampling time point for each cell: library(HSMMSingleCell) data(HSMM_sample_sheet) days.factor = HSMM_sample_sheet$Hours days = as.numeric(levels(days.factor))[days.factor] # Our grouping annotation (in hours): print(unique(days)) ## ----mst_tree, eval=TRUE------------------------------------------------- # compute MST from a fitted LDA model: mst.tree = compute.backbone.tree(HSMM_lda_model, days, only.mst=TRUE) ## ----plot_mst_tree_with_topics, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"---- # plot the tree (showing topic distribution for each cell): mst.tree.with.layout = ct.plot.topics(mst.tree) ## ----plot_mst_tree_with_groups, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"---- # plot the tree (showing time point for each cell): mst.tree.with.layout = ct.plot.grouping(mst.tree) ## ----plot_btree_with_groups, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"---- # compute backbone tree from a fitted LDA model: b.tree = compute.backbone.tree(HSMM_lda_model, days) # plot the tree (showing time label for each cell): b.tree.with.layout = ct.plot.grouping(b.tree) ## ----plot_btree_with_groups_wider, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"---- # compute backbone tree from a fitted LDA model: b.tree = compute.backbone.tree(HSMM_lda_model, days, width.scale.factor=1.5) # plot the tree (showing time label for each cell): b.tree.with.layout = ct.plot.grouping(b.tree) ## ----plot_btree_with_topics_wider, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"---- # plot the tree (showing topic distribution for each cell): b.tree.with.layout = ct.plot.topics(b.tree) ## ----go_lib, eval=TRUE--------------------------------------------------- # Load GO mappings for human: library(org.Hs.eg.db) ## ----go_terms, eval=TRUE, results="hide"--------------------------------- # Compute GO enrichment sets (using the Cellular Components category) # for each topic go.results = compute.go.enrichment(HSMM_lda_model, org.Hs.eg.db, ontology.type="CC", bonferroni.correct=TRUE, p.val.threshold=0.01) ## ----go_terms_print, eval=TRUE------------------------------------------- # Print ranked table of significantly enriched terms for topic 1 # that do not appear in other topics: go.results$unique[[1]] ## ----go_terms_dag_files, eval=FALSE-------------------------------------- ## # Compute GO enrichment sets (using the Biological Process category) ## # for each topic and saves DAG plots to files: ## go.results.bp = compute.go.enrichment(HSMM_lda_model, ## org.Hs.eg.db, ontology.type="BP", ## bonferroni.correct=TRUE, p.val.threshold=0.01, ## dag.file.prefix="hsmm_go_") ## ----plot_go_results, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"---- # plot GO sub-DAG for topics 1 to 3: go.dag.subtree = ct.plot.go.dag(go.results, up.generations = 2, only.topics=c(1:3)) ## ----cell_ordering_table, eval=TRUE-------------------------------------- # Generate table summary of cells, ranked by tree position: cell.table = cell.ordering.table(b.tree) # Print first 5 cells: cell.table[1:5,] ## ----cell_ordering_table_latex, eval=FALSE------------------------------- ## # Generate table summary of cells, ranked by tree position: ## cell.table = cell.ordering.table(b.tree, ## write.to.tex.file="cell_summary.tex") ## ----session_info, eval=TRUE--------------------------------------------- sessionInfo()