## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 7, dpi = 300, out.width = "100%" ) ## ----setup, message = FALSE--------------------------------------------------- library(evanverse) library(dplyr) library(ggplot2) library(tidyr) ## ----gene-conversion-demo, eval = FALSE--------------------------------------- # # Example gene symbols commonly used in cancer research # cancer_genes <- c("BRCA1", "BRCA2", "TP53", "EGFR", "MYC", "RAS", "PIK3CA", "AKT1") # # # Convert gene symbols to Ensembl IDs # ensembl_ids <- convert_gene_id( # genes = cancer_genes, # from = "symbol", # to = "ensembl", # species = "human" # ) # # # Display conversion results # conversion_table <- data.frame( # Gene_Symbol = cancer_genes, # Ensembl_ID = ensembl_ids # ) # # print(conversion_table) ## ----gene-conversion-mock----------------------------------------------------- # Mock example for demonstration (since biomaRt requires internet) cancer_genes <- c("BRCA1", "BRCA2", "TP53", "EGFR", "MYC", "KRAS", "PIK3CA", "AKT1") # Simulated conversion results mock_conversion <- data.frame( Gene_Symbol = cancer_genes, Ensembl_ID = c( "ENSG00000012048", "ENSG00000139618", "ENSG00000141510", "ENSG00000146648", "ENSG00000136997", "ENSG00000133703", "ENSG00000171608", "ENSG00000142208" ), Entrez_ID = c(672, 675, 7157, 1956, 4609, 3845, 5290, 207), stringsAsFactors = FALSE ) cat("🧬 Gene ID Conversion Example\n") cat("=============================\n") print(mock_conversion) cat("\n📊 Conversion Summary:\n") cat(" • Input genes:", length(cancer_genes), "\n") cat(" • Successful conversions:", nrow(mock_conversion), "\n") cat(" • Success rate:", round(100 * nrow(mock_conversion) / length(cancer_genes), 1), "%\n") ## ----conversion-workflow------------------------------------------------------ # Simulate a real-world scenario with mixed identifier types mixed_identifiers <- c( "BRCA1", "ENSG00000139618", "7157", "EGFR", "ENSG00000136997", "3845", "PIK3CA", "207" ) # Function to detect identifier type detect_id_type <- function(ids) { sapply(ids, function(id) { if (grepl("^ENSG", id)) return("ensembl") if (grepl("^[0-9]+$", id)) return("entrez") return("symbol") }) } id_types <- detect_id_type(mixed_identifiers) cat("🔍 Identifier Type Detection:\n") print(data.frame( Identifier = mixed_identifiers, Detected_Type = id_types )) # Group by identifier type for batch conversion id_groups <- split(mixed_identifiers, id_types) cat("\n📦 Grouped Identifiers for Conversion:\n") str(id_groups) ## ----gmt-processing-demo, eval = FALSE---------------------------------------- # # Example: Process a pathway GMT file # # pathway_df <- gmt2df("path/to/c2.cp.kegg.v7.4.symbols.gmt") # # pathway_list <- gmt2list("path/to/c2.cp.kegg.v7.4.symbols.gmt") # # # Display structure # # head(pathway_df, 10) # # length(pathway_list) ## ----gmt-mock-demo------------------------------------------------------------ # Create mock GMT data to demonstrate structure mock_pathways <- list( "KEGG_GLYCOLYSIS_GLUCONEOGENESIS" = c( "HK1", "HK2", "GPI", "PFKL", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO1", "PKM", "LDHA", "PDK1" ), "KEGG_CITRATE_CYCLE" = c( "CS", "ACO1", "IDH1", "OGDH", "SUCLA2", "SDHA", "FH", "MDH1", "PCK1", "PDK1", "DLAT" ), "KEGG_FATTY_ACID_SYNTHESIS" = c( "ACACA", "FASN", "ACLY", "ACC2", "ELOVL6", "SCD", "FADS1", "FADS2", "ACSL1", "GPAM" ), "KEGG_DNA_REPAIR" = c( "BRCA1", "BRCA2", "TP53", "ATM", "CHEK1", "CHEK2", "RAD51", "XRCC1", "PARP1", "MSH2", "MLH1" ) ) # Convert list to data frame format (simulating gmt2df output) mock_gmt_df <- do.call(rbind, lapply(names(mock_pathways), function(pathway) { data.frame( pathway = pathway, gene = mock_pathways[[pathway]], stringsAsFactors = FALSE ) })) cat("📋 GMT File Processing Results\n") cat("==============================\n") cat("Number of pathways:", length(mock_pathways), "\n") cat("Total gene-pathway associations:", nrow(mock_gmt_df), "\n") cat("Average genes per pathway:", round(mean(lengths(mock_pathways)), 1), "\n\n") cat("Sample pathway data frame:\n") print(head(mock_gmt_df, 12)) # Pathway size distribution pathway_sizes <- lengths(mock_pathways) cat("\n📊 Pathway Size Distribution:\n") print(data.frame( Pathway = names(pathway_sizes), Gene_Count = pathway_sizes )) ## ----gene-set-overlap, fig.cap="Gene set overlap analysis showing relationships between biological pathways"---- # Analyze overlaps between pathways pathway_genes <- mock_pathways[1:3] # Use first 3 pathways for Venn diagram # Create Venn diagram for pathway overlaps venn_plot <- plot_venn( set1 = pathway_genes[[1]], set2 = pathway_genes[[2]], set3 = pathway_genes[[3]], category.names = names(pathway_genes), fill = get_palette("vividset", type = "qualitative", n = 3), title = "Metabolic Pathway Gene Overlaps" ) print(venn_plot) # Calculate detailed overlap statistics all_genes <- unique(unlist(pathway_genes)) cat("\n🔍 Detailed Overlap Analysis:\n") cat("===============================\n") cat("Total unique genes across pathways:", length(all_genes), "\n") # Pairwise overlaps pathway_names <- names(pathway_genes) for (i in 1:(length(pathway_names) - 1)) { for (j in (i + 1):length(pathway_names)) { overlap <- length(intersect(pathway_genes[[i]], pathway_genes[[j]])) cat(sprintf("%s ∩ %s: %d genes\n", gsub("KEGG_", "", pathway_names[i]), gsub("KEGG_", "", pathway_names[j]), overlap)) } } ## ----rnaseq-workflow, fig.cap="Differential expression analysis visualization with volcano plot"---- # Simulate RNA-seq differential expression results set.seed(123) n_genes <- 2000 # Simulate log fold changes and p-values gene_results <- data.frame( Gene = paste0("Gene_", 1:n_genes), LogFC = rnorm(n_genes, mean = 0, sd = 1.2), PValue = rbeta(n_genes, shape1 = 1, shape2 = 10), stringsAsFactors = FALSE ) # Add some significant genes significant_indices <- sample(1:n_genes, 200) gene_results$LogFC[significant_indices] <- gene_results$LogFC[significant_indices] + sample(c(-2, 2), 200, replace = TRUE) gene_results$PValue[significant_indices] <- gene_results$PValue[significant_indices] * 0.01 # Calculate adjusted p-values gene_results$FDR <- p.adjust(gene_results$PValue, method = "BH") # Classify genes gene_results$Regulation <- "Not Significant" gene_results$Regulation[gene_results$FDR < 0.05 & gene_results$LogFC > 1] <- "Up-regulated" gene_results$Regulation[gene_results$FDR < 0.05 & gene_results$LogFC < -1] <- "Down-regulated" # Create volcano plot volcano_colors <- c( "Up-regulated" = get_palette("vividset", type = "qualitative", n = 3)[1], "Down-regulated" = get_palette("vividset", type = "qualitative", n = 3)[2], "Not Significant" = "#CCCCCC" ) p1 <- ggplot(gene_results, aes(x = LogFC, y = -log10(FDR), color = Regulation)) + geom_point(alpha = 0.6, size = 1.2) + scale_color_manual(values = volcano_colors) + geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "#666666") + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#666666") + labs( title = "Differential Gene Expression Analysis", subtitle = "Volcano plot showing treatment vs. control comparison", x = "Log₂ Fold Change", y = "-log₁₀(FDR-adjusted p-value)", color = "Regulation" ) + theme_minimal() + theme( plot.title = element_text(size = 14, face = "bold", color = "#0D47A1"), plot.subtitle = element_text(size = 11, color = "#666666"), legend.position = "bottom" ) print(p1) # Summary statistics regulation_summary <- table(gene_results$Regulation) cat("\n📊 Differential Expression Summary:\n") cat("===================================\n") print(regulation_summary) cat("\nTop 10 up-regulated genes (by fold change):\n") top_up <- gene_results[gene_results$Regulation == "Up-regulated", ] %>% arrange(desc(LogFC)) %>% head(10) print(top_up[, c("Gene", "LogFC", "FDR")]) ## ----pathway-enrichment, fig.cap="Pathway enrichment analysis showing biological processes affected by treatment"---- # Simulate pathway enrichment analysis results enrichment_results <- data.frame( Pathway = c( "Cell Cycle", "Apoptosis", "DNA Repair", "Inflammation", "Metabolism", "Signaling", "Transport", "Development" ), GeneRatio = c(0.15, 0.22, 0.18, 0.31, 0.09, 0.25, 0.12, 0.08), FDR = c(0.001, 0.003, 0.008, 0.0001, 0.045, 0.002, 0.021, 0.089), GeneCount = c(23, 34, 28, 48, 14, 39, 18, 12), stringsAsFactors = FALSE ) # Calculate enrichment score enrichment_results$EnrichmentScore <- -log10(enrichment_results$FDR) # Create enrichment plot p2 <- ggplot(enrichment_results, aes(x = GeneRatio, y = reorder(Pathway, EnrichmentScore))) + geom_point(aes(color = EnrichmentScore, size = GeneCount), alpha = 0.8) + scale_color_gradientn( colors = get_palette("warm_blush", type = "sequential", n = 4), name = "-log₁₀(FDR)" ) + scale_size_continuous(name = "Gene Count", range = c(3, 12)) + geom_vline(xintercept = 0.1, linetype = "dashed", color = "#666666", alpha = 0.7) + labs( title = "Pathway Enrichment Analysis", subtitle = "Biological processes enriched in differentially expressed genes", x = "Gene Ratio (enriched genes / pathway total)", y = "Biological Pathway" ) + theme_minimal() + theme( plot.title = element_text(size = 14, face = "bold", color = "#0D47A1"), plot.subtitle = element_text(size = 11, color = "#666666"), panel.grid.major.y = element_blank(), legend.position = "right" ) print(p2) cat("\n🎯 Pathway Enrichment Summary:\n") cat("==============================\n") significant_pathways <- enrichment_results[enrichment_results$FDR < 0.05, ] cat("Significant pathways (FDR < 0.05):", nrow(significant_pathways), "\n") cat("Most enriched pathway:", significant_pathways$Pathway[which.max(significant_pathways$EnrichmentScore)], "\n") cat("Total genes in significant pathways:", sum(significant_pathways$GeneCount), "\n") ## ----multiomics-integration, fig.cap="Multi-omics data integration showing genomic variants and expression changes"---- # Simulate multi-omics data integration set.seed(456) selected_genes <- c("BRCA1", "TP53", "EGFR", "MYC", "KRAS", "PIK3CA", "AKT1", "PTEN") # Create integrated omics data omics_data <- data.frame( Gene = rep(selected_genes, each = 3), DataType = rep(c("Mutation", "CNV", "Expression"), length(selected_genes)), Value = c( # Mutation frequencies (0-1) c(0.12, 0.34, 0.08, 0.15, 0.22, 0.09, 0.06, 0.18), # Copy number variations (-2 to 2) c(-0.5, -1.2, 1.8, 0.3, 0.8, -0.8, 1.1, -1.5), # Expression fold changes (-3 to 3) c(-1.5, -2.8, 2.1, 1.8, -1.2, 2.3, -0.8, -2.1) ), Patient_Group = rep(c("Group_A", "Group_B", "Group_C"), length(selected_genes)) ) # Normalize values for visualization omics_data$Normalized_Value <- ave(omics_data$Value, omics_data$DataType, FUN = function(x) scale(x)[,1]) # Create heatmap p3 <- ggplot(omics_data, aes(x = DataType, y = Gene, fill = Normalized_Value)) + geom_tile(color = "white", size = 0.5) + scale_fill_gradientn( colors = get_palette("gradient_rd_bu", type = "diverging", n = 11), name = "Z-score", limits = c(-2, 2), breaks = c(-2, -1, 0, 1, 2) ) + labs( title = "Multi-omics Cancer Gene Analysis", subtitle = "Integrated view of mutations, copy number, and expression", x = "Data Type", y = "Cancer-related Genes" ) + theme_minimal() + theme( plot.title = element_text(size = 14, face = "bold", color = "#0D47A1"), plot.subtitle = element_text(size = 11, color = "#666666"), panel.grid = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1) ) print(p3) # Summary by data type cat("\n🧬 Multi-omics Data Summary:\n") cat("============================\n") summary_stats <- omics_data %>% group_by(DataType) %>% summarise( Mean_Value = round(mean(Value), 3), SD_Value = round(sd(Value), 3), Min_Value = round(min(Value), 3), Max_Value = round(max(Value), 3), .groups = 'drop' ) print(summary_stats) ## ----survival-analysis, fig.cap="Forest plot showing hazard ratios for genetic markers in survival analysis"---- # Simulate survival analysis results survival_data <- data.frame( Gene = c("BRCA1", "BRCA2", "TP53", "EGFR", "MYC", "KRAS", "PIK3CA", "AKT1"), HazardRatio = c(1.23, 0.87, 1.45, 1.12, 0.92, 1.67, 1.34, 0.78), CI_Lower = c(0.98, 0.71, 1.18, 0.89, 0.75, 1.32, 1.05, 0.61), CI_Upper = c(1.55, 1.07, 1.78, 1.41, 1.13, 2.11, 1.71, 0.99), PValue = c(0.067, 0.189, 0.001, 0.324, 0.445, 0.0001, 0.018, 0.041), stringsAsFactors = FALSE ) # Add significance categories survival_data$Significance <- ifelse(survival_data$PValue < 0.001, "***", ifelse(survival_data$PValue < 0.01, "**", ifelse(survival_data$PValue < 0.05, "*", "ns"))) # Create forest plot using evanverse plotting functions p4 <- plot_forest( data = survival_data, label_col = "Gene", estimate_col = "HazardRatio", lower_col = "CI_Lower", upper_col = "CI_Upper", p_col = "PValue" ) print(p4) cat("\n🎯 Survival Analysis Summary:\n") cat("=============================\n") significant_genes <- survival_data[survival_data$PValue < 0.05, ] cat("Significant prognostic markers:", nrow(significant_genes), "\n") cat("Risk factors (HR > 1):", sum(significant_genes$HazardRatio > 1), "\n") cat("Protective factors (HR < 1):", sum(significant_genes$HazardRatio < 1), "\n") print(significant_genes[, c("Gene", "HazardRatio", "PValue", "Significance")]) ## ----biomarker-discovery, fig.cap="Biomarker discovery showing gene expression patterns across clinical subtypes"---- # Simulate clinical biomarker data set.seed(789) n_patients <- 120 n_biomarkers <- 20 # Generate patient clinical data clinical_data <- data.frame( Patient_ID = paste0("P", 1:n_patients), Subtype = sample(c("Luminal_A", "Luminal_B", "HER2+", "TNBC"), n_patients, replace = TRUE, prob = c(0.4, 0.2, 0.15, 0.25)), Stage = sample(c("I", "II", "III", "IV"), n_patients, replace = TRUE, prob = c(0.3, 0.35, 0.25, 0.1)), Age = round(rnorm(n_patients, 55, 12)), Survival_Months = round(rexp(n_patients, rate = 0.02)), stringsAsFactors = FALSE ) # Generate biomarker expression data biomarker_genes <- paste0("Biomarker_", 1:n_biomarkers) expression_data <- matrix(rnorm(n_patients * n_biomarkers, mean = 5, sd = 2), nrow = n_patients, ncol = n_biomarkers) colnames(expression_data) <- biomarker_genes rownames(expression_data) <- clinical_data$Patient_ID # Add subtype-specific expression patterns luminal_a_patients <- clinical_data$Patient_ID[clinical_data$Subtype == "Luminal_A"] her2_patients <- clinical_data$Patient_ID[clinical_data$Subtype == "HER2+"] tnbc_patients <- clinical_data$Patient_ID[clinical_data$Subtype == "TNBC"] # Simulate subtype-specific biomarkers expression_data[luminal_a_patients, "Biomarker_1"] <- expression_data[luminal_a_patients, "Biomarker_1"] + 3 expression_data[her2_patients, "Biomarker_5"] <- expression_data[her2_patients, "Biomarker_5"] + 4 expression_data[tnbc_patients, "Biomarker_12"] <- expression_data[tnbc_patients, "Biomarker_12"] + 2.5 # Convert to long format for visualization expression_long <- as.data.frame(expression_data) %>% mutate(Patient_ID = rownames(.)) %>% gather(Biomarker, Expression, -Patient_ID) %>% left_join(clinical_data, by = "Patient_ID") # Select top biomarkers for visualization top_biomarkers <- c("Biomarker_1", "Biomarker_5", "Biomarker_12", "Biomarker_8") plot_data <- expression_long %>% filter(Biomarker %in% top_biomarkers) # Create biomarker expression plot p5 <- ggplot(plot_data, aes(x = Subtype, y = Expression, fill = Subtype)) + geom_boxplot(alpha = 0.7, outlier.alpha = 0.5) + geom_jitter(alpha = 0.3, width = 0.2, size = 0.8) + scale_fill_manual( values = get_palette("vividset", type = "qualitative", n = 4) ) + facet_wrap(~Biomarker, scales = "free_y", ncol = 2) + labs( title = "Biomarker Expression Across Cancer Subtypes", subtitle = "Potential subtype-specific biomarkers for precision medicine", x = "Cancer Subtype", y = "Expression Level (log2 normalized)", fill = "Subtype" ) + theme_minimal() + theme( plot.title = element_text(size = 14, face = "bold", color = "#0D47A1"), plot.subtitle = element_text(size = 11, color = "#666666"), axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom", strip.background = element_rect(fill = "#E3F2FD", color = NA) ) print(p5) # Statistical summary cat("\n📊 Biomarker Analysis Summary:\n") cat("==============================\n") subtype_counts <- table(clinical_data$Subtype) print(subtype_counts) cat("\nMean expression by subtype for key biomarkers:\n") biomarker_summary <- plot_data %>% group_by(Biomarker, Subtype) %>% summarise( Mean_Expression = round(mean(Expression), 2), SD = round(sd(Expression), 2), .groups = 'drop' ) %>% arrange(Biomarker, desc(Mean_Expression)) print(biomarker_summary) ## ----data-management, eval = FALSE-------------------------------------------- # # Example of downloading reference data # # Note: These functions require internet connection and may take time # # # Download gene reference annotation # gene_ref <- download_gene_ref( # species = "human", # build = "hg38", # feature_type = "gene" # ) # # # Download GEO dataset # geo_data <- download_geo_data( # geo_id = "GSE123456", # destdir = "data/geo_downloads" # ) # # # Download pathway databases # pathway_url <- "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.4/c2.cp.kegg.v7.4.symbols.gmt" # download_url( # url = pathway_url, # dest = "data/pathways/kegg_pathways.gmt" # ) ## ----data-management-demo----------------------------------------------------- # Demonstrate file organization for bioinformatics projects cat("📁 Recommended Project Structure for Bioinformatics:\n") cat("==================================================\n") cat("project/\n") cat("├── data/\n") cat("│ ├── raw/ # Original data files\n") cat("│ ├── processed/ # Cleaned/normalized data\n") cat("│ ├── reference/ # Genome annotations, databases\n") cat("│ └── results/ # Analysis outputs\n") cat("├── scripts/\n") cat("│ ├── preprocessing/ # Data cleaning scripts\n") cat("│ ├── analysis/ # Statistical analysis\n") cat("│ └── visualization/ # Plotting scripts\n") cat("├── docs/ # Documentation, protocols\n") cat("└── reports/ # Final reports, publications\n\n") # Demonstrate batch file handling file_extensions <- c("fastq.gz", "bam", "vcf", "gmt", "gff3", "bed") file_descriptions <- c( "Raw sequencing reads", "Aligned sequencing data", "Variant calls", "Gene set definitions", "Gene annotations", "Genomic intervals" ) file_info <- data.frame( Extension = file_extensions, Description = file_descriptions, stringsAsFactors = FALSE ) cat("🗂️ Common Bioinformatics File Types:\n") print(file_info) ## ----best-practices----------------------------------------------------------- cat("🔬 BIOINFORMATICS BEST PRACTICES\n") cat("================================\n\n") cat("📋 Data Management:\n") cat(" • Use version control (Git) for all scripts\n") cat(" • Document data provenance and processing steps\n") cat(" • Implement checkpoints and intermediate file saves\n") cat(" • Use consistent file naming conventions\n\n") cat("🧬 Gene Identifier Handling:\n") cat(" • Always validate gene ID conversions\n") cat(" • Store original identifiers alongside converted ones\n") cat(" • Document the genome build and annotation version\n") cat(" • Handle missing/ambiguous identifiers gracefully\n\n") cat("📊 Statistical Analysis:\n") cat(" • Apply appropriate multiple testing corrections\n") cat(" • Set significance thresholds before analysis\n") cat(" • Report effect sizes along with p-values\n") cat(" • Validate results with independent datasets when possible\n\n") cat("🎨 Visualization Guidelines:\n") cat(" • Use color-blind friendly palettes\n") cat(" • Include appropriate scales and legends\n") cat(" • Provide clear titles and axis labels\n") cat(" • Consider publication requirements for figures\n") ## ----qc-checklist------------------------------------------------------------- cat("✅ QUALITY CONTROL CHECKLIST\n") cat("============================\n\n") cat("🔍 Data Quality:\n") cat(" [ ] Check for missing values and outliers\n") cat(" [ ] Verify sample sizes and statistical power\n") cat(" [ ] Validate gene identifier mappings\n") cat(" [ ] Assess data distribution and normalization\n\n") cat("📈 Analysis Validation:\n") cat(" [ ] Cross-validate results with different methods\n") cat(" [ ] Perform sensitivity analyses\n") cat(" [ ] Check for batch effects and confounders\n") cat(" [ ] Compare with known biological expectations\n\n") cat("📊 Results Reporting:\n") cat(" [ ] Include sample sizes and effect sizes\n") cat(" [ ] Report confidence intervals\n") cat(" [ ] Document software versions and parameters\n") cat(" [ ] Provide supplementary data and code\n") ## ----complete-pipeline-------------------------------------------------------- cat("🔄 COMPLETE BIOINFORMATICS PIPELINE EXAMPLE\n") cat("===========================================\n\n") # Simulate a complete analysis workflow pipeline_steps <- data.frame( Step = 1:8, Process = c( "Data Import & Quality Control", "Gene ID Conversion & Mapping", "Differential Expression Analysis", "Multiple Testing Correction", "Pathway Enrichment Analysis", "Gene Set Overlap Analysis", "Visualization & Plotting", "Results Export & Reporting" ), evanverse_Functions = c( "read_table_flex(), file_info()", "convert_gene_id(), replace_void()", "User analysis + evanverse utilities", "Built-in R functions", "gmt2df(), gmt2list()", "plot_venn(), combine_logic()", "plot_forest(), get_palette()", "write_xlsx_flex(), remind()" ), Estimated_Time = c("5-10 min", "10-15 min", "30-60 min", "5 min", "15-30 min", "10-20 min", "20-40 min", "10-15 min") ) print(pipeline_steps) cat("\n⏱️ Total Estimated Pipeline Time: 2-4 hours\n") cat("🎯 Key Success Factors:\n") cat(" • Proper data validation at each step\n") cat(" • Consistent identifier handling\n") cat(" • Appropriate statistical methods\n") cat(" • Clear documentation and visualization\n") ## ----bio-quick-ref, eval = FALSE---------------------------------------------- # # Gene identifier conversion # convert_gene_id(genes, from = "symbol", to = "ensembl", species = "human") # # # Pathway analysis # pathways <- gmt2list("pathways.gmt") # plot_venn(gene_sets, colors = get_palette("vividset")) # # # Data visualization # plot_forest(survival_data, hr_col = "HazardRatio") # get_palette("gradient_rd_bu", type = "diverging", n = 11) # # # Data management # download_geo_data("GSE123456") # read_table_flex("expression_data.txt")