## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo = FALSE------------------------------------------------------ library(BinaryDosage) ## ----files-------------------------------------------------------------------- vcf_file <- system.file("extdata", "set1a.vcf.gz", package = "BinaryDosage") bd4_file <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage") bdose_file <- file.path(tempdir(), "set1a.bdose") ## ----convert, eval = TRUE, echo = TRUE, message = FALSE, warning = FALSE------ if (requireNamespace("vcfppR", quietly = TRUE)) { vcftobd(vcffile = vcf_file, bdose_file = bdose_file) } else { updatebd(bdfiles = bd4_file, bdose_file = bdose_file) } ## ----convert_region, eval = F, echo = T--------------------------------------- # vcftobd(vcffile = vcf_file, # bdose_file = bdose_file, # region = "1:10000-15000") ## ----load_info, eval = TRUE, echo = T, message = F, warning = F--------------- bd5 <- getbdinfo(bdose_file) ## ----meta, eval = TRUE, echo = T---------------------------------------------- cat("Class: ", class(bd5), "\n") cat("Uses family ID: ", bd5$usesfid, "\n") cat("One chromosome: ", bd5$onechr, "\n") cat("SNP ID format: ", bd5$snpidformat, "\n") cat("Format: ", bd5$additionalinfo$format, "\n") cat("Header size: ", bd5$additionalinfo$headersize, "bytes\n") ## ----samples, eval = TRUE, echo = T------------------------------------------- cat("Number of samples:", nrow(bd5$samples), "\n") knitr::kable(head(bd5$samples, 5), caption = "First 5 samples") ## ----snps, eval = TRUE, echo = T---------------------------------------------- cat("Number of SNPs:", nrow(bd5$snps), "\n") knitr::kable(bd5$snps, caption = "SNP table") ## ----indices, eval = TRUE, echo = T------------------------------------------- cat("Indices (bytes):", bd5$indices, "\n") ## ----snp_by_index, eval = TRUE, echo = T, message = F, warning = F------------ snp1 <- getbd5snp(bd5info = bd5, snp = 1L) snp1_df <- data.frame( SampleID = bd5$samples$sid, Dosage = round(snp1$dosage, 4), P_00 = round(snp1$p0, 4), P_01 = round(snp1$p1, 4), P_11 = round(snp1$p2, 4) ) knitr::kable(head(snp1_df, 10), caption = paste("SNP", bd5$snps$snpid[1], "— first 10 subjects")) ## ----snp_by_id, eval = TRUE, echo = T, message = F, warning = F--------------- snp_id <- "1:12000:T:C" snp3 <- getbd5snp(bd5info = bd5, snp = snp_id) snp3_df <- data.frame( SampleID = bd5$samples$sid, Dosage = round(snp3$dosage, 4), P_00 = round(snp3$p0, 4), P_01 = round(snp3$p1, 4), P_11 = round(snp3$p2, 4) ) knitr::kable(head(snp3_df, 10), caption = paste("SNP", snp_id, "— first 10 subjects")) ## ----apply_bdapply, eval = TRUE, echo = T, message = F, warning = F----------- getaaf <- function(dosage, p0, p1, p2) mean(dosage, na.rm = TRUE) / 2 aaf_list <- bdapply(bdinfo = bd5, func = getaaf) aaf_table <- data.frame(snpid = bd5$snps$snpid, aaf = round(unlist(aaf_list), 4)) knitr::kable(aaf_table, caption = "Alternate allele frequencies via bdapply") ## ----apply_con, eval = TRUE, echo = T, message = F, warning = F--------------- n_snps <- nrow(bd5$snps) n_samp <- nrow(bd5$samples) dosage <- numeric(n_samp) p0 <- numeric(n_samp) p1 <- numeric(n_samp) p2 <- numeric(n_samp) con <- openbd5con(bd5) aaf <- numeric(n_snps) for (i in seq_len(n_snps)) { getbd5snp_con(bd5info = bd5, snp = i, dosage = dosage, p0 = p0, p1 = p1, p2 = p2, bd5con = con) aaf[i] <- mean(dosage, na.rm = TRUE) / 2 } closebd5con(con) knitr::kable(data.frame(snpid = bd5$snps$snpid, aaf = round(aaf, 4)), caption = "Alternate allele frequencies via getbd5snp_con") ## ----cleanup, include = FALSE, eval = TRUE------------------------------------ unlink(c(bdose_file, paste0(bdose_file, ".bdi")))