## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(BinaryReplicates) library(tidyverse) set.seed(42) ## ----------------------------------------------------------------------------- theta <- .4 p <- q <- .22 n <- 50 ni <- sample(2:6, n, replace = TRUE) ti <- rbinom(n, 1, theta) si <- rbinom(n, ni, ti*(1-q) + (1-ti)*p) synth_data <- data.frame(ni = ni, si = si, ti=ti) ## ----------------------------------------------------------------------------- fit_em <- EMFit(ni, si) fit_em$parameters_hat ## ----------------------------------------------------------------------------- fit <- BayesianFit(ni, si, chains = 4, iter = 5000, refresh = 0) print(fit, pars = c("theta", "p", "q")) ## ----------------------------------------------------------------------------- synth_data <- synth_data %>% mutate(Y_A = average_scoring(ni, si), Y_M = median_scoring(ni, si), Y_L = likelihood_scoring(ni, si, list(theta = theta, p = p, q = q)), Y_MAP = MAP_scoring(ni, si, fit_em), Y_B = bayesian_scoring(ni, si, fit)) ## ----------------------------------------------------------------------------- credint(fit) ## ----------------------------------------------------------------------------- credint(fit, level = 0.95) ## ----------------------------------------------------------------------------- theta_A <- prevalence_estimate(synth_data$Y_A) theta_M <- prevalence_estimate(synth_data$Y_M) theta_MAP <- prevalence_estimate(synth_data$Y_MAP) theta_B <- bayesian_prevalence_estimate(fit) cat("True prevalence: ", theta, "\n") cat("Average-based: ", round(theta_A, 3), "\n") cat("Median-based: ", round(theta_M, 3), "\n") cat("MAP-based: ", round(theta_MAP, 3), "\n") cat("Bayesian: ", round(theta_B, 3), "\n") ## ----------------------------------------------------------------------------- v_L <- .35 v_U <- 1 - v_L synth_data <- synth_data %>% mutate(T_A = classify_with_scores(Y_A, v_L, v_U), T_M = classify_with_scores(Y_M, v_L, v_U), T_L = classify_with_scores(Y_L, v_L, v_U), T_MAP = classify_with_scores(Y_MAP, v_L, v_U), T_B = classify_with_scores(Y_B, v_L, v_U)) ## ----------------------------------------------------------------------------- confusion <- synth_data %>% mutate( Status = ifelse(ti == 1, "T=1", "T=0"), Average = T_A, Median = T_M, Likelihood = T_L, MAP = T_MAP, Bayesian = T_B) %>% pivot_longer(cols = c(Average, Median, Likelihood, MAP, Bayesian), names_to = "Method", values_to = "Decision") %>% group_by(Status, Method, Decision) %>% summarise(count = n(), .groups = "keep") %>% ungroup() %>% mutate(count = as.integer(count)) %>% pivot_wider(names_from = Decision, values_from = count, values_fill = 0) confusion ## ----------------------------------------------------------------------------- new_data <- data.frame(ni = rep(8, 9), si = 0:8) ## ----------------------------------------------------------------------------- new_data <- new_data %>% mutate(Y_B = predict_scores(ni, si, fit), T_B = classify_with_scores(Y_B, v_L, v_U)) new_data