## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE
)

## ----setup--------------------------------------------------------------------
library(metasurvey)
library(survey)
library(data.table)

data(api, package = "survey")
dt <- data.table(apistrat)

svy <- Survey$new(
  data    = dt,
  edition = "2000",
  type    = "api",
  psu     = NULL,
  engine  = "data.table",
  weight  = add_weight(annual = "pw")
)

## ----mean---------------------------------------------------------------------
result <- workflow(
  list(svy),
  survey::svymean(~api00, na.rm = TRUE),
  estimation_type = "annual"
)

result

## ----total--------------------------------------------------------------------
result_total <- workflow(
  list(svy),
  survey::svytotal(~enroll, na.rm = TRUE),
  estimation_type = "annual"
)

result_total

## ----multiple-----------------------------------------------------------------
results <- workflow(
  list(svy),
  survey::svymean(~api00, na.rm = TRUE),
  survey::svytotal(~enroll, na.rm = TRUE),
  estimation_type = "annual"
)

results

## ----domain-------------------------------------------------------------------
# Mean API score by school type
api_by_type <- workflow(
  list(svy),
  survey::svyby(~api00, ~stype, survey::svymean, na.rm = TRUE),
  estimation_type = "annual"
)

api_by_type

## ----domain-award-------------------------------------------------------------
# Mean enrollment by awards status
enroll_by_award <- workflow(
  list(svy),
  survey::svyby(~enroll, ~awards, survey::svymean, na.rm = TRUE),
  estimation_type = "annual"
)

enroll_by_award

## ----cv-----------------------------------------------------------------------
# Evaluate quality of the API score estimate
cv_pct <- results$cv[1] * 100
quality <- evaluate_cv(cv_pct)

cat("CV:", round(cv_pct, 2), "%\n")
cat("Quality:", quality, "\n")

## ----create-wf----------------------------------------------------------------
wf <- RecipeWorkflow$new(
  name = "API Score Analysis 2000",
  description = "Mean API score estimation by school type",
  user = "Research Team",
  survey_type = "api",
  edition = "2000",
  estimation_type = "annual",
  recipe_ids = character(0),
  calls = list(
    "survey::svymean(~api00, na.rm = TRUE)",
    "survey::svyby(~api00, ~stype, survey::svymean, na.rm = TRUE)"
  )
)

wf

## ----wf-registry--------------------------------------------------------------
# Configure a local backend
wf_path <- tempfile(fileext = ".json")
set_workflow_backend("local", path = wf_path)

# Publish
publish_workflow(wf)

# Discover workflows
all_wf <- list_workflows()
length(all_wf)

# Search by text
found <- search_workflows("income")
length(found)

# Filter by survey type
ech_wf <- filter_workflows(survey_type = "ech")
length(ech_wf)

## ----find-for-recipe----------------------------------------------------------
# Create a workflow that references a recipe
wf2 <- RecipeWorkflow$new(
  name            = "Labor Market Estimates",
  user            = "Team",
  survey_type     = "ech",
  edition         = "2023",
  estimation_type = "annual",
  recipe_ids      = c("labor_force_recipe_001"),
  calls           = list("survey::svymean(~employed, na.rm = TRUE)")
)

publish_workflow(wf2)

# Find all workflows that use this recipe
related <- find_workflows_for_recipe("labor_force_recipe_001")
length(related)
if (length(related) > 0) cat("Found:", related[[1]]$name, "\n")

## ----full-pipeline------------------------------------------------------------
# 1. Create survey from real data
dt_full <- data.table(apistrat)

svy_full <- Survey$new(
  data    = dt_full,
  edition = "2000",
  type    = "api",
  psu     = NULL,
  engine  = "data.table",
  weight  = add_weight(annual = "pw")
)

# 2. Apply steps: compute derived variables
svy_full <- step_compute(svy_full,
  api_growth = api00 - api99,
  high_growth = ifelse(api00 - api99 > 50, 1L, 0L),
  comment = "API score growth indicators"
)

svy_full <- step_recode(svy_full, school_level,
  stype == "E" ~ "Elementary",
  stype == "M" ~ "Middle",
  stype == "H" ~ "High",
  .default = "Other",
  comment = "School level classification"
)

# 3. Estimate means
estimates <- workflow(
  list(svy_full),
  survey::svymean(~api_growth, na.rm = TRUE),
  survey::svymean(~high_growth, na.rm = TRUE),
  estimation_type = "annual"
)

estimates

## ----full-pipeline-domain-----------------------------------------------------
# 4. Domain estimation (by school type)
by_school <- workflow(
  list(svy_full),
  survey::svyby(~api00, ~stype, survey::svymean, na.rm = TRUE),
  estimation_type = "annual"
)

by_school

## ----full-pipeline-cv---------------------------------------------------------
# 5. Assess quality
for (i in seq_len(nrow(estimates))) {
  cv_val <- estimates$cv[i] * 100
  cat(
    estimates$stat[i], ":",
    round(cv_val, 1), "% CV -",
    evaluate_cv(cv_val), "\n"
  )
}

## ----provenance---------------------------------------------------------------
# Provenance is populated automatically after bake_steps()
prov <- provenance(svy_full)
prov

## ----provenance-workflow------------------------------------------------------
prov_wf <- provenance(estimates)
cat("metasurvey version:", prov_wf$environment$metasurvey_version, "\n")
cat("Steps applied:", length(prov_wf$steps), "\n")

## ----provenance-json, eval = FALSE--------------------------------------------
# provenance_to_json(prov, "audit_trail.json")

## ----provenance-diff, eval = FALSE--------------------------------------------
# diff <- provenance_diff(prov_2022, prov_2023)
# diff$steps_changed
# diff$n_final_changed

## ----workflow-table, eval = requireNamespace("gt", quietly = TRUE)------------
workflow_table(estimates)

## ----workflow-table-opts, eval = requireNamespace("gt", quietly = TRUE)-------
# Spanish locale, hide SE, custom title
workflow_table(
  estimates,
  locale = "es",
  show_se = FALSE,
  title = "API Growth Indicators",
  subtitle = "California Schools, 2000"
)

## ----workflow-table-domain, eval = requireNamespace("gt", quietly = TRUE)-----
workflow_table(by_school)

## ----workflow-table-export, eval = FALSE--------------------------------------
# tbl <- workflow_table(estimates)
# gt::gtsave(tbl, "estimates.html")
# gt::gtsave(tbl, "estimates.docx")
# gt::gtsave(tbl, "estimates.png")