## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE
)

## ----setup, include = FALSE---------------------------------------------------
rm(list = ls())
library("DIVINE")

## ----eval=FALSE---------------------------------------------------------------
# install.packages("DIVINE")

## ----eval=FALSE---------------------------------------------------------------
# # install.packages("devtools")
# devtools::install_github("bruigtp/DIVINE")
# 
# pak::pak("bruigtp/DIVINE") # Alternative

## ----eval=FALSE---------------------------------------------------------------
# library(DIVINE)

## ----eval=FALSE---------------------------------------------------------------
# data(package = "DIVINE")

## ----eval=FALSE---------------------------------------------------------------
# ?demographic

## -----------------------------------------------------------------------------
data("demographic")
head(demographic)

## -----------------------------------------------------------------------------
# Overview of your data frame
ov <- data_overview(demographic)

# Print the entire overview
ov

## -----------------------------------------------------------------------------
# Each of the elements
ov$dimensions      # number of rows and columns
ov$variable_types  # data types of each variable
ov$missing_values  # count of missing values per column
ov$preview         # a small preview of the data

## -----------------------------------------------------------------------------
# 1) Default: replace all numeric NAs with column means
cleaned_default <- impute_missing(icu)

# 2) Single column strategies:
#    - Mean for vent_mec_start_days
#    - Zero for icu_enter_days
cleaned_mix <- impute_missing(
  icu,
  method = list(
    vent_mec_start_days ~ "mean",
    icu_enter_days ~ 0
  )
)

# 3) Multiple columns at once:
#    - Medians for any column ending in "_days"
cleaned_days_median <- impute_missing(
  icu,
  method = list(starts_with(".*_days$") ~ "median")
)

# 4) Factor/character imputation:
#    - Fill gender with its most common level
#    - Fill status with "Unknown"
cleaned_char <- impute_missing(
  icu,
  method = list(
    covid_wave ~ "mode",
    icu ~ "Unknown"
  )
)

# 5) Drop all-NA rows first, then impute numeric means
cleaned_no_empty <- impute_missing(
  icu,
  method    = list(where(is.numeric) ~ "mean"),
  drop_all_na = TRUE
)
# ▶ message: Removed X rows where all values were NA

## -----------------------------------------------------------------------------
data("vital_signs")
data("scores")

joined <- multi_join(
  list(demographic, vital_signs, scores),
  key = c("record_id", "covid_wave", "center"),
  join_type = "left"
)

## -----------------------------------------------------------------------------
# Mean (SD) by group (e.g., by gender or cohort)
tbl1 <- stats_table(
  demographic,
  vars = c("age", "smoker", "alcohol"),
  by = "sex",
  statistic_type = "mean_sd",
  pvalue = TRUE
)

# Median [Q1; Q3] for all observations (no grouping)
tbl2 <- stats_table(
  demographic,
  statistic_type = "median_iqr"
)

# Both mean (SD) and median [IQR] combined
tbl3 <- stats_table(
  demographic,
  statistic_type = "both"
)

## -----------------------------------------------------------------------------
# Histogram of age
multi_plot(
  demographic,
  x_var = "age",
  plot_type = "histogram",
  fill_color = "skyblue",
  title = "Distribution of Age"
)

# Boxplot of age by sex
multi_plot(
  demographic,
  x_var = "sex",
  y_var = "age",
  plot_type = "boxplot",
  group = "sex",
  title = "Age by Sex"
)

# Spider plot of numeric variables (e.g., compare age, weight, height distributions)
multi_plot(
  comorbidities,
  x_var = "hypertension",
  y_var = "dyslipidemia",
  plot_type = "spider",
  z_var = c("depression", "mild_kidney_disease", "ceiling_dico"),
  radar_vlabels = stringr::str_to_sentence(c("hypertension", "dyslipidemia", "depression", "mild_kidney_disease", "ceiling_dico")),
  radar_color = "blue",
  radar_ref_lev = "Yes"
)