## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width  = 7,
  fig.height = 4.5,
  warning    = FALSE,
  message    = FALSE
)
library(NonlinearDiD)
library(ggplot2)
set.seed(42)

## ----simulate-----------------------------------------------------------------
dat <- sim_binary_panel(
  n            = 800,
  nperiods     = 8,
  prop_treated = 0.6,
  n_cohorts    = 3,
  true_att     = c(0.15, 0.25, 0.20),  # heterogeneous treatment effects
  base_prob    = 0.3,
  seed         = 42
)

head(dat)
cat("Treatment cohorts:", table(dat$g[dat$period == 1]), "\n")
cat("Baseline outcome rate:", round(mean(dat$y[dat$D == 0]), 3), "\n")

## ----attgt--------------------------------------------------------------------
res <- nonlinear_attgt(
  data          = dat,
  yname         = "y",
  tname         = "period",
  idname        = "id",
  gname         = "g",
  xformla       = ~ x1 + x2,
  outcome_model = "logit",
  estimand      = "att",
  control_group = "nevertreated",
  doubly_robust = TRUE
)

summary(res)

## ----eventstudy, fig.cap="Event-study plot for binary outcome DiD"------------
agg_dynamic <- nonlinear_aggte(res, type = "dynamic")
print(agg_dynamic)
plot(agg_dynamic)

## ----groupatt-----------------------------------------------------------------
agg_group <- nonlinear_aggte(res, type = "group")
print(agg_group)

## ----pretest------------------------------------------------------------------
pt <- nonlinear_pretest(res, plot = FALSE)
print(pt)

## ----ordata-------------------------------------------------------------------
# Use simple 2-period case for clarity
dat2 <- dat[dat$period %in% c(3, 5), ]

res_or <- odds_ratio_did(
  data           = dat2,
  yname          = "y",
  tname          = "period",
  idname         = "id",
  treat_period   = 5,
  control_period = 3,
  gname          = "g"
)

print(res_or)

## ----countdata----------------------------------------------------------------
count_dat <- sim_count_panel(
  n            = 600,
  nperiods     = 6,
  prop_treated = 0.5,
  true_rr      = c(1.5, 2.0, 1.3),  # rate ratios per cohort
  base_rate    = 10,
  seed         = 7
)

summary(count_dat$y)

## ----countdid-----------------------------------------------------------------
# Staggered Poisson DiD
res_count <- nonlinear_attgt(
  data          = count_dat,
  yname         = "y",
  tname         = "period",
  idname        = "id",
  gname         = "g",
  outcome_model = "poisson",
  estimand      = "att",
  control_group = "nevertreated"
)

agg_count <- nonlinear_aggte(res_count, type = "dynamic")
plot(agg_count)

## ----pois2x2------------------------------------------------------------------
# Simple 2x2 Poisson DiD (Wooldridge QMLE)
count_sub <- count_dat[count_dat$period %in% c(2, 4), ]
res_pois  <- count_did_poisson(
  count_sub,
  yname          = "y",
  tname          = "period",
  idname         = "id",
  treat_period   = 4,
  control_period = 2,
  gname          = "g"
)
print(res_pois)

## ----drlogit------------------------------------------------------------------
res_dr <- binary_did_dr(
  data           = dat[dat$period %in% c(3, 5), ],
  yname          = "y",
  tname          = "period",
  idname         = "id",
  treat_period   = 5,
  control_period = 3,
  gname          = "g",
  xformla        = ~ x1 + x2,
  outcome_model  = "logit"
)
print(res_dr)

## ----bounds-------------------------------------------------------------------
bounds <- nonlinear_bounds(
  data          = dat,
  yname         = "y",
  tname         = "period",
  idname        = "id",
  gname         = "g",
  bound_type    = "manski",  # widest (no assumptions)
  control_group = "nevertreated"
)

# Show post-treatment bounds
post_bounds <- bounds[bounds$post, ]
head(post_bounds[, c("group", "time", "lb", "ub", "identified")])

## ----comparison, eval = FALSE-------------------------------------------------
# # Linear comparison (for illustration)
# res_linear <- nonlinear_attgt(
#   data          = dat,
#   yname         = "y",
#   tname         = "period",
#   idname        = "id",
#   gname         = "g",
#   outcome_model = "linear",
#   estimand      = "att"
# )
# 
# agg_lin  <- nonlinear_aggte(res_linear, type = "dynamic")
# agg_logit <- nonlinear_aggte(res, type = "dynamic")
# 
# # The two produce different estimates when baseline rates are moderate
# cat("Linear overall ATT:",  round(agg_lin$overall_att, 4), "\n")
# cat("Logit overall ATT:",   round(agg_logit$overall_att, 4), "\n")
# cat("True ATT (avg):    ",  round(mean(c(0.15, 0.25, 0.20)), 4), "\n")