--- title: "neun_euro_ticket_demonstrator" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{neun_euro_ticket_demonstrator} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r, setup} library(ubair) ``` ```{r set variables and dates for effect} effect_title <- "neun_euro_ticket" model_types <- c("rf", "lightgbm", "dynamic_regression") # , "fnn") station <- "DESN025" station_name <- "Leipzig-Mitte" window_size <- 14 trend <- "linear" # set dates training_start <- lubridate::ymd("20100101") training_end <- lubridate::ymd("20220301") application_start <- lubridate::ymd("20220301") application_end <- lubridate::ymd("20220831") date_effect_start <- lubridate::ymd_hm("20220601 00:00") buffer <- 0 ``` ```{r prepare data} # load data params <- load_params() env_data <- sample_data_DESN025 dt_prepared <- prepare_data_for_modelling(env_data, params) dt_prepared <- dt_prepared[complete.cases(dt_prepared)] split_data <- split_data_counterfactual( dt_prepared, application_start, application_end ) ``` ```{r run models combine results, fig.width=10, fig.height=6} predictions_all <- data.table::data.table() metrics_all <- data.table::data.table() for (model_type in model_types) { print(paste("model = ", model_type)) print(paste("trend = ", trend)) res <- run_counterfactual(split_data, params, detrending_function = trend, model_type = model_type, alpha = 0.9, log_transform = TRUE ) counterfactual_plot <- plot_counterfactual(res$prediction, params, window_size = window_size, date_effect_start, buffer = buffer ) print(counterfactual_plot) ggplot2::ggsave(paste0(model_type, "_", station, effect_title, ".png"), plot = counterfactual_plot, width = 12, height = 4 ) # evaluation metrics <- round( calc_performance_metrics(res$prediction, date_effect_start, buffer = buffer ), 2 ) metrics["effect_size"] <- estimate_effect_size(res$prediction, date_effect_start, buffer = buffer, verbose = FALSE )["absolute_effect"] print(metrics["effect_size"]) metrics["model"] <- model_type metrics["trend"] <- trend metrics["station"] <- station metrics["station_name"] <- station_name metrics["buffer_start"] <- format( date_effect_start - as.difftime(buffer, units = "hours" ), "%Y-%m-%d" ) metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d") metrics_dt <- data.table::as.data.table(t(metrics)) predictions <- data.table::copy(res$prediction) predictions[, station := station] predictions[, model := model_type] predictions[, trend := trend] predictions_all <- rbind(predictions_all, predictions) metrics_all <- rbind(metrics_all, metrics_dt) } ``` ```{r calc mean save results} predictions_save <- dplyr::select( predictions_all, c( date, value, prediction, prediction_lower, prediction_upper, station, model, trend ) ) predictions_save[, date := as.Date(date)] # Calculate the window_size rolling mean on these daily aggregated values predictions_save[, (names(predictions_save)[sapply( predictions_save, is.numeric )]) := lapply(.SD, function(x) { data.table::frollmean(x, n = window_size * 24, align = "center", na.rm = TRUE ) }), by = .(model, trend), .SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)] ] # reduce to daily mean for demonstrator export daily_data <- predictions_save[, lapply(.SD, mean, na.rm = TRUE), by = .(model, trend, date), .SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)] ] data.table::fwrite(daily_data, file = paste0( "preds_NO2_", effect_title, "_", station_name, window_size, "d_mean.csv" )) data.table::fwrite(metrics_all, file = paste0( "metrics_NO2_", effect_title, "_", station_name, ".csv" )) ```