## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center" ) library(rsofun) library(dplyr) library(ggplot2) library(tidyr) library(sensitivity) library(BayesianTools) ## ----------------------------------------------------------------------------- # Define log-likelihood function ll_pmodel <- function( par_v # a named vector of all calibratable parameters including errors ){ rsofun::cost_likelihood_pmodel( # reuse likelihood cost function par_v, obs = rsofun::p_model_validation, drivers = rsofun::p_model_drivers, targets = "gpp" ) } # Compute log-likelihood for a given set of parameters ll_pmodel( par_v = c( kphio = 4.102119e-02, kphio_par_a =-2.289344e-03, kphio_par_b = 1.525094e+01, soilm_thetastar = 1.577507e+02, soilm_betao = 1.169702e-04, beta_unitcostratio = 146.0, rd_to_vcmax = 0.014, tau_acclim = 20.0, kc_jmax = 0.41, error_gpp = 0.9 # value from previous simulations )) ## ----------------------------------------------------------------------------- # best parameter values (initial values, taken from Stocker et al., 2020 GMD) par_cal_best <- c( kphio = 0.09423773, kphio_par_a = -0.0025, kphio_par_b = 20, soilm_thetastar = 0.6*240, soilm_betao = 0.2, beta_unitcostratio = 146.0, rd_to_vcmax = 0.014, tau_acclim = 30.0, kc_jmax = 0.41, error_gpp = 1 ) # lower bound par_cal_min <- c( kphio = 0.03, kphio_par_a = -0.004, kphio_par_b = 10, soilm_thetastar = 0, soilm_betao = 0, beta_unitcostratio = 50.0, rd_to_vcmax = 0.01, tau_acclim = 7.0, kc_jmax = 0.2, error_gpp = 0.01 ) # upper bound par_cal_max <- c( kphio = 0.15, kphio_par_a = -0.001, kphio_par_b = 30, soilm_thetastar = 240, soilm_betao = 1, beta_unitcostratio = 200.0, rd_to_vcmax = 0.1, tau_acclim = 60.0, kc_jmax = 0.8, error_gpp = 4 ) ## ----eval = FALSE------------------------------------------------------------- # morris_setup <- BayesianTools::createBayesianSetup( # likelihood = ll_pmodel, # prior = BayesianTools::createUniformPrior(par_cal_min, par_cal_max, par_cal_best), # names = names(par_cal_best) # ) ## ----eval = FALSE, echo = TRUE------------------------------------------------ # set.seed(432) # morrisOut <- sensitivity::morris( # model = morris_setup$posterior$density, # factors = names(par_cal_best), # r = 1000, # design = list(type = "oat", levels = 20, grid.jump = 3), # binf = par_cal_min, # bsup = par_cal_max, # scale = TRUE # ) ## ----simulate_morrisOut, include = FALSE-------------------------------------- # fake output since Morris sensitivity isn't run # saveRDS(morrisOut, "files/sensitivity_analysis.Rmd__morrisOut.RDS") morrisOut <- readRDS("files/sensitivity_analysis.Rmd__morrisOut.RDS") ## ----eval = TRUE, fig.width=7, fig.height=4----------------------------------- # Summarise the morris output morrisOut.df <- data.frame( parameter = names(par_cal_best), mu.star = apply(abs(morrisOut$ee), 2, mean, na.rm = T), sigma = apply(morrisOut$ee, 2, sd, na.rm = T) ) |> arrange( mu.star ) morrisOut.df |> tidyr::pivot_longer( -parameter, names_to = "variable", values_to = "value") |> ggplot(aes( reorder(parameter, value), value, fill = variable), color = NA) + geom_bar(position = position_dodge(), stat = 'identity') + scale_fill_manual("", labels = c('mu.star' = expression(mu * "*"), 'sigma' = expression(sigma)), values = c('mu.star' = "#29a274ff", 'sigma' = "#777055ff")) + theme_classic() + theme( axis.text = element_text(size = 6), axis.title = element_blank(), legend.position = c(0.9, 0.1), legend.justification = c(0.95, 0.05) ) + coord_flip() # make horizontal ## ----eval = FALSE, echo = TRUE------------------------------------------------ # # Calibrates kphio, kphio_par_b, kc_jmax - top 3 model params # set.seed(2025) # # # Define calibration settings # settings_calib <- list( # method = "BayesianTools", # metric = rsofun::cost_likelihood_pmodel, # control = list( # sampler = "DEzs", # settings = list( # burnin = 12000, # iterations = 24000, # nrChains = 3, # number of independent chains # startValue = 3 # number of internal chains to be sampled # )), # par = list( # kphio = list(lower = 0.02, upper = 0.15, init = 0.05), # kphio_par_a =list(lower = -0.004, upper = -0.001, init = -0.0025), # kphio_par_b = list(lower = 10, upper = 30, init = 20), # soilm_thetastar = list( # lower = 0.01 * rsofun::p_model_drivers$site_info[[1]]$whc, # upper = 1.0 * rsofun::p_model_drivers$site_info[[1]]$whc, # init = 0.6 * rsofun::p_model_drivers$site_info[[1]]$whc # ), # soilm_betao = list(lower = 0.0, upper = 1.0, init = 0.0), # err_gpp = list(lower = 0.1, upper = 3, init = 0.8) # ) # ) # # par_fixed <- list( # beta_unitcostratio = 146.0, # kc_jmax = 0.41, # rd_to_vcmax = 0.014, # tau_acclim = 20.0 # ) # # # Calibrate kphio-related parameters and err_gpp # par_calib <- calib_sofun( # drivers = p_model_drivers, # obs = p_model_validation, # settings = settings_calib, # par_fixed = par_fixed, # targets = "gpp" # ) # saveRDS(par_calib, "files/sensitivity_analysis.Rmd__par_calib.RDS", compress = "xz") # # # This code takes 15 minutes to run ## ----simulate_par_calib, include = FALSE-------------------------------------- # fake output since calibration isn't run par_calib <- readRDS("files/sensitivity_analysis.Rmd__par_calib.RDS") ## ----fig.height = 10, fig.width = 7------------------------------------------- plot(par_calib$mod) ## ----fig.height = 10, fig.width = 7, eval = FALSE, echo = FALSE--------------- # # Define function for plotting chains separately # plot_acf_mcmc <- function(chains, par_names){ # # chains: from the BayesianTools output # n_chains <- length(chains) # n_internal_chains <- length(chains[[1]]$chain) # par(mfrow = c(length(par_names), n_chains)) # for(par_name in par_names){ # for(i in 1:n_chains){ # stopifnot(n_internal_chains<=3); color = c("blue", "red", "darkgreen") # spacing = 0.5/n_internal_chains # for(j in 1:n_internal_chains){ # autocorr_internal_chain <- pacf(getSample(chains[[i]]$chain[[j]])[, par_name], plot = FALSE) # if(j==1){ # plot(autocorr_internal_chain, col = color[j], # main = sprintf("Series of %s , chain (%i)", par_name, i)) # } else { # lines(autocorr_internal_chain$lag + spacing*(j-1), # autocorr_internal_chain$acf, # col = color[j], type = "h") # } # } # } # } # } # plot_acf_mcmc( # par_calib$mod, # c("kphio", "kphio_par_a", "kphio_par_b", "soilm_thetastar", "soilm_betao", "err_gpp") # ) ## ----fig.width=5, fig.height=5------------------------------------------------ correlationPlot(par_calib$mod, thin = 1) # use all samples, no thinning ## ----------------------------------------------------------------------------- gelmanDiagnostics(par_calib$mod) ## ----------------------------------------------------------------------------- summary(par_calib$mod) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # # Evaluation of the uncertainty coming from the model parameters' uncertainty # # # Sample parameter values from the posterior distribution # samples_par <- getSample( # par_calib$mod, # thin = 60 # ) |> # as.data.frame() |> # dplyr::mutate(mcmc_id = 1:n()) |> # tidyr::nest(.by = mcmc_id, .key = "pars") # # run_pmodel <- function(par){ # # Function that runs the P-model for a sample of parameters # # and also adds the new observation error # # out <- runread_pmodel_f( # drivers = p_model_drivers, # par = list( # kphio = par$kphio, # kphio_par_a = par$kphio_par_a, # kphio_par_b = par$kphio_par_b, # soilm_thetastar = par$soilm_thetastar, # soilm_betao = par$soilm_betao, # beta_unitcostratio = 146.0, # rd_to_vcmax = 0.014, # tau_acclim = 20.0, # kc_jmax = 0.41 # ) # ) # # # return modelled GPP and prediction for a new GPP observation # gpp <- out$data[[1]][, "gpp"] # out <- data.frame( # gpp = gpp, # gpp_pred = rnorm( # n = length(gpp), # mean = gpp, # sd = par$err_gpp # ), # date = out$data[[1]][, "date"]) # return(out) # } # # set.seed(2025) # # # Run the P-model for each set of parameters # pmodel_runs <- samples_par |> # dplyr::mutate(sim = purrr::map(pars, ~run_pmodel(.x))) |> # # format to obtain 90% credible intervals # dplyr::select(mcmc_id, sim) |> # tidyr::unnest(sim) |> # dplyr::group_by(date) |> # # compute quantiles for each day # dplyr::summarise( # gpp_q05 = quantile(gpp, 0.05, na.rm = TRUE), # gpp_q50 = quantile(gpp, 0.5, na.rm = TRUE), # get median # gpp_q95 = quantile(gpp, 0.95, na.rm = TRUE), # gpp_pred_q05 = quantile(gpp_pred, 0.05, na.rm = TRUE), # gpp_pred_q95 = quantile(gpp_pred, 0.95, na.rm = TRUE) # ) # # # run model with maximum a posteriori parameter estimates # pmodel_run_map <- run_pmodel( # MAP(par_calib$mod)$parametersMAP |> # t() |> # as_tibble() # ) ## ----simulate_pmodel_runs, include = FALSE------------------------------------ # TODO: get rid of this and always fully run the vignettes # fake output since calibration isn't run # saveRDS(pmodel_runs, file = "files/pmodel_runs.rds") pmodel_runs <- readRDS("files/pmodel_runs.rds") ## ----fig.width=7, fig.height=5------------------------------------------------ ## add transparency to color given as a name add_alpha <- function( col, alpha ){ col <- col2rgb( col, alpha = TRUE )/255 col[4] <- alpha col <- rgb(col[1,],col[2,],col[3,],col[4,]) return( col ) } # Plot the credible intervals computed above # for the first year only data_to_plot <- pmodel_runs |> # Plot only first year dplyr::slice(1:365) |> dplyr::left_join( # Merge GPP validation data (first year) p_model_validation$data[[1]][1:365, ] |> dplyr::rename(gpp_obs = gpp), by = "date") plot_gpp_error <- ggplot(data = data_to_plot) + geom_ribbon( aes( ymin = gpp_pred_q05, ymax = gpp_pred_q95, x = date, fill = "Model uncertainty" )) + geom_ribbon( aes( ymin = gpp_q05, ymax = gpp_q95, x = date, fill = "Parameter uncertainty" )) + # Include observations in the plot geom_point( aes( x = date, y = gpp_obs, color = "Observations" ), ) + geom_line( aes( x = date, y = gpp_q50, color = "Predictions" ) ) + theme_classic() + theme(panel.grid.major.y = element_line(), legend.position = "bottom") + labs( x = 'Date', y = expression(paste("GPP (g C m"^-2, "s"^-1, ")")) ) + scale_color_manual(NULL, breaks = c("Observations", "Predictions"), values = c("black", "tomato")) + scale_fill_manual(NULL, breaks = c("Model uncertainty", "Parameter uncertainty"), values = c(add_alpha("tomato", 0.5), "#1b9e77", 0)) plot_gpp_error ## ----fig.width=7, fig.height=5, echo = FALSE, eval = FALSE-------------------- # # Define function to run model for a set of sampled parameters # run_pmodel <- function(par){ # # Function that runs the P-model for a sample of parameters # out <- runread_pmodel_f( # drivers = p_model_drivers, # par = list( # kphio = par$kphio, # kphio_par_a = par$kphio_par_a, # kphio_par_b = par$kphio_par_b, # soilm_thetastar = par$soilm_thetastar, # soilm_betao = par$soilm_betao, # beta_unitcostratio = 146.0, # rd_to_vcmax = 0.014, # tau_acclim = 20.0, # kc_jmax = 0.41 # ) # ) # # return(out) # } # # # Plot observed and predicted GPP, with a 95% confidence interval using err_gpp # # Run model with maximum a posteriori parameter estimates (not shown on plot). # pmodel_run_map <- run_pmodel( # BayesianTools::MAP(par_calib$mod)$parametersMAP |> # t() |> # as_tibble() # ) |> # dplyr::select(-site_info) |> # tidyr::unnest(data) # # # Plot the credible intervals computed above # # for the first year only # data_to_plot <- pmodel_run_map |> # # Plot only first year # dplyr::slice(1:365) |> # dplyr::left_join( # # Merge GPP validation data (first year) # p_model_validation$data[[1]][1:365, ] |> # dplyr::rename(gpp_obs = gpp), # by = "date") # # plot_gpp_error <- ggplot(data = data_to_plot) + # # Include observations in the plot # geom_point( # aes( # x = date, # y = gpp_obs, # color = "Observations" # ), # ) + # geom_line( # aes( # x = date, # y = gpp, # color = "Predictions based on MAP" # ) # ) + # theme_classic() + # theme(panel.grid.major.y = element_line(), # legend.position = "bottom") + # labs( # x = 'Date', # y = expression(paste("GPP (g C m"^-2, "s"^-1, ")")) # ) + # scale_color_manual(NULL, # breaks = c("Observations", # "Predictions based on MAP"), # values = c("black", "tomato")) # plot_gpp_error