## ----set_options, include = FALSE---------------------------------------------
knitr::opts_chunk$set(
  eval = FALSE, # Chunks of codes will not be evaluated by default
  collapse = TRUE,
  comment = "#>",
  fig.width = 7, fig.height = 5   # Set device size at rendering time (when plots are generated)
)

## ----setup, eval = TRUE, include = FALSE--------------------------------------
library(deepSTRAPP)

is_dev_version <- function (pkg = "deepSTRAPP")
{
  # # Check if ran on CRAN
  # not_cran <- identical(Sys.getenv("NOT_CRAN"), "true") # || interactive()

  # Version number check
  version <- tryCatch(as.character(utils::packageVersion(pkg)), error = function(e) "")
  dev_version <- grepl("\\.9000", version)

  # not_cran || dev_version
  
  return(dev_version)
}


## ----adjust_dpi_CRAN, include = FALSE, eval = !is_dev_version()---------------
knitr::opts_chunk$set(
  dpi = 72   # Lower DPI to save space
)

## ----adjust_dpi_dev, include = FALSE, eval = is_dev_version()-----------------
# knitr::opts_chunk$set(
#   dpi = 72   # Default DPI for the dev version
# )

## ----model_trait_evolution_cat------------------------------------------------
# 
# # ------ Step 0: Load data ------ #
# 
# ## Load phylogeny and tip data
# 
# library(phytools)
# data(eel.tree)
# data(eel.data)
# # Dataset of feeding mode and maximum total length from 61 species of elopomorph eels.
# # Source: Collar, D. C., P. C. Wainwright, M. E. Alfaro, L. J. Revell, and R. S. Mehta (2014)
# # Biting disrupts integration to spur skull evolution in eels. Nature Communications, 5, 5505.
# 
# ## Transform feeding mode data into a 3-level factor
# # This is NOT actual biological data anymore, but data adjusted for the sake of example!
# 
# eel_tip_data <- stats::setNames(object = eel.data$feed_mode, nm = rownames(eel.data))
# eel_tip_data <- as.character(eel_tip_data)
# eel_tip_data[c(1, 5, 6, 7, 10, 11, 15, 16, 17, 24, 25, 28, 30, 51, 52, 53, 55, 58, 60)] <- "kiss"
# eel_tip_data <- stats::setNames(eel_tip_data, rownames(eel.data))
# table(eel_tip_data)
# 
# plot(eel.tree)
#  ape::Ntip(eel.tree) == length(eel_tip_data)
# 
# ## Check that trait tip data and phylogeny are named and ordered similarly
# all(names(eel_tip_data) == eel.tree$tip.label)
# 
# # Reorder tip_data as in phylogeny
# eel_tip_data <- eel_tip_data[eel.tree$tip.label]
# 
# 
# ## Set colors per states
# colors_per_states <- c("limegreen", "orange", "dodgerblue")
# names(colors_per_states) <- c("bite", "kiss", "suction")
# 
# 
# # ------ Step 1: Prepare categorical trait data ------ #
# 
# ## Goal: Map categorical trait evolution on the time-calibrated phylogeny
# 
# # 1.1/ Fit evolutionary models to trait data using Maximum Likelihood.
# # 1.2/ Select the best fitting model comparing AICc.
# # 1.3/ Infer ancestral characters estimates (ACE) at nodes.
# # 1.4/ Run stochastic mapping simulations to generate evolutionary histories
# #      compatible with the best model and inferred ACE.
# # 1.5/ Infer ancestral states along branches.
# #  - Compute posterior frequencies of each state/range
# #    to produce a `densityMap` for each state/range.
# 
# library(deepSTRAPP)
# 
# # All these actions are performed by a single function: deepSTRAPP::prepare_trait_data()
# ?deepSTRAPP::prepare_trait_data()
# # Model selection is performed internally with deepSTRAPP::select_best_model_from_geiger()
# ?deepSTRAPP::select_best_model_from_geiger()
# # Plotting of all densityMaps as a unique phylogeny is carried out
# # with deepSTRAPP::plot_densityMaps_overlay()
# ?deepSTRAPP::plot_densityMaps_overlay()
# 
# ## Macroevolutionary models for categorical traits
# 
# ?geiger::fitDiscrete() # For more in-depth information on the models available
# 
# ## 5 models from geiger::fitDiscrete() are available
#  # "ER": Equal-Rates model.
#     # Default model where a single parameter governs all transition rates between states.
#     # Rates are symmetrical.
#        # Ex: A <-> B = A <-> C.
#  # "SYM": Symmetric model.
#     # Forward and reverse transitions share the same parameter,
#     # but transitions between diffrent states have different rates.
#        # Ex: (A -> B = B -> A) ≠ (A -> C = C -> A).
#  # "ARD": All-Rates-Different model.
#     # Each transition rate is a unique parameter.
#         # Ex: A -> B ≠ B -> A ≠ A -> C ≠ C -> A.
#  # "meristic": Step-stone model
#     # Transitions occur in a step-wise ordered fashion (e.g., 1 <-> 2 <-> 3).
#     # Transitions between non-adjacent states are forbidden (e.g., 1 <-> 3 is forbidden).
#     # Transitions rates are assumed to be symmetrical.
#         # Ex: (1 -> 2 = 2 -> 1) ≠ (2 -> 3 = 3 -> 2), with 1 <-> 3 set to zero.
#  # "matrix": Custom model.
#     # Allows to provide a custom "Q_matrix" defining transition classes between states.
#     # Transitions with similar integers are estimated with a shared rate parameter.
#     # Transitions with `0` represent rates that are fixed to zero (i.e., impossible transitions).
#     # Diagonal must be populated with `NA`. row.names(Q_matrix) and col.names(Q_matrix) are the states.
# 
# 
# ## Example of custom Q_matrix defining rate classes of state transitions to use in the 'matrix' model
# 
# # Does not allow transitions from state 1 ("bite") to state 2 ("kiss") or state 3 ("suction")
# # Does not allow transitions from state 3 ("suction") to state 1 ("bite")
# # Set symmetrical rates between state 2 ("kiss") and state 3 ("suction")
# Q_matrix = rbind(c(NA, 0, 0), c(1, NA, 2), c(0, 2, NA))
# 
# 
# ## Model trait data evolution
# eel_cat_3lvl_data <- prepare_trait_data(
#     tip_data = eel_tip_data,
#     trait_data_type = "categorical",
#     phylo = eel.tree,
#     seed = 1234, # Set seed for reproducibility
#     evolutionary_models = c("ER", "SYM", "ARD", "meristic", "matrix"), # All possible models
#     Q_matrix = Q_matrix, # Custom transition rate classes matrix for the "matrix" model
#     transform = "lambda", # Example of additional parameters that can be pass down
#     # to geiger::fitDiscrete() to control tree transformation.
#     res = 100, # To set the resolution of the continuous mapping of states on the densityMaps
#     # Reduce the number of Stochastic Mapping simulations to save time (Default = '1000')
#     nb_simulations = 100,
#     colors_per_levels = colors_per_states,
#     # Plot the densityMaps with plot_densityMaps_overlay() to show all states at once.
#     plot_overlay = TRUE,
#     # To export in PDF the densityMaps generated (Here a single map as 'plot_overlay = TRUE')
#     # PDF_file_path = "./eel_densityMaps_overlay.pdf",
#     return_ace = TRUE, # To include Ancestral Character Estimates (ACE) at nodes in the output
#     return_simmaps = TRUE, # To include the Stochastic Mapping simulations (simmaps) in the output
#     return_best_model_fit = TRUE, # To include the best model fit in the output
#     return_model_selection_df = TRUE, # To include the df for model selection in the output
#     verbose = TRUE) # To display progress
# 
# 
# # ------ Step 2: Explore output ------ #
# 
# ## Explore output
# str(eel_cat_3lvl_data, 1)
# 
# ## Extract the densityMaps showing posterior probabilities of states on the phylogeny
# ## as estimated from the model
# eel_densityMaps <- eel_cat_3lvl_data$densityMaps
# 
# # Plot densityMap for each state.
# # Grey represents absence of the state. Color represents presence of the state.
# plot(eel_densityMaps[[1]]) # densityMap for state n°1 ("bite")
# plot(eel_densityMaps[[2]]) # densityMap for state n°2 ("kiss")
# plot(eel_densityMaps[[3]]) # densityMap for state n°3 ("suction")
# 
# # Plot all densityMaps overlaid in on a single phylogeny.
# # Each color highlights presence of its associated state.
# plot_densityMaps_overlay(eel_densityMaps)
# 
# # Plot with a new color scheme
# new_colors_per_states <- c("red", "pink", "goldenrod")
# names(new_colors_per_states) <- c("bite", "kiss", "suction")
# 
# plot_densityMaps_overlay(
#     densityMaps = eel_densityMaps,
#     colors_per_levels = new_colors_per_states)
#     # PDF_file_path = "./eel_densityMaps_overlay_new_colors.pdf")
# 
# # The densityMaps are the main input needed to perform a deepSTRAPP run on categorical trait data.
# 
# ## Extract the Ancestral Character Estimates (ACE) = trait values at nodes
# eel_ACE <- eel_cat_3lvl_data$ace
# head(eel_ACE)
# # This is a matrix with row.names = internal node ID, colnames = ancestral states,
# # and values = posterior probabilities.
# # It can be used as an optional input in deepSTRAPP run to provide perfectly accurate estimates
# # for ancestral states at internal nodes.
# 
# ## Explore summary of model selection
# eel_cat_3lvl_data$model_selection_df # Summary of model selection
# # Models are compared using the corrected Akaike's Information Criterion (AICc)
# # Akaike's weights represent the probability that a given model is the best
# # among the set of candidate models, given the data.
# # Here, the best model is the Equal-Rates model ('ER')
# 
# ## Explore best model fit (ER model)
# eel_cat_3lvl_data$best_model_fit # Summary of best model optimization by geiger::fitContinuous()
# eel_cat_3lvl_data$best_model_fit$opt # Parameter estimates and goodness-of-fit information
# # Unique transition parameter = 0.0208 transitions per branch per My.
# 
# ## Explore simmaps
# # Since we selected 'return_simmaps = TRUE',
# # Stochastic Mapping simulations (simmaps) are included in the output
# # Each simmap represents a simulated evolutionary history with final states compatible
# # with the tip_data and estimated ACE at nodes.
# # Each simmap also follows the transition parameters of the best fit model
# # to simulate transitions along branches.
# 
# # Plot simmap n°1 using the same color scheme as in densityMaps
# plot(eel_cat_3lvl_data$simmaps[[1]], colors = colors_per_states, fsize = 0.7)
# # Plot simmap n°10 using the same color scheme as in densityMaps
# plot(eel_cat_3lvl_data$simmaps[[10]], colors = colors_per_states, fsize = 0.7)
# # Plot simmap n°100 using the same color scheme as in densityMaps
# plot(eel_cat_3lvl_data$simmaps[[100]], colors = colors_per_states, fsize = 0.7)
# 
# 
# ## Inputs needed to run deepSTRAPP are the densityMaps (eel_densityMaps), and optionally,
# ## the tip_data (eel_tip_data), and the ACE (eel_ACE)
# 
# 

## ----model_trait_evolution_cat_eval, fig.height = 7, eval = TRUE, echo = FALSE, out.width = "100%"----
# Load directly output
data(eel_cat_3lvl_data, package = "deepSTRAPP")

## Set colors per states
colors_per_states <- c("limegreen", "orange", "dodgerblue")
names(colors_per_states) <- c("bite", "kiss", "suction")

## Plot densityMaps

# Plot all densityMaps overlaid in on a single phylogeny.
plot_densityMaps_overlay(eel_cat_3lvl_data$densityMaps, fsize = 0.6)

# Plot with a new color scheme
new_colors_per_states <- c("red", "pink", "goldenrod")
names(new_colors_per_states) <- c("bite", "kiss", "suction")

plot_densityMaps_overlay(
    densityMaps = eel_cat_3lvl_data$densityMaps,
    colors_per_levels = new_colors_per_states,
    fsize = 0.6)

## Explore summary of model selection
eel_cat_3lvl_data$model_selection_df # Summary of model selection

## Explore simmaps
plot(eel_cat_3lvl_data$simmaps[[1]], colors = colors_per_states, fsize = 0.7)
title(main = "\nStochastic Mapping simulation n°1")

plot(eel_cat_3lvl_data$simmaps[[10]], colors = colors_per_states, fsize = 0.7)
title(main = "\nStochastic Mapping simulation n°10")