\documentclass[11pt]{article} \usepackage[utf8]{inputenc} \usepackage{indentfirst} \usepackage{natbib} \usepackage[colorlinks=true,allcolors=blue]{hyperref} \usepackage{url} \usepackage{doi} \RequirePackage{amsmath} \newcommand{\boldbeta}{{\boldsymbol{\beta}}} \newcommand{\boldeta}{{\boldsymbol{\eta}}} \newcommand{\boldtheta}{{\boldsymbol{\theta}}} \newcommand{\boldthetahat}{{\boldsymbol{\hat{\theta}}}} \newcommand{\boldxi}{{\boldsymbol{\xi}}} \newcommand{\boldtau}{{\boldsymbol{\tau}}} \newcommand{\boldvarphi}{{\boldsymbol{\varphi}}} \newcommand{\boldzeta}{{\boldsymbol{\zeta}}} \newcommand{\boldA}{{\mathbf{A}}} \newcommand{\boldB}{{\mathbf{B}}} \newcommand{\boldM}{{\mathbf{M}}} \let\code=\texttt % \VignetteIndexEntry{Aster Models and the Delta Method} \begin{document} \title{Aster Models and the Delta Method} \author{Charles J. Geyer} \maketitle \begin{abstract} There are lots of ways to do the calculations involved in the delta method. Here we illustrate what is the easiest way to use the delta method to obtain standard errors for functions of parameters and random effects (if any) for models fit by R package \code{aster}. \end{abstract} \section{License} This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License \url{http://creativecommons.org/licenses/by-sa/4.0/}. \section{R} \begin{itemize} \item The version of R used to make this document is \Sexpr{getRversion()}. \item The version of the \texttt{knitr} package used to make this document is \Sexpr{packageVersion("knitr")}. \item The version of the \texttt{aster} package used to make this document is \Sexpr{packageVersion("aster")}. \item The version of the \texttt{numDeriv} package used to make this document is \Sexpr{packageVersion("numDeriv")}. \end{itemize} <>= options(keep.source = TRUE, width = 80) @ <>= library(aster) library(numDeriv) @ \section{The Delta Method: Old Way and New Way} The first paper about aster models \citep*[Section~3.3]{aster} already briefly mentions using the delta method along with the method of R generic function \code{predict} that handles objects of class \code{aster} to obtain standard errors for nonlinear functions of parameters. And the technical report \citep*[Appendix~A]{tr644} that provides supplementary material for that paper gives more details. That is the ``old way'' to apply the delta method to aster models. Since that time, there have been a lot of changes to R package \code{aster} \citep{aster-package}. Aster models with random effects \citep*{reaster} were added. So now we need to apply the delta method to obtain standard errors for estimates obtained from model fits of class \code{aster} and class \code{reaster}. But new tools have been developed. R package \code{aster} has gotten methods of R generic function \code{vcov} that handle R objects of class \code{aster} or class \code{reaster}. R package \code{numDeriv} has methods that calculate derivatives of nonlinear functions without users having to know (or explain) calculus. So the ``new way'' to apply the delta method to aster models uses these new tools. This new way is exemplified in \citet*{zenodo} and \citet*{realized}. The delta method says that if a vector parameter estimate $\hat{\beta}$ has an approximate multivariate normal distribution with mean $\beta$ (the true unknown vector parameter value) and variance-covariance matrix $\Sigma$ and if $g$ is a vector-to-vector function differentiable at $\beta$ with derivative matrix (also called Jacobian matrix) $B = \nabla g(\beta)$, then the vector parameter estimate $g(\hat{\beta})$ has an approximate multivariate normal distribution with mean $g(\beta)$ and variance-covariance matrix $B \Sigma B^T$. The ``new way'' gets $\Sigma$ from R generic function \code{vcov}, gets the Jacobian matrix from R function \code{jacobian} in R package \code{numDeriv}, and does the matrix multiplication \verb@B %*% Sigma %*% t(B)@ explicitly in R. For future reference (we use this in Section~\ref{sec:example-ii} below) we note that the delta method can be applied recursively. The composition of differentiable functions is differentiable, and the derivative of the composition is the matrix multiplication of the derivative (Jacobian matrices). If $g_3 = g_1 \circ g_2$ meaning $$ g_3(\beta) = g_1(g_2(\beta)), \qquad \text{for all $\beta$} $$ and $$ B_i = \nabla g_i(\beta) $$ then $$ B_3 = B_1 B_2 $$ So the delta method says that the variance-covariance matrix of $g_3(\hat{\beta})$ is given by $$ (B_1 B_2) \Sigma (B_1 B_2)^T = B_1 B_2 \Sigma B_2^T B_1^T $$ which is just the delta method applied twice (to $g_2$ and then $g_1$). \section{Example I: No Random Effects} \subsection{Introduction} We redo an example from a technical report \citep{aster3tr} that is supplementary material for the paper \citet{aster3}. Because it was beyond what scientists had imagined could be done, that paper used simulated data to expand horizons. (Later papers did apply these methods to real data.) <>= data(sim) @ We fit a quadratic regression of fitness on phenotypic traits of an organism. The corresponding mean value parameters, considered as a function of phenotypic traits (predictor variables) is called the \emph{fitness landscape}. <>= aout <- aster(resp ~ varb + 0 + z1 + z2 + I(z1^2) + I(z1 * z2) + I(z2^2), pred, fam, varb, id, root, data = redata) summary(aout) @ \subsection{Point Estimate} We want the maximum of the fitness landscape calculated by the function <>= foo <- function(beta) { A <- matrix(NaN, 2, 2) A[1, 1] <- beta["I(z1^2)"] A[2, 2] <- beta["I(z2^2)"] A[1, 2] <- beta["I(z1 * z2)"] / 2 A[2, 1] <- beta["I(z1 * z2)"] / 2 b <- rep(NaN, 2) b[1] <- beta["z1"] b[2] <- beta["z2"] solve(-2 * A, b) } @ Try it. <>= foo(aout$coef) @ This agrees with the ``old way'' shown in \citet[Section~4]{aster3tr}. \subsection{Standard Errors} \subsubsection{The New Way} The new way does <>= Sigma <- vcov(aout) B <- jacobian(foo, aout$coef) V <- B %*% Sigma %*% t(B) V @ This disagrees with the calculation in \citet[Section~4]{aster3tr}. Something is wrong. Either R function \code{jacobian} is wrong, or \citeauthor{aster3tr} botched their calculus (or their R implementation of that calculus). As we shall see, they did indeed botch the calculus. \subsubsection{The Old Way} This section can be skipped. This is the old way implemented correctly. The old way extracts \verb@aout$fisher@ with the dollar sign operator rather than a helper function and then explicitly inverts this matrix <>= all.equal(Sigma, solve(aout$fisher), check.attributes = FALSE) @ That checks that the old way and the new way do the same thing for this part (asymptotic variance-covariance matrix coefficients vector). Now we have to differentiate $$ c = - \tfrac{1}{2} A^{-1} b $$ with respect to any parameter $\beta_k$. $$ \frac{\partial c}{\partial \beta_k} = - \tfrac{1}{2} A^{-1} \frac{\partial b}{\partial \beta_k} + \tfrac{1}{2} A^{-1} \frac{\partial A}{\partial \beta_k} A^{-1} b $$ Do it. <>= beta <- aout$coef A <- matrix(NaN, 2, 2) A[1, 1] <- beta["I(z1^2)"] A[2, 2] <- beta["I(z2^2)"] A[1, 2] <- beta["I(z1 * z2)"] / 2 A[2, 1] <- beta["I(z1 * z2)"] / 2 b <- rep(NaN, 2) b[1] <- beta["z1"] b[2] <- beta["z2"] jack <- matrix(0, nrow = nrow(B), ncol = ncol(B)) # d b / d beta["z1"] i <- names(beta) == "z1" jack[ , i] <- solve(-2 * A, c(1, 0)) # d b / d beta["z2"] i <- names(beta) == "z2" jack[ , i] <- solve(-2 * A, c(0, 1)) # d A / d beta["I(z1^2)"] dA <- matrix(0, 2, 2) dA[1, 1] <- 1 i <- names(beta) == "I(z1^2)" jack[ , i] <- (1 / 2) * solve(A) %*% dA %*% solve(A) %*% b # d A / d beta["I(z2^2)"] dA <- matrix(0, 2, 2) dA[2, 2] <- 1 i <- names(beta) == "I(z2^2)" jack[ , i] <- (1 / 2) * solve(A) %*% dA %*% solve(A) %*% b # d A / d beta["I(z1 * z2)"] dA <- matrix(0, 2, 2) dA[1, 2] <- 1 / 2 dA[2, 1] <- 1 / 2 i <- names(beta) == "I(z1 * z2)" jack[ , i] <- (1 / 2) * solve(A) %*% dA %*% solve(A) %*% b all.equal(jack, B) @ This check shows that R function \code{jacobian} in R package \code{numDeriv} is correct in its calculus. \subsubsection{Comment} The new way is a lot easier than the old way, which is so hard that it was botched by \citet[Section~4]{aster3tr}. \subsubsection{Clean Up} Clear the R global environment, removing the trash from example I. <>= rm(list = ls()) @ \section{Example II: With Random Effects} \label{sec:example-ii} \subsection{Fit Aster Model with Random Effects} Fit a random effects aster model. Unfortunately, this takes more time than CRAN allows, so we precompute this result and save it (so CRAN does not notice). To see this actually work, remove the file \code{vignettes/rout.rda} from the aster package sources and rebuild this vignette. <>= lout <- suppressWarnings(try(load("rout.rda"), silent = TRUE)) if (inherits(lout, "try-error")) { data(grey_cloud_2015) modmat.sire <- model.matrix(~ 0 + fit:paternalID, redata) modmat.dam <- model.matrix(~ 0 + fit:maternalID, redata) modmat.siredam <- cbind(modmat.sire, modmat.dam) rout.time <- system.time( rout <- reaster(resp ~ fit + varb, list(parental = ~ 0 + modmat.siredam, block = ~ 0 + fit:block), pred, fam, varb, id, root, data = redata) ) save(rout, rout.time, file = "rout.rda") } @ <>= rout.time.seconds <- rout.time["elapsed"] %% 60 rout.time.minutes <- rout.time["elapsed"] %/% 60 @ This invocation of R function \code{reaster} on this model for these data takes \Sexpr{rout.time.minutes} minutes and \Sexpr{round(rout.time.seconds, 1)} seconds (on one computer). Not really long. We've seen worse. The results of such a fit are the following ``estimates'' in scare quotes: the vector \code{alpha} of fixed effects, the vector \code{b} of random effects, and the vector \code{nu} of variance components. The reason for the scare quotes is that $\alpha$ and $\nu$ are indeed parameters of the model and \verb@rout$alpha@ and \verb@rout$nu@ are the approximate maximum likelihood estimates \citep{reaster} but, at least according to the frequentist theory of statistics, the vector $b$ of random effects is not a parameter of the model, and it makes no sense to estimate it, or, at least, it is very unclear what it means to ``estimate'' it (in scare quotes). What \verb@rout$b@ is is the conditional mode of the distribution of the random effects given the data and the parameters. This is, of course, a function of the parameters $\alpha$ and $\nu$. \subsection{Asymptotic Variance-Covariance Matrix} So we can use the delta method to calculate a joint asymptotic variance-covariance matrix for the combined vector $(\alpha, b, \nu)$, and this is what <>= Sigma <- vcov(rout, standard.deviation = FALSE, re.too = TRUE) @ Does for us. \subsection{Map Random Effect Estimates to Mean Values} The vector $b$ of random effects is not scientifically interpretable. Hence we map it to the mean value parameter scale (response scale). In the process, we also correct the means for the artificial subsampling done in the experiment. For any more explanation of what is going on here, see \citet{realized} and references cited therein. The following R function is just copied from the supplementary material for \citet{realized} (you are not expected to understand this) <>= map.factory <- function(rout) { stopifnot(inherits(rout, "reaster")) aout <- rout$obj stopifnot(inherits(aout, "aster")) nnode <- ncol(aout$x) nind <- nrow(aout$x) fixed <- rout$fixed random <- rout$random if (nnode == 4) { is.subsamp <- rep(FALSE, 4) } else if (nnode == 5) { is.subsamp <- c(FALSE, FALSE, FALSE, TRUE, FALSE) } else stop("can only deal with graphs for individuals with 4 or 5 nodes", "\nand graph is linear, and subsampling arrow is 4th of 5") # fake object of class aster randlab <- unlist(lapply(rout$random, colnames)) include.random <- grepl("paternalID", randlab, fixed = TRUE) fake.out <- aout fake.beta <- with(rout, c(alpha, b[include.random])) modmat.random <- Reduce(cbind, random) stopifnot(ncol(modmat.random) == length(rout$b)) # never forget drop = FALSE in programming R modmat.random <- modmat.random[ , include.random, drop = FALSE] fake.modmat <- cbind(fixed, modmat.random) # now have to deal with objects of class aster (as opposed to reaster) # thinking model matrices are three-way arrays. stopifnot(prod(dim(aout$modmat)[1:2]) == nrow(fake.modmat)) fake.modmat <- array(as.vector(fake.modmat), dim = c(dim(aout$modmat)[1:2], ncol(fake.modmat))) fake.out$modmat <- fake.modmat nparm <- length(rout$alpha) + length(rout$b) + length(rout$nu) is.alpha <- 1:nparm %in% seq_along(rout$alpha) is.bee <- 1:nparm %in% (length(rout$alpha) + seq_along(rout$b)) is.nu <- (! (is.alpha | is.bee)) # figure out individuals from each family m <- rout$random$parental dads <- grep("paternal", colnames(m)) # get family, that is, paternalID or grandpaternalID as the case may be fams <- colnames(m)[dads] |> sub("^.*ID", "", x = _) # drop maternal effects columns (if any) m.dads <- m[ , dads, drop = FALSE] # make into 3-dimensional array, like obj$modmat m.dads <- array(m.dads, c(nind, nnode, ncol(m.dads))) # only keep fitness node # only works for linear graph m.dads <- m.dads[ , nnode, ] # redefine dads as families of individuals stopifnot(as.vector(m.dads) %in% c(0, 1)) stopifnot(rowSums(m.dads) == 1) # tricky, only works because each row of m.dads # is indicator vector of family, # so we are multiplying family number by zero or one dads <- drop(m.dads %*% as.integer(fams)) # find one individual in each family sudads <- sort(unique(dads)) which.ind <- match(sudads, dads) function(alphabeenu) { stopifnot(is.numeric(alphabeenu)) stopifnot(is.finite(alphabeenu)) stopifnot(length(alphabeenu) == nparm) alpha <- alphabeenu[is.alpha] bee <- alphabeenu[is.bee] nu <- alphabeenu[is.nu] fake.beta <- c(alpha, bee[include.random]) fake.out$coefficients <- fake.beta pout <- predict(fake.out, model.type = "conditional", is.always.parameter = TRUE) xi <- matrix(pout, ncol = nnode) xi <- xi[ , ! is.subsamp, drop = FALSE] mu <- apply(xi, 1, prod) mu <- mu[which.ind] names(mu) <- paste0("PID", formatC(sudads, format="d", width=3, flag="0")) return(mu) } } @ Rather than try to explain what this function does and how --- see the supplementary material for \citet{realized} for that --- just notice \begin{itemize} \item this function is very complicated and not easy to differentiate, and \item this function did arise in a real application, so it cannot be avoided. \end{itemize} So try it out. <>= alphabeenu <- with(rout, c(alpha, b, nu)) map <- map.factory(rout) mu.hat <- map(alphabeenu) mu.hat @ \subsection{One Application of the Delta Method} Great! Now we want an (approximate) variance-covariance matrix for this (vector) estimate. <>= jack <- jacobian(map, alphabeenu) Sigma.mu.hat <- jack %*% Sigma %*% t(jack) @ So that is one application of the delta method. \subsection{Another Application of the Delta Method} But we actually wanted a function of these estimates. <>= fitness_change <- function(mu) mean(mu * (mu / mean(mu) - 1)) delta.fitness <- fitness_change(mu.hat) delta.fitness @ And, of course, we need variance for this. <>= jack <- jacobian(fitness_change, mu.hat) Sigma.delta.fit <- jack %*% Sigma.mu.hat %*% t(jack) Sigma.delta.fit @ And standard error <>= sqrt(drop(Sigma.delta.fit)) @ This agrees with Table~1 of the supplementary material for \citet{realized}. \section{Summary} It works and requires no calculus, so is less prone to mistakes and easier to explain. \begin{thebibliography}{} \bibitem[Geyer(2025)]{aster-package} Geyer, C.~J. (2025). \newblock R package \texttt{aster}: Aster Models, version 1.3-6. \newblock \url{https://cran.r-project.org/package=aster}. \bibitem[Geyer, et al.(2022)Geyer, Kulbaba, Sheth, Pain, Eckhart, and Shaw]{zenodo} Geyer, C.~J., Kulbaba, M.~W., Sheth, S.~N., Pain, R.~E., Eckhart, V.~M., and Shaw, R.~G. (2022). \newblock Correction for Kulbaba et al. (2019). \newblock \emph{Evolution}, \textbf{76}, 3074. \newblock \doi{10.1111/evo.14607}. \newblock Supplementary material, version 2.0.1. \newblock \doi{10.5281/zenodo.7013098}. \bibitem[Geyer, et al.(2013)Geyer, Ridley, Latta, Etterson, and Shaw]{reaster} Geyer, C.~J., Ridley, C.~E., Latta, R.~G., Etterson, J.~R., and Shaw, R.~G. (2013). \newblock Local Adaptation and genetic effects on fitness: Calculations for exponential family models with random effects. \newblock \emph{Annals of Applied Statistics}, \textbf{7}, 1778--1795. \newblock \doi{10.1214/13-AOAS653}. \bibitem[Geyer and Shaw(2010)]{aster3tr} Geyer, C.~J., and Shaw, R.~G. (2010). \newblock Hypothesis tests and confidence intervals involving fitness landscapes fit by aster models. \newblock Technical Report No.~674 revised. School of Statistics, University of Minnesota. \newblock \url{https://hdl.handle.net/11299/56328}. \bibitem[Geyer, et~al.(2005)Geyer, Wagenius, and Shaw]{tr644} Geyer, C.~J., Wagenius, S., and Shaw, R.~G. (2005). \newblock Aster models for life history analysis. \newblock Technical Report No.~644. School of Statistics, University of Minnesota. \newblock \url{http://hdl.handle.net/11299/199666}. \bibitem[Geyer, et~al.(2007)Geyer, Wagenius, and Shaw]{aster} Geyer, C.~J., Wagenius, S., and Shaw, R.~G. (2007). \newblock Aster models for life history analysis. \newblock \emph{Biometrika}, \textbf{94}, 415--426. \newblock \doi{10.1093/biomet/asm030}. \bibitem[Shaw and Geyer(2010)]{aster3} Shaw, R.~G., and C.~J. Geyer (2010). \newblock Inferring fitness landscapes. \newblock \emph{Evolution}, \textbf{64}, 2510--2520. \newblock \doi{https://doi.org/10.1111/j.1558-5646.2010.01010.x}. \bibitem[Shaw, et al.(in preparation)Shaw, Geyer, Kulbaba, Sheth, Eckhart, and Pain]{realized} Shaw, R.~G., Geyer, C.~J., Kulbaba, M.~W., Sheth, S.~N., Eckhart, V.~M. and Pain, R.~E. (in preparation). \newblock Realization of ongoing evolutionary adaptation in the field. \newblock Supplementary material (data analysis) for a draft paper \code{analysis/realized.pdf} in this git repository: \newblock \url{https://github.com/cjgeyer/MeanFitness}. \newblock Permanent repository: \doi{10.5281/zenodo.17093288}. \end{thebibliography} \end{document}