## ----setup, include=FALSE----------------------------------------------------- # knitr::opts_chunk$set(warning = FALSE, message = FALSE) library(lme4breeding) ## ----------------------------------------------------------------------------- data(DT_example) DT <- DT_example A <- A_example ansMain <- lmebreed(Yield ~ Env + (1|Name), relmat = list(Name = A ), verbose = 0L, trace=0L, data=DT) vc <- VarCorr(ansMain); print(vc,comp=c("Variance")) ## ----------------------------------------------------------------------------- ansDG <- lmebreed(Yield ~ Env + (0+Env || Name), relmat = list(Name = A ), verbose = 0L, trace=0L, data=DT) vc <- VarCorr(ansDG); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- DT$EnvName <- paste(DT$Env, DT$Name, sep = ":") E <- Matrix::Diagonal(length(unique(DT$Env))); colnames(E) <- rownames(E) <- unique(DT$Env);E EA <- Matrix::kronecker(E,A, make.dimnames = TRUE) ansCS <- lmebreed(Yield ~ Env + (1|Name) + (1|EnvName), relmat = list(Name = A, EnvName= EA ), verbose = 0L, trace=0L, data=DT) vc <- VarCorr(ansCS); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- ansCSDG <- lmebreed(Yield ~ Env + (Env || Name), relmat = list(Name = A ), verbose = 0L, trace=0L, data=DT) vc <- VarCorr(ansCSDG); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- ansUS <- lmebreed(Yield ~ Env + (0+Env | Name), relmat = list(Name = A ), verbose = 0L, trace=0L, data=DT) vc <- VarCorr(ansUS); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- # library(orthopolynom) # DT$EnvN <- as.numeric(as.factor(DT$Env)) # # add new columns to dataset # Z <- with(DT, smm(leg(EnvN,1)) ) # for(i in 1:ncol(Z)){DT[,colnames(Z)[i]] <- Z[,i]} # ansRR <- lmebreed(Yield ~ Env + (0+leg0+leg1| Name), # relmat = list(Name = A ), # verbose = 0L, trace=0L, # data=DT) # vc <- VarCorr(ansRR); print(vc,comp=c("Variance")) # ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- # ansRR2 <- lmebreed(Yield ~ Env + (0+leg0+leg1|| Name), # relmat = list(Name = A ), # verbose = 0L, trace=0L, # data=DT) # vc <- VarCorr(ansRR2); print(vc,comp=c("Variance")) # ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- data(DT_h2) DT <- DT_h2 ## build the environmental index ei <- aggregate(y~Env, data=DT,FUN=mean) colnames(ei)[2] <- "envIndex" ei$envIndex <- ei$envIndex - mean(ei$envIndex,na.rm=TRUE) # center the envIndex to have clean VCs ei <- ei[with(ei, order(envIndex)), ] ## add the environmental index to the original dataset DT2 <- merge(DT,ei, by="Env") DT2 <- DT2[with(DT2, order(Name)), ] ansFW <- lmebreed(y~ Env + (envIndex || Name), verbose = 0L, trace=0L, data=DT2) vc <- VarCorr(ansFW); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- ansFW2 <- lmebreed(y~ Env + (envIndex | Name), verbose = 0L, trace=0L, data=DT2) vc <- VarCorr(ansFW2); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- data(DT_h2) DT <- DT_h2 DT=DT[with(DT, order(Env)), ] head(DT) indNames <- na.omit(unique(DT$Name)) A <- diag(length(indNames)) rownames(A) <- colnames(A) <- indNames # fit diagonal model first to produce a two-way table of genotype by env BLUPs Z <- with(DT, smm(Env)) diagFormula <- paste0( "y ~ Env + (0+", paste(colnames(Z), collapse = "+"), "|| Name)") for(i in 1:ncol(Z)){DT[,colnames(Z)[i]] <- Z[,i]} ans1a <- lmebreed(as.formula(diagFormula), relmat = list(Name = A ), verbose = 0L, trace=0L, data=DT) vc <- VarCorr(ans1a); H0 <- ranef(ans1a)$Name # GxE table # Use the GxE table to obtain loadings and build loadings columns in dataset nPC=3 Z <- with(DT, rrm(Env, H = H0, nPC = nPC) ) for(i in 1:ncol(Z)){DT[,colnames(Z)[i]] <- Z[,i]} # fit the FA model (rr + diag) to calculate factor scores ansFA <- lmebreed(y ~ Env + (0+PC1+PC2+PC3|Name) + (0+Env|| Name), relmat = list(Name = A ), verbose = 0L, trace=0L, data=DT) u <- ranef(ansFA)$Name # all BLUPs scores <- as.matrix(u[,1:nPC]) # extract factor scores loadings=with(DT, rrm(Env, nPC = nPC, H = H0, # lodings (latent covars) returnGamma = TRUE) )$Gamma vc <- VarCorr(ansFA); print(vc,comp=c("Variance")) # extract all varcomps vcFA <- vc[[1]] # G of PCs only vcDG <- diag( unlist(lapply(vc[2:16], function(x){x[[1]]})) ) # G of diag model vcUS <- loadings %*% vcFA %*% t(loadings) # G of unstructured model G <- vcUS + vcDG # total G as the sum of both Gs colfunc <- colorRampPalette(c("steelblue4","springgreen","yellow")) hv <- heatmap(cov2cor(G), col = colfunc(100), symm = TRUE) uFA <- scores %*% t(loadings) # recover UNS effects uDG <- as.matrix(u[,(nPC+1):ncol(u)]) # recover DIAG effects u <- uFA + uDG # total effects ## ----------------------------------------------------------------------------- ########## ## stage 1 ########## data(DT_h2) DT <- DT_h2 head(DT) envs <- unique(DT$Env) vals <- list() for(i in 1:length(envs)){ ans1 <- lmebreed(y~Name + (1|Block), trace = 0L, verbose = 0L, data= droplevels(DT[which(DT$Env == envs[i]),]) ) b <- fixef(ans1) b[2:length(b)] <- b[2:length(b)] + b[1] ids <- colnames(model.matrix(~Name-1, data=droplevels(DT[which(DT$Env == envs[i]),]) )) ids <- gsub("Name","",ids) vals[[i]] <- data.frame(Estimate=b , stdError= diag( vcov(ans1)), Effect= ids, Env= envs[i]) } DT2 <- do.call(rbind, vals) ########## ## stage 2 ########## DT2$w <- 1/DT2$stdError ans2 <- lmebreed(Estimate~Env + (1|Effect) + (1|Env:Effect), weights = w, data=DT2, verbose = 0L, trace=0L ) vc <- VarCorr(ans2); print(vc,comp=c("Variance")) ve <- attr(vc, "sc")^2; ve ## ----------------------------------------------------------------------------- # load data (phenotypes and genotypes) data(DT_big) DT = DT_big M = apply(M_big,2,as.numeric) # change from bits to numeric rownames(M) <- rownames(M_big) DT[,"envf_repf"] = paste(DT[,"envf"],DT[,"repf"],sep="_") # we will make up missing data indsNa <- sample(unique(DT$id), 200) nas <- which(DT$id %in% indsNa) # sample(1:nrow(DT), round(nrow(DT)*.25)) DT$value[nas]=NA DT$value <- imputev(DT$value, by=DT$envf_repf) DT$nas <- 0; DT$nas[nas]=1 # compute the relationship matrix MMT <- tcrossprod(M) m <- sum(diag(MMT))/nrow(MMT) MMT <- MMT/m MMT <- MMT + diag(1e-05, ncol(MMT), ncol(MMT)) ## Fit the main effect model lmod <- lmebreed(formula=value~(1|id), data = DT, relmat = list(id = MMT), verbose = 0L, trace=0L, rotation=TRUE ) # extract GEBVs ran0 <- ranef(lmod, condVar=FALSE) xx=as.data.frame(as.matrix(ran0$id) %*% matrix(rep(1,50), nrow=1) ) xx$id <- rownames(xx) # Compare against xxl <- reshape(xx, idvar = "id", varying = list(1:(ncol(xx)-1)), v.names = "gebv", direction = "long", timevar = "envf_repf", times = unique(DT$envf_repf) ) DTc <- merge(DT, xxl, by=c("id","envf_repf"), all.x = TRUE) with(DTc[which(DTc$nas ==1),], plot(gebv,gv)) with(DTc[which(DTc$nas ==1),], cor(gebv,gv)) # accuracy is 0.39 ## ----------------------------------------------------------------------------- ## Fit the diagonal model lmod <- lmebreed(formula=value~(0+envf_repf||id), data = DT, relmat = list(id = MMT), nIters = 1000, verbose = 0L, trace=0L, rotation=TRUE ) # Extract GEBVs ran0 <- ranef(lmod, condVar=FALSE) xx=as.data.frame(ran0$id)# u=H0 xx$id <- rownames(xx) # Compare against xxl <- reshape(xx, idvar = "id", varying = list(1:(ncol(xx)-1)), v.names = "gebv", direction = "long", timevar = "envf_repf", times = colnames(xx)[1:(ncol(xx)-1)]) DTc <- merge(DT, xxl, by=c("id","envf_repf"), all.x = TRUE) # with(DTc[which(DTc$nas ==1),], plot(gebv,gv)) with(DTc[which(DTc$nas ==1),], cor(gebv,gv)) # accuracy is ~0.62 ## ----------------------------------------------------------------------------- # lmodRes <- lmebreed(formula=value~(0+envf_repf||id) + (0+envf_repf||unitsR), data = DT, # relmat = list(id = MMT), nIters=1000, # verbose = 0L, trace=0L, rotation=TRUE # ) # # # Extract GEBVs # ran0 <- ranef(lmodRes, condVar=FALSE) # xx=as.data.frame(ran0$id)# u=H0 # xx$id <- rownames(xx) # # # Compare against # xxl <- reshape(xx, idvar = "id", varying = list(1:(ncol(xx)-1)), # v.names = "gebv", direction = "long", timevar = "envf_repf", # times = colnames(xx)[1:(ncol(xx)-1)]) # DTc <- merge(DT, xxl, by=c("id","envf_repf"), all.x = TRUE) # with(DTc[which(DTc$nas ==1),], plot(gebv,gv)) # with(DTc[which(DTc$nas ==1),], cor(gebv,gv)) # accuracy is ~0.62 ## ----------------------------------------------------------------------------- # # get latent covariates and add them to the dataset # Gammas <- with(DT, rrm(envf_repf, H = ran0$id, nPC = 3, returnGamma = TRUE)) # Zrr <- Gammas$Zstar # for(i in 1:ncol(Zrr)){DT[,colnames(Zrr)[i]] <- Zrr[,i]} # # fit the model # ansRR <- lmebreed(value~(0+PC1+PC2+PC3|id) + (0+envf_repf||id), data = DT, # relmat = list(id = MMT), nIters = 1000, # verbose = 0L, trace=0L, rotation=TRUE # ) # vc <- VarCorr(ansRR); print(vc,comp=c("Variance")) # ve <- attr(vc, "sc")^2; ve # # loadings=Gammas$Gamma # Gint <- loadings %*% vc$id %*% t(loadings) # Gspec <- diag( unlist(lapply(vc[2:(length(vc))], function(x){x[[1]]})) ) # G <- Gint + Gspec # u <- ranef(ansRR, condVar=FALSE)$id # uInter <- as.matrix(u[,1:3]) %*% t(as.matrix(loadings)) # uSpec <- as.matrix(u[,-c(1:3)]) # u <- uSpec + uInter # # xx=as.data.frame(u)# u=H0 # xx$id <- rownames(xx) # xxl <- reshape(xx, idvar = "id", varying = list(1:(ncol(xx)-1)), # v.names = "gebv", direction = "long", timevar = "envf_repf", # times = colnames(xx)[1:(ncol(xx)-1)]) # DTc <- merge(DT, xxl, by=c("id","envf_repf"), all.x = TRUE) # with(DTc[which(DTc$nas ==1),], plot(gebv,gv)) # with(DTc[which(DTc$nas ==1),], cor(gebv,gv)) # accuracy is 0.62