## ----include=FALSE------------------------------------------------------------ library(knitr) opts_chunk$set( concordance=TRUE ) ## ----load package, echo=FALSE, results='hide'--------------------------------- library("robCompositions") constSum <- function(x, const=1){ x / rowSums(x) * const } ## ----echo=FALSE, results='hide'----------------------------------------------- ##require(compositions) genData <- function(n=1000, out=0.05, Sigma=5*c(1,1)%*%t(c(1,1))+0.05*c(1,-1)%*%t(c(1,-1))){ ## Gruppe ohne Ausreisser: z <- mvrnorm(n, mu=c(0,2), Sigma=Sigma) N <- dim(z)[1] n1 <- N - floor(n*out) n2 <- N - floor(2*n*out) if(out > 0){ z[(n1+1):N, ] <- mvrnorm(floor(n*out), mu=c(0,6), Sigma=Sigma) ## erste Ausreissergruppe (Euclidean+Aitchison) } z <- isomLRinv(z) #ilr.inv(z) sum=runif(n1,0,1) #rnorm(n1,10,1) z[1:n1, ] <- z[1:n1,] * sum if(out > 0){ sum1=runif(floor(2*n*out),13,17) #rnorm(n2-n1,15,1) z[(n2+1):N, ] <- z[(n2+1):N, ] * sum1 z[(n1+1):n2, ] <- z[(n1+1):n2, ] * 10 } ## generate missings zmiss <- z s <- c(0.2, 0.1, 0.05, 0.05) for(i in 1:ncol(z)){ zmiss[sample(1:n2, floor(s[i]*n2)), i] <- NA #1:index } list(zmiss=data.frame(zmiss), z2=data.frame(z), good=n2) } ## ----echo=FALSE, results='hide'----------------------------------------------- genData <- function(n=1000, out=0.05, Sigma=1*c(1,1)%*%t(c(1,1))+0.05*c(1,-1)%*%t(c(1,-1))){ ## Gruppe ohne Ausreisser: z <- mvrnorm(n, mu=c(0,0), Sigma=Sigma) N <- dim(z)[1] n1 <- N - floor(n*out) n2 <- N - floor(2*n*out) if(out > 0){ z[(n1+1):N, ] <- mvrnorm(floor(n*out), mu=c(0,6), Sigma=Sigma) ## erste Ausreissergruppe (Euclidean+Aitchison) } z <- isomLRinv(z) #ilr.inv(z) sum=runif(n1,0,1) #rnorm(n1,10,1) z[1:n1, ] <- z[1:n1,] * sum if(out > 0){ sum1=runif(floor(2*n*out),13,17) #rnorm(n2-n1,15,1) z[(n2+1):N, ] <- z[(n2+1):N, ] * sum1 z[(n1+1):n2, ] <- z[(n1+1):n2, ] * 10 } ## generate missings zmiss <- z s <- c(0.2, 0.1, 0.05, 0.05) for(i in 1:ncol(z)){ zmiss[sample(1:n2, floor(s[i]*n2)), i] <- NA #1:index } list(zmiss=data.frame(zmiss), z2=data.frame(z), good=n2) } ## ----seed, echo=FALSE, message=FALSE, warning=FALSE--------------------------- set.seed(123) library("MASS") ## ----new data, echo=FALSE----------------------------------------------------- x <- genData(100) ## ----plot.acomp, echo=FALSE--------------------------------------------------- plot.acomp <- function (x, ..., labels = colnames(X), cn = colnames(X), aspanel = FALSE, id = FALSE, idlabs = NULL, idcol = 2, center = FALSE, scale = FALSE, pca = FALSE, col.pca = par("col"), margin = "acomp", add = FALSE, triangle = !add, col = par("col"), cexT=1.5, adj=-1 ) { col <- unclass(col) X <- oneOrDataset(x) oX <- X s60 <- sin(pi/3) c60 <- cos(pi/3) if (NCOL(X) > 3) { if (margin == "rcomp") infkt <- function(x, y, ...) { plot.acomp(rcompmargin(X, d = c(gsi.mapfrom01(x), gsi.mapfrom01(y)), pos = 1)[, c(3, 2, 1)], ..., aspanel = TRUE, center = center, scale = scale, col = col) } else if (margin == "acomp") { infkt <- function(x, y, ...) { plot.acomp(acompmargin(X, d = c(gsi.mapfrom01(x), gsi.mapfrom01(y)), pos = 1)[, c(3, 2, 1)], ..., aspanel = TRUE, center = center, scale = scale, col = col) } } else { if (!is.numeric(margin)) margin <- match(margin, colnames(X)) fest <- X[, margin, drop = FALSE] X <- X[, -margin] infkt <- function(x, y, ...) { plot.acomp(acomp(cbind(X[, c(gsi.mapfrom01(y), gsi.mapfrom01(x))], fest)), ..., aspanel = TRUE, center = center, scale = scale, col = col) } } nn <- NCOL(X) if (add) gsi.add2pairs(sapply(1:NCOL(X), gsi.mapin01), infkt, ...) else gsi.pairs(sapply(1:NCOL(X), gsi.mapin01), labels = labels, panel = infkt, ...) } else { if (is.null(cn)) { cn <- c(expression(x[1]), expression(x[2]), expression(x[3])) } if (aspanel) { usr <- par("usr") on.exit(par(usr)) par(usr = c(0, 1, 0, 1), pty = "s") lines(x = c(0, c60, 1, 0), y = c(0, s60, 0, 0)) text(0, 0.2, cn[1], pos = 4, offset = 0.01, xpd = TRUE, cex=2) text(1, 0.2, cn[2], pos = 2, offset = 0.01, xpd = TRUE, cex=2) text(0.5, s60, cn[3], pos = 3, offset = 0.01, xpd = TRUE, cex=2) } else { if (!add) { usr <- par("pty") on.exit(par(usr)) par(pty = "s") plot(x = c(0, c60, 1, 0), y = c(0, s60, 0, 0), xlim = c(0, 1), ylim = c(0, 1), type = "n", xlab = "", ylab = "", axes = FALSE) gsi.plots[[dev.cur()]] <<- NULL } if (triangle) { segments(x0 = c(0, 1, c60), y0 = c(0, 0, s60), x1 = c(1, c60, 0), y1 = c(0, s60, 0)) mtext(cn[1], side = 1, adj = 0, padj=adj, line = 1.5, cex=cexT) mtext(cn[2], side = 1, adj = 1, padj=adj , line = 1.5, cex=cexT) text(0.5, s60 * 1.03, cn[3], pos = 3, offset = 0.01, xpd = TRUE, cex=cexT) } } X <- acomp(X, c(1, 2, 3)) Y <- scale.acomp(X, center = center, scale = scale) gsi.setCoorInfo(mean = if (center) -mean(acomp(X)) else acomp(c(1, 1, 1)), scale = if (scale) 1/msd(X) else 1) x <- Y[, 2] + Y[, 3] * c60 y <- Y[, 3] * s60 points(x, y, ..., col = col) } return(invisible(NULL)) } ## ----knn, message=FALSE, warning=FALSE---------------------------------------- library("robCompositions") packageDescription("robCompositions")$Version xImp <- impKNNa(x$zmiss, k=6) ## ----class-------------------------------------------------------------------- class(xImp) ## ----printSummary------------------------------------------------------------- methods(class = "imp") xImp ## ----imp---------------------------------------------------------------------- xImp1 <- impCoda(x$zmiss, method='lm') xImp2 <- impCoda(x$zmiss, method='ltsReg') ## ----da, echo=FALSE----------------------------------------------------------- cda <- function(xOrig, xImp, w){ da <- function(x,y){ d <- 0 p <- length(x) for(i in 1:(p-1)){ for(j in (i+1):p){ d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2 } } d=d/p sqrt(d) } das <- 0 for(i in 1:nrow(xOrig)){ das <- das + da(x=xOrig[i,], y=xImp[i,]) } das/w } ## ----variations, echo=FALSE--------------------------------------------------- v1 <- variation(constSum(x$z2[1:95,]), method="Pairwise") v2 <- variation(constSum(xImp1$xImp[1:95,]), method="Pairwise") v22 <- variation(constSum(xImp2$xImp[1:95,]), method="Pairwise") variations1 <- sum(abs(v1[upper.tri(v1, diag=FALSE)] - v2[upper.tri(v2, diag=FALSE)]), na.rm=TRUE)/length(c(upper.tri(v2, diag=FALSE))) variations2 <- sum(abs(v1[upper.tri(v1, diag=FALSE)] - v22[upper.tri(v22, diag=FALSE)]), na.rm=TRUE)/length(c(upper.tri(v22, diag=FALSE))) ## ----erg-variations, echo=FALSE----------------------------------------------- paste("RDA: iterative lm approach:", round(cda(x$z2, xImp1$xImp, xImp1$w),3)) paste("RDA: iterative ltsReg approach:", round(cda(x$z2, xImp2$xImp, xImp2$w), 3)) paste("DV: iterative lm approach:", round(variations1, 3)) paste("DV: iterative ltsReg approach:", round(variations2, 3)) ## ----bootstrap-old, echo=FALSE------------------------------------------------ bootimp <- function(x, R = 100, method = "lm") { d <- dim(x)[2] n <- dim(x)[1] thetaM <- matrix(NA, ncol = 2, nrow = d) xs <- theta_hat1 <- matrix(0, nrow = n, ncol = d) med <- matrix(NA, ncol = d, nrow = R) for (i in 1:R) { ind <- sample(1:n, replace = TRUE) forbid <- c(91:100) induse <- ind[apply(outer(ind,forbid,"!=")==FALSE,1,sum)==0] s1 <- x[ind, ] simp <- impCoda(s1, method=method)$xImp med[i, ] <- apply(simp[induse,], 2, geometricmean) } thetaHat <- apply(med, 2, mean) for (i in 1:d) { thetaM[i, 1] <- quantile(med[, i], 0.025) thetaM[i, 2] <- quantile(med[, i], 0.975) } res <- list(geometricmean = thetaHat, ci = thetaM) res } ## ----bootstrap-new, echo=FALSE------------------------------------------------ bootimp <- function(x, R = 100, method = "lm") { d <- dim(x)[2] n <- dim(x)[1] thetaM <- matrix(NA, ncol = 2, nrow = d) xs <- theta_hat1 <- matrix(0, nrow = n, ncol = d) med <- matrix(NA, ncol = d, nrow = R) for (i in 1:R) { ind <- sample(1:n, replace = TRUE) forbid <- c(91:100) induse <- ind[apply(outer(ind,forbid,"!=")==FALSE,1,sum)==0] s1 <- x[ind, ] simp <- impCoda(s1, method=method)$xImp med[i, ] <- apply(simp[induse,], 2, geometricmean) } invisible(as.data.frame(constSum(med))) ##thetaHatMed <- apply(med, 2, mean) ##for (i in 1:d) { ## thetaM[i, 1] <- quantile(med[, i], 0.025) ## thetaM[i, 2] <- quantile(med[, i], 0.975) ##} ##res <- list(geometricmean = thetaHat, ci = thetaM) ##res } bootimpEM <- function(x, R = 100) { d <- dim(x)[2] n <- dim(x)[1] thetaM <- matrix(NA, ncol = 2, nrow = d) xs <- theta_hat1 <- matrix(0, nrow = n, ncol = d) med <- matrix(NA, ncol = d, nrow = R) for (i in 1:R) { ind <- sample(1:n, replace = TRUE) forbid <- c(91:100) induse <- ind[apply(outer(ind,forbid,"!=")==FALSE,1,sum)==0] s1 <- x[ind, ] ## und das hier produziert Unsinn: s <- prelim.norm(s1) thetahat <- em.norm(s, showits=FALSE) simp <- imp.norm(s, thetahat, s)[[1]] print(simp) med[i, ] <- apply(simp[induse,], 2, geometricmean) } invisible(as.data.frame(constSum(med))) } geometricmean <- function (x) { if (any(na.omit(x == 0))) 0 else exp(mean(log(unclass(x)[is.finite(x) & x > 0]))) } bootimpGM <- function(x, R = 100) { d <- dim(x)[2] n <- dim(x)[1] thetaM <- matrix(NA, ncol = 2, nrow = d) xs <- theta_hat1 <- matrix(0, nrow = n, ncol = d) med <- matrix(NA, ncol = d, nrow = R) for (i in 1:R) { ind <- sample(1:n, replace = TRUE) forbid <- c(91:100) induse <- ind[apply(outer(ind,forbid,"!=")==FALSE,1,sum)==0] s1 <- x[ind, ] w <- is.na(s1) gm <- apply(s1, 2, function(x) { geometricmean(x[complete.cases(x)]) }) xmean <- x for(j in 1:ncol(x)){ xmean[w[,j], j] <- gm[j] } med[i, ] <- apply(xmean[induse,], 2, geometricmean) } invisible(as.data.frame(constSum(med))) } bootimpM <- function(x, R = 100) { d <- dim(x)[2] n <- dim(x)[1] thetaM <- matrix(NA, ncol = 2, nrow = d) xs <- theta_hat1 <- matrix(0, nrow = n, ncol = d) med <- matrix(NA, ncol = d, nrow = R) for (i in 1:R) { ind <- sample(1:n, replace = TRUE) forbid <- c(91:100) induse <- ind[apply(outer(ind,forbid,"!=")==FALSE,1,sum)==0] s1 <- x[ind, ] simp <- impute(s1, what="mean") med[i, ] <- apply(simp[induse,], 2, geometricmean) } invisible(as.data.frame(constSum(med))) } ## ----bootstat-erg------------------------------------------------------------- R <- 5 bootimp(x$z2, R=R) ## ----message=FALSE, warning=FALSE--------------------------------------------- plot(xImp1, which=2) ## ----message=FALSE, warning=FALSE--------------------------------------------- plot(xImp1, which=3, seg1=FALSE)