## ---- include = FALSE---------------------------------------------------------
library(knitr)
set.seed(1)
is_CRAN <- !identical(Sys.getenv("NOT_CRAN"), "true")
if (!is_CRAN) {
   options(mc.cores = parallel::detectCores())
} else {
  knitr::opts_chunk$set(eval = FALSE)
  knitr::knit_hooks$set(evaluate.inline = function(x, envir) x)
}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  cache.lazy = FALSE  # https://github.com/yihui/knitr/issues/572
)
options(digits=4)
options(scipen=2)

## ---- message=FALSE-----------------------------------------------------------
library(rstan)
library(pcFactorStan)
library(loo)
library(qgraph)
library(ggplot2)
library(Matrix)

## ---- results='hide'----------------------------------------------------------
head(phyActFlowPropensity)

## ---- results='asis', echo=FALSE----------------------------------------------
kable(head(phyActFlowPropensity))

## -----------------------------------------------------------------------------
dl <- prepData(phyActFlowPropensity[,c(paste0('pa',1:2), 'skill')])
dl$varCorrection <- 5.0

## ----pcStan, message=FALSE, results='hide', cache=TRUE------------------------
fit1 <- pcStan("unidim_adapt", data=dl)

## ----pcStanDiag1, cache=TRUE--------------------------------------------------
check_hmc_diagnostics(fit1)

## ----pcStanDiag2, cache=TRUE--------------------------------------------------
allPars <- summary(fit1, probs=c())$summary
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))

## ----skill, cache=TRUE--------------------------------------------------------
theta <- summary(fit1, pars=c("theta"), probs=c())$summary[,'mean']

ggplot(data.frame(x=theta, activity=dl$nameInfo$pa, y=0.47)) +
  geom_point(aes(x=x),y=0) +
  geom_text(aes(label=activity, x=x, y=y),
            angle=85, hjust=0, size=2,
            position = position_jitter(width = 0, height = 0.4)) + ylim(0,1) +
  theme(legend.position="none",
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

## -----------------------------------------------------------------------------
s50 <- summary(fit1, pars=c("scale"), probs=c(.5))$summary[,'50%']
print(s50)

## ---- results='hide', include = FALSE-----------------------------------------
rm(fit1)  # free up some memory

## ----calibrateItems, message=FALSE, results='hide', cache=TRUE----------------
result <- calibrateItems(phyActFlowPropensity, iter=1000L)

## ---- results='hide'----------------------------------------------------------
print(result)

## ---- results='asis', echo=FALSE----------------------------------------------
kable(result)

## ----covarianceData, cache=TRUE-----------------------------------------------
pafp <- phyActFlowPropensity
excl <- match(c('goal1','feedback1'), colnames(pafp))
pafp <- pafp[,-excl]
dl <- prepData(pafp)
dl$scale <- result[match(dl$nameInfo$item, result$item), 'scale']

## ----covariance, message=FALSE, results='hide', cache=TRUE--------------------
fit2 <- pcStan("correlation", data=dl, include=FALSE, pars=c('rawTheta', 'rawThetaCorChol'))

## ----covarianceDiag1, cache=TRUE----------------------------------------------
check_hmc_diagnostics(fit2)

allPars <- summary(fit2, probs=0.5)$summary
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))

## ----covarianceDiag2, cache=TRUE----------------------------------------------
head(allPars[order(allPars[,'sd']),])

## ----covarianceDiag3, cache=TRUE----------------------------------------------
allPars <- allPars[allPars[,'sd'] > 1e-6,]
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))

## ----corPlot, cache=TRUE------------------------------------------------------
corItemNames <- dl$nameInfo$item
tc <- summary(fit2, pars=c("thetaCor"), probs=c(.1,.5,.9))$summary
tcor <- matrix(tc, length(corItemNames), length(corItemNames))
tcor[sign(tc[,'10%']) != sign(tc[,'90%'])] <- 0  # delete faint edges
dimnames(tcor) <- list(corItemNames, corItemNames)
tcor <- nearPD(tcor, corr=TRUE)$mat

qgraph(tcor, layout = "spring", graph = "cor", labels=colnames(tcor),
       legend.cex = 0.3,
       cut = 0.3, maximum = 1, minimum = 0, esize = 20,
       vsize = 7, repulsion = 0.8, negDashed=TRUE, theme="colorblind")

## ----responseCurves, cache=TRUE-----------------------------------------------
df <- responseCurve(dl, fit2,
  item=setdiff(dl$nameInfo$item, c('spont','control','evaluated','waiting')),
  responseNames=c("much more","somewhat more", 'equal',
                  "somewhat less", "much less"))
ggplot(df) +
  geom_line(aes(x=worthDiff, y=prob, color=response,linetype=response,
                group=responseSample), size=.2, alpha=.2) +
  xlab("difference in latent worths") + ylab("probability") +
  ylim(0,1) + facet_wrap(~item, scales="free_x") +
  guides(color=guide_legend(override.aes=list(alpha = 1, size=1)))

## ----factorData1, cache=TRUE--------------------------------------------------
pafp <- pafp[,c(paste0('pa',1:2),
             setdiff(corItemNames, c('spont','control','evaluated','waiting')))]
pafp <- normalizeData(filterGraph(pafp))
dl <- prepCleanData(pafp)
dl <- prepSingleFactorModel(dl)
dl$scale <- result[match(dl$nameInfo$item, result$item), 'scale']

## ---- results='hide', include = FALSE-----------------------------------------
rm(fit2)  # free up some memory

## ----factor, message=FALSE, results='hide', cache=TRUE------------------------
fit3 <- pcStan("factor1_ll", data=dl, include=FALSE,
               pars=c('rawUnique', 'rawUniqueTheta', 'rawPerComponentVar',
	       'rawFactor', 'rawLoadings', 'rawFactorProp', 'rawThreshold',
         'rawPathProp', 'rawCumTh'))

## ----factorDiag1, cache=TRUE--------------------------------------------------
check_hmc_diagnostics(fit3)

interest <- c("threshold", "alpha", "pathProp", "factor", "residualItemCor", "lp__")

allPars <- summary(fit3, pars=interest)$summary
allPars <- allPars[allPars[,'sd'] > 1e-6,]
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))

## ----factorLoo, cache=TRUE----------------------------------------------------
options(mc.cores=1)  # otherwise loo consumes too much RAM
kThreshold <- 0.1
l1 <- toLoo(fit3)
print(l1)

## -----------------------------------------------------------------------------
pa11 <- levels(filterGraph(pafp, minDifferent=11L)$pa1)
ot <- outlierTable(dl, l1, kThreshold)
ot <- subset(ot, pa1 %in% pa11 & pa2 %in% pa11)

## ---- results='hide'----------------------------------------------------------
print(ot[1:6,])

## ---- results='asis', echo=FALSE----------------------------------------------
kable(ot[1:6,], row.names=TRUE)

## ---- results='hide'----------------------------------------------------------
xx <- which(ot[,'pa1'] == 'mountain biking' & ot[,'pa2'] == 'climbing' & ot[,'item'] == 'predict' & ot[,'pick'] == -2)

## ---- results='asis', echo=FALSE----------------------------------------------
if (length(xx) == 0) {
  xx <- 1
  warning("Can't find outlier")
}
kable(ot[xx,,drop=FALSE], row.names=TRUE)

## -----------------------------------------------------------------------------
pafp[pafp$pa1 == ot[xx,'pa1'] & pafp$pa2 == ot[xx,'pa2'],
     c('pa1','pa2', as.character(ot[xx,'item']))]

## ----outlier, cache=TRUE------------------------------------------------------
loc <- sapply(ot[xx,c('pa1','pa2','item')], unfactor)
exam <- summary(fit3, pars=paste0("theta[",loc[paste0('pa',1:2)],
                          ",", loc['item'],"]"))$summary
rownames(exam) <- c(as.character(ot[xx,'pa1']), as.character(ot[xx,'pa2']))

## ---- results='asis', echo=FALSE----------------------------------------------
#exam <- data.frame(mean=c(0,0), '2.5%'=c(0,0), '97.5%'=c(0,0))
kable(exam)

## -----------------------------------------------------------------------------
sum(c(pafp$pa1 == ot[xx,'pa1'], pafp$pa2 == ot[xx,'pa1']))
sum(c(pafp$pa1 == ot[xx,'pa2'], pafp$pa2 == ot[xx,'pa2']))

## ----pathProp, cache=TRUE-----------------------------------------------------
pi <- parInterval(fit3, 'pathProp', dl$nameInfo$item, label='item')
pi <- pi[order(abs(pi$M)),]

ggplot(pi) +
  geom_vline(xintercept=0, color="green") +
  geom_jitter(data=parDistributionFor(fit3, pi),
              aes(value, item), height = 0.35, alpha=.05) +
  geom_segment(aes(y=item, yend=item, x=L, xend=U),
               color="yellow", alpha=.5) +
  geom_point(aes(x=M, y=item), color="red", size=1) +
  theme(axis.title.y=element_blank())

## ----activities, cache=TRUE---------------------------------------------------
pick <- paste0('factor[',match(pa11, dl$nameInfo$pa),',1]')
pi <- parInterval(fit3, pick, pa11, label='activity')
pi <- pi[order(pi$M),]

ggplot(pi) +
  geom_vline(xintercept=0, color="green") +
  geom_jitter(data=parDistributionFor(fit3, pi, samples=200),
              aes(value, activity), height = 0.35, alpha=.05) +
  geom_segment(aes(y=activity, yend=activity, x=L, xend=U),
               color="yellow", alpha=.5) +
  geom_point(aes(x=M, y=activity), color="red", size=1) +
  theme(axis.title.y=element_blank())

## ----residualItemCor, cache=TRUE----------------------------------------------
m <- matrix(apply(expand.grid(r=1:dl$NITEMS, c=1:dl$NITEMS), 1,
      function(x) paste0("residualItemCor[",x['r'],",",x['c'],"]")),
      dl$NITEMS, dl$NITEMS)
n <- matrix(apply(expand.grid(r=dl$nameInfo$item, c=dl$nameInfo$item), 1,
                  function(x) paste0(x['r'],":",x['c'])),
            dl$NITEMS, dl$NITEMS)
pi <- parInterval(fit3, m[lower.tri(m)], n[lower.tri(n)], label='cor')
pi <- pi[abs(pi$M) > .08,]
pi <- pi[order(-abs(pi$M)),]
ggplot(pi) +
  geom_vline(xintercept=0, color="green") +
  geom_jitter(data=parDistributionFor(fit3, pi, samples=800),
              aes(value, cor), height = 0.35, alpha=.05) +
  geom_segment(aes(y=cor, yend=cor, x=L, xend=U),
               color="yellow", alpha=.5) +
  geom_point(aes(x=M, y=cor), color="red", size=1) +
  theme(axis.title.y=element_blank())

## -----------------------------------------------------------------------------
sessionInfo()