## ----eval=FALSE--------------------------------------------------------------- # library(ggplot2) # library(kko) # library(knockoff) # set.seed(12345) # # ### generate regression coefficent # p=20 # number of predictors # sig_mag=10 # signal strength # s=5 # sparsity, number of nonzero component functions # reg_coef=c(rep(1,s),rep(0,p-s)) # regression coefficient # reg_coef=reg_coef*(2*(rnorm(p)>0)-1)*sig_mag # # ### generate response and design # model="poly" # n= 600 # sample size # X=matrix(rnorm(n*p),n,p) # generate design # X_k = create.second_order(X) # generate knockoff # y=generate_data(X,reg_coef,model) # response ## ----eval=FALSE--------------------------------------------------------------- # rkernel="laplacian" # kernel choice # rk_scale=1 # scaling paramtere of kernel # rfn_range=c(2,3,4) # number of random features # cv_folds=15 # folds of cross-validation in group lasso # n_stb=200 # number of subsampling for importance scores # n_stb_tune=100 # number of subsampling for tuning random feature number # frac_stb=1/2 # fraction of subsample # nCores_para=2 # number of cores for parallelization # # ### KKO selection # kko_fit=kko(X,y,X_k,rfn_range,n_stb_tune,n_stb,cv_folds,frac_stb,nCores_para,rkernel,rk_scale) ## ----echo=FALSE--------------------------------------------------------------- library(kko) library(knockoff) library(ggplot2) load("demo.Rdata") p=length(kko_fit$importance_score) ## ----fig.width=6,fig.height=4------------------------------------------------- reg_coef # true regression coefficient W=kko_fit$importance_score # knockoff importance scores generated by KKO W mydata=data.frame(W=W,var_group=ifelse(reg_coef!=0,"Active","NUll")) myplot = ggplot(mydata, aes(W, fill = var_group)) + geom_histogram(color = "gray2",binwidth=1/p) + theme_bw()+ xlab("Importance scores")+ylab("Number of variables")+ xlim(-1,1) print(myplot) ## ----------------------------------------------------------------------------- fdr=0.2 #FDR control level thres = knockoff.threshold(W, fdr=fdr) # thresholding on scores by knockoff filter selected = which(W >= thres) selected # indices of selected variables