## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, # fig.path = "fig/tut01-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) #if(!file.exists("fig")) dir.create("fig") ## ----case-form---------------------------------------------------------------- names(Arthritis) # show the variables str(Arthritis) # show the structure head(Arthritis,5) # first 5 observations, same as Arthritis[1:5,] ## ----frequency-form----------------------------------------------------------- # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) GSS names(GSS) str(GSS) sum(GSS$count) ## ----table-form1-------------------------------------------------------------- str(HairEyeColor) # show the structure sum(HairEyeColor) # number of cases sapply(dimnames(HairEyeColor), length) # table dimension sizes ## ----table-form2-------------------------------------------------------------- # A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c( 1, 2, 1, 0, 3, 3, 6, 1, 10,10,14, 9, 6, 7,12,11), 4, 4) dimnames(JobSat) = list( income = c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS") ) JobSat ## ----table-form3-------------------------------------------------------------- JobSat <- as.table(JobSat) str(JobSat) ## ----relevel, eval=FALSE------------------------------------------------------ # dimnames(JobSat)$income <- c(7.5,20,32.5,60) # dimnames(JobSat)$satisfaction <- 1:4 ## ----reorder1----------------------------------------------------------------- HairEyeColor <- HairEyeColor[, c(1,3,4,2), ] str(HairEyeColor) ## ----reorder2, echo=TRUE, eval=FALSE------------------------------------------ # Arthritis <- read.csv("arthritis.txt",header=TRUE) # Arthritis$Improved <- ordered(Arthritis$Improved, # levels=c("None", "Some", "Marked") # ) ## ----------------------------------------------------------------------------- data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ## ----reorder3----------------------------------------------------------------- UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") # display as a flattened table stats::ftable(UCB) ## ----structable--------------------------------------------------------------- structable(HairEyeColor) # show the table: default structable(Hair+Sex ~ Eye, HairEyeColor) # specify col ~ row variables ## ----structable1,eval=FALSE--------------------------------------------------- # HSE < - structable(Hair+Sex ~ Eye, HairEyeColor) # save structable object # mosaic(HSE) # plot it ## ----table-setup-------------------------------------------------------------- n=500 A <- factor(sample(c("a1","a2"), n, rep=TRUE)) B <- factor(sample(c("b1","b2"), n, rep=TRUE)) C <- factor(sample(c("c1","c2"), n, rep=TRUE)) mydata <- data.frame(A,B,C) ## ----table-ex1---------------------------------------------------------------- # 2-Way Frequency Table attach(mydata) mytable <- table(A,B) # A will be rows, B will be columns mytable # print table margin.table(mytable, 1) # A frequencies (summed over B) margin.table(mytable, 2) # B frequencies (summed over A) prop.table(mytable) # cell percentages prop.table(mytable, 1) # row percentages prop.table(mytable, 2) # column percentages ## ----table-ex2---------------------------------------------------------------- # 3-Way Frequency Table mytable <- table(A, B, C) ftable(mytable) ## ----xtabs-ex1---------------------------------------------------------------- # 3-Way Frequency Table mytable <- xtabs(~A+B+C, data=mydata) ftable(mytable) # print table summary(mytable) # chi-square test of indepedence ## ----xtabs-ex2---------------------------------------------------------------- (GSStab <- xtabs(count ~ sex + party, data=GSS)) summary(GSStab) ## ----dayton1------------------------------------------------------------------ data("DaytonSurvey", package="vcdExtra") str(DaytonSurvey) head(DaytonSurvey) ## ----dayton2------------------------------------------------------------------ # data in frequency form # collapse over sex and race Dayton.ACM.df <- aggregate(Freq ~ cigarette+alcohol+marijuana, data=DaytonSurvey, FUN=sum) Dayton.ACM.df ## ----dayton3------------------------------------------------------------------ # in table form Dayton.tab <- xtabs(Freq ~ cigarette+alcohol+marijuana+sex+race, data=DaytonSurvey) structable(cigarette+alcohol+marijuana ~ sex+race, data=Dayton.tab) ## ----dayton4------------------------------------------------------------------ # collapse over sex and race Dayton.ACM.tab <- apply(Dayton.tab, MARGIN=1:3, FUN=sum) Dayton.ACM.tab <- margin.table(Dayton.tab, 1:3) # same result structable(cigarette+alcohol ~ marijuana, data=Dayton.ACM.tab) ## ----dayton5------------------------------------------------------------------ library(plyr) Dayton.ACM.df <- plyr::ddply(DaytonSurvey, .(cigarette, alcohol, marijuana), plyr::summarise, Freq=sum(Freq)) Dayton.ACM.df ## ----collapse1---------------------------------------------------------------- # create some sample data in frequency form sex <- c("Male", "Female") age <- c("10-19", "20-29", "30-39", "40-49", "50-59", "60-69") education <- c("low", 'med', 'high') data <- expand.grid(sex=sex, age=age, education=education) counts <- rpois(36, 100) # random Possion cell frequencies data <- cbind(data, counts) # make it into a 3-way table t1 <- xtabs(counts ~ sex + age + education, data=data) structable(t1) ## ----collapse2---------------------------------------------------------------- # collapse age to 3 levels, education to 2 levels t2 <- collapse.table(t1, age=c("10-29", "10-29", "30-49", "30-49", "50-69", "50-69"), education=c(" mutate(sibspF = case_match(sibsp, 0 ~ "0", 1 ~ "1", 2:max(sibsp) ~ "2+")) |> mutate(sibspF = ordered(sibspF)) |> mutate(parchF = case_match(parch, 0 ~ "0", 1 ~ "1", 2:max(parch) ~ "2+")) |> mutate(parchF = ordered(parchF)) table(Titanicp$sibspF, Titanicp$parchF) ## ----convert-ex1-------------------------------------------------------------- as.data.frame(GSStab) ## ----convert-ex2-------------------------------------------------------------- Art.tab <- with(Arthritis, table(Treatment, Sex, Improved)) str(Art.tab) ftable(Art.tab) ## ----convert-ex3-------------------------------------------------------------- Art.df <- expand.dft(Art.tab) str(Art.df) ## ----tv1---------------------------------------------------------------------- tv.data<-read.table(system.file("extdata","tv.dat", package="vcdExtra")) head(tv.data,5) ## ----tv2,eval=FALSE----------------------------------------------------------- # tv.data<-read.table("C:/R/data/tv.dat") ## ----tv3---------------------------------------------------------------------- TV <- array(tv.data[,5], dim=c(5,11,5,3)) dimnames(TV) <- list(c("Monday","Tuesday","Wednesday","Thursday","Friday"), c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", "9:45","10:00","10:15","10:30"), c("ABC","CBS","NBC","Fox","Other"), c("Off","Switch","Persist")) names(dimnames(TV))<-c("Day", "Time", "Network", "State") ## ----tv3a,eval=FALSE---------------------------------------------------------- # TV <- xtabs(V5 ~ ., data=tv.data) # dimnames(TV) <- list(Day = c("Monday","Tuesday","Wednesday","Thursday","Friday"), # Time = c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", # "9:45","10:00","10:15","10:30"), # Network = c("ABC","CBS","NBC","Fox","Other"), # State = c("Off","Switch","Persist")) # # # table dimensions # dim(TV) ## ----tv4---------------------------------------------------------------------- TV2 <- TV[,,1:3,] # keep only ABC, CBS, NBC TV2 <- TV2[,,,3] # keep only Persist -- now a 3 way table structable(TV2) ## ----tv5---------------------------------------------------------------------- TV.df <- as.data.frame.table(TV2) levels(TV.df$Time) <- c(rep("8:00", 2), rep("8:30", 2), rep("9:00", 2), rep("9:30", 2), rep("10:00",2), "10:30" ) TV3 <- xtabs(Freq ~ Day + Time + Network, TV.df) structable(Day ~ Time+Network, TV3) ## ----tv-mosaic1, fig.height=6, fig.width=7------------------------------------ mosaic(TV3, shade = TRUE, labeling = labeling_border(rot_labels = c(0, 0, 0, 90)))