install.packages("XLConnect") library(XLConnect) #THAI DATA dataTH <- readWorksheet(loadWorkbook(""), header = TRUE, sheet=1, startRow =0, endRow = 258) ls(dataTH) attach(dataTH) #DUTCH DATA dataNL <- readWorksheet(loadWorkbook(""), header = TRUE, sheet=1, startRow =0, endRow = 359) ls(dataNL) attach(dataNL) #################################################################################################################################### ############################################ FIGURE 1 ######################################################################################################## setRepositories() install.packages("Rgraphviz") require(Rgraphviz) require(igraph) #FOR NETHERLANDS rdsdata <- dataNL #FOR THAILAND rdsdata <- dataTH # Extract the number of coupons ncoupons <- 4 # Extract the missing code misscode <- as.character(NA) #Now generate new headers couponList <- list() couponsGiven <- rdsdata[, grep("child.", names(rdsdata))] colnames(couponsGiven) <- c("coupon.1", "coupon.2","coupon.3","coupon.4") # Now collapse coupons into a single column couponsGiven <- stack(lapply(couponsGiven,as.character)) couponIDs <- rep(rdsdata$ID, ncoupons) rdsdata$Token <- as.character(rdsdata$recruitmentid) rdsdata$coupon.1 <- as.character(rdsdata$child1) rdsdata$coupon.2 <- as.character(rdsdata$child2) rdsdata$coupon.3 <- as.character(rdsdata$child3) rdsdata$coupon.4 <- as.character(rdsdata$child4) colnames(rdsdata)[1] <- "id" colnames(rdsdata)[3] <- "mycoupon" child.vec <- NULL for (i in 1:4) { child.vec <- c(child.vec, as.character(rdsdata[,paste("coupon",i,sep=".")])) } child.vec <- child.vec[child.vec!="-1"] #length(child.vec) vec.loop <- 1:nrow(rdsdata) coupon.data <- data.frame(recruiter.origin=NA, participant=rep(NA,length(vec.loop))) for (i in vec.loop) { if (i%%10==0) { print(i) } recruiter.tmp <- as.character(rdsdata$mycoupon[i]) vec.loop.tmp <- vec.loop[1:(i-1)] hit <- 0 pos <- NA j <- 1 while (hit==0 & j < i) { coupon.tmp <- as.character(rdsdata[j,paste("coupon",1:4,sep=".")]) if (recruiter.tmp%in%coupon.tmp) { hit <- 1 } else { j <- j+1 } } coupon.data$participant[i] <- recruiter.tmp if (hit==1) { coupon.data$recruiter.origin[i] <- rdsdata[j,"mycoupon"] #coupon.data$invited[i] <- 1 } else { coupon.data$recruiter.origin[i] <- NA } } coupon.data2 <- coupon.data[is.na(coupon.data$recruiter.origin)==FALSE,] seeds.all <- rdsdata[rdsdata$wave==0,"mycoupon"] seeds.unq <- unique(coupon.data2$recruiter.origin) invited <- seeds.unq[seeds.unq%in%seeds.all] #remove seeds who did not invite contact persons invited2 <- as.character(invited) g=graph.data.frame(coupon.data2) adj.new3 <- get.adjacency(g,sparse=FALSE) rdsdata.graph <- graphAM(adjMat = adj.new3, edgemode = "directed") #MAKE AN EMPTY STRUCTERED PLOT OF RDS DATA plot(rdsdata.graph, "dot") #CREATE FIGURE 1 nodeNames <- nodes(rdsdata.graph) numNodes <- length(nodeNames) nAttrs <- list() rdsdatav2 <- rdsdata[which(rdsdata$mycoupon%in%nodeNames), ] nrow(rdsdatav2) elabel <- NULL for (i in 1:length(nodeNames)){ if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){ elabel [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"educationforC"]) } else { elabel [i] <- NA } } elabel <- as.character(elabel) elabel[elabel=='-1']=("M") elabel <- elabel[1:numNodes(rdsdata.graph)] names(elabel)<-nodes(rdsdata.graph) nAttrs$label <- elabel ######## #TO GET EMPTY LABELS! ########### elabel <- NULL for (i in 1:length(nodeNames)){ if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){ elabel [i] <- "" } else { elabel [i] <- NA } } elabel <- as.character(elabel) elabel[elabel=='-1']=("") elabel <- elabel[1:numNodes(rdsdata.graph)] names(elabel)<-nodes(rdsdata.graph) nAttrs$label <- elabel ######## #TO GET EMPTY LABELS! ############### degree <- NULL for (i in 1:length(nodeNames)){ if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){ degree [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"Catdegree"]) } else { degree [i] <- NA } } degree[is.na(degree)] <- 0 nAttrs$shape <- c() for (i in 1:numNodes){ if (degree [i] == '0') { nAttrs$shape <- c(nAttrs$shape, "box") } else { nAttrs$shape <- c(nAttrs$shape, "circle") } } names(nAttrs$shape) <- as.character(nodeNames) ####################################NODE SIZE IF NO LABEL IS ADDED #### nAttrs$width <- c() for (i in 1:numNodes) { if (degree[i] =="1") { nAttrs$width <- c(nAttrs$width, 1.3) } else if (degree[i] =="2"){ nAttrs$width <- c(nAttrs$width, 1.6) } else if (degree[i] =="3"){ nAttrs$width <- c(nAttrs$width, 2.0) } else { nAttrs$width <- c(nAttrs$width, 1.3) } } names(nAttrs$width) <- as.character(nodeNames) nAttrs$height <- c() for (i in 1:numNodes) { if (degree[i] =="1") { nAttrs$height <- c(nAttrs$height, 1.3) } else if (degree[i] =="2"){ nAttrs$height <- c(nAttrs$height, 1.6) } else if (degree[i] =="3"){ nAttrs$height <- c(nAttrs$height, 2.0) } else { nAttrs$height <- c(nAttrs$height, 1.3) } } names(nAttrs$height) <- as.character(nodeNames) ###################################### #AGE COLOUR age <- NULL for (i in 1:length(nodeNames)){ if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){ age [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"AGEGROUP10"]) } else { age [i] <- NA } } age[is.na(age)] <- 0 nAttrs$fillcolor <- c() for (i in 1:numNodes) { if (age[i] == as.character("10-19")) { nAttrs$fillcolor <- c(nAttrs$fillcolor, "purple") } else if (age[i] == as.character("20-29")){ nAttrs$fillcolor <- c(nAttrs$fillcolor, "lightblue") } else if (age[i] == as.character("30-39")){ nAttrs$fillcolor <- c(nAttrs$fillcolor, "orange") } else if (age[i] == as.character("40-49")){ nAttrs$fillcolor <- c(nAttrs$fillcolor, "green") } else if (age[i] == as.character("50-59")){ nAttrs$fillcolor <- c(nAttrs$fillcolor, "yellow") } else if (age[i] == as.character("60+")){ nAttrs$fillcolor <- c(nAttrs$fillcolor, "red") } else { nAttrs$fillcolor <- c(nAttrs$fillcolor, "grey") } } names(nAttrs$fillcolor) <- as.character(nodeNames) #SEEDS WITH BLACK COLOUR seed <- NULL for (i in 1:length(nodeNames)){ if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){ seed [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"seeds"]) } else { seed [i] <- NA } } seed[is.na(seed)] <- 0 nAttrs$color <- c() for (i in 1:numNodes) { if (seed[i] == as.character(1)) { nAttrs$color <- c(nAttrs$color,"black") } else { nAttrs$color <- c(nAttrs$color,"darkgrey") } } names(nAttrs$color) <- as.character(nodeNames) plot(rdsdata.graph, "dot", nodeAttrs=nAttrs, attrs=list(node=list(fixedsize=FALSE), graph= list(rankdir = "TB"))) savePlot(file=" ",type="pdf") ################################## FIGURE 1A ################################################################################ ################################## FIGURE 2A ################################################################################ require(ggplot2) library(plyr) NL <- data.frame(dataNL$InvitationbyNUM,dataNL$recruitmentbyNUM) TH <- data.frame(dataTH$InvitationbyNUM,dataTH$recruitmentbyNUM) NL$country <- "Netherlands" colnames(NL) <- c("inv","rec","Country") TH$country <- "Thailand" colnames(TH) <- c("inv","rec","Country") NLTH <- rbind(NL,TH) head(NLTH) NLTH <- na.omit(NLTH) #NLTH$inv[NLTH$inv==1]="email" #NLTH$inv[NLTH$inv==2]="Facebook" #NLTH$inv[NLTH$inv==3]="recr. page" NLTH$rec[NLTH$rec==0]="no recr." NLTH$rec[NLTH$rec==1]="Facebook" NLTH$rec[NLTH$rec==2]="email B" NLTH$rec[NLTH$rec==3]="email C" table(dataTH$recruitmentbyNUM) df.new<-ddply(NLTH,.(Country),summarise, prop=prop.table(table(rec)), rec=names(table(rec))) ggplot(NLTH, aes(x=Country))+ geom_bar(data = subset(NLTH, Country=="Netherlands"), aes(fill=rec, y=(..count..)/sum(..count..)), position="stack", alpha = 1, binwidth=1) + geom_bar(data = subset(NLTH, Country=="Thailand"), aes(fill=rec, y=(..count..)/sum(..count..)), position="stack", alpha = 1, binwidth=0.5)+ geom_text(aes("Netherlands", .80, label="No recruitment"))+ geom_text(aes("Netherlands", .52, label="Facebook"))+ geom_text(aes("Netherlands", .38, label="Email C"))+ geom_text(aes("Netherlands", .15, label="Email B"))+ geom_text(aes("Thailand", .80, label="No recruitment"))+ geom_text(aes("Thailand", .30, label="Facebook"))+ geom_text(aes("Thailand", .09, label="Email C"))+ geom_text(aes("Thailand", .03, label="Email B"))+ xlab("") + ylab("proportion of total in country") + theme_bw(base_size = 15)+ theme(axis.title.x = element_text(color = "black", size = 15, family = "Arial", vjust= -0.3))+ theme(axis.title.y = element_text(color = "black", size = 15, family = "Arial", vjust= 0.2))+ theme(axis.text.x = element_text(color = "black", size = 15, family = "Arial"))+ theme(axis.text.y = element_text(color = "black", size = 15, family = "Arial")) ############################################## FIGURE 2A ############################ ############################################## FIGURE 2B ############################ require(ggplot2) library(plyr) NL <- data.frame(dataNL$wave) NL$country <- "Netherlands" colnames(NL) <- c("wave","Country") TH <- data.frame(dataTH$wave) TH$country <- "Thailand" colnames(TH) <- c("wave","Country") NLTH <- rbind(NL,TH) head(NLTH) NLTH <- na.omit(NLTH) NLTH$wave[NLTH$wave==0]="0" NLTH$wave[NLTH$wave==1]="1" NLTH$wave[NLTH$wave==2]="2" NLTH$wave[NLTH$wave==3]="3" NLTH$wave[NLTH$wave==4]="4" NLTH$wave[NLTH$wave==5]="5" NLTH$wave[NLTH$wave==6]="6" df.new<-ddply(NLTH,.(Country),summarise, prop=prop.table(table(wave)), wave=names(table(wave))) #ADD LINES #= proportion of participants per wave that invited others NL <- data.frame(dataNL$wave,dataNL$recruitmentbyNUM) colnames(NL) <- c("wave","rec") NL$rec[NL$rec>0]="4" NL <- na.omit(NL) table(NL) df.inv<-ddply(NL,.(wave),summarise, prop=prop.table(table(rec)), rec=names(table(rec))) df.inv df.invNL <- subset(df.inv, rec=="4") df.invNL$wave[df.invNL$wave==0]="0" df.invNL$wave[df.invNL$wave==1]="1" df.invNL$wave[df.invNL$wave==2]="2" df.invNL$wave[df.invNL$wave==3]="3" df.invNL$wave[df.invNL$wave==4]="4" df.invNL$wave[df.invNL$wave==5]="5" df.invNL$Country <- "Netherlands" df.invNL colnames(df.invNL) <- c("wave","prop","rec","Country") TH <- data.frame(dataTH$wave,dataTH$recruitmentbyNUM) table(TH) colnames(TH) <- c("wave","rec") TH TH$rec[TH$rec>0]="4" df.invTH<-ddply(TH,.(wave),summarise, prop=prop.table(table(rec)), rec=names(table(rec))) df.invTH <- subset(df.invTH, rec=="4") wave6 <- c(6,0,4) df.invTH <- rbind(df.invTH,wave6) df.invTH df.invTH$Country <- "Thailand" df.invTH colnames(df.invTH) <- c("wave","prop","rec","Country") df.invTH$wave[df.invTH$wave==0]="0" df.invTH$wave[df.invTH$wave==1]="1" df.invTH$wave[df.invTH$wave==2]="2" df.invTH$wave[df.invTH$wave==3]="3" df.invTH$wave[df.invTH$wave==4]="4" df.invTH$wave[df.invTH$wave==5]="5" df.invTH$wave[df.invTH$wave==6]="6" #COMBINE TWO LINES <- rbind(df.invTH,df.invNL) LINES #CREATE NEW FIGURE df.new ggplot(df.new,aes(x=wave))+ geom_bar(aes(y=prop, fill=Country),stat="identity",position='dodge', alpha=0.55)+ scale_fill_manual(values=c("lightgreen", "darkblue"))+ geom_line(data=LINES, aes(y=prop,group=Country, colour = Country, linetype=Country),position="identity",size=1)+ scale_color_manual(values=c("lightgreen", "darkblue"))+ xlab("waves") + ylab("proportions")+ theme_bw(base_size = 15)+ theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ theme(axis.text.x = element_text(color = "black", size = 15))+ theme(axis.text.y = element_text(color = "black", size = 15)) savePlot(file="",type="pdf") ############################################## FIGURE 2B ############################ ############################################## FIGURE 2C ############################ require(ggplot2) require(scales) library(XLConnect) NL <- data.frame(dataNL$age,dataNL$gender) TH <- data.frame(dataTH$age, dataTH$gender) NL$country <- "NL" colnames(NL) <- c("age","gender","country") TH$country <- "TH" colnames(TH) <- c("age","gender","country") NLTH <- rbind(NL,TH) head(NLTH) NLTH <- na.omit(NLTH) NLTH$gender[NLTH$gender=="1"]= "female" NLTH$gender[NLTH$gender=="2"]= "male" #ALLE ACTIEVE SEEDS df <- cbind(dataNL$age,dataNL$wave,dataNL$no_suc_rec) df dfNL <- matrix(df[df[,1] & df[,2]==0 & df[,3]>=1],ncol=3) dfNL <- data.frame(dfNL) colnames(dfNL) <- c("age","wave","rec") dfNL df <- cbind(dataTH$age,dataTH$wave,dataTH$no_suc_rec) dfzero <- matrix(df[df[,1] & df[,2]==0 & df[,3]>=1],ncol=3) dfTH <- na.omit(dfzero) dfTH <- data.frame(dfTH) colnames(dfTH) <- c("age","wave","rec") hist(dfTH[,1],breaks=20) dfTH #CENSUS DATA df.census <- readWorksheet(loadWorkbook(""), header = TRUE, sheet=1, startRow =0, endRow = 90) ls(df.census) census.df <- data.frame(df.census) colnames(census.df) <- c("age","NL","TH") bw <- 1 ggplot() + geom_histogram(data=subset(NLTH, country=="NL"), aes(x=age,(y=..count../sum(..count..)),fill=gender), colour="black", binwidth = bw, alpha=0.6)+ geom_histogram(data=subset(NLTH, country=="TH"), aes(x=age,(y=..count../sum(..count..)*-1), fill=gender), colour="black", binwidth = bw, alpha=0.6)+ geom_density(data=dfNL, aes(x=age, (y=..count../sum(..count..))), binwidth=1, colour="black", size=0.7)+ geom_density(data=dfTH, aes(x=age, (y=..count../sum(..count..)*-1)),binwidth=1, colour="black", size=0.7)+ geom_line(data=census.df, aes(x=age,y=NL), colour="blue",linetype = "dotted", stat="identity", size=0.9,se=F)+ geom_line(data=census.df, aes(x=age,y=TH*-1), colour="blue",linetype = "dotted", stat="identity", size=0.9,se=F)+ xlab("age (years)")+ ylab("proportion of country total") + scale_y_continuous(breaks = seq(-0.5,0.5, 0.05), labels = abs(seq(-0.5,0.5, 0.05))) + scale_x_continuous(breaks = seq(10, 80, 10), labels = seq(10, 80, 10))+ theme_bw(base_size = 20)+ theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ theme(axis.text.x = element_text(color = "black", size = 20))+ theme(axis.text.y = element_text(color = "black", size = 20))+ geom_text(aes(45, .06, label="Netherlands"),size=8)+ geom_text(aes(45, -.06, label="Thailand"),size=8)+ scale_fill_manual(values=c("orange", "lightblue")) savePlot(file="",type="pdf") ############################################## FIGURE 2C ############################ ############################################## FIGURE 2D ############################ install.packages("ggplot2") require(ggplot2) install.packages("colorspace") install.packages("plyr") require(plyr) THcensus <- c(0.183,0.232,0.216,0.185,0.186) #data from 2010 NLcensus <- c(2802182/7569371, 2474729/7569371, 914480/7569371, 967899/7569371,410081/7569371) #data from 2013 NL <- data.frame(dataNL$householdGROUPS) TH <- data.frame(dataTH$householdGROUPS) NL$country <- "Netherlands" colnames(NL) <- c("HS","Country") TH$country <- "Thailand" colnames(TH) <- c("HS","Country") NLTH <- rbind(NL,TH) NLTH <- na.omit(NLTH) df.new<-ddply(NLTH,.(Country),summarise, prop=prop.table(table(HS)), HS=names(table(HS))) NLadd <- c("Netherlands",0.05828221+0.09509202,"5+") THadd <- c("Thailand",0.14479638+0.19004525,"5+") df.new[5,] <- NLadd df.new[11,] <- THadd df.new2 <- df.new[-6,] df.new3 <- df.new2[-11,] df.new3[,2] <- as.numeric(df.new3[,2]) NLS <- data.frame(dataNL$symphousehold) THS <- data.frame(dataTH$symphousehold) NLS$country <- "Netherlands" colnames(NLS) <- c("HSS","Country") THS$country <- "Thailand" colnames(THS) <- c("HSS","Country") NLTHS <- rbind(NLS,THS) NLTHS <- na.omit(NLTHS) df.sym <- ddply(NLTHS,.(Country),summarise, prop=prop.table(table(HSS)), HSS=names(table(HSS))) w6 <- c("Netherlands",0,"6") df.symC <- rbind(df.sym,w6) df.symCnew <- transform(df.symC, prop = as.numeric(prop)) df.symCnewO <- df.symCnew[order(df.symCnew$Country),] df.s <- data.frame(df.symCnewO) df.s = df.s[-1,] df.s = df.s[-6,] #REMOVE UNKNOWN NETHERLANDS (10) df.s = df.s[-7,] df.s = df.s[-13,] #REMOVE UNKNOWN THAILAND (10) df.s <- df.s[-6,] df.s[10,2] <- c(0.036199095+0.018099548) df.s <- df.s[-11,] df.s$HS <- df.new3$HS df.s$HSS <- NULL df.s[,2] <- as.numeric(df.s[,2]) NLcensus <- formatC(NLcensus,digits=3, format="f") NLc <- cbind(NLcensus,THcensus) THc <- matrix(THcensus) class(df.new3$HS) # CENSUS DATA Cdfcountry <- as.vector(c("Netherlands","Netherlands","Netherlands","Netherlands","Netherlands","Thailand","Thailand", "Thailand", "Thailand", "Thailand")) CdfNL <- c(as.numeric(NLcensus),as.numeric(THcensus)) Cdfhouse <- c("1","2","3","4","5+","1","2","3","4","5+") CdfNL <- data.frame(Cdfcountry,CdfNL,Cdfhouse) colnames(CdfNL) <- c("Country","prop","HS") ggplot(df.new3,aes(x=HS))+ geom_bar(aes(y=prop, fill=Country),stat="identity",position='dodge',alpha=0.55)+ scale_fill_manual(values=c("lightgreen", "darkblue"))+ geom_line(data=df.s, aes(y=prop, group=Country,colour = Country,linetype=Country),position=position_dodge(width=1, height=0),size=1)+ geom_point(data=df.s, aes(y=prop, group=Country,colour = Country),position=position_dodge(width=1, height=0),size=2.5)+ geom_point(data=CdfNL, aes(y=prop, shape=Country, group=Country,fill=Country), position=position_dodge(width=1, height=0),size=4.1)+ scale_shape_manual(values = c(22, 24))+ scale_color_manual(values=c("lightgreen", "darkblue"))+ xlab("household size") + ylab("proportion of total in country") + theme_bw(base_size = 15)+ theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ theme(axis.text.x = element_text(color = "black", size = 15))+ theme(axis.text.y = element_text(color = "black", size = 15)) savePlot(file="",type="pdf") ############################################## FIGURE 2D END ############################ ######################################### Figure 3A ######################################## require(ggplot2) dfNLt <- data.frame(dataNL$travelTotwZeros,dataNL$weekorw) dfNLt$class <- "while travelling" colnames(dfNLt) <- c("value","week","class") dfNLt$classC[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"]="Travelling WD" dfNLt$classC[dfNLt$week=="weekends" & dfNLt$class=="while travelling"]="Travelling WE" dfNLl <- data.frame(dataNL$locationTotReal,dataNL$weekorw) dfNLl$class <- "at locations" colnames(dfNLl) <- c("value","week","class") dfNLl$classC[dfNLl$week=="weekdays" & dfNLl$class=="at locations"]="Locations WD" dfNLl$classC[dfNLl$week=="weekends" & dfNLl$class=="at locations"]="Locations WE" dfNLf <- data.frame(dataNL$foodTotFULL,dataNL$weekorw) dfNLf$class <- "while eating" colnames(dfNLf) <- c("value","week","class") dfNLf$classC[dfNLf$week=="weekdays" & dfNLf$class=="while eating"]="Eating WD" dfNLf$classC[dfNLf$week=="weekends" & dfNLf$class=="while eating"]="Eating WE" summary(dfNLf) DFNL <- rbind(dfNLt,dfNLl,dfNLf) DFNL$country <- "Netherlands" DFNL <- na.omit(DFNL) #THAILAND dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ] dfTHt <- data.frame(dataTHr$travelTotwZeros,dataTHr$weekorw ) dfTHt$class <- "while travelling" colnames(dfTHt) <- c("value","week","class") dfTHt$classC[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"]="Travelling WD" dfTHt$classC[dfTHt$week=="weekends" & dfTHt$class=="while travelling"]="Travelling WE" dfTHl <- data.frame(dataTHr$locationTotReal,dataTHr$weekorw) dfTHl$class <- "at locations" colnames(dfTHl) <- c("value","week","class") dfTHl$classC[dfTHl$week=="weekdays" & dfTHl$class=="at locations"]="Locations WD" dfTHl$classC[dfTHl$week=="weekends" & dfTHl$class=="at locations"]="Locations WE" summary(dfTHl) dfTHf <- data.frame(dataTHr$foodTotFULL,dataTHr$weekorw) dfTHf$class <- "while eating" colnames(dfTHf) <- c("value","week","class") dfTHf$classC[dfTHf$week=="weekdays" & dfTHf$class=="while eating"]="Eating WD" dfTHf$classC[dfTHf$week=="weekends" & dfTHf$class=="while eating"]="Eating WE" summary(dfTHf) DFTH <- NULL DFTH <- rbind(dfTHt,dfTHl,dfTHf) DFTH$country <- "Thailand" DFTH <- na.omit(DFTH) NLTH <- NULL NLTH <- rbind(DFNL,DFTH) head(NLTH) NLTH$classC <- factor(NLTH$classC, levels= c("Eating WE","Eating WD","Travelling WE","Travelling WD", "Locations WE", "Locations WD")) nrow(NLTH) #nederland travelling MEANtravNLWD <- mean(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"],na.rm=T) MEDIANtravNLWD <- median(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"],na.rm=T) SDtravNLWD <- sd(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"],na.rm=T) NtravNLWD <- length(na.omit(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"])) MEANtravNLWE <- mean(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"],na.rm=T) MEDIANtravNLWE <- median(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"],na.rm=T) SDtravNLWE <- sd(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"],na.rm=T) NtravNLWE <- length(na.omit(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"])) #Thailand travelling MEANtravTHWD <- mean(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"],na.rm=T) MEDIANtravTHWD <- median(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"],na.rm=T) SDtravTHWD <- sd(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"],na.rm=T) NtravTHWD <- length(na.omit(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"])) MEANtravTHWE <- mean(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"],na.rm=T) MEDIANtravTHWE <- median(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"],na.rm=T) SDtravTHWE <- sd(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"],na.rm=T) NtravTHWE <- length(na.omit(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"])) #NEDERLAND LOCATIONS MEANlocNLWD <- mean(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"],na.rm=T) MEDIANlocNLWD <- median(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"],na.rm=T) SDlocNLWD <- sd(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"],na.rm=T) NlocNLWD <- length(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"]) MEANlocNLWE <- mean(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"],na.rm=T) MEDIANlocNLWE <- median(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"],na.rm=T) SDlocNLWE <- sd(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"],na.rm=T) NlocNLWE <- length(na.omit(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"])) #Thailand LOCATIONS MEANlocTHWD <- mean(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"],na.rm=T) MEDIANlocTHWD <- median(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"],na.rm=T) SDlocTHWD <- sd(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"],na.rm=T) NlocTHWD <- length(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"]) MEANlocTHWE <- mean(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"],na.rm=T) MEDIANlocTHWE <- median(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"],na.rm=T) SDlocTHWE <- sd(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"],na.rm=T) NlocTHWE <- length(na.omit(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"])) #NEDERLAND EATING FOOD WITH MEANfoodNLWD <- mean(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"],na.rm=T) MEDIANfoodNLWD <- median(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"],na.rm=T) SDfoodNLWD <- sd(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"],na.rm=T) NfoodNLWD <- length(na.omit(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"])) MEANfoodNLWE <- mean(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"],na.rm=T) MEDIANfoodNLWE <- median(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"],na.rm=T) SDfoodNLWE <- sd(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"],na.rm=T) NfoodNLWE <- length(na.omit(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"])) #Thailand EATING FOOD WITH MEANfoodTHWD <- mean(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"],na.rm=T) MEDIANfoodTHWD <- median(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"],na.rm=T) SDfoodTHWD <- sd(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"],na.rm=T) NfoodTHWD <- length(na.omit(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"])) MEANfoodTHWE <- mean(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"],na.rm=T) MEDIANfoodTHWE <- median(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"],na.rm=T) SDfoodTHWE <- sd(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"],na.rm=T) NfoodTHWE <- length(na.omit(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"])) ggplot(NLTH, aes(x=classC,y=value,fill=country))+ geom_boxplot(notch = FALSE, alpha=0.55,legend=FALSE)+ scale_fill_manual(values=c("lightgreen", "darkblue"))+ scale_y_sqrt(breaks = c(2,10,20,40,60,100,200,300,400,500),limits=c(0,670))+ coord_flip()+ geom_hline(yintercept=0,colour="grey", alpha=0.55)+ xlab("") + ylab("Number of contact persons") + theme_bw(base_size = 20)+ theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ theme(axis.text.x = element_text(color = "black", size = 15))+ theme(axis.text.y = element_text(color = "black", size = 15))+ annotate("text", x="Locations WE",y=22, label=MEDIANlocTHWE, col="blue",size=5, vjust=-0.3)+ annotate("text", x="Locations WE",y=13, label=MEDIANlocNLWE, col="blue",size=5, vjust=1.3)+ annotate("text", x="Locations WE",y=580, label=round(MEANlocTHWE, digits = 1),col="black", size=5, vjust=-0.3)+ annotate("text", x="Locations WE",y=580, label=round(MEANlocNLWE, digits = 1),col="black", size=5, vjust=1.3)+ annotate("text", x="Locations WE",y=660, label="(85.5)",col="darkgrey", size=5, vjust=-.3)+ #SDlocTHWE annotate("text", x="Locations WE",y=660, label="(32.8)",col="darkgrey", size=5, vjust=1.3)+ #SDlocNLWE annotate("text", x="Locations WD",y=22, label=MEDIANlocTHWD, col="blue",size=5, vjust=-0.3)+ annotate("text", x="Locations WD",y=580, label=round(MEANlocTHWD, digits = 1),col="black", size=5, vjust=-0.3)+ annotate("text", x="Locations WD",y=14, label=MEDIANlocNLWD, col="blue",size=5, vjust=1.3)+ annotate("text", x="Locations WD",y=580, label=round(MEANlocNLWD, digits = 1),col="black", size=5, vjust=1.3)+ annotate("text", x="Locations WD",y=660, label="(55.3)",col="darkgrey", size=5, vjust=-.3)+ #SDlocTHWD annotate("text", x="Locations WD",y=660, label="(49.1)",col="darkgrey", size=5, vjust=1.3)+ #SDlocNLWD annotate("text", x="Travelling WE",y=7.5, label=MEDIANtravTHWE, col="blue",size=5, vjust=-0.3)+ annotate("text", x="Travelling WE",y=2, label=MEDIANtravNLWE, col="blue",size=5, vjust=1.3)+ annotate("text", x="Travelling WE",y=580, label=round(MEANtravTHWE, digits = 1),col="black", size=5, vjust=-0.3)+ annotate("text", x="Travelling WE",y=580, label=round(MEANtravNLWE, digits = 1),col="black", size=5, vjust=1.3)+ annotate("text", x="Travelling WE",y=660, label="(46.6)",col="darkgrey", size=5, vjust=-.3)+ #SDtravTHWE annotate("text", x="Travelling WE",y=660, label="(7.5)",col="darkgrey", size=5, vjust=1.3)+ #SDtravNLWE annotate("text", x="Travelling WD",y=7.5, label=MEDIANtravTHWD, col="blue",size=5, vjust=-0.3)+ annotate("text", x="Travelling WD",y=2, label=MEDIANtravNLWD, col="blue",size=5, vjust=1.3)+ annotate("text", x="Travelling WD",y=580, label=round(MEANtravTHWD, digits = 1),col="black", size=5, vjust=-0.3)+ annotate("text", x="Travelling WD",y=580, label=round(MEANtravNLWD, digits = 1),col="black", size=5, vjust=1.3)+ annotate("text", x="Travelling WD",y=660, label="(29.0)",col="darkgrey", size=5, vjust=-.3)+ #SDtravTHWD annotate("text", x="Travelling WD",y=660, label="(9.4)",col="darkgrey", size=5, vjust=1.3)+ #SDtravNLWD annotate("text", x="Eating WE",y=9, label=MEDIANfoodTHWE, col="blue",size=5, vjust=-0.3)+ annotate("text", x="Eating WE",y=5.5, label=MEDIANfoodNLWE, col="blue",size=5, vjust=1.3)+ annotate("text", x="Eating WE",y=580, label=round(MEANfoodTHWE, digits = 1),col="black", size=5, vjust=-0.3)+ annotate("text", x="Eating WE",y=580, label=round(MEANfoodNLWE, digits = 1),col="black", size=5, vjust=1.3)+ annotate("text", x="Eating WE",y=660, label="(15.6)",col="darkgrey", size=5, vjust=-.3)+ #SDfoodTHWE annotate("text", x="Eating WE",y=660, label="(16.8)",col="darkgrey", size=5, vjust=1.3)+ #SDfoodNLWE annotate("text", x="Eating WD",y=8.5, label=MEDIANfoodTHWD,col="blue",size=5, vjust=-0.3)+ annotate("text", x="Eating WD",y=6, label=MEDIANfoodNLWD,col="blue",size=5, vjust=1.3)+ annotate("text", x="Eating WD",y=580, label=round(MEANfoodTHWD, digits = 1),col="black", size=5, vjust=-0.3)+ annotate("text", x="Eating WD",y=580, label=round(MEANfoodNLWD, digits = 1),col="black", size=5, vjust=1.3)+ annotate("text", x="Eating WD",y=660, label="(14.7)",col="darkgrey", size=5, vjust=-.3)+ #SDfoodTHWD annotate("text", x="Eating WD",y=660, label="(6.6)",col="darkgrey", size=5, vjust=1.3) #SDfoodNLWD savePlot(file="",type="pdf") ######################################### Figure 3A END ######################################## ######################################### Figure 3B START ######################################## require(MASS) require(ggplot2) install.packages("fitdistrplus") require(fitdistrplus) dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ] #remove extremes (see method section) # FITTING, what are the values MU and SIZE: degree <- dataNL$degreeYourSpace degree <- dataTHr$degreeYourSpace degree <- na.omit(degree) degree <- c(degree) fg.mle <- fitdist(degree, "nbinom", method="mle") ml <- mledist(degree,"nbinom") b1 <- bootdist(fg.mle, niter=501) #FOR 95% confidence interval = parametric bootstrap summary(b1) #Create a plot summary(dataNL$degreeYourSpace) summary(dataTHr$degreeYourSpace) out.NBth <-glm.nb(dataNL$degreeYourSpace~1) out.NBnl <-glm.nb(dataTHr$degreeYourSpace~1) #Create datasets dfTH <- data.frame(x=0:533, prob=dnbinom(0:533, mu=exp(coef(out.NBth)), size=out.NBth$theta)) #create a dataset dfNL <- data.frame(x=0:522, prob=dnbinom(0:522, mu=exp(coef(out.NBnl)), size=out.NBnl$theta)) #ggplot(data=dfTH, aes(x=x,y=prob)) + geom_line() dfTH$country <- "Thailand" dfNL$country <- "Netherlands" LINES2 <- rbind(dfTH,dfNL) head(LINES2) degreeNL <- data.frame(dataNL$degreeYourSpace,"Netherlands") colnames(degreeNL) <- c("degree","country") degreeTH <- data.frame(dataTHr$degreeYourSpace,"Thailand") colnames(degreeTH) <- c("degree","country") degreeF <- rbind(degreeNL,degreeTH) ggplot(degreeF,aes(degree, fill=country))+ geom_histogram(alpha=0.55,binwidth=4, aes(y=..density..),position="identity")+ scale_fill_manual(values=c("lightgreen", "darkblue"))+ scale_x_continuous(limits=c(0,540),breaks=c(0,100,200,300,400,500))+ scale_y_continuous(limits=c(0,0.045),breaks=c(0,0.01,0.02,0.03,0.04))+ theme_bw(base_size = 20)+ xlab("degree") + ylab("density")+ theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ theme(axis.text.x = element_text(color = "black", size = 20))+ theme(axis.text.y = element_text(color = "black", size = 20))+ geom_line(data=LINES2, aes(x=x, y=prob,group=country, colour = country, linetype=country),position="identity",size=1)+ scale_color_manual(values=c("lightgreen", "darkblue")) savePlot(file="",type="pdf") ######################################### Figure 3B END ######################################## ######################################### Figure 3C ########################################### dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ] #DELETE EXTREME VALUES, see method section degreeNL <- dataNL$degreeYourSpace degreeNL <- na.omit(degreeNL) degreeTHr <- dataTHr$degreeYourSpace degreeTH <- na.omit(degreeTHr) x <- c(log(degreeTH+0.5)) y <- c(log(degreeNL+0.5)) qqplot(x,y, xlab="degree Thailand (log)", ylab="degree Netherlands (log)", cex.lab=1.5, cex.axis=1.5, cex.sub=1.5) abline(a=0,b=1, col=2,lwd=1,lty=2) #IF the distributions are the same, then the points should be on this line #TEST WITH ANDERSON-DARLING install.packages("kSamples") require(kSamples) ad.test(list(x,y),method=("asymptotic"),dist=FALSE,Nsim=10000) ######################################### Figure 3C END ################################################### ########################################################################################################### ######################################### FIGURE 3D & LOGISTIC REGRESSION ANALYSES ####################### require(ggplot2) require(Hmisc) install.packages("rcs") require(rcs) require(stats) install.packages("rms") require(rms) #WITH SEEDS### rdsdata <- dataTH[(dataTH$recruitmentid=="8L95PAN")==FALSE & (dataTH$recruitmentid=="M7DVDBLA")==FALSE, ] rdsdata <- dataNL rdsdata$age[rdsdata$age==-1]=NA rds.data2 <- rdsdata[is.na(rdsdata$age)==FALSE,] #row eruit waar geen age is! nrow(rds.data2) #WITH SEEDS### #WITHOUT SEEDS##############################ZONDER SEEDS #THAILAND rdsdata <- dataTH[(dataTH$recruitmentid=="8L95PAN")==FALSE & (dataTH$recruitmentid=="M7DVDBLA")==FALSE, ] rdsdata$age[rdsdata$age==-1]=NA rdsdata$seeds[is.na(rdsdata$seeds)] = 2 rdsdata$seeds[rdsdata$seeds== 0] = 2 rdsdata$seeds[rdsdata$seeds==1]= NA rds.data2 <- rdsdata[is.na(rdsdata$seeds)==FALSE,] rds.data2 <- rds.data2[is.na(rds.data2$age)==FALSE,] nrow(rds.data2) #NETHERLANDS rdsdata <- dataNL rdsdata$age[rdsdata$age==-1]=NA rdsdata$seeds[is.na(rdsdata$seeds)] <- 2 rdsdata$seeds[rdsdata$seeds==1]= NA rds.data2 <- rdsdata[is.na(rdsdata$seeds)==FALSE,] rds.data2 <- rds.data2[is.na(rds.data2$age)==FALSE,] rds.data2$seeds #ZONDER SEEDS###########################ZONDER SEEDS rds.data2$recruited <- ifelse(rds.data2$recruitmentbyNUM=="0",0, ifelse(rds.data2$recruitmentbyNUM>"0",1,NA)) rds.data2$recruited <- factor(rds.data2$recruited) rds.data2$education2 <- ifelse(rds.data2$education<"4",0, ifelse(rds.data2$education>"3",1,NA)) rds.data2$education2 <- factor(rds.data2$education2) rds.data2$householdRealV2[rds.data2$householdRealV2==520]=NA rds.data2$gender <- ifelse(rds.data2$gender=="1",0, ifelse(rds.data2$gender=="2",1,NA)) rds.data2$gender <- factor(rds.data2$gender) ############################################################################################ #########################################Supplementary test for LOGISTIC REGRESSION ANALYSES####################### degree <- rds.data2$degreeYourSpace recruited <- as.numeric(rds.data2$recruited) df <- data.frame(degree,recruited) df <- na.omit(df) rcspline.plot(log(df$degree+0.5), df$recruited, nk=4, showknots=FALSE, noprint=FALSE,xlab="degree", ylim=c(-1,1.1),xrange=c(0,10)) #just to plot function before adding RCS #########################################Supplementary test for LOGISTIC REGRESSION ANALYSES####################### ############################################################################################ rds.data2 <- rds.data2[is.na(rds.data2$age)==FALSE,] rds.data2 <- rds.data2[is.na(rds.data2$degreeYourSpace)==FALSE,] nrow(rds.data2) fit1 <- glm(formula=recruited~rcs(log(degreeYourSpace+0.5),3)+ rcs(age,3) + education2 + gender+ rcs(householdRealV2,3), data=rds.data2, family=binomial("logit")) summary(fit1) ########################################################## EXPORT FIT VALUES########################################################## coef <-summary(fit1)$coefficients[,1] stE <- summary(fit1)$coefficients[,2] Z <- summary(fit1)$coefficients[,3] Pvalue <- summary(fit1)$coefficients[,4] CI <- confint.default(fit1) #CI Using Standard Errors e.table <- cbind("estimate" = coef, "SE"= stE, "z value" = Z, "Pr(>|z|)" = Pvalue, "95% CI" = CI) rownames(e.table) <- c("Constant","degree1","degree2","Age","age", "Edu Bachelor","male", "Household size1","Household size2") write.csv(e.table, ".csv") ########################################################## EXPORT FIT VALUES########################################################## newdata3 <- cbind(rds.data2,predict(fit1,type="response",se=TRUE)) newdata4 <- within(newdata3, { PredictProb <- plogis(fit) LL <- plogis(fit - (1.96*se.fit)) UL <- plogis(fit + (1.96*se.fit)) }) head(newdata4) data <- data.frame(newdata4$degreeYourSpace, newdata4$UL,newdata4$LL,newdata4$PredictProb,newdata4$fit,newdata4$gender) ##THAILAND data$country <- "Thailand" #WITH SEEDS data$Seeds <- "With Seeds" ThaiWithSeeds <- data #Without SEEDS data$Seeds <- "Without Seeds" ThaiWithoutSeeds <- data ##NETHERLANDS data$country <- "Netherlands" #WITH SEEDS data$Seeds <- "With Seeds" NLWithSeeds <- data #Without SEEDS data$Seeds <- "Without Seeds" NLWithoutSeeds <- data datafull <- rbind(ThaiWithSeeds,ThaiWithoutSeeds,NLWithSeeds,NLWithoutSeeds) colnames(datafull) <- c("degreeYourSpace","UL","LL","PredictProb","fit","gender","Seeds","country") datafull <- na.omit(datafull) datafull$gender <- as.numeric(datafull$gender) datafull$gender[datafull$gender=="1"]= "Female" datafull$gender[datafull$gender=="2"]= "Male" ####################################### FIGURE 3D ######################################### ggplot(datafull, aes(x = degreeYourSpace, y = PredictProb, colour=gender, group=gender)) + geom_smooth(size=1,se=FALSE,method="loess")+ geom_smooth(aes(y=UL,fill=gender),se=FALSE,linetype=3,size=1, method="loess")+ geom_smooth(aes(y=LL,fill=gender),se=FALSE,linetype=3,size=1, method="loess")+ scale_x_continuous(limits=c(0,150), breaks=c(0,25,50,75,100,125,150))+ facet_grid(country~Seeds, scales = "free")+ scale_color_manual(values=c("#CC6666", "#008B45"))+ scale_y_continuous(limits=c(0.4,0.8),breaks=c(0.2,0.4,0.6,0.8))+ theme_bw(base_size = 20)+ theme(strip.background = element_rect(fill = "white"))+ xlab("degree") + ylab("Predicted Probability of Inviting Contacts")+ theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ theme(axis.text.x=element_text(color = "black", size = 20,angle=30,hjust=1))+ theme(axis.text.y = element_text(color = "black", size = 20))+ theme(legend.position="top")+ theme(strip.text.x= element_text(size =20))+ theme(strip.text.y= element_text(size =20)) savePlot(file="",type="pdf") ####################################### FIGURE 3D END ######################################### ####################################### FIGURE 4A ######################################### library(data.table) library(lattice) library(gplots) ##############CONTACT AT DIFFERENT LOCATIONS #NETHERLANDS DF <- data.table(dataNL$age_groups,dataNL$degreeY, dataNL$degreeS, dataNL$degreeO) tests <- DF[ ,lapply(.SD, sum, na.rm=TRUE), by=V1] tests[, young:= V2/sum(V2,V3,V4), by = list(V1)] tests[, same:= V3/sum(V2,V3,V4), by = list(V1)] tests[, older:= V4/sum(V2,V3,V4), by = list(V1)] df <- data.frame(tests$V1,tests$young,tests$same, tests$older) df <- df[-13,] df[df=="NaN"] = 0 empt <- matrix(0:0,ncol=4,nrow=1) empt[1,1] <- "10-14" mats <- as.matrix(df) dim(mats) <- c(13,4) mats <- rbind(empt,mats) matt <- data.frame(mats) order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") matr <- matt[order(match(matt[,1],order)),] mat <- as.matrix(matr) dim(mat) <- c(14,4) mat loc <- mat[,-1] myPanel <- function(x, y, z, ...) { panel.levelplot(x,y,z,...) # Add: panel = myPanel to levelplot panel.text(x, y, round(z,2),cex=1.5,font=1) } x <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") y <- c("locations YA", "locations SA","locations OA") ckey <- list(labels=list(cex=1.8,cex.lab=1.5,font=1, col='black'), height=0.35) levelplot(loc, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1),alpha.regions=0.55, col.regions=colorpanel(16,"darkgreen", "lightgreen","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) savePlot(file="",type="pdf") ##############CONTACT AT DIFFERENT LOCATIONS #THAILAND DF <- data.table(dataTH$age_groups,dataTH$degreeY, dataTH$degreeS, dataTH$degreeO) tests <- DF[ ,lapply(.SD, sum, na.rm=TRUE), by=V1] tests[, young:= V2/sum(V2,V3,V4), by = list(V1)] tests[, same:= V3/sum(V2,V3,V4), by = list(V1)] tests[, older:= V4/sum(V2,V3,V4), by = list(V1)] df <- data.frame(tests$V1,tests$young,tests$same, tests$older) df <- df[-10,] df[df=="NaN"] = 0 empt <- matrix(0:0,ncol=4,nrow=5) empt[1,1] <- "55-59" empt[2,1] <- "60-64" empt[3,1] <- "65-69" empt[4,1] <- "70-74" empt[5,1] <- "75-79" mats <- as.matrix(df) dim(mats) <- c(9,4) mats <- rbind(empt,mats) mats <- data.frame(mats) order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") matr <- mats[order(match(mats[,1],order)),] mat <- as.matrix(matr) dim(mat) <- c(14,4) loc <- mat[,-1] myPanel <- function(x, y, z, ...) { panel.levelplot(x,y,z,...) panel.text(x, y, round(z,2),cex=1.5,font=1) } levelplot(loc, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1),alpha.regions=0.55, col.regions=colorpanel(16,"darkblue", "lightblue","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) savePlot(file="",type="pdf") #######CONTACT WHILE EATING CONTACTS### #NETHERLANDS DFe <- data.table(dataNL$age_groups,dataNL$FOODy, dataNL$FOODs, dataNL$FOODo) tests <- DFe[ ,lapply(.SD, sum, na.rm=TRUE), by=V1] tests[, young:= V2/sum(V2,V3,V4), by = list(V1)] tests[, same:= V3/sum(V2,V3,V4), by = list(V1)] tests[, older:= V4/sum(V2,V3,V4), by = list(V1)] df <- data.frame(tests$V1,tests$young,tests$same, tests$older) df <- df[-13,] df[df=="NaN"] = 0 empt <- matrix(0:0,ncol=4,nrow=1) empt[1,1] <- "10-14" mats <- as.matrix(df) dim(mats) <- c(13,4) mats <- rbind(empt,mats) matt <- data.frame(mats) order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") matr <- matt[order(match(matt[,1],order)),] mat <- as.matrix(matr) dim(mat) <- c(14,4) eat <- mat[,-1] myPanel <- function(x, y, z, ...) { panel.levelplot(x,y,z,...) panel.text(x, y, round(z,2),cex=1.5,font=1) } x <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") y <- c("Eating YA","Eating SA","Eating OA") ckey <- list(labels=list(cex=1.5,cex.lab=1.5, col='black'), height=0.35) levelplot(eat, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(16,"darkgreen", "lightgreen","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) savePlot(file="",type="pdf") #######CONTACT WHILE EATING CONTACTS### ######THAILAND DFe <- data.table(dataTH$age_groups,dataTH$FOODy, dataTH$FOODs, dataTH$FOODo) tests <- DFe[ ,lapply(.SD, sum, na.rm=TRUE), by=V1] tests[, young:= V2/sum(V2,V3,V4), by = list(V1)] tests[, same:= V3/sum(V2,V3,V4), by = list(V1)] tests[, older:= V4/sum(V2,V3,V4), by = list(V1)] df <- data.frame(tests$V1,tests$young,tests$same, tests$older) df <- df[-10,] df[df=="NaN"] = 0 empt <- matrix(0:0,ncol=4,nrow=5) empt[1,1] <- "55-59" empt[2,1] <- "60-64" empt[3,1] <- "65-69" empt[4,1] <- "70-74" empt[5,1] <- "75-79" mats <- as.matrix(df) dim(mats) <- c(9,4) mats <- rbind(empt,mats) mats <- data.frame(mats) order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") matr <- mats[order(match(mats[,1],order)),] mat <- as.matrix(mats) dim(mat) <- c(14,4) eat <- mat[,-1] myPanel <- function(x, y, z, ...) { panel.levelplot(x,y,z,...) panel.text(x, y, round(z,2),cex=1.5,font=1) } x <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79") y <- c("Eating YA","Eating SA","Eating OA") ckey <- list(labels=list(cex=1.5,cex.lab=1.5, col='black'), height=0.35) levelplot(eat, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(16,"darkblue", "lightblue","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) savePlot(file="",type="pdf") ####################################### FIGURE 4A END ######################################### ####################################### FIGURE 4B ######################################### require(lattice) library(gplots) data <- dataTH #OR data <- dataNL data.koppel1 <- data[,c("recruitmentid","age", "agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),] names(data.koppel1) <- c("child1","age.child1","agegrouplarge1", "gender.child1","education.child1","degree.child1","Catdegree.child1","travelTotwZeros.child1","locationTotReal.child1","foodTot.child1","householdTot.child1", "tree.child1", "wave.child1", "no_suc_rec.child1", "recruitmentbyNUM.child1", "totalsym.child1","flu.child1","cold.child1", "TwoorMoreSymp.child1","age_groups.child1","post_child1") data.merge <- merge(x=data,y=data.koppel1,by="child1",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] data.koppel2 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),] names(data.koppel2) <- c("child2","age.child2","agegrouplarge2","gender.child2","education.child2","degree.child2","Catdegree.child2","travelTotwZeros.child2","locationTotReal.child2","foodTot.child2","householdTot.child2", "tree.child2", "wave.child2", "no_suc_rec.child2", "recruitmentbyNUM.child2", "totalsym.child2","flu.child2","cold.child2", "TwoorMoreSymp.child2","age_groups.child2","post_child2") data.merge <- merge(x=data.merge,y=data.koppel2,by="child2",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] data.koppel3 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),] names(data.koppel3) <- c("child3","age.child3","agegrouplarge3","gender.child3","education.child3","degree.child3","Catdegree.child3","travelTotwZeros.child3","locationTotReal.child3","foodTot.child3","householdTot.child3", "tree.child3", "wave.child3", "no_suc_rec.child3", "recruitmentbyNUM.child3", "totalsym.child3","flu.child3","cold.child3", "TwoorMoreSymp.child3","age_groups.child3","post_child3") data.merge <- merge(x=data.merge,y=data.koppel3,by="child3",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] data.koppel4 <- data[,c("recruitmentid","age","agegrouplarge", "gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),] names(data.koppel4) <- c("child4","age.child4","agegrouplarge4","gender.child4","education.child4","degree.child4","Catdegree.child4","travelTotwZeros.child4","locationTotReal.child4","foodTot.child4","householdTot.child4", "tree.child4", "wave.child4", "no_suc_rec.child4", "recruitmentbyNUM.child4", "totalsym.child4","flu.child4","cold.child4", "TwoorMoreSymp.child4","age_groups.child4","post_child4") data.merge <- merge(x=data.merge,y=data.koppel4,by="child4",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] tab <- data.frame(agerecruiter=rep(data.merge$age_groups,4),agechild=c(data.merge$age_groups.child1, data.merge$age_groups.child2,data.merge$age_groups.child3,data.merge$age_groups.child4)) tab2 <- na.omit(tab) t <- table(tab2) t <- as.matrix(t,ncol=13,nrow=13) #RUN ONLY FOR NETHERLANDS emt <- matrix(0:0, ncol=1,nrow=13) te <- cbind(emt,t) emr <- matrix(0:0,ncol=14,nrow=1) tet <- rbind(emr,te) t <- tet #RUN ONLY FOR NETHERLANDS #RUN ONLY FOR THAILAND t <- cbind(t[, 1:9], matrix(0:0, ncol = 4,nrow=9)) t <- rbind(t[1:9,],matrix(0:0, ncol = 13,nrow=4)) colnames(t) <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74") rownames(t) <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74") #RUN ONLY FOR THAILAND e <- prop.table(t) ee <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74") ckey <- list(labels=list(cex=1.5,cex.lab=1.5, col='black'), height=0.35) #FOR NETHERLANDS levelplot(e, ylim=ee, xlim=ee, xlab=list("age recruiter",cex=1.8,font=1), colorkey=ckey, ylab=list("age recruited contact person",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(18,"darkgreen","gray100","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) savePlot(file="",type="pdf") #THAILAND #RUN FOR REDUCED SCALE: ee <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54") #GREEN COLOUR levelplot(e, ylim=ee, xlim=ee, xlab=list("age recruiter",cex=1.8,font=1), colorkey=ckey, ylab=list("age recruited contact person",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(18,"darkgreen","gray100","orange"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) #BLUE COLOUR levelplot(e, ylim=ee, xlim=ee, xlab=list("age recruiter",cex=1.8,font=1), colorkey=ckey, ylab=list("age recruited contact person",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(18,"darkblue", "cyan","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1))) savePlot(file="",type="pdf") ####################################### FIGURE 4B END ######################################### ####################################### FIGURE 4C ######################################### require(ggplot2) data <- dataNL data.koppel1 <- data[,c("recruitmentid","age", "agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),] names(data.koppel1) <- c("child1","age.child1","agegrouplarge1", "gender.child1","education.child1","degree.child1","Catdegree.child1","travelTotwZeros.child1","locationTotReal.child1","foodTot.child1","householdTot.child1", "tree.child1", "wave.child1", "no_suc_rec.child1", "recruitmentbyNUM.child1", "totalsym.child1","flu.child1","cold.child1", "TwoorMoreSymp.child1","age_groups.child1") data.merge <- merge(x=data,y=data.koppel1,by="child1",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] data.koppel2 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),] names(data.koppel2) <- c("child2","age.child2","agegrouplarge2","gender.child2","education.child2","degree.child2","Catdegree.child2","travelTotwZeros.child2","locationTotReal.child2","foodTot.child2","householdTot.child2", "tree.child2", "wave.child2", "no_suc_rec.child2", "recruitmentbyNUM.child2", "totalsym.child2","flu.child2","cold.child2", "TwoorMoreSymp.child2","age_groups.child2") data.merge <- merge(x=data.merge,y=data.koppel2,by="child2",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] data.koppel3 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),] names(data.koppel3) <- c("child3","age.child3","agegrouplarge3","gender.child3","education.child3","degree.child3","Catdegree.child3","travelTotwZeros.child3","locationTotReal.child3","foodTot.child3","householdTot.child3", "tree.child3", "wave.child3", "no_suc_rec.child3", "recruitmentbyNUM.child3", "totalsym.child3","flu.child3","cold.child3", "TwoorMoreSymp.child3","age_groups.child3") data.merge <- merge(x=data.merge,y=data.koppel3,by="child3",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] data.koppel4 <- data[,c("recruitmentid","age","agegrouplarge", "gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),] names(data.koppel4) <- c("child4","age.child4","agegrouplarge4","gender.child4","education.child4","degree.child4","Catdegree.child4","travelTotwZeros.child4","locationTotReal.child4","foodTot.child4","householdTot.child4", "tree.child4", "wave.child4", "no_suc_rec.child4", "recruitmentbyNUM.child4", "totalsym.child4","flu.child4","cold.child4", "TwoorMoreSymp.child4","age_groups.child4") data.merge <- merge(x=data.merge,y=data.koppel4,by="child4",all.x=TRUE) data.merge <- data.merge[order(data.merge$ID),] DB <- rgb(0,0,139,alpha=55, maxColorValue=255) #THAILAND # COLOUR SCALE LG <- rgb(144,238,144,alpha=55, maxColorValue=255) #NETHERLANDS #RUN FOR NETHERLANDS tab <- data.frame(degreeR=rep(data.merge$degreeYourSpace,4),degreechild=c(data.merge$degree.child1, data.merge$degree.child2,data.merge$degree.child3,data.merge$degree.child4)) myplot <- qplot(degreeR, degreechild, data = tab, geom="hex", bins=40, xlim = c(0, 150), ylim = c(0,150), xlab=("degree recruiter"), ylab=("degree recruited contact person")) myplot + theme_bw(base_size = 20)+ scale_fill_gradient2(low="purple",mid="forestgreen", high="yellow", name="Count\n",breaks=c(0,2,5,8,10))+ theme(axis.title.x = element_text(color = "black", size = 25, vjust= -0.1))+ theme(axis.title.y = element_text(color = "black", size = 25, vjust= 0.3))+ theme(axis.text.x = element_text(color = "black", size = 20))+ theme(axis.text.y = element_text(color = "black", size = 20))+ theme(legend.text=element_text(size=20))+ theme(legend.title=element_text(size=20)) savePlot(file="",type="pdf") #RUN FOR THAILAND tab <- data.frame(degreeR=rep(data.merge$degreeYourSpace,4),degreechild=c(data.merge$degree.child1, data.merge$degree.child2,data.merge$degree.child3,data.merge$degree.child4)) myplot <- qplot(degreeR, degreechild, data = tab, geom="hex", bins=40, xlim = c(0, 150), ylim = c(0,150), xlab=("degree recruiter"), ylab=("degree recruited contact person")) myplot + theme_bw(base_size = 20)+ scale_fill_gradient2(low="blue3",mid="blue3", high="lightgoldenrod1", name="Count\n", breaks=c(1,2,3,4))+ theme(axis.title.x = element_text(color = "black", size = 25, vjust= -0.1))+ theme(axis.title.y = element_text(color = "black", size = 25, vjust= 0.3))+ theme(axis.text.x = element_text(color = "black", size = 20))+ theme(axis.text.y = element_text(color = "black", size = 20))+ theme(legend.text=element_text(size=20))+ theme(legend.title=element_text(size=20)) savePlot(file="",type="pdf") ####################################### FIGURE 4C ######################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### TABLE 2 EFFECTIVE CONTACT RATE ################################################################### dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ] #remove extremes (see method section) degree <- dataNL$degreeYourSpace degree <- dataTHr$degreeYourSpace m <- mean(degree,na.rm=T) v <- var(degree,na.rm=T) m+(v/m) loc <- dataNL$locationTotReal loc <- dataTHr$locationTotReal m <- mean(loc,na.rm=T) v <- var(loc,na.rm=T) m+(v/m) travel <- dataNL$travelTotwZeros travel <- dataTHr$travelTotwZeros m <- mean(travel,na.rm=T) v <- var(travel,na.rm=T) m+(v/m) food <- dataNL$foodTotFULL food <- dataTHr$foodTotFULL m <- mean(food,na.rm=T) v <- var(food,na.rm=T) m+(v/m) dataTH$householdRealV2[dataTH$householdRealV2==520]=NA house <- dataTH$householdRealV2 house <- dataNL$householdRealV2 m <- mean(house,na.rm=T) v <- var(house,na.rm=T) m+(v/m) ####################################### ####################################### TABLE 2 EFFECTIVE CONTACT RATE ################################################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### TABLE 3 CORRELATIONS ################################################################### install.packages("igraph") library(igraph) #Read datasets separately #THAILAND path <- "" dataset <- read.csv2(file=paste(path,"",sep="")) #NETHERLANDS path <- "" dataset <- read.csv2(file=paste(path,".csv",sep="")) dataset <- dataset[order(dataset$tree,dataset$wave,dataset$recruitmentid),] #REMOVE EMPTY ROWS dataset <- dataset[as.character(dataset$recruitmentid)!="",] #REPLACE EMPTY FIELDS CHILDREN TOKENS WITH NA's dataset[as.character(dataset$child1)=="","child1"] <- NA dataset[as.character(dataset$child2)=="","child2"] <- NA dataset[as.character(dataset$child3)=="","child3"] <- NA dataset[as.character(dataset$child4)=="","child4"] <- NA dataset.new <- data.frame(matrix(NA,nrow=0,ncol=ncol(dataset)*2+1)) names(dataset.new) <- c("degree",paste(names(dataset),"person1",sep="."),paste(names(dataset),"person2",sep=".")) #Begin loop: door voor elke boom in de dataset het volgende for (t in unique(dataset$tree)) { print(paste("Processing Tree Number",t,sep=" ")) #Bepaal (potentiele) recruiters in een boom, d.w.z. mensen binnen een boom die de vragenlijst ingevuld hebben. lab.tree <- unique(as.character(dataset[as.character(dataset$tree)==t,"recruitmentid"])) lab.tree <- lab.tree[is.na(lab.tree)==FALSE] #Bepaal aantal recruiters. len.tree <- length(lab.tree) #Als er meer dan 1 recruiter in een boom zit, doe het volgende. if (len.tree>1) { #Maak eerst drie soorten objecten per tree aan als voorbereiding. #[1] Maak een "adjacency matrix" van alleen maar nullen aan; zie verderop voor verdere uitleg. adj.mat <- matrix(0,nrow=len.tree,ncol=len.tree) rownames(adj.mat) <- lab.tree colnames(adj.mat) <- lab.tree #Voor alle individuen in de boom doe het volgende. for (i in lab.tree) { #Pas de adjacency matrix aan: vervang een 0 door 1 bij een link tussen parent/child adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child1"])] <- 1 adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child2"])] <- 1 adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child3"])] <- 1 adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child4"])] <- 1 adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child1"]),colnames(adj.mat)==i] <- 1 adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child2"]),colnames(adj.mat)==i] <- 1 adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child3"]),colnames(adj.mat)==i] <- 1 adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child4"]),colnames(adj.mat)==i] <- 1 } #[2] Maak een graaf object op basis van de graph adjacency matrix. g <- graph.adjacency(adj.mat, mode="max") #[3] Bepaal met Dijkstra's algoritme de kortste paden tussen elke persoon binnen een boom; dit is in feite de link degree. # Maak degree object aan. pth.mat <- shortest.paths(g) #Sla adj.mat, g en pth als aparte tree-specifieke objecten op (zie hieronder). assign(paste("adj.mat",t,sep=""),adj.mat) assign(paste("g",t,sep=""),g) assign(paste("pth.mat",t,sep=""),pth.mat) #Nu de eigenlijk stap van het bewerken van dataset.new #Vul nu dataset.new aan met nieuwe rijen: elke rij bevat de degree, en alle variabelen van persoon1, en alle variabelen van persoon2. #De degree kunnen we verkrijgen uit pth.mat. #Voor elke recruiter in de rijen van pth.mat doe het volgende for (i in rownames(pth.mat)) { #Bepaal de positie pos.i van persoon1 (kan de volgende waarde aanemen: 1 tot het aantal rijen) pos.i <- which(rownames(pth.mat)==i) #En pak de bijbehorende informatie van persoon1 uit de dataset. tmp.i <- (dataset[dataset$recruitmentid==i,]) #Voor elke recruiter in de kolommen van pth.mat doe het volgende for (j in colnames(pth.mat)) { #Bepaal de positie pos.j van persoon2 (kan de volgende waarde aanemen: 1 tot het aantal rijen) pos.j <- which(colnames(pth.mat)==j) #Omdat de matrix pth.mat symmetrisch is in de diagonaal, hoef je alleen een bewerking te doen voor de helft van de matrix: #In dit geval kiezen we als pos.j>pos.i (maar had evengoed kunnen zijn pos.jpos.i) { #Pak de bijbehorende informatie van persoon2 uit de dataset. tmp.j <- (dataset[dataset$recruitmentid==j,]) #Plak de eerder gemaakte kolommen tmp.i en tmp.j aan elkaar vast; #Plus voeg daarvoor ook de degree toe, nl. pth.mat[pos.i,pos.j] tmp.vec <- cbind(pth.mat[pos.i,pos.j],tmp.i,tmp.j) #Zorg ervoor dat tmp.vec dezelfde kolomnamen heeft als dataset.new names(tmp.vec) <- names(dataset.new) #Dan kan de rij met nieuwe link informatie namelijk netjes worden toegevoegd aan dataset.new. dataset.new <- rbind(dataset.new,tmp.vec) } } } } } #AGE dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA) for (i in 1:nrow(dataset.cor)) { dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"age.person1"],dataset.new[dataset.new$degree==i,"age.person2"],use="pairwise.complete.obs") } dataset.cor plot(dataset.new[dataset.new$degree==1,"age.person1"],dataset.new[dataset.new$degree==1,"age.person2"]) cor.test(dataset.new[dataset.new$degree==3,"age.person1"],dataset.new[dataset.new$degree==3,"age.person2"],use="pairwise.complete.obs", method="pearson") cor.test(dataset.new[dataset.new$degree>=4,"age.person1"],dataset.new[dataset.new$degree>=4,"age.person2"],use="pairwise.complete.obs", method="pearson") # GENDER dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA) for (i in 1:nrow(dataset.cor)) { dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"gender.person1"],dataset.new[dataset.new$degree==i,"gender.person2"],use="pairwise.complete.obs", method="pearson") } dataset.cor cor.test(dataset.new[dataset.new$degree==3,"gender.person1"],dataset.new[dataset.new$degree==3,"gender.person2"],use="pairwise.complete.obs",method="pearson") cor.test(dataset.new[dataset.new$degree>=4,"gender.person1"],dataset.new[dataset.new$degree>=4,"gender.person2"],use="pairwise.complete.obs",method="pearson") # EDUCATION dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA) for (i in 1:nrow(dataset.cor)) { dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"educationforCn.person1"],dataset.new[dataset.new$degree==i,"educationforCn.person2"],use="pairwise.complete.obs", method="spearman") } dataset.cor install.packages("RVAideMemoire") require(RVAideMemoire) require(Hmisc) spearman.ci(dataset.new[dataset.new$degree==3,"educationforCn.person1"],dataset.new[dataset.new$degree==3,"educationforCn.person2"], nrep = 1000, conf.level = 0.95) spearman.ci(dataset.new[dataset.new$degree>=4,"educationforCn.person1"],dataset.new[dataset.new$degree>=4,"educationforCn.person2"], nrep = 1000, conf.level = 0.95) #cor.test(dataset.new[dataset.new$degree>=4,"educationforCn.person1"],dataset.new[dataset.new$degree>=4,"educationforCn.person2"], method=c("spearman"),conf.level = 0.95) ###DEGREE DESCRETE #Netherlands dataset.new$degreeYourSpace.person1 <- log(dataset.new$degreeYourSpace.person1+0.5) dataset.new$degreeYourSpace.person2 <- log(dataset.new$degreeYourSpace.person2+0.5) cor.test(dataset.new[dataset.new$degree==3,"degreeYourSpace.person1"],dataset.new[dataset.new$degree==3,"degreeYourSpace.person2"],use="pairwise.complete.obs", method="pearson") #THailand dataset.newR <- dataset.new[(dataset.new$recruitmentid.person1=="8L95PAN")==FALSE & (dataset.new$recruitmentid.person2=="8L95PAN")==FALSE & (dataset.new$recruitmentid.person1=="M7DVDBLA")==FALSE & (dataset.new$recruitmentid.person2=="M7DVDBLA")==FALSE, ] dataset.newR$degreeYourSpace.person1 <- log(dataset.newR$degreeYourSpace.person1+0.5) dataset.newR$degreeYourSpace.person2 <- log(dataset.newR$degreeYourSpace.person2+0.5) cor.test(dataset.newR[dataset.newR$degree==3,"degreeYourSpace.person1"],dataset.newR[dataset.newR$degree==3,"degreeYourSpace.person2"],use="pairwise.complete.obs", method="pearson") plot(dataset.newR[dataset.newR$degree==2,"degreeYourSpace.person1"],dataset.newR[dataset.newR$degree==2,"degreeYourSpace.person2"]) ###DEGREE CATEGORIES dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA) #NETHERLANDS for (i in 1:nrow(dataset.cor)) { dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"Catdegree.person1"],dataset.new[dataset.new$degree==i,"Catdegree.person2"],use="pairwise.complete.obs", method="spearman") } dataset.cor require(RVAideMemoire) spearman.ci(dataset.new[dataset.new$degree>=4,"Catdegree.person1"],dataset.new[dataset.new$degree>=4,"Catdegree.person2"], nrep = 1000, conf.level = 0.95) cor.test(dataset.new[dataset.new$degree>=4,"Catdegree.person1"],dataset.new[dataset.new$degree>=4,"Catdegree.person2"], method=c("spearman"),conf.level = 0.95) #THAILAND dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA) for (i in 1:nrow(dataset.cor)) { dataset.cor[i,2] <- cor(dataset.newR[dataset.newR$degree==i,"Catdegree.person1"],dataset.newR[dataset.newR$degree==i,"Catdegree.person2"],use="pairwise.complete.obs", method="spearman") } dataset.cor spearman.ci(dataset.newR[dataset.newR$degree>=4,"Catdegree.person1"],dataset.newR[dataset.newR$degree>=4,"Catdegree.person2"], nrep = 1000, conf.level = 0.95) cor.test(dataset.newR[dataset.newR$degree>=4,"Catdegree.person1"],dataset.newR[dataset.newR$degree>=4,"Catdegree.person2"], method=c("spearman"),conf.level = 0.95) ###NUMBER OF CONTACTS WHILE EATING #NETHERLANDS dataset.new$foodTotFULL.person1 <- log(dataset.new$foodTotFULL.person1+0.5) dataset.new$foodTotFULL.person2 <- log(dataset.new$foodTotFULL.person2+0.5) cor.test(dataset.new[dataset.new$degree>=4,"foodTotFULL.person1"],dataset.new[dataset.new$degree>=4,"foodTotFULL.person2"],use="pairwise.complete.obs", method="pearson") #THAILAND dataset.newR <- dataset.new[(dataset.new$recruitmentid.person1=="8L95PAN")==FALSE & (dataset.new$recruitmentid.person2=="8L95PAN")==FALSE & (dataset.new$recruitmentid.person1=="M7DVDBLA")==FALSE & (dataset.new$recruitmentid.person2=="M7DVDBLA")==FALSE, ] dataset.newR$foodTotFULL.person1 <- log(dataset.newR$foodTotFULL.person1+0.5) dataset.newR$foodTotFULL.person2 <- log(dataset.newR$foodTotFULL.person2+0.5) cor.test(dataset.newR[dataset.newR$degree>=4,"foodTotFULL.person1"],dataset.newR[dataset.newR$degree>=4,"foodTotFULL.person2"],use="pairwise.complete.obs", method="pearson") nrow(dataset.newR) ###TWO OR MORE SYMPTOMS cor.test(dataset.new$TwoorMoreSymp.person1,dataset.new$TwoorMoreSymp.person2,use="pairwise.complete.obs",method="pearson") table(dataset.new[dataset.new$degree=3,"TwoorMoreSymp.person1"],dataset.new[dataset.new$degree==3,"TwoorMoreSymp.person2"]) cor.test(dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person1"],dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person2"],use="pairwise.complete.obs",method="spearman") spearman.ci(dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person1"],dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person2"], nrep = 1000, conf.level = 0.95) ####################################### ####################################### TABLE 3 CORRELATIONS ################################################################### ####################################### ####################################### ####################################### ####################################### ####################################### ####################################### ################################################################### ####################################### ####################################### FIGURE S1 Seasonality ####################################### ####################################### require(graphics) install.packages("xts") require(xts) require(scales) require(ggplot2) #THAILAND PARTICIPANTS timeTH <- data.frame(as.Date(dataTH$Date_participation),part=1) timeTH <- timeTH[order(timeTH$as.Date.dataTH.Date_participation., decreasing =FALSE ),] timeTH2 <- aggregate(timeTH[,2], by=list(cut(timeTH$as.Date.dataTH.Date_participation. ,"week")),sum) weeks <- c(51,52,1,2,3,4,5,6,7,8,9) timeTH2$weeks <- weeks timeTH2$data <- "Number of participants in Thailand" colnames(timeTH2) <- c("Date","freq","week","data") timeNL <- data.frame(as.Date(dataNL$date_pat),part=1) timeNL <- timeNL[order(timeNL$as.Date.dataNL.date_pat., decreasing =FALSE ),] timeNL2 <- aggregate(timeNL[,2], by=list(cut(timeNL$as.Date.dataNL.date_pat. ,"week")),sum) weeksNL2 <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14) timeNL2$weeks <- weeksNL2 timeNL2$data <- "Number of participants in Netherlands" colnames(timeNL2) <- c("Date","freq","week","data") #LOADING WHO FLUNET DATA path <- "" dataset <- read.csv(file=paste(path,".csv",sep="")) WHO <- data.frame(as.Date(dataset$Date),dataset[,4],dataset[,2],data="Influenza activity Thailand (number of specimens positive for influenza)") colnames(WHO) <- c("Date","freq","week","data") WHO <- WHO[order(WHO$Date, decreasing =FALSE ),] #LOADING NIVEL DATA path <- "" dataset <- read.csv(file=paste(path,"",sep="")) NIVEL <- data.frame(as.Date(dataset$day),dataset$week,dataset[,6]) NIVEL <- NIVEL[order(NIVEL$ as.Date.dataset.day., decreasing =FALSE ),] NIVEL <- data.frame(as.Date(NIVEL$as.Date.dataset.day.),round(NIVEL$dataset...6.,2),NIVEL$dataset.week,data="Influenza activity Netherlands (ILI / 10.000 inhabitants)") colnames(NIVEL) <- c("Date","freq","week","data") NIVEL <- NIVEL[order(NIVEL$Date, decreasing =FALSE ),] COM <- rbind(WHO, timeTH2,timeNL2,NIVEL) COM <- COM[order(COM$Date, decreasing =FALSE ),] ggplot(COM,aes(x=Date,factor(freq),fill=data)) + geom_bar(stat="identity",legend=FALSE)+ xlab("years 2012-2013") + ylab("")+ scale_color_manual(values=c("#CC6666", "#9999CC","darkgreen"))+ facet_wrap(~data, scales="free_y",ncol=1)+ theme_bw(base_size = 20)+ theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ theme(axis.text.x = element_text(color = "black", size = 20))+ theme(axis.text.y = element_text(color = "black", size = 20))+ scale_x_date(breaks="2 months",labels = date_format("%b"),limit=c(as.Date("2012-01-02"),as.Date("2013-12-31")) )+ scale_y_discrete(breaks= function(n) n[floor(length(n)/4)*0:10]) savePlot(file="",type="pdf") ####################################### ####################################### FIGURE S1 Seasonality ################################################################### ####################################### ####################################### ####################################### ####################################### ############################################## FIGURE S2 AND S3 DEGREE OVER DAYS OF THE WEEK, TO CHECK DISTRIBUTIONS ############################ install.packages("ggplot2") require(ggplot2) install.packages("reshape") library(reshape) #Netherlands travel <- data.frame(dataNL$Masstransport, dataNL$bustransport, dataNL$car, dataNL$othertransport,dataNL$dayfilledin) travel$total <- rowSums(travel[,1:4]) travel <- na.omit(travel) nrow(travel) colnames(travel) <- c("mass transport","bus","car","other transport","day","Total transport") travel$day <- factor(travel$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE) travelNL <- melt(travel, id=c("day")) travelNL$country <- "NL" #Thailand dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ] travel <- data.frame(dataTHr$Masstransport, dataTHr$bustransport, dataTHr$car, dataTHr$othertransport,dataTHr$dayfilledin) travel$total <- rowSums(travel[,1:4]) travel <- na.omit(travel) nrow(travel) colnames(travel) <- c("mass transport","bus","car","other transport","day","Total transport") travel$day <- factor(travel$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE) travelTH <- melt(travel, id=c("day")) travelTH$country <- "TH" summary(travel) traveldata <- rbind(travelNL,travelTH) ggplot(traveldata, aes(x=day,y=value,colour=variable))+ geom_boxplot(notch = FALSE, alpha=0.55,show_guide = FALSE)+ facet_wrap(variable~country,ncol=2)+ scale_y_sqrt(breaks = c(0,5,20,50,100,150),limits=c(0,150))+ geom_hline(yintercept=0,colour="grey", alpha=0.55)+ xlab("") + ylab("Number of contact persons")+ theme_bw(base_size = 20)+ theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ theme(axis.text.x = element_text(color = "black", size = 15))+ theme(axis.text.y = element_text(color = "black", size = 15)) #KS Test travelTH <- data.frame(dataTH$Masstransport, dataTH$bustransport, dataTH$car, dataTH$othertransport,dataTH$dayfilledin) travelTH$total <- rowSums(travelTH[,1:4]) travelTH <- na.omit(travelTH) ks.test(travelTH$total[travelTH$dataTH.dayfilledin=="Sat"],travelTH$total[travelTH$dataTH.dayfilledin=="Sun"]) travelNL <- data.frame(dataNL$Masstransport, dataNL$bustransport, dataNL$car, dataNL$othertransport,dataNL$dayfilledin) travelNL$total <- rowSums(travelNL[,1:4]) travelNL <- na.omit(travelNL) ks.test(travelNL$total[travelNL$dataNL.dayfilledin=="Mon"],travelNL$total[travelNL$dataNL.dayfilledin=="Fri"]) #LOCATIONS #NETHERLANDS home <- data.frame(dataNL$homeY, dataNL$homeS, dataNL$homeO) home$homeT <- rowSums(home[,1:3]) work <- data.frame(dataNL$workY, dataNL$workS, dataNL$workO) work$workT <- rowSums(work[,1:3]) school <- data.frame(dataNL$schoolY, dataNL$schoolS, dataNL$schoolO) school$schoolT <- rowSums(school[,1:3]) restaurant <- data.frame(dataNL$restaurantY, dataNL$restaurantS, dataNL$restaurantO) restaurant$restaurantT <- rowSums(restaurant[,1:3]) coffee <- data.frame(dataNL$coffeeY, dataNL$coffeeS, dataNL$coffeeO) coffee$coffeeT <- rowSums(coffee[,1:3]) sport <- data.frame(dataNL$sportY, dataNL$sportS, dataNL$sportO) sport$sportT <- rowSums(sport[,1:3]) concert <- data.frame(dataNL$concertY, dataNL$concertS, dataNL$concertO) concert$concertT <- rowSums(concert[,1:3]) otherplace <- data.frame(dataNL$otherplaceY, dataNL$otherplaceS, dataNL$otherplaceO) otherplace$otherplaceT <- rowSums(otherplace[,1:3]) loc <- data.frame(home$homeT,work$workT,school$schoolT,restaurant$restaurantT,coffee$coffeeT,sport$sportT,concert$concertT,otherplace$otherplaceT,dataNL$dayfilledin) loc$total <- rowSums(loc[,1:8]) loc <- na.omit(loc) nrow(loc) colnames(loc) <- c("home","work","school / university","restaurant","coffee shop","sport", "concert","other places","day","Total locations") loc$day <- factor(loc$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE) locNL <- melt(loc, id=c("day")) locNL$country <- "NL" tapply(loc$work, loc$day, summary) ks.test(loc$total[loc$dataNL.dayfilledin=="Mon"],loc$total[loc$dataNL.dayfilledin=="Fri"]) #THAILAND home <- data.frame(dataTHr$homeY, dataTHr$homeS, dataTHr$homeO) home$homeT <- rowSums(home[,1:3]) work <- data.frame(dataTHr$workY, dataTHr$workS, dataTHr$workO) work$workT <- rowSums(work[,1:3]) school <- data.frame(dataTHr$schoolY, dataTHr$schoolS, dataTHr$schoolO) school$schoolT <- rowSums(school[,1:3]) restaurant <- data.frame(dataTHr$restaurantY, dataTHr$restaurantS, dataTHr$restaurantO) restaurant$restaurantT <- rowSums(restaurant[,1:3]) coffee <- data.frame(dataTHr$coffeeY, dataTHr$coffeeS, dataTHr$coffeeO) coffee$coffeeT <- rowSums(coffee[,1:3]) sport <- data.frame(dataTHr$sportY, dataTHr$sportS, dataTHr$sportO) sport$sportT <- rowSums(sport[,1:3]) concert <- data.frame(dataTHr$concertY, dataTHr$concertS, dataTHr$concertO) concert$concertT <- rowSums(concert[,1:3]) otherplace <- data.frame(dataTHr$otherplaceY, dataTHr$otherplaceS, dataTHr$otherplaceO) otherplace$otherplaceT <- rowSums(otherplace[,1:3]) loc <- data.frame(home$homeT,work$workT,school$schoolT,restaurant$restaurantT,coffee$coffeeT,sport$sportT,concert$concertT,otherplace$otherplaceT,dataTHr$dayfilledin) loc$total <- rowSums(loc[,1:8]) loc <- na.omit(loc) nrow(loc) head(loc) #Test with two sample KS test whether distributions different significantly ks.test(loc$total[loc$dataTHr.dayfilledin=="Sat"],loc$total[loc$dataTHr.dayfilledin=="Sun"]) colnames(loc) <- c("home","work","school / university","restaurant","coffee shop","sport", "concert","other places","day","Total locations") loc$day <- factor(loc$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE) locTH <- melt(loc, id=c("day")) locTH$country <- "TH" locdata <- rbind(locNL,locTH) ggplot(locdata, aes(x=day,y=value,colour=variable))+ geom_boxplot(notch = FALSE, alpha=0.55,show_guide = FALSE)+ facet_wrap(variable~country,ncol=2)+ scale_y_sqrt(breaks = c(0,5,50,150),limits=c(0,150))+ geom_hline(yintercept=0,colour="grey", alpha=0.55)+ xlab("") + ylab("Number of contact persons")+ theme_bw(base_size = 20)+ theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ theme(axis.text.x = element_text(color = "black", size = 15))+ theme(axis.text.y = element_text(color = "black", size = 15)) ############################################## ARTIKEL TWEE FIGURE S2 / S3 DEGREE OVER DAYS OF THE WEEK CHECK OF DISTRIBUTIONS ############################