install.packages("XLConnect")
library(XLConnect)

#THAI DATA
dataTH <- readWorksheet(loadWorkbook(""), header = TRUE, sheet=1, startRow =0, endRow = 258)					
ls(dataTH)
attach(dataTH)

#DUTCH DATA
dataNL <- readWorksheet(loadWorkbook(""), header = TRUE, sheet=1, startRow =0, endRow = 359)
ls(dataNL)
attach(dataNL)

####################################################################################################################################
############################################ FIGURE 1 ########################################################################################################
setRepositories()
install.packages("Rgraphviz")
require(Rgraphviz)
require(igraph) 

#FOR NETHERLANDS
rdsdata <- dataNL   

#FOR THAILAND
rdsdata <- dataTH 

# Extract the number of coupons
ncoupons <- 4
# Extract the missing code
misscode <- as.character(NA)
#Now generate new headers
couponList <- list()
couponsGiven <- rdsdata[, grep("child.", names(rdsdata))]
colnames(couponsGiven) <- c("coupon.1", "coupon.2","coupon.3","coupon.4")

# Now collapse coupons into a single column
couponsGiven <- stack(lapply(couponsGiven,as.character))  
couponIDs <- rep(rdsdata$ID, ncoupons)

rdsdata$Token <- as.character(rdsdata$recruitmentid)
rdsdata$coupon.1 <- as.character(rdsdata$child1)
rdsdata$coupon.2 <- as.character(rdsdata$child2)
rdsdata$coupon.3 <- as.character(rdsdata$child3)
rdsdata$coupon.4 <- as.character(rdsdata$child4)

colnames(rdsdata)[1] <- "id"
colnames(rdsdata)[3] <- "mycoupon"

child.vec <- NULL
for (i in 1:4)
	{
	child.vec <- c(child.vec, as.character(rdsdata[,paste("coupon",i,sep=".")]))
	}
child.vec <- child.vec[child.vec!="-1"]
#length(child.vec)
vec.loop <- 1:nrow(rdsdata)
coupon.data <- data.frame(recruiter.origin=NA, participant=rep(NA,length(vec.loop)))
for (i in vec.loop)
	{
	if (i%%10==0)
		{
		print(i)
		}
	recruiter.tmp <- as.character(rdsdata$mycoupon[i])
	vec.loop.tmp  <- vec.loop[1:(i-1)]
	hit 	<- 0
	pos   <- NA
	j 	<- 1
	while (hit==0 & j < i)
		{
		coupon.tmp <- as.character(rdsdata[j,paste("coupon",1:4,sep=".")])
		if (recruiter.tmp%in%coupon.tmp)
			{
			hit <- 1
			} else {
			j <- j+1
			}
		}
	coupon.data$participant[i] <- recruiter.tmp
	if (hit==1)
		{
		coupon.data$recruiter.origin[i] <- rdsdata[j,"mycoupon"]
		#coupon.data$invited[i] 		  <- 1
		} else {
		coupon.data$recruiter.origin[i] <- NA
		}
	}

coupon.data2 <- coupon.data[is.na(coupon.data$recruiter.origin)==FALSE,]
seeds.all <- rdsdata[rdsdata$wave==0,"mycoupon"]
seeds.unq <- unique(coupon.data2$recruiter.origin)
invited <- seeds.unq[seeds.unq%in%seeds.all]  		 #remove seeds who did not invite contact persons	
invited2 <- as.character(invited)

g=graph.data.frame(coupon.data2)
adj.new3 <- get.adjacency(g,sparse=FALSE)
rdsdata.graph <- graphAM(adjMat = adj.new3, edgemode = "directed")

#MAKE AN EMPTY STRUCTERED PLOT OF RDS DATA
plot(rdsdata.graph, "dot")  		

#CREATE FIGURE 1
nodeNames <- nodes(rdsdata.graph)
numNodes <- length(nodeNames)
nAttrs <- list()

rdsdatav2 <- rdsdata[which(rdsdata$mycoupon%in%nodeNames), ]
nrow(rdsdatav2)

elabel <- NULL
for (i in 1:length(nodeNames)){
	if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){
	elabel [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"educationforC"])
	} else {
	elabel [i] <- NA
	}
}

elabel <- as.character(elabel)
elabel[elabel=='-1']=("M")
elabel <- elabel[1:numNodes(rdsdata.graph)]
names(elabel)<-nodes(rdsdata.graph)
nAttrs$label <- elabel

########   #TO GET EMPTY LABELS!   ###########
elabel <- NULL
for (i in 1:length(nodeNames)){
	if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){
	elabel [i] <- ""
	} else {
	elabel [i] <- NA
	}
}

elabel <- as.character(elabel)
elabel[elabel=='-1']=("")
elabel <- elabel[1:numNodes(rdsdata.graph)]
names(elabel)<-nodes(rdsdata.graph)
nAttrs$label <- elabel
########   #TO GET EMPTY LABELS!   ###############

degree <- NULL
for (i in 1:length(nodeNames)){
	if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){
	degree [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"Catdegree"])
	} else {
	degree [i] <- NA
	}
}

degree[is.na(degree)] <- 0

nAttrs$shape <- c()
for (i in 1:numNodes){
    if (degree [i] == '0') {
     nAttrs$shape <- c(nAttrs$shape, "box")
      } else {
	nAttrs$shape <- c(nAttrs$shape, "circle")
	}
}
names(nAttrs$shape) <- as.character(nodeNames)

####################################NODE SIZE IF NO LABEL IS ADDED ####
nAttrs$width <- c()
for (i in 1:numNodes) {
    if (degree[i] =="1") {
        nAttrs$width <- c(nAttrs$width, 1.3)
   } else if (degree[i] =="2"){
	nAttrs$width <- c(nAttrs$width, 1.6)
    } else if (degree[i] =="3"){
   nAttrs$width <- c(nAttrs$width, 2.0)
   } else {
	nAttrs$width <- c(nAttrs$width, 1.3)
	}
}
names(nAttrs$width) <- as.character(nodeNames)

nAttrs$height <- c()
for (i in 1:numNodes) {
    if (degree[i] =="1") {
        nAttrs$height <- c(nAttrs$height, 1.3)
   } else if (degree[i] =="2"){
	nAttrs$height <- c(nAttrs$height, 1.6)
    } else if (degree[i] =="3"){
   nAttrs$height <- c(nAttrs$height, 2.0)
   } else {
	nAttrs$height <- c(nAttrs$height, 1.3)
	}
}
names(nAttrs$height) <- as.character(nodeNames)
######################################

#AGE COLOUR
age <- NULL
for (i in 1:length(nodeNames)){
	if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){
	age [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"AGEGROUP10"])
	} else {
	age [i] <- NA
	}
}

age[is.na(age)] <- 0

nAttrs$fillcolor <- c()
for (i in 1:numNodes) {
    if (age[i] == as.character("10-19")) {
        nAttrs$fillcolor <- c(nAttrs$fillcolor, "purple")
   } else if (age[i] == as.character("20-29")){
	nAttrs$fillcolor <- c(nAttrs$fillcolor, "lightblue")
   } else if (age[i] == as.character("30-39")){
	nAttrs$fillcolor <- c(nAttrs$fillcolor, "orange")
   } else if (age[i] == as.character("40-49")){
	nAttrs$fillcolor <- c(nAttrs$fillcolor, "green")
   } else if (age[i] == as.character("50-59")){
	nAttrs$fillcolor <- c(nAttrs$fillcolor, "yellow")
   } else if (age[i] == as.character("60+")){
	nAttrs$fillcolor <- c(nAttrs$fillcolor, "red")
   } else {
        nAttrs$fillcolor <- c(nAttrs$fillcolor, "grey")
      }
}
names(nAttrs$fillcolor) <- as.character(nodeNames)

#SEEDS WITH BLACK COLOUR
seed <- NULL
for (i in 1:length(nodeNames)){
	if (nodeNames[i]%in%as.character(rdsdatav2$mycoupon)){
	seed [i] <- as.character(rdsdatav2[rdsdatav2$mycoupon==nodeNames[i],"seeds"])
	} else {
	seed [i] <- NA
	}
}

seed[is.na(seed)] <- 0

nAttrs$color <- c()
for (i in 1:numNodes) {
    if (seed[i] == as.character(1)) {
 	nAttrs$color <- c(nAttrs$color,"black")
   } else {
        nAttrs$color <- c(nAttrs$color,"darkgrey")
    }
}
names(nAttrs$color) <- as.character(nodeNames)

plot(rdsdata.graph, "dot", nodeAttrs=nAttrs, attrs=list(node=list(fixedsize=FALSE),
						graph= list(rankdir = "TB")))
savePlot(file=" ",type="pdf")
################################## FIGURE 1A ################################################################################

################################## FIGURE 2A ################################################################################
require(ggplot2)
library(plyr)

NL <- data.frame(dataNL$InvitationbyNUM,dataNL$recruitmentbyNUM)
TH <- data.frame(dataTH$InvitationbyNUM,dataTH$recruitmentbyNUM)
NL$country <- "Netherlands"
colnames(NL) <- c("inv","rec","Country")
TH$country <- "Thailand"
colnames(TH) <- c("inv","rec","Country")
NLTH <- rbind(NL,TH)
head(NLTH)
NLTH <- na.omit(NLTH)

#NLTH$inv[NLTH$inv==1]="email"
#NLTH$inv[NLTH$inv==2]="Facebook"
#NLTH$inv[NLTH$inv==3]="recr. page"

NLTH$rec[NLTH$rec==0]="no recr."
NLTH$rec[NLTH$rec==1]="Facebook"
NLTH$rec[NLTH$rec==2]="email B"
NLTH$rec[NLTH$rec==3]="email C"
table(dataTH$recruitmentbyNUM)

df.new<-ddply(NLTH,.(Country),summarise,
              prop=prop.table(table(rec)),
              rec=names(table(rec)))

ggplot(NLTH, aes(x=Country))+ 
geom_bar(data = subset(NLTH, Country=="Netherlands"), aes(fill=rec, y=(..count..)/sum(..count..)), position="stack", alpha = 1, binwidth=1) +
geom_bar(data = subset(NLTH, Country=="Thailand"), aes(fill=rec, y=(..count..)/sum(..count..)), position="stack", alpha = 1, binwidth=0.5)+
geom_text(aes("Netherlands", .80, label="No recruitment"))+
geom_text(aes("Netherlands", .52, label="Facebook"))+
geom_text(aes("Netherlands", .38, label="Email C"))+
geom_text(aes("Netherlands", .15, label="Email B"))+
geom_text(aes("Thailand", .80, label="No recruitment"))+
geom_text(aes("Thailand", .30, label="Facebook"))+
geom_text(aes("Thailand", .09, label="Email C"))+
geom_text(aes("Thailand", .03, label="Email B"))+
xlab("") +
ylab("proportion of total in country") +
theme_bw(base_size = 15)+
theme(axis.title.x = element_text(color = "black", size = 15, family = "Arial", vjust= -0.3))+ 
theme(axis.title.y = element_text(color = "black", size = 15, family = "Arial", vjust= 0.2))+ 
theme(axis.text.x = element_text(color = "black", size = 15, family = "Arial"))+ 
theme(axis.text.y = element_text(color = "black", size = 15, family = "Arial"))
############################################## FIGURE 2A ############################

############################################## FIGURE 2B ############################
require(ggplot2)
library(plyr)

NL <- data.frame(dataNL$wave)
NL$country <- "Netherlands"
colnames(NL) <- c("wave","Country")
TH <- data.frame(dataTH$wave)
TH$country <- "Thailand"
colnames(TH) <- c("wave","Country")
NLTH <- rbind(NL,TH)
head(NLTH)
NLTH <- na.omit(NLTH)

NLTH$wave[NLTH$wave==0]="0"
NLTH$wave[NLTH$wave==1]="1"
NLTH$wave[NLTH$wave==2]="2"
NLTH$wave[NLTH$wave==3]="3"
NLTH$wave[NLTH$wave==4]="4"
NLTH$wave[NLTH$wave==5]="5"
NLTH$wave[NLTH$wave==6]="6"

df.new<-ddply(NLTH,.(Country),summarise,
              prop=prop.table(table(wave)),
              wave=names(table(wave)))

#ADD LINES   #= proportion of participants per wave that invited others
NL <- data.frame(dataNL$wave,dataNL$recruitmentbyNUM)
colnames(NL) <- c("wave","rec")
NL$rec[NL$rec>0]="4"

NL <- na.omit(NL)
table(NL)
df.inv<-ddply(NL,.(wave),summarise,
              prop=prop.table(table(rec)),
              rec=names(table(rec)))
df.inv
df.invNL <- subset(df.inv, rec=="4")
df.invNL$wave[df.invNL$wave==0]="0"
df.invNL$wave[df.invNL$wave==1]="1"
df.invNL$wave[df.invNL$wave==2]="2"
df.invNL$wave[df.invNL$wave==3]="3"
df.invNL$wave[df.invNL$wave==4]="4"
df.invNL$wave[df.invNL$wave==5]="5"

df.invNL$Country <- "Netherlands"
df.invNL
colnames(df.invNL) <- c("wave","prop","rec","Country")

TH <- data.frame(dataTH$wave,dataTH$recruitmentbyNUM)
table(TH)
colnames(TH) <- c("wave","rec")
TH
TH$rec[TH$rec>0]="4"

df.invTH<-ddply(TH,.(wave),summarise,
              prop=prop.table(table(rec)),
              rec=names(table(rec)))

df.invTH <- subset(df.invTH, rec=="4")
wave6 <- c(6,0,4)
df.invTH <- rbind(df.invTH,wave6)
df.invTH

df.invTH$Country <- "Thailand"
df.invTH
colnames(df.invTH) <- c("wave","prop","rec","Country")

df.invTH$wave[df.invTH$wave==0]="0"
df.invTH$wave[df.invTH$wave==1]="1"
df.invTH$wave[df.invTH$wave==2]="2"
df.invTH$wave[df.invTH$wave==3]="3"
df.invTH$wave[df.invTH$wave==4]="4"
df.invTH$wave[df.invTH$wave==5]="5"
df.invTH$wave[df.invTH$wave==6]="6"

#COMBINE TWO
LINES <- rbind(df.invTH,df.invNL)
LINES
	
#CREATE NEW FIGURE
df.new
ggplot(df.new,aes(x=wave))+
geom_bar(aes(y=prop, fill=Country),stat="identity",position='dodge', alpha=0.55)+
scale_fill_manual(values=c("lightgreen", "darkblue"))+
geom_line(data=LINES, aes(y=prop,group=Country, colour = Country, linetype=Country),position="identity",size=1)+
scale_color_manual(values=c("lightgreen", "darkblue"))+
xlab("waves") +
ylab("proportions")+
theme_bw(base_size = 15)+
theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ 
theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ 
theme(axis.text.x = element_text(color = "black", size = 15))+ 
theme(axis.text.y = element_text(color = "black", size = 15))
savePlot(file="",type="pdf")
############################################## FIGURE 2B ############################

############################################## FIGURE 2C ############################
require(ggplot2)
require(scales)
library(XLConnect)

NL <- data.frame(dataNL$age,dataNL$gender)
TH <- data.frame(dataTH$age, dataTH$gender)
NL$country <- "NL"
colnames(NL) <- c("age","gender","country")
TH$country <- "TH"
colnames(TH) <- c("age","gender","country")
NLTH <- rbind(NL,TH)
head(NLTH)

NLTH <- na.omit(NLTH)
NLTH$gender[NLTH$gender=="1"]= "female"
NLTH$gender[NLTH$gender=="2"]= "male"

#ALLE ACTIEVE SEEDS
df <- cbind(dataNL$age,dataNL$wave,dataNL$no_suc_rec)
df
dfNL <- matrix(df[df[,1] & df[,2]==0 & df[,3]>=1],ncol=3)
dfNL <- data.frame(dfNL)
colnames(dfNL) <- c("age","wave","rec")
dfNL

df <- cbind(dataTH$age,dataTH$wave,dataTH$no_suc_rec)
dfzero <- matrix(df[df[,1] & df[,2]==0 & df[,3]>=1],ncol=3)
dfTH <- na.omit(dfzero)
dfTH <- data.frame(dfTH)
colnames(dfTH) <- c("age","wave","rec")
hist(dfTH[,1],breaks=20)
dfTH

#CENSUS DATA
df.census <- readWorksheet(loadWorkbook(""), header = TRUE, sheet=1, startRow =0, endRow = 90)					
ls(df.census)
census.df <- data.frame(df.census)
colnames(census.df) <- c("age","NL","TH")

bw <- 1
ggplot() + 
geom_histogram(data=subset(NLTH, country=="NL"), aes(x=age,(y=..count../sum(..count..)),fill=gender), colour="black", binwidth = bw, alpha=0.6)+
geom_histogram(data=subset(NLTH, country=="TH"), aes(x=age,(y=..count../sum(..count..)*-1), fill=gender), colour="black", binwidth = bw, alpha=0.6)+
geom_density(data=dfNL, aes(x=age, (y=..count../sum(..count..))), binwidth=1, colour="black", size=0.7)+
geom_density(data=dfTH, aes(x=age, (y=..count../sum(..count..)*-1)),binwidth=1, colour="black", size=0.7)+
geom_line(data=census.df, aes(x=age,y=NL), colour="blue",linetype = "dotted", stat="identity", size=0.9,se=F)+
geom_line(data=census.df, aes(x=age,y=TH*-1), colour="blue",linetype = "dotted", stat="identity", size=0.9,se=F)+
xlab("age (years)")+
ylab("proportion of country total") +
scale_y_continuous(breaks = seq(-0.5,0.5, 0.05), labels = abs(seq(-0.5,0.5, 0.05))) + 
scale_x_continuous(breaks = seq(10, 80, 10), labels = seq(10, 80, 10))+
theme_bw(base_size = 20)+
theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ 
theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ 
theme(axis.text.x = element_text(color = "black", size = 20))+ 
theme(axis.text.y = element_text(color = "black", size = 20))+
geom_text(aes(45, .06, label="Netherlands"),size=8)+
geom_text(aes(45, -.06, label="Thailand"),size=8)+
scale_fill_manual(values=c("orange", "lightblue"))
savePlot(file="",type="pdf")
############################################## FIGURE 2C ############################

############################################## FIGURE 2D ############################
install.packages("ggplot2")
require(ggplot2)
install.packages("colorspace")
install.packages("plyr")
require(plyr)

THcensus <- c(0.183,0.232,0.216,0.185,0.186) #data from 2010
NLcensus <- c(2802182/7569371, 2474729/7569371, 914480/7569371, 967899/7569371,410081/7569371)    #data from 2013
NL <- data.frame(dataNL$householdGROUPS)
TH <- data.frame(dataTH$householdGROUPS)
NL$country <- "Netherlands"
colnames(NL) <- c("HS","Country")
TH$country <- "Thailand"
colnames(TH) <- c("HS","Country")
NLTH <- rbind(NL,TH)
NLTH <- na.omit(NLTH)

df.new<-ddply(NLTH,.(Country),summarise,
              prop=prop.table(table(HS)),
              HS=names(table(HS)))

NLadd  <- c("Netherlands",0.05828221+0.09509202,"5+")
THadd  <- c("Thailand",0.14479638+0.19004525,"5+")  

df.new[5,] <- NLadd
df.new[11,] <- THadd
df.new2 <- df.new[-6,]
df.new3 <- df.new2[-11,]
df.new3[,2] <- as.numeric(df.new3[,2])

NLS <- data.frame(dataNL$symphousehold)
THS <- data.frame(dataTH$symphousehold)

NLS$country <- "Netherlands"
colnames(NLS) <- c("HSS","Country")
THS$country <- "Thailand"
colnames(THS) <- c("HSS","Country")
NLTHS <- rbind(NLS,THS)
NLTHS <- na.omit(NLTHS)

df.sym <- ddply(NLTHS,.(Country),summarise,
              prop=prop.table(table(HSS)),
              HSS=names(table(HSS)))

w6 <- c("Netherlands",0,"6")
df.symC <- rbind(df.sym,w6)
df.symCnew <- transform(df.symC, prop = as.numeric(prop))
df.symCnewO <- df.symCnew[order(df.symCnew$Country),]
df.s <- data.frame(df.symCnewO)

df.s = df.s[-1,]
df.s = df.s[-6,]  #REMOVE UNKNOWN NETHERLANDS (10) 
df.s = df.s[-7,]   
df.s = df.s[-13,]  #REMOVE UNKNOWN THAILAND (10) 
df.s <- df.s[-6,]
df.s[10,2] <- c(0.036199095+0.018099548)
df.s <- df.s[-11,]

df.s$HS <- df.new3$HS
df.s$HSS  <- NULL
df.s[,2] <- as.numeric(df.s[,2])

NLcensus <- formatC(NLcensus,digits=3, format="f")
NLc <- cbind(NLcensus,THcensus)
THc <- matrix(THcensus)
class(df.new3$HS)

# CENSUS DATA
Cdfcountry <- as.vector(c("Netherlands","Netherlands","Netherlands","Netherlands","Netherlands","Thailand","Thailand", "Thailand", "Thailand", "Thailand"))
CdfNL <- c(as.numeric(NLcensus),as.numeric(THcensus))
Cdfhouse <- c("1","2","3","4","5+","1","2","3","4","5+")
CdfNL <- data.frame(Cdfcountry,CdfNL,Cdfhouse)
colnames(CdfNL) <- c("Country","prop","HS")

ggplot(df.new3,aes(x=HS))+
geom_bar(aes(y=prop, fill=Country),stat="identity",position='dodge',alpha=0.55)+
scale_fill_manual(values=c("lightgreen", "darkblue"))+
geom_line(data=df.s, aes(y=prop, group=Country,colour = Country,linetype=Country),position=position_dodge(width=1, height=0),size=1)+
geom_point(data=df.s, aes(y=prop, group=Country,colour = Country),position=position_dodge(width=1, height=0),size=2.5)+
geom_point(data=CdfNL, aes(y=prop, shape=Country, group=Country,fill=Country), position=position_dodge(width=1, height=0),size=4.1)+
scale_shape_manual(values = c(22, 24))+
scale_color_manual(values=c("lightgreen", "darkblue"))+
xlab("household size") +
ylab("proportion of total in country") +
theme_bw(base_size = 15)+
theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ 
theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ 
theme(axis.text.x = element_text(color = "black", size = 15))+ 
theme(axis.text.y = element_text(color = "black", size = 15))
savePlot(file="",type="pdf")
############################################## FIGURE 2D END ############################

######################################### Figure 3A ########################################
require(ggplot2)

dfNLt <- data.frame(dataNL$travelTotwZeros,dataNL$weekorw)
dfNLt$class <- "while travelling" 
colnames(dfNLt) <- c("value","week","class")
dfNLt$classC[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"]="Travelling WD"
dfNLt$classC[dfNLt$week=="weekends" & dfNLt$class=="while travelling"]="Travelling WE"

dfNLl <- data.frame(dataNL$locationTotReal,dataNL$weekorw)
dfNLl$class <- "at locations" 
colnames(dfNLl) <- c("value","week","class")
dfNLl$classC[dfNLl$week=="weekdays" & dfNLl$class=="at locations"]="Locations WD"
dfNLl$classC[dfNLl$week=="weekends" & dfNLl$class=="at locations"]="Locations WE"

dfNLf <- data.frame(dataNL$foodTotFULL,dataNL$weekorw)
dfNLf$class <- "while eating" 
colnames(dfNLf) <- c("value","week","class")
dfNLf$classC[dfNLf$week=="weekdays" & dfNLf$class=="while eating"]="Eating WD"
dfNLf$classC[dfNLf$week=="weekends" & dfNLf$class=="while eating"]="Eating WE"
summary(dfNLf)

DFNL <- rbind(dfNLt,dfNLl,dfNLf)
DFNL$country <- "Netherlands"
DFNL <- na.omit(DFNL)

#THAILAND
dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ]

dfTHt <- data.frame(dataTHr$travelTotwZeros,dataTHr$weekorw )
dfTHt$class <- "while travelling" 
colnames(dfTHt) <- c("value","week","class")
dfTHt$classC[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"]="Travelling WD"
dfTHt$classC[dfTHt$week=="weekends" & dfTHt$class=="while travelling"]="Travelling WE"

dfTHl <- data.frame(dataTHr$locationTotReal,dataTHr$weekorw)
dfTHl$class <- "at locations" 
colnames(dfTHl) <- c("value","week","class")
dfTHl$classC[dfTHl$week=="weekdays" & dfTHl$class=="at locations"]="Locations WD"
dfTHl$classC[dfTHl$week=="weekends" & dfTHl$class=="at locations"]="Locations WE"
summary(dfTHl)

dfTHf <- data.frame(dataTHr$foodTotFULL,dataTHr$weekorw)
dfTHf$class <- "while eating" 
colnames(dfTHf) <- c("value","week","class")
dfTHf$classC[dfTHf$week=="weekdays" & dfTHf$class=="while eating"]="Eating WD"
dfTHf$classC[dfTHf$week=="weekends" & dfTHf$class=="while eating"]="Eating WE"
summary(dfTHf)

DFTH <- NULL
DFTH <- rbind(dfTHt,dfTHl,dfTHf)
DFTH$country <- "Thailand"
DFTH <- na.omit(DFTH)

NLTH <- NULL
NLTH <- rbind(DFNL,DFTH)
head(NLTH)
NLTH$classC <- factor(NLTH$classC, levels= c("Eating WE","Eating WD","Travelling WE","Travelling WD", "Locations WE", "Locations WD"))
nrow(NLTH)

#nederland travelling
MEANtravNLWD <-  mean(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"],na.rm=T)
MEDIANtravNLWD <-  median(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"],na.rm=T)
SDtravNLWD <- sd(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"],na.rm=T)
NtravNLWD <-  length(na.omit(dfNLt$value[dfNLt$week=="weekdays" & dfNLt$class=="while travelling"]))
MEANtravNLWE <-  mean(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"],na.rm=T)
MEDIANtravNLWE <-  median(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"],na.rm=T)
SDtravNLWE <-  sd(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"],na.rm=T)
NtravNLWE <-  length(na.omit(dfNLt$value[dfNLt$week=="weekends" & dfNLt$class=="while travelling"]))

#Thailand travelling
MEANtravTHWD <-  mean(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"],na.rm=T)
MEDIANtravTHWD <-  median(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"],na.rm=T)
SDtravTHWD <-  sd(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"],na.rm=T)
NtravTHWD <-  length(na.omit(dfTHt$value[dfTHt$week=="weekdays" & dfTHt$class=="while travelling"]))
MEANtravTHWE <-  mean(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"],na.rm=T)
MEDIANtravTHWE <-  median(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"],na.rm=T)
SDtravTHWE <-  sd(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"],na.rm=T)
NtravTHWE <-  length(na.omit(dfTHt$value[dfTHt$week=="weekends" & dfTHt$class=="while travelling"]))

#NEDERLAND LOCATIONS
MEANlocNLWD <-  mean(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"],na.rm=T)
MEDIANlocNLWD <-  median(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"],na.rm=T)
SDlocNLWD <-  sd(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"],na.rm=T)
NlocNLWD <-  length(dfNLl$value[dfNLl$week=="weekdays" & dfNLl$class=="at locations"])
MEANlocNLWE <-  mean(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"],na.rm=T)
MEDIANlocNLWE <-  median(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"],na.rm=T)
SDlocNLWE <-  sd(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"],na.rm=T)
NlocNLWE <-  length(na.omit(dfNLl$value[dfNLl$week=="weekends" & dfNLl$class=="at locations"]))

#Thailand LOCATIONS
MEANlocTHWD <-  mean(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"],na.rm=T)
MEDIANlocTHWD <-  median(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"],na.rm=T)
SDlocTHWD <-  sd(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"],na.rm=T)
NlocTHWD <-  length(dfTHl$value[dfTHl$week=="weekdays" & dfTHl$class=="at locations"])
MEANlocTHWE <-  mean(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"],na.rm=T)
MEDIANlocTHWE <-  median(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"],na.rm=T)
SDlocTHWE <-  sd(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"],na.rm=T)
NlocTHWE <-  length(na.omit(dfTHl$value[dfTHl$week=="weekends" & dfTHl$class=="at locations"]))               

#NEDERLAND EATING FOOD WITH
MEANfoodNLWD <-  mean(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"],na.rm=T)
MEDIANfoodNLWD <-  median(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"],na.rm=T)
SDfoodNLWD <-  sd(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"],na.rm=T)
NfoodNLWD <-  length(na.omit(dfNLf$value[dfNLf$week=="weekdays" & dfNLf$class=="while eating"]))
MEANfoodNLWE <-  mean(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"],na.rm=T)
MEDIANfoodNLWE <-  median(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"],na.rm=T)
SDfoodNLWE <-  sd(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"],na.rm=T)
NfoodNLWE <-  length(na.omit(dfNLf$value[dfNLf$week=="weekends" & dfNLf$class=="while eating"]))

#Thailand EATING FOOD WITH
MEANfoodTHWD <-  mean(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"],na.rm=T)
MEDIANfoodTHWD <-  median(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"],na.rm=T)
SDfoodTHWD <-  sd(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"],na.rm=T)
NfoodTHWD <-  length(na.omit(dfTHf$value[dfTHf$week=="weekdays" & dfTHf$class=="while eating"]))
MEANfoodTHWE <-  mean(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"],na.rm=T)
MEDIANfoodTHWE <-  median(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"],na.rm=T)
SDfoodTHWE <-  sd(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"],na.rm=T)
NfoodTHWE <-  length(na.omit(dfTHf$value[dfTHf$week=="weekends" & dfTHf$class=="while eating"]))

ggplot(NLTH, aes(x=classC,y=value,fill=country))+
geom_boxplot(notch = FALSE, alpha=0.55,legend=FALSE)+
scale_fill_manual(values=c("lightgreen", "darkblue"))+
scale_y_sqrt(breaks = c(2,10,20,40,60,100,200,300,400,500),limits=c(0,670))+ 
coord_flip()+ 
geom_hline(yintercept=0,colour="grey", alpha=0.55)+
xlab("") +
ylab("Number of contact persons") +
theme_bw(base_size = 20)+
theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ 
theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ 
theme(axis.text.x = element_text(color = "black", size = 15))+ 
theme(axis.text.y = element_text(color = "black", size = 15))+
annotate("text", x="Locations WE",y=22, label=MEDIANlocTHWE, col="blue",size=5, vjust=-0.3)+
annotate("text", x="Locations WE",y=13, label=MEDIANlocNLWE, col="blue",size=5, vjust=1.3)+
annotate("text", x="Locations WE",y=580, label=round(MEANlocTHWE, digits = 1),col="black", size=5, vjust=-0.3)+
annotate("text", x="Locations WE",y=580, label=round(MEANlocNLWE, digits = 1),col="black", size=5, vjust=1.3)+
annotate("text", x="Locations WE",y=660, label="(85.5)",col="darkgrey", size=5, vjust=-.3)+       #SDlocTHWE
annotate("text", x="Locations WE",y=660, label="(32.8)",col="darkgrey", size=5, vjust=1.3)+ 		#SDlocNLWE

annotate("text", x="Locations WD",y=22, label=MEDIANlocTHWD, col="blue",size=5, vjust=-0.3)+
annotate("text", x="Locations WD",y=580, label=round(MEANlocTHWD, digits = 1),col="black", size=5, vjust=-0.3)+
annotate("text", x="Locations WD",y=14, label=MEDIANlocNLWD, col="blue",size=5, vjust=1.3)+
annotate("text", x="Locations WD",y=580, label=round(MEANlocNLWD, digits = 1),col="black", size=5, vjust=1.3)+
annotate("text", x="Locations WD",y=660, label="(55.3)",col="darkgrey", size=5, vjust=-.3)+       #SDlocTHWD
annotate("text", x="Locations WD",y=660, label="(49.1)",col="darkgrey", size=5, vjust=1.3)+ 		#SDlocNLWD

annotate("text", x="Travelling WE",y=7.5, label=MEDIANtravTHWE, col="blue",size=5, vjust=-0.3)+
annotate("text", x="Travelling WE",y=2, label=MEDIANtravNLWE, col="blue",size=5, vjust=1.3)+
annotate("text", x="Travelling WE",y=580, label=round(MEANtravTHWE, digits = 1),col="black", size=5, vjust=-0.3)+
annotate("text", x="Travelling WE",y=580, label=round(MEANtravNLWE, digits = 1),col="black", size=5, vjust=1.3)+
annotate("text", x="Travelling WE",y=660, label="(46.6)",col="darkgrey", size=5, vjust=-.3)+       #SDtravTHWE
annotate("text", x="Travelling WE",y=660, label="(7.5)",col="darkgrey", size=5, vjust=1.3)+ 		#SDtravNLWE

annotate("text", x="Travelling WD",y=7.5, label=MEDIANtravTHWD, col="blue",size=5, vjust=-0.3)+
annotate("text", x="Travelling WD",y=2, label=MEDIANtravNLWD, col="blue",size=5, vjust=1.3)+
annotate("text", x="Travelling WD",y=580, label=round(MEANtravTHWD, digits = 1),col="black", size=5, vjust=-0.3)+
annotate("text", x="Travelling WD",y=580, label=round(MEANtravNLWD, digits = 1),col="black", size=5, vjust=1.3)+
annotate("text", x="Travelling WD",y=660, label="(29.0)",col="darkgrey", size=5, vjust=-.3)+       #SDtravTHWD
annotate("text", x="Travelling WD",y=660, label="(9.4)",col="darkgrey", size=5, vjust=1.3)+ 		#SDtravNLWD

annotate("text", x="Eating WE",y=9, label=MEDIANfoodTHWE, col="blue",size=5, vjust=-0.3)+
annotate("text", x="Eating WE",y=5.5, label=MEDIANfoodNLWE, col="blue",size=5, vjust=1.3)+
annotate("text", x="Eating WE",y=580, label=round(MEANfoodTHWE, digits = 1),col="black", size=5, vjust=-0.3)+
annotate("text", x="Eating WE",y=580, label=round(MEANfoodNLWE, digits = 1),col="black", size=5, vjust=1.3)+
annotate("text", x="Eating WE",y=660, label="(15.6)",col="darkgrey", size=5, vjust=-.3)+       #SDfoodTHWE
annotate("text", x="Eating WE",y=660, label="(16.8)",col="darkgrey", size=5, vjust=1.3)+ 		#SDfoodNLWE

annotate("text", x="Eating WD",y=8.5, label=MEDIANfoodTHWD,col="blue",size=5, vjust=-0.3)+
annotate("text", x="Eating WD",y=6, label=MEDIANfoodNLWD,col="blue",size=5, vjust=1.3)+
annotate("text", x="Eating WD",y=580, label=round(MEANfoodTHWD, digits = 1),col="black", size=5, vjust=-0.3)+
annotate("text", x="Eating WD",y=580, label=round(MEANfoodNLWD, digits = 1),col="black", size=5, vjust=1.3)+
annotate("text", x="Eating WD",y=660, label="(14.7)",col="darkgrey", size=5, vjust=-.3)+       #SDfoodTHWD
annotate("text", x="Eating WD",y=660, label="(6.6)",col="darkgrey", size=5, vjust=1.3) 		#SDfoodNLWD
savePlot(file="",type="pdf")
######################################### Figure 3A END ########################################

######################################### Figure 3B START ########################################
require(MASS)
require(ggplot2)
install.packages("fitdistrplus")
require(fitdistrplus)

dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ]  #remove extremes (see method section)

# FITTING, what are the values MU and SIZE:
degree <- dataNL$degreeYourSpace
degree <- dataTHr$degreeYourSpace

degree <- na.omit(degree)
degree <- c(degree)
fg.mle <- fitdist(degree, "nbinom", method="mle")
ml <- mledist(degree,"nbinom")
b1 <- bootdist(fg.mle, niter=501)   #FOR 95% confidence interval = parametric bootstrap    
summary(b1)

#Create a plot
summary(dataNL$degreeYourSpace)
summary(dataTHr$degreeYourSpace)

out.NBth <-glm.nb(dataNL$degreeYourSpace~1)
out.NBnl <-glm.nb(dataTHr$degreeYourSpace~1)

#Create datasets
dfTH <- data.frame(x=0:533, prob=dnbinom(0:533, mu=exp(coef(out.NBth)), size=out.NBth$theta))		#create a dataset
dfNL <- data.frame(x=0:522, prob=dnbinom(0:522, mu=exp(coef(out.NBnl)), size=out.NBnl$theta))
#ggplot(data=dfTH, aes(x=x,y=prob)) + geom_line() 

dfTH$country <- "Thailand"
dfNL$country <- "Netherlands"
LINES2 <- rbind(dfTH,dfNL)
head(LINES2)

degreeNL <- data.frame(dataNL$degreeYourSpace,"Netherlands")
colnames(degreeNL) <- c("degree","country")
degreeTH <- data.frame(dataTHr$degreeYourSpace,"Thailand")
colnames(degreeTH) <- c("degree","country")
degreeF <- rbind(degreeNL,degreeTH)

ggplot(degreeF,aes(degree, fill=country))+
geom_histogram(alpha=0.55,binwidth=4, aes(y=..density..),position="identity")+
scale_fill_manual(values=c("lightgreen", "darkblue"))+
scale_x_continuous(limits=c(0,540),breaks=c(0,100,200,300,400,500))+
scale_y_continuous(limits=c(0,0.045),breaks=c(0,0.01,0.02,0.03,0.04))+
theme_bw(base_size = 20)+
xlab("degree") +
ylab("density")+
theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ 
theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ 
theme(axis.text.x = element_text(color = "black", size = 20))+ 
theme(axis.text.y = element_text(color = "black", size = 20))+
geom_line(data=LINES2, aes(x=x, y=prob,group=country, colour = country, linetype=country),position="identity",size=1)+
scale_color_manual(values=c("lightgreen", "darkblue"))
savePlot(file="",type="pdf")
######################################### Figure 3B END ########################################

######################################### Figure 3C ###########################################
dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ]   #DELETE EXTREME VALUES, see method section

degreeNL <- dataNL$degreeYourSpace
degreeNL <- na.omit(degreeNL)

degreeTHr <- dataTHr$degreeYourSpace
degreeTH <- na.omit(degreeTHr)

x <- c(log(degreeTH+0.5))
y <- c(log(degreeNL+0.5))

qqplot(x,y, 
	xlab="degree Thailand (log)",
	ylab="degree Netherlands (log)",
	cex.lab=1.5, cex.axis=1.5, cex.sub=1.5)
abline(a=0,b=1, col=2,lwd=1,lty=2)       			#IF the distributions are the same, then the points should be on this line

#TEST WITH ANDERSON-DARLING
install.packages("kSamples")
require(kSamples)
ad.test(list(x,y),method=("asymptotic"),dist=FALSE,Nsim=10000)

######################################### Figure 3C END ###################################################

###########################################################################################################
######################################### FIGURE 3D &  LOGISTIC REGRESSION ANALYSES #######################
require(ggplot2)
require(Hmisc)
install.packages("rcs")
require(rcs)
require(stats)
install.packages("rms")
require(rms)

#WITH SEEDS###
rdsdata <- dataTH[(dataTH$recruitmentid=="8L95PAN")==FALSE & (dataTH$recruitmentid=="M7DVDBLA")==FALSE, ]
rdsdata <- dataNL

rdsdata$age[rdsdata$age==-1]=NA			    
rds.data2 <- rdsdata[is.na(rdsdata$age)==FALSE,]    #row eruit waar geen age is!
nrow(rds.data2)
#WITH SEEDS###

#WITHOUT SEEDS##############################ZONDER SEEDS
	#THAILAND
	rdsdata <- dataTH[(dataTH$recruitmentid=="8L95PAN")==FALSE & (dataTH$recruitmentid=="M7DVDBLA")==FALSE, ]
	rdsdata$age[rdsdata$age==-1]=NA
	rdsdata$seeds[is.na(rdsdata$seeds)] = 2	
	rdsdata$seeds[rdsdata$seeds== 0] = 2
	rdsdata$seeds[rdsdata$seeds==1]= NA
	rds.data2 <- rdsdata[is.na(rdsdata$seeds)==FALSE,]
	rds.data2 <- rds.data2[is.na(rds.data2$age)==FALSE,]
	nrow(rds.data2)

	#NETHERLANDS
	rdsdata <- dataNL
	rdsdata$age[rdsdata$age==-1]=NA	
	rdsdata$seeds[is.na(rdsdata$seeds)] <- 2	
	rdsdata$seeds[rdsdata$seeds==1]= NA
	rds.data2 <- rdsdata[is.na(rdsdata$seeds)==FALSE,]
	rds.data2 <- rds.data2[is.na(rds.data2$age)==FALSE,]
	rds.data2$seeds
	#ZONDER SEEDS###########################ZONDER SEEDS

rds.data2$recruited  <- ifelse(rds.data2$recruitmentbyNUM=="0",0,        
			 	      ifelse(rds.data2$recruitmentbyNUM>"0",1,NA))
rds.data2$recruited <- factor(rds.data2$recruited)

rds.data2$education2 <- ifelse(rds.data2$education<"4",0,			
				ifelse(rds.data2$education>"3",1,NA))        		
rds.data2$education2 <- factor(rds.data2$education2)
rds.data2$householdRealV2[rds.data2$householdRealV2==520]=NA
rds.data2$gender <- ifelse(rds.data2$gender=="1",0,			
				ifelse(rds.data2$gender=="2",1,NA))        		
rds.data2$gender <- factor(rds.data2$gender)
############################################################################################
#########################################Supplementary test for LOGISTIC REGRESSION ANALYSES#######################
degree <- rds.data2$degreeYourSpace
recruited <- as.numeric(rds.data2$recruited)
df <- data.frame(degree,recruited)
df <- na.omit(df)

rcspline.plot(log(df$degree+0.5), df$recruited, nk=4, showknots=FALSE, noprint=FALSE,xlab="degree", ylim=c(-1,1.1),xrange=c(0,10))    #just to plot function before adding RCS

#########################################Supplementary test for LOGISTIC REGRESSION ANALYSES#######################
############################################################################################

rds.data2 <- rds.data2[is.na(rds.data2$age)==FALSE,]
rds.data2 <- rds.data2[is.na(rds.data2$degreeYourSpace)==FALSE,]
nrow(rds.data2)

fit1 <- glm(formula=recruited~rcs(log(degreeYourSpace+0.5),3)+ rcs(age,3) + education2  + gender+ rcs(householdRealV2,3), data=rds.data2, family=binomial("logit"))
summary(fit1)

########################################################## EXPORT FIT VALUES##########################################################
coef <-summary(fit1)$coefficients[,1]   
stE <- summary(fit1)$coefficients[,2]
Z <- summary(fit1)$coefficients[,3]
Pvalue <- summary(fit1)$coefficients[,4]
CI <- confint.default(fit1)    #CI Using Standard Errors

e.table <- cbind("estimate" = coef,
                        "SE"= stE, 
                        "z value" = Z,
				"Pr(>|z|)" = Pvalue,
				"95% CI" = CI) 
rownames(e.table) <- c("Constant","degree1","degree2","Age","age", "Edu Bachelor","male", "Household size1","Household size2")
write.csv(e.table, ".csv")
########################################################## EXPORT FIT VALUES##########################################################

newdata3 <- cbind(rds.data2,predict(fit1,type="response",se=TRUE))  
newdata4 <- within(newdata3, {
PredictProb <- plogis(fit) 
LL <- plogis(fit - (1.96*se.fit))
UL <- plogis(fit + (1.96*se.fit))
})
head(newdata4)

data <- data.frame(newdata4$degreeYourSpace, newdata4$UL,newdata4$LL,newdata4$PredictProb,newdata4$fit,newdata4$gender)

##THAILAND 
data$country <- "Thailand"
	      
#WITH SEEDS
data$Seeds <- "With Seeds"
ThaiWithSeeds <- data
 	   
#Without SEEDS
data$Seeds <- "Without Seeds"
ThaiWithoutSeeds <- data	

##NETHERLANDS
data$country <- "Netherlands"
	     
#WITH SEEDS
data$Seeds <- "With Seeds"
NLWithSeeds <- data
 	 
#Without SEEDS
data$Seeds <- "Without Seeds"
NLWithoutSeeds <- data	

datafull <- rbind(ThaiWithSeeds,ThaiWithoutSeeds,NLWithSeeds,NLWithoutSeeds)			
colnames(datafull) <- c("degreeYourSpace","UL","LL","PredictProb","fit","gender","Seeds","country")
datafull <- na.omit(datafull)

datafull$gender <- as.numeric(datafull$gender)
datafull$gender[datafull$gender=="1"]= "Female"
datafull$gender[datafull$gender=="2"]= "Male"	

#######################################   FIGURE 3D #########################################
ggplot(datafull, aes(x = degreeYourSpace, y = PredictProb, colour=gender, group=gender)) + 
geom_smooth(size=1,se=FALSE,method="loess")+ 
geom_smooth(aes(y=UL,fill=gender),se=FALSE,linetype=3,size=1, method="loess")+
geom_smooth(aes(y=LL,fill=gender),se=FALSE,linetype=3,size=1, method="loess")+
scale_x_continuous(limits=c(0,150), breaks=c(0,25,50,75,100,125,150))+
facet_grid(country~Seeds, scales = "free")+
scale_color_manual(values=c("#CC6666", "#008B45"))+
scale_y_continuous(limits=c(0.4,0.8),breaks=c(0.2,0.4,0.6,0.8))+ 
theme_bw(base_size = 20)+
theme(strip.background = element_rect(fill = "white"))+
xlab("degree") +
ylab("Predicted Probability of Inviting Contacts")+
theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ 
theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ 
theme(axis.text.x=element_text(color = "black", size = 20,angle=30,hjust=1))+ 
theme(axis.text.y = element_text(color = "black", size = 20))+
theme(legend.position="top")+
theme(strip.text.x= element_text(size =20))+
theme(strip.text.y= element_text(size =20))
savePlot(file="",type="pdf")
#######################################   FIGURE 3D END #########################################

#######################################   FIGURE 4A  #########################################
library(data.table)
library(lattice)
library(gplots)

##############CONTACT AT DIFFERENT LOCATIONS  #NETHERLANDS  
DF <- data.table(dataNL$age_groups,dataNL$degreeY, dataNL$degreeS, dataNL$degreeO)
tests <- DF[ ,lapply(.SD, sum, na.rm=TRUE), by=V1]
tests[, young:= V2/sum(V2,V3,V4), by = list(V1)]
tests[, same:= V3/sum(V2,V3,V4), by = list(V1)]
tests[, older:= V4/sum(V2,V3,V4), by = list(V1)]
df <- data.frame(tests$V1,tests$young,tests$same, tests$older)
df <- df[-13,]
df[df=="NaN"] = 0

empt <- matrix(0:0,ncol=4,nrow=1)
empt[1,1] <- "10-14"
mats <- as.matrix(df)
dim(mats) <- c(13,4)
mats <- rbind(empt,mats)
matt <- data.frame(mats)
order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
matr <-  matt[order(match(matt[,1],order)),]
mat <- as.matrix(matr)
dim(mat) <- c(14,4)
mat
loc <- mat[,-1]

myPanel <- function(x, y, z, ...) {			
    panel.levelplot(x,y,z,...)				# Add: panel = myPanel to levelplot
    panel.text(x, y, round(z,2),cex=1.5,font=1)
}
x <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
y <-  c("locations YA", "locations SA","locations OA")
ckey <- list(labels=list(cex=1.8,cex.lab=1.5,font=1, col='black'), height=0.35) 
levelplot(loc, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1),alpha.regions=0.55, col.regions=colorpanel(16,"darkgreen", "lightgreen","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))

savePlot(file="",type="pdf")

##############CONTACT AT DIFFERENT LOCATIONS  #THAILAND
DF <- data.table(dataTH$age_groups,dataTH$degreeY, dataTH$degreeS, dataTH$degreeO)
tests <- DF[ ,lapply(.SD, sum, na.rm=TRUE), by=V1]
tests[, young:= V2/sum(V2,V3,V4), by = list(V1)]
tests[, same:= V3/sum(V2,V3,V4), by = list(V1)]
tests[, older:= V4/sum(V2,V3,V4), by = list(V1)]
df <- data.frame(tests$V1,tests$young,tests$same, tests$older)
df <- df[-10,]
df[df=="NaN"] = 0

empt <- matrix(0:0,ncol=4,nrow=5)
empt[1,1] <- "55-59"
empt[2,1] <- "60-64"
empt[3,1] <- "65-69"
empt[4,1] <- "70-74"
empt[5,1] <- "75-79"
mats <- as.matrix(df)
dim(mats) <- c(9,4)
mats <- rbind(empt,mats)
mats <- data.frame(mats)
order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
matr <-  mats[order(match(mats[,1],order)),]
mat <- as.matrix(matr)
dim(mat) <- c(14,4)
loc <- mat[,-1]
myPanel <- function(x, y, z, ...) {			
    panel.levelplot(x,y,z,...)				
    panel.text(x, y, round(z,2),cex=1.5,font=1)
}
levelplot(loc, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1),alpha.regions=0.55, col.regions=colorpanel(16,"darkblue", "lightblue","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))

savePlot(file="",type="pdf")

#######CONTACT WHILE EATING CONTACTS###   #NETHERLANDS
DFe <- data.table(dataNL$age_groups,dataNL$FOODy, dataNL$FOODs, dataNL$FOODo)
tests <- DFe[ ,lapply(.SD, sum, na.rm=TRUE), by=V1]
tests[, young:= V2/sum(V2,V3,V4), by = list(V1)]
tests[, same:= V3/sum(V2,V3,V4), by = list(V1)]
tests[, older:= V4/sum(V2,V3,V4), by = list(V1)]
df <- data.frame(tests$V1,tests$young,tests$same, tests$older)
df <- df[-13,]
df[df=="NaN"] = 0

empt <- matrix(0:0,ncol=4,nrow=1)
empt[1,1] <- "10-14"
mats <- as.matrix(df)
dim(mats) <- c(13,4)
mats <- rbind(empt,mats)
matt <- data.frame(mats)
order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
matr <-  matt[order(match(matt[,1],order)),]
mat <- as.matrix(matr)
dim(mat) <- c(14,4)
eat <- mat[,-1]

myPanel <- function(x, y, z, ...) {			
    panel.levelplot(x,y,z,...)				
    panel.text(x, y, round(z,2),cex=1.5,font=1)
}

x <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
y <-  c("Eating YA","Eating SA","Eating OA")
ckey <- list(labels=list(cex=1.5,cex.lab=1.5, col='black'), height=0.35) 

levelplot(eat, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(16,"darkgreen", "lightgreen","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))
savePlot(file="",type="pdf")

#######CONTACT WHILE EATING CONTACTS###   ######THAILAND
DFe <- data.table(dataTH$age_groups,dataTH$FOODy, dataTH$FOODs, dataTH$FOODo)
tests <- DFe[ ,lapply(.SD, sum, na.rm=TRUE), by=V1]
tests[, young:= V2/sum(V2,V3,V4), by = list(V1)]
tests[, same:= V3/sum(V2,V3,V4), by = list(V1)]
tests[, older:= V4/sum(V2,V3,V4), by = list(V1)]
df <- data.frame(tests$V1,tests$young,tests$same, tests$older)
df <- df[-10,]
df[df=="NaN"] = 0
empt <- matrix(0:0,ncol=4,nrow=5)
empt[1,1] <- "55-59"
empt[2,1] <- "60-64"
empt[3,1] <- "65-69"
empt[4,1] <- "70-74"
empt[5,1] <- "75-79"
mats <- as.matrix(df)
dim(mats) <- c(9,4)
mats <- rbind(empt,mats)
mats <- data.frame(mats)
order <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
matr <-  mats[order(match(mats[,1],order)),]
mat <- as.matrix(mats)
dim(mat) <- c(14,4)
eat <- mat[,-1]

myPanel <- function(x, y, z, ...) {			
    panel.levelplot(x,y,z,...)				
    panel.text(x, y, round(z,2),cex=1.5,font=1)
}

x <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79")
y <-  c("Eating YA","Eating SA","Eating OA")
ckey <- list(labels=list(cex=1.5,cex.lab=1.5, col='black'), height=0.35) 
levelplot(eat, ylim=y, xlim=x,panel = myPanel, xlab=list("age participant",cex=1.8,font=1), colorkey=ckey, ylab=list("",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(16,"darkblue", "lightblue","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))
savePlot(file="",type="pdf")
#######################################   FIGURE 4A END #########################################

#######################################   FIGURE 4B #########################################
require(lattice)
library(gplots)

data <- dataTH
#OR
data <- dataNL

data.koppel1 <- data[,c("recruitmentid","age", "agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),]
names(data.koppel1) <- c("child1","age.child1","agegrouplarge1", "gender.child1","education.child1","degree.child1","Catdegree.child1","travelTotwZeros.child1","locationTotReal.child1","foodTot.child1","householdTot.child1", "tree.child1", "wave.child1", "no_suc_rec.child1", "recruitmentbyNUM.child1", "totalsym.child1","flu.child1","cold.child1", "TwoorMoreSymp.child1","age_groups.child1","post_child1")
data.merge <- merge(x=data,y=data.koppel1,by="child1",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]

data.koppel2 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),]
names(data.koppel2) <- c("child2","age.child2","agegrouplarge2","gender.child2","education.child2","degree.child2","Catdegree.child2","travelTotwZeros.child2","locationTotReal.child2","foodTot.child2","householdTot.child2", "tree.child2", "wave.child2", "no_suc_rec.child2", "recruitmentbyNUM.child2", "totalsym.child2","flu.child2","cold.child2", "TwoorMoreSymp.child2","age_groups.child2","post_child2")
data.merge <- merge(x=data.merge,y=data.koppel2,by="child2",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]   

data.koppel3 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),]
names(data.koppel3) <- c("child3","age.child3","agegrouplarge3","gender.child3","education.child3","degree.child3","Catdegree.child3","travelTotwZeros.child3","locationTotReal.child3","foodTot.child3","householdTot.child3", "tree.child3", "wave.child3", "no_suc_rec.child3", "recruitmentbyNUM.child3", "totalsym.child3","flu.child3","cold.child3", "TwoorMoreSymp.child3","age_groups.child3","post_child3")
data.merge <- merge(x=data.merge,y=data.koppel3,by="child3",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]

data.koppel4 <- data[,c("recruitmentid","age","agegrouplarge", "gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups","postalcode"),]
names(data.koppel4) <- c("child4","age.child4","agegrouplarge4","gender.child4","education.child4","degree.child4","Catdegree.child4","travelTotwZeros.child4","locationTotReal.child4","foodTot.child4","householdTot.child4", "tree.child4", "wave.child4", "no_suc_rec.child4", "recruitmentbyNUM.child4", "totalsym.child4","flu.child4","cold.child4", "TwoorMoreSymp.child4","age_groups.child4","post_child4")
data.merge <- merge(x=data.merge,y=data.koppel4,by="child4",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]

tab <- data.frame(agerecruiter=rep(data.merge$age_groups,4),agechild=c(data.merge$age_groups.child1, data.merge$age_groups.child2,data.merge$age_groups.child3,data.merge$age_groups.child4))
tab2 <- na.omit(tab)
t <- table(tab2)
t <- as.matrix(t,ncol=13,nrow=13)

#RUN ONLY FOR NETHERLANDS
emt <- matrix(0:0, ncol=1,nrow=13)
te <- cbind(emt,t)
emr <- matrix(0:0,ncol=14,nrow=1)
tet <- rbind(emr,te)
t <- tet
#RUN ONLY FOR NETHERLANDS

#RUN ONLY FOR THAILAND
t <- cbind(t[, 1:9], matrix(0:0, ncol = 4,nrow=9))
t <- rbind(t[1:9,],matrix(0:0, ncol = 13,nrow=4))
colnames(t) <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74")
rownames(t) <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74")
#RUN ONLY FOR THAILAND

e <- prop.table(t)
ee <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74")
ckey <- list(labels=list(cex=1.5,cex.lab=1.5, col='black'), height=0.35) 

#FOR NETHERLANDS
levelplot(e, ylim=ee, xlim=ee, xlab=list("age recruiter",cex=1.8,font=1), colorkey=ckey, ylab=list("age recruited contact person",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(18,"darkgreen","gray100","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))
savePlot(file="",type="pdf")

#THAILAND
#RUN FOR REDUCED SCALE:
ee <- c("10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54")

#GREEN COLOUR
levelplot(e, ylim=ee, xlim=ee, xlab=list("age recruiter",cex=1.8,font=1), colorkey=ckey, ylab=list("age recruited contact person",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(18,"darkgreen","gray100","orange"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))

#BLUE COLOUR
levelplot(e, ylim=ee, xlim=ee, xlab=list("age recruiter",cex=1.8,font=1), colorkey=ckey, ylab=list("age recruited contact person",cex=1.8,font=1), alpha.regions=0.55, col.regions=colorpanel(18,"darkblue", "cyan","yellow"), scales=list(x=list(cex=1.5,font=1,rot=45),y=list(cex=1.5,font=1)))
savePlot(file="",type="pdf")
#######################################   FIGURE 4B END #########################################

#######################################   FIGURE 4C #########################################
require(ggplot2)

data <- dataNL

data.koppel1 <- data[,c("recruitmentid","age", "agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),]
names(data.koppel1) <- c("child1","age.child1","agegrouplarge1", "gender.child1","education.child1","degree.child1","Catdegree.child1","travelTotwZeros.child1","locationTotReal.child1","foodTot.child1","householdTot.child1", "tree.child1", "wave.child1", "no_suc_rec.child1", "recruitmentbyNUM.child1", "totalsym.child1","flu.child1","cold.child1", "TwoorMoreSymp.child1","age_groups.child1")
data.merge <- merge(x=data,y=data.koppel1,by="child1",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]

data.koppel2 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),]
names(data.koppel2) <- c("child2","age.child2","agegrouplarge2","gender.child2","education.child2","degree.child2","Catdegree.child2","travelTotwZeros.child2","locationTotReal.child2","foodTot.child2","householdTot.child2", "tree.child2", "wave.child2", "no_suc_rec.child2", "recruitmentbyNUM.child2", "totalsym.child2","flu.child2","cold.child2", "TwoorMoreSymp.child2","age_groups.child2")
data.merge <- merge(x=data.merge,y=data.koppel2,by="child2",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]   

data.koppel3 <- data[,c("recruitmentid","age","agegrouplarge","gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),]
names(data.koppel3) <- c("child3","age.child3","agegrouplarge3","gender.child3","education.child3","degree.child3","Catdegree.child3","travelTotwZeros.child3","locationTotReal.child3","foodTot.child3","householdTot.child3", "tree.child3", "wave.child3", "no_suc_rec.child3", "recruitmentbyNUM.child3", "totalsym.child3","flu.child3","cold.child3", "TwoorMoreSymp.child3","age_groups.child3")
data.merge <- merge(x=data.merge,y=data.koppel3,by="child3",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]

data.koppel4 <- data[,c("recruitmentid","age","agegrouplarge", "gender","education","degreeYourSpace","Catdegree","travelTotwZeros","locationTotReal","foodTotFULL","householdRealV2", "tree", "wave", "no_suc_rec", "recruitmentbyNUM", "ntotalsym", "nnflu", "ncold", "TwoorMoreSymp","age_groups"),]
names(data.koppel4) <- c("child4","age.child4","agegrouplarge4","gender.child4","education.child4","degree.child4","Catdegree.child4","travelTotwZeros.child4","locationTotReal.child4","foodTot.child4","householdTot.child4", "tree.child4", "wave.child4", "no_suc_rec.child4", "recruitmentbyNUM.child4", "totalsym.child4","flu.child4","cold.child4", "TwoorMoreSymp.child4","age_groups.child4")
data.merge <- merge(x=data.merge,y=data.koppel4,by="child4",all.x=TRUE)
data.merge <- data.merge[order(data.merge$ID),]

DB <- rgb(0,0,139,alpha=55, maxColorValue=255)	    #THAILAND               # COLOUR SCALE
LG <- rgb(144,238,144,alpha=55, maxColorValue=255)  #NETHERLANDS

#RUN FOR NETHERLANDS
tab <- data.frame(degreeR=rep(data.merge$degreeYourSpace,4),degreechild=c(data.merge$degree.child1, data.merge$degree.child2,data.merge$degree.child3,data.merge$degree.child4))
myplot <- qplot(degreeR, degreechild, data = tab, geom="hex", bins=40, xlim = c(0, 150), ylim = c(0,150), xlab=("degree recruiter"), ylab=("degree recruited contact person"))
myplot + theme_bw(base_size = 20)+
scale_fill_gradient2(low="purple",mid="forestgreen", high="yellow", name="Count\n",breaks=c(0,2,5,8,10))+
theme(axis.title.x = element_text(color = "black", size = 25, vjust= -0.1))+ 
theme(axis.title.y = element_text(color = "black", size = 25, vjust= 0.3))+ 
theme(axis.text.x = element_text(color = "black", size = 20))+ 
theme(axis.text.y = element_text(color = "black", size = 20))+
theme(legend.text=element_text(size=20))+
theme(legend.title=element_text(size=20))
savePlot(file="",type="pdf")

#RUN FOR THAILAND
tab <- data.frame(degreeR=rep(data.merge$degreeYourSpace,4),degreechild=c(data.merge$degree.child1, data.merge$degree.child2,data.merge$degree.child3,data.merge$degree.child4))
myplot <- qplot(degreeR, degreechild, data = tab, geom="hex", bins=40, xlim = c(0, 150), ylim = c(0,150), xlab=("degree recruiter"), ylab=("degree recruited contact person"))
myplot + theme_bw(base_size = 20)+
scale_fill_gradient2(low="blue3",mid="blue3", high="lightgoldenrod1", name="Count\n", breaks=c(1,2,3,4))+
theme(axis.title.x = element_text(color = "black", size = 25, vjust= -0.1))+ 
theme(axis.title.y = element_text(color = "black", size = 25, vjust= 0.3))+ 
theme(axis.text.x = element_text(color = "black", size = 20))+ 
theme(axis.text.y = element_text(color = "black", size = 20))+
theme(legend.text=element_text(size=20))+
theme(legend.title=element_text(size=20))
savePlot(file="",type="pdf")
#######################################   FIGURE 4C #########################################

####################################### ####################################### ####################################### ####################################### 
####################################### #######################################   TABLE 2 EFFECTIVE CONTACT RATE  ###################################################################
dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ]  #remove extremes (see method section)

degree <- dataNL$degreeYourSpace
degree <- dataTHr$degreeYourSpace
m <- mean(degree,na.rm=T)
v <- var(degree,na.rm=T)
m+(v/m)

loc <-  dataNL$locationTotReal
loc <-  dataTHr$locationTotReal
m <- mean(loc,na.rm=T)
v <- var(loc,na.rm=T)
m+(v/m)

travel <- dataNL$travelTotwZeros
travel <- dataTHr$travelTotwZeros
m <- mean(travel,na.rm=T)
v <- var(travel,na.rm=T)
m+(v/m)

food <- dataNL$foodTotFULL
food <- dataTHr$foodTotFULL
m <- mean(food,na.rm=T)
v <- var(food,na.rm=T)
m+(v/m)

dataTH$householdRealV2[dataTH$householdRealV2==520]=NA
house <- dataTH$householdRealV2
house <- dataNL$householdRealV2
m <- mean(house,na.rm=T)
v <- var(house,na.rm=T)
m+(v/m)
####################################### #######################################  TABLE 2 EFFECTIVE CONTACT RATE ###################################################################
####################################### ####################################### ####################################### ####################################### 

####################################### ####################################### ####################################### ####################################### 
####################################### #######################################  TABLE 3 CORRELATIONS ###################################################################
install.packages("igraph")
library(igraph)

#Read datasets separately
#THAILAND
path <- ""
dataset <- read.csv2(file=paste(path,"",sep=""))

#NETHERLANDS
path <- ""
dataset <- read.csv2(file=paste(path,".csv",sep=""))
dataset <- dataset[order(dataset$tree,dataset$wave,dataset$recruitmentid),]

#REMOVE EMPTY ROWS
dataset <- dataset[as.character(dataset$recruitmentid)!="",]

#REPLACE EMPTY FIELDS CHILDREN TOKENS WITH NA's
dataset[as.character(dataset$child1)=="","child1"] <- NA
dataset[as.character(dataset$child2)=="","child2"] <- NA
dataset[as.character(dataset$child3)=="","child3"] <- NA
dataset[as.character(dataset$child4)=="","child4"] <- NA

dataset.new <- data.frame(matrix(NA,nrow=0,ncol=ncol(dataset)*2+1))
names(dataset.new) <- c("degree",paste(names(dataset),"person1",sep="."),paste(names(dataset),"person2",sep="."))

#Begin loop: door voor elke boom in de dataset het volgende
for (t in unique(dataset$tree))
	{
	print(paste("Processing Tree Number",t,sep=" "))
	#Bepaal (potentiele) recruiters in een boom, d.w.z. mensen binnen een boom die de vragenlijst ingevuld hebben.
	lab.tree <- unique(as.character(dataset[as.character(dataset$tree)==t,"recruitmentid"]))
	lab.tree <- lab.tree[is.na(lab.tree)==FALSE]
	#Bepaal aantal recruiters.
	len.tree <- length(lab.tree)
	#Als er meer dan 1 recruiter in een boom zit, doe het volgende.
	if (len.tree>1)
		{	
		#Maak eerst drie soorten objecten per tree aan als voorbereiding.
		#[1] Maak een "adjacency matrix" van alleen maar nullen aan; zie verderop voor verdere uitleg.
		adj.mat <- matrix(0,nrow=len.tree,ncol=len.tree)
		rownames(adj.mat) <- lab.tree
		colnames(adj.mat) <- lab.tree
		#Voor alle individuen in de boom doe het volgende.
		for (i in lab.tree)
			{
			#Pas de adjacency matrix aan: vervang een 0 door 1 bij een link tussen parent/child
			adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child1"])] <- 1
			adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child2"])] <- 1 
			adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child3"])] <- 1
			adj.mat[rownames(adj.mat)==i,colnames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child4"])] <- 1
			adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child1"]),colnames(adj.mat)==i] <- 1
			adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child2"]),colnames(adj.mat)==i] <- 1
			adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child3"]),colnames(adj.mat)==i] <- 1
			adj.mat[rownames(adj.mat)==as.character(dataset[dataset$recruitmentid==i,"child4"]),colnames(adj.mat)==i] <- 1
			}
		#[2] Maak een graaf object op basis van de graph adjacency matrix.
		g <- graph.adjacency(adj.mat, mode="max")
		#[3] Bepaal met Dijkstra's algoritme de kortste paden tussen elke persoon binnen een boom; dit is in feite de link degree. 
		# Maak degree object aan.
		pth.mat <- shortest.paths(g)
		#Sla adj.mat, g en pth als aparte tree-specifieke objecten op (zie hieronder).
		assign(paste("adj.mat",t,sep=""),adj.mat)
		assign(paste("g",t,sep=""),g)
		assign(paste("pth.mat",t,sep=""),pth.mat)
		#Nu de eigenlijk stap van het bewerken van dataset.new
		#Vul nu dataset.new aan met nieuwe rijen: elke rij bevat de degree, en alle variabelen van persoon1, en alle variabelen van persoon2.
		#De degree kunnen we verkrijgen uit pth.mat.
		#Voor elke recruiter in de rijen van pth.mat doe het volgende
		for (i in rownames(pth.mat))
			{
			#Bepaal de positie pos.i van persoon1 (kan de volgende waarde aanemen: 1 tot het aantal rijen)
			pos.i <- which(rownames(pth.mat)==i)
			#En pak de bijbehorende informatie van persoon1 uit de dataset.
			tmp.i <- (dataset[dataset$recruitmentid==i,])
			#Voor elke recruiter in de kolommen van pth.mat doe het volgende
			for (j in colnames(pth.mat))
				{
				#Bepaal de positie pos.j van persoon2 (kan de volgende waarde aanemen: 1 tot het aantal rijen)
				pos.j <- which(colnames(pth.mat)==j)
				#Omdat de matrix pth.mat symmetrisch is in de diagonaal, hoef je alleen een bewerking te doen voor de helft van de matrix:
				#In dit geval kiezen we als pos.j>pos.i (maar had evengoed kunnen zijn pos.j<pos.i)
				if (pos.j>pos.i)
					{
					#Pak de bijbehorende informatie van persoon2 uit de dataset.
					tmp.j <- (dataset[dataset$recruitmentid==j,])
					#Plak de eerder gemaakte kolommen tmp.i en tmp.j aan elkaar vast; 
					#Plus voeg daarvoor ook de degree toe, nl. pth.mat[pos.i,pos.j]
					tmp.vec <- cbind(pth.mat[pos.i,pos.j],tmp.i,tmp.j)
					#Zorg ervoor dat tmp.vec dezelfde kolomnamen heeft als dataset.new
					names(tmp.vec) <- names(dataset.new)
					#Dan kan de rij met nieuwe link informatie namelijk netjes worden toegevoegd aan dataset.new.
					dataset.new <- rbind(dataset.new,tmp.vec)
					}
				}
			}
		}
	}



#AGE
dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA)
for (i in 1:nrow(dataset.cor))
	{
	dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"age.person1"],dataset.new[dataset.new$degree==i,"age.person2"],use="pairwise.complete.obs")
	}
dataset.cor

plot(dataset.new[dataset.new$degree==1,"age.person1"],dataset.new[dataset.new$degree==1,"age.person2"])
cor.test(dataset.new[dataset.new$degree==3,"age.person1"],dataset.new[dataset.new$degree==3,"age.person2"],use="pairwise.complete.obs", method="pearson")
cor.test(dataset.new[dataset.new$degree>=4,"age.person1"],dataset.new[dataset.new$degree>=4,"age.person2"],use="pairwise.complete.obs", method="pearson")

# GENDER
dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA)
for (i in 1:nrow(dataset.cor))
	{
	dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"gender.person1"],dataset.new[dataset.new$degree==i,"gender.person2"],use="pairwise.complete.obs", method="pearson")
	}
dataset.cor

cor.test(dataset.new[dataset.new$degree==3,"gender.person1"],dataset.new[dataset.new$degree==3,"gender.person2"],use="pairwise.complete.obs",method="pearson")
cor.test(dataset.new[dataset.new$degree>=4,"gender.person1"],dataset.new[dataset.new$degree>=4,"gender.person2"],use="pairwise.complete.obs",method="pearson")

# EDUCATION
dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA)
for (i in 1:nrow(dataset.cor))
	{
	dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"educationforCn.person1"],dataset.new[dataset.new$degree==i,"educationforCn.person2"],use="pairwise.complete.obs", method="spearman")
	}
dataset.cor

install.packages("RVAideMemoire")
require(RVAideMemoire)
require(Hmisc)

spearman.ci(dataset.new[dataset.new$degree==3,"educationforCn.person1"],dataset.new[dataset.new$degree==3,"educationforCn.person2"], nrep = 1000, conf.level = 0.95)
spearman.ci(dataset.new[dataset.new$degree>=4,"educationforCn.person1"],dataset.new[dataset.new$degree>=4,"educationforCn.person2"], nrep = 1000, conf.level = 0.95)
#cor.test(dataset.new[dataset.new$degree>=4,"educationforCn.person1"],dataset.new[dataset.new$degree>=4,"educationforCn.person2"], method=c("spearman"),conf.level = 0.95)

###DEGREE DESCRETE
#Netherlands
dataset.new$degreeYourSpace.person1 <- log(dataset.new$degreeYourSpace.person1+0.5)
dataset.new$degreeYourSpace.person2 <- log(dataset.new$degreeYourSpace.person2+0.5)
cor.test(dataset.new[dataset.new$degree==3,"degreeYourSpace.person1"],dataset.new[dataset.new$degree==3,"degreeYourSpace.person2"],use="pairwise.complete.obs", method="pearson")

#THailand
dataset.newR <- dataset.new[(dataset.new$recruitmentid.person1=="8L95PAN")==FALSE & (dataset.new$recruitmentid.person2=="8L95PAN")==FALSE &   (dataset.new$recruitmentid.person1=="M7DVDBLA")==FALSE & (dataset.new$recruitmentid.person2=="M7DVDBLA")==FALSE, ]
dataset.newR$degreeYourSpace.person1 <- log(dataset.newR$degreeYourSpace.person1+0.5)
dataset.newR$degreeYourSpace.person2 <- log(dataset.newR$degreeYourSpace.person2+0.5)

cor.test(dataset.newR[dataset.newR$degree==3,"degreeYourSpace.person1"],dataset.newR[dataset.newR$degree==3,"degreeYourSpace.person2"],use="pairwise.complete.obs", method="pearson")
plot(dataset.newR[dataset.newR$degree==2,"degreeYourSpace.person1"],dataset.newR[dataset.newR$degree==2,"degreeYourSpace.person2"])

###DEGREE CATEGORIES
dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA)
#NETHERLANDS
for (i in 1:nrow(dataset.cor))
	{
	dataset.cor[i,2] <- cor(dataset.new[dataset.new$degree==i,"Catdegree.person1"],dataset.new[dataset.new$degree==i,"Catdegree.person2"],use="pairwise.complete.obs", method="spearman")
	}
dataset.cor

require(RVAideMemoire)
spearman.ci(dataset.new[dataset.new$degree>=4,"Catdegree.person1"],dataset.new[dataset.new$degree>=4,"Catdegree.person2"], nrep = 1000, conf.level = 0.95)
cor.test(dataset.new[dataset.new$degree>=4,"Catdegree.person1"],dataset.new[dataset.new$degree>=4,"Catdegree.person2"], method=c("spearman"),conf.level = 0.95)

#THAILAND
dataset.cor <- data.frame(degree=1:max(dataset.new$degree,na.rm=T),cor=NA)
for (i in 1:nrow(dataset.cor))
	{
	dataset.cor[i,2] <- cor(dataset.newR[dataset.newR$degree==i,"Catdegree.person1"],dataset.newR[dataset.newR$degree==i,"Catdegree.person2"],use="pairwise.complete.obs", method="spearman")
	}
dataset.cor

spearman.ci(dataset.newR[dataset.newR$degree>=4,"Catdegree.person1"],dataset.newR[dataset.newR$degree>=4,"Catdegree.person2"], nrep = 1000, conf.level = 0.95)
cor.test(dataset.newR[dataset.newR$degree>=4,"Catdegree.person1"],dataset.newR[dataset.newR$degree>=4,"Catdegree.person2"], method=c("spearman"),conf.level = 0.95)

###NUMBER OF CONTACTS WHILE EATING
#NETHERLANDS
dataset.new$foodTotFULL.person1 <- log(dataset.new$foodTotFULL.person1+0.5)
dataset.new$foodTotFULL.person2 <- log(dataset.new$foodTotFULL.person2+0.5)
cor.test(dataset.new[dataset.new$degree>=4,"foodTotFULL.person1"],dataset.new[dataset.new$degree>=4,"foodTotFULL.person2"],use="pairwise.complete.obs", method="pearson")

#THAILAND
dataset.newR <- dataset.new[(dataset.new$recruitmentid.person1=="8L95PAN")==FALSE & (dataset.new$recruitmentid.person2=="8L95PAN")==FALSE &   (dataset.new$recruitmentid.person1=="M7DVDBLA")==FALSE & (dataset.new$recruitmentid.person2=="M7DVDBLA")==FALSE, ]
dataset.newR$foodTotFULL.person1 <- log(dataset.newR$foodTotFULL.person1+0.5)
dataset.newR$foodTotFULL.person2 <- log(dataset.newR$foodTotFULL.person2+0.5)
cor.test(dataset.newR[dataset.newR$degree>=4,"foodTotFULL.person1"],dataset.newR[dataset.newR$degree>=4,"foodTotFULL.person2"],use="pairwise.complete.obs", method="pearson")

nrow(dataset.newR)

###TWO OR MORE SYMPTOMS
cor.test(dataset.new$TwoorMoreSymp.person1,dataset.new$TwoorMoreSymp.person2,use="pairwise.complete.obs",method="pearson")
table(dataset.new[dataset.new$degree=3,"TwoorMoreSymp.person1"],dataset.new[dataset.new$degree==3,"TwoorMoreSymp.person2"])

cor.test(dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person1"],dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person2"],use="pairwise.complete.obs",method="spearman")
spearman.ci(dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person1"],dataset.new[dataset.new$degree>=4,"TwoorMoreSymp.person2"], nrep = 1000, conf.level = 0.95)
####################################### #######################################  TABLE 3 CORRELATIONS ###################################################################
####################################### ####################################### ####################################### ####################################### 

####################################### #######################################  ###################################################################
####################################### ####################################### FIGURE S1 Seasonality ####################################### ####################################### 
require(graphics)      
install.packages("xts")
require(xts)
require(scales)
require(ggplot2)

#THAILAND PARTICIPANTS
timeTH <- data.frame(as.Date(dataTH$Date_participation),part=1)
timeTH <- timeTH[order(timeTH$as.Date.dataTH.Date_participation., decreasing =FALSE ),]
timeTH2 <- aggregate(timeTH[,2], by=list(cut(timeTH$as.Date.dataTH.Date_participation. ,"week")),sum) 
weeks <- c(51,52,1,2,3,4,5,6,7,8,9)
timeTH2$weeks <- weeks
timeTH2$data <- "Number of participants in Thailand"
colnames(timeTH2) <- c("Date","freq","week","data")

timeNL <- data.frame(as.Date(dataNL$date_pat),part=1)
timeNL <- timeNL[order(timeNL$as.Date.dataNL.date_pat., decreasing =FALSE ),]
timeNL2 <- aggregate(timeNL[,2], by=list(cut(timeNL$as.Date.dataNL.date_pat. ,"week")),sum) 
weeksNL2 <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14)
timeNL2$weeks <- weeksNL2
timeNL2$data <- "Number of participants in Netherlands"
colnames(timeNL2) <- c("Date","freq","week","data")

#LOADING WHO FLUNET DATA  
path <- ""
dataset <- read.csv(file=paste(path,".csv",sep=""))

WHO <- data.frame(as.Date(dataset$Date),dataset[,4],dataset[,2],data="Influenza activity Thailand (number of specimens positive for influenza)")
colnames(WHO) <- c("Date","freq","week","data")
WHO <- WHO[order(WHO$Date, decreasing =FALSE ),]

#LOADING NIVEL DATA  
path <- ""
dataset <- read.csv(file=paste(path,"",sep=""))

NIVEL <- data.frame(as.Date(dataset$day),dataset$week,dataset[,6])
NIVEL <- NIVEL[order(NIVEL$ as.Date.dataset.day., decreasing =FALSE ),]

NIVEL <- data.frame(as.Date(NIVEL$as.Date.dataset.day.),round(NIVEL$dataset...6.,2),NIVEL$dataset.week,data="Influenza activity Netherlands (ILI / 10.000 inhabitants)")
colnames(NIVEL) <- c("Date","freq","week","data")
NIVEL <- NIVEL[order(NIVEL$Date, decreasing =FALSE ),]

COM <-  rbind(WHO, timeTH2,timeNL2,NIVEL)
COM <- COM[order(COM$Date, decreasing =FALSE ),]

ggplot(COM,aes(x=Date,factor(freq),fill=data)) + geom_bar(stat="identity",legend=FALSE)+            
xlab("years 2012-2013") + ylab("")+
scale_color_manual(values=c("#CC6666", "#9999CC","darkgreen"))+
facet_wrap(~data, scales="free_y",ncol=1)+
theme_bw(base_size = 20)+
theme(axis.title.x = element_text(color = "black", size = 20, vjust= -0.1))+ 
theme(axis.title.y = element_text(color = "black", size = 20, vjust= 0.35))+ 
theme(axis.text.x = element_text(color = "black", size = 20))+ 
theme(axis.text.y = element_text(color = "black", size = 20))+
scale_x_date(breaks="2 months",labels = date_format("%b"),limit=c(as.Date("2012-01-02"),as.Date("2013-12-31")) )+
scale_y_discrete(breaks= function(n) n[floor(length(n)/4)*0:10])
savePlot(file="",type="pdf")
####################################### ####################################### FIGURE S1 Seasonality ###################################################################
####################################### ####################################### ####################################### ####################################### 

##############################################  FIGURE S2 AND S3 DEGREE OVER DAYS OF THE WEEK, TO CHECK DISTRIBUTIONS ############################
install.packages("ggplot2")
require(ggplot2)
install.packages("reshape")
library(reshape)

#Netherlands
travel <- data.frame(dataNL$Masstransport, dataNL$bustransport, dataNL$car, dataNL$othertransport,dataNL$dayfilledin)
travel$total <- rowSums(travel[,1:4])
travel <- na.omit(travel)
nrow(travel)
colnames(travel) <- c("mass transport","bus","car","other transport","day","Total transport")
travel$day <- factor(travel$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE)
travelNL <- melt(travel, id=c("day"))
travelNL$country <- "NL"

#Thailand
dataTHr <- dataTH[(dataTH$recruitmentid=="8L95PZN")==FALSE & (dataTH$recruitmentid=="M72V2YLZ")==FALSE, ]

travel <- data.frame(dataTHr$Masstransport, dataTHr$bustransport, dataTHr$car, dataTHr$othertransport,dataTHr$dayfilledin)
travel$total <- rowSums(travel[,1:4])
travel <- na.omit(travel)
nrow(travel)
colnames(travel) <- c("mass transport","bus","car","other transport","day","Total transport")
travel$day <- factor(travel$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE)
travelTH <- melt(travel, id=c("day"))
travelTH$country <- "TH"
summary(travel)

traveldata <- rbind(travelNL,travelTH)

ggplot(traveldata, aes(x=day,y=value,colour=variable))+
geom_boxplot(notch = FALSE, alpha=0.55,show_guide = FALSE)+
facet_wrap(variable~country,ncol=2)+					
scale_y_sqrt(breaks = c(0,5,20,50,100,150),limits=c(0,150))+
geom_hline(yintercept=0,colour="grey", alpha=0.55)+
xlab("") +
ylab("Number of contact persons")+
theme_bw(base_size = 20)+
theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ 
theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ 
theme(axis.text.x = element_text(color = "black", size = 15))+ 
theme(axis.text.y = element_text(color = "black", size = 15))

#KS Test
travelTH <- data.frame(dataTH$Masstransport, dataTH$bustransport, dataTH$car, dataTH$othertransport,dataTH$dayfilledin)
travelTH$total <- rowSums(travelTH[,1:4])
travelTH <- na.omit(travelTH)
ks.test(travelTH$total[travelTH$dataTH.dayfilledin=="Sat"],travelTH$total[travelTH$dataTH.dayfilledin=="Sun"])

travelNL <- data.frame(dataNL$Masstransport, dataNL$bustransport, dataNL$car, dataNL$othertransport,dataNL$dayfilledin)
travelNL$total <- rowSums(travelNL[,1:4])
travelNL <- na.omit(travelNL)
ks.test(travelNL$total[travelNL$dataNL.dayfilledin=="Mon"],travelNL$total[travelNL$dataNL.dayfilledin=="Fri"])

#LOCATIONS
#NETHERLANDS
home <- data.frame(dataNL$homeY, dataNL$homeS, dataNL$homeO)
home$homeT <- rowSums(home[,1:3])

work <- data.frame(dataNL$workY, dataNL$workS, dataNL$workO)
work$workT <- rowSums(work[,1:3])

school <- data.frame(dataNL$schoolY, dataNL$schoolS, dataNL$schoolO)
school$schoolT <- rowSums(school[,1:3])

restaurant <- data.frame(dataNL$restaurantY, dataNL$restaurantS, dataNL$restaurantO)
restaurant$restaurantT <- rowSums(restaurant[,1:3])

coffee <- data.frame(dataNL$coffeeY, dataNL$coffeeS, dataNL$coffeeO)
coffee$coffeeT <- rowSums(coffee[,1:3])

sport <- data.frame(dataNL$sportY, dataNL$sportS, dataNL$sportO)
sport$sportT <- rowSums(sport[,1:3])

concert <- data.frame(dataNL$concertY, dataNL$concertS, dataNL$concertO)
concert$concertT <- rowSums(concert[,1:3])

otherplace <- data.frame(dataNL$otherplaceY, dataNL$otherplaceS, dataNL$otherplaceO)
otherplace$otherplaceT <- rowSums(otherplace[,1:3])

loc <- data.frame(home$homeT,work$workT,school$schoolT,restaurant$restaurantT,coffee$coffeeT,sport$sportT,concert$concertT,otherplace$otherplaceT,dataNL$dayfilledin)
loc$total <- rowSums(loc[,1:8])
loc <- na.omit(loc)
nrow(loc)
colnames(loc) <- c("home","work","school / university","restaurant","coffee shop","sport", "concert","other places","day","Total locations")
loc$day <- factor(loc$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE)
locNL <- melt(loc, id=c("day"))
locNL$country <- "NL"

tapply(loc$work, loc$day, summary)
ks.test(loc$total[loc$dataNL.dayfilledin=="Mon"],loc$total[loc$dataNL.dayfilledin=="Fri"])

#THAILAND
home <- data.frame(dataTHr$homeY, dataTHr$homeS, dataTHr$homeO)
home$homeT <- rowSums(home[,1:3])

work <- data.frame(dataTHr$workY, dataTHr$workS, dataTHr$workO)
work$workT <- rowSums(work[,1:3])

school <- data.frame(dataTHr$schoolY, dataTHr$schoolS, dataTHr$schoolO)
school$schoolT <- rowSums(school[,1:3])

restaurant <- data.frame(dataTHr$restaurantY, dataTHr$restaurantS, dataTHr$restaurantO)
restaurant$restaurantT <- rowSums(restaurant[,1:3])

coffee <- data.frame(dataTHr$coffeeY, dataTHr$coffeeS, dataTHr$coffeeO)
coffee$coffeeT <- rowSums(coffee[,1:3])

sport <- data.frame(dataTHr$sportY, dataTHr$sportS, dataTHr$sportO)
sport$sportT <- rowSums(sport[,1:3])

concert <- data.frame(dataTHr$concertY, dataTHr$concertS, dataTHr$concertO)
concert$concertT <- rowSums(concert[,1:3])

otherplace <- data.frame(dataTHr$otherplaceY, dataTHr$otherplaceS, dataTHr$otherplaceO)
otherplace$otherplaceT <- rowSums(otherplace[,1:3])

loc <- data.frame(home$homeT,work$workT,school$schoolT,restaurant$restaurantT,coffee$coffeeT,sport$sportT,concert$concertT,otherplace$otherplaceT,dataTHr$dayfilledin)
loc$total <- rowSums(loc[,1:8])
loc <- na.omit(loc)
nrow(loc)
head(loc)

#Test with two sample KS test whether distributions different significantly
ks.test(loc$total[loc$dataTHr.dayfilledin=="Sat"],loc$total[loc$dataTHr.dayfilledin=="Sun"])

colnames(loc) <- c("home","work","school / university","restaurant","coffee shop","sport", "concert","other places","day","Total locations")
loc$day <- factor(loc$day, levels= c("Mon", "Tue", "Wed","Thu","Fri","Sat","Sun"), ordered=TRUE)
locTH <- melt(loc, id=c("day"))
locTH$country <- "TH"

locdata <- rbind(locNL,locTH)

ggplot(locdata, aes(x=day,y=value,colour=variable))+
geom_boxplot(notch = FALSE, alpha=0.55,show_guide = FALSE)+
facet_wrap(variable~country,ncol=2)+					
scale_y_sqrt(breaks = c(0,5,50,150),limits=c(0,150))+
geom_hline(yintercept=0,colour="grey", alpha=0.55)+
xlab("") +
ylab("Number of contact persons")+
theme_bw(base_size = 20)+
theme(axis.title.x = element_text(color = "black", size = 15, vjust= -0.3))+ 
theme(axis.title.y = element_text(color = "black", size = 15, vjust= 0.2))+ 
theme(axis.text.x = element_text(color = "black", size = 15))+ 
theme(axis.text.y = element_text(color = "black", size = 15))
##############################################   ARTIKEL TWEE FIGURE S2 / S3 DEGREE OVER DAYS OF THE WEEK CHECK OF DISTRIBUTIONS ############################