####Variations in the use of malaria preventive measures among pregnant women in Guinea. A secondary analysis of the 2012 and 2018 demographic and health surveys. 
#List of authors and affiliations
#Ibrahima BARRY1,2, Almamy Amara TOURE2,3¥, Oumar SANGHO4, Abdoul Habib BEAVOGUI2, Diao CISSE5, Abdourahamane DIALLO6, Aboubacar Sidiki MAGASSOUBA5, Younoussa SYLLA2,3, Lancina DOUMBIA1, Mahamoud Sama Cherif2, Alseny Yarie CAMARA1 , Fatou DIAWARA7, Moctar TOUNKARA1, Alexandre DELAMOU4,7, Seydou DOUMBIA1

#Corresponding author
#Almamy Amara TOURE, Centre National de Formation et de Recherche en Santé Rurale de Maferinyah, Forecariah, Guinea, almamy@maferinyah.org


#########Importation des données Femmes et personal recode
library(foreign)
#ir1 <- read.spss("D:\\MOOC\\BASE_2005sav.sav", to.data.frame=TRUE)
ir11 <- read.spss("D:\\MOOC\\GNIR62FL.sav", to.data.frame=TRUE)
ir112 <- read.spss("D:\\MOOC\\GNIR71FL.sav", to.data.frame=TRUE)

#PR1 <- read.spss("D:\\MOOC\\GNPR53FL.sav", to.data.frame=TRUE)
PR11 <- read.spss("D:\\MOOC\\GNPR62FL.sav", to.data.frame=TRUE)
PR112 <- read.spss("D:\\MOOC\\\\GNPR71FL.sav", to.data.frame=TRUE)


###################Combinez les bd pour rechercher la variables dernières passées sous moustiquaire
library(dplyr)
#bd <- inner_join(ir1,PR1,by=c("V001"="HV001","V002"="HV002","V003"="HV003"))

##################Recodage de la variable HVOO3 en numérique
#PR1$HV003 <- as.numeric(PR1$HV003)
PR11$HV003 <- as.numeric(PR11$HV003)
PR112$HV003 <- as.numeric(PR112$HV003)
############################################Fusionner les bases de 2005
#bd2005 <- inner_join(ir1,PR1,by=c("V001"="HV001","V002"="HV002","V003"="HVIDX"))
bd2012 <- inner_join(ir11,PR11,by=c("V001"="HV001","V002"="HV002","V003"="HVIDX"))
#bd2012 <- inner_join(ir11,PR11,by=c("V001"="HV001","V002"="HV002","V003"="HV003"))
bd2018 <- inner_join(ir112,PR112,by=c("V001"="HV001","V002"="HV002","V003"="HVIDX"))
#bd2018 <- inner_join(ir112,PR112,by=c("V001"="HV001","V002"="HV002","V003"="HV003"))

rm(ir11)
rm(ir112)
rm(PR11)
rm(PR112)

####################################Filtering for the last 3 years preceding the survey

########################################################
summary(bd2012$V238)
dhs2012 <- filter(.data = bd2012,V238>=1)
summary(bd2018$V238)
dhs2018 <- filter(.data = bd2018,V238>=1)



######################################################################clearing environment
rm(bd2012,bd2018)

####################Selecting variables on each dataframe from each DHS survery from 2005 to 2018
#ibdat2005 <- subset(bdat2005,select=c("CASEID","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V501","V714","V731",
# "V150","V151","V152","V153","V201","HML10","HML12","HML16","HML18","V218","V701","M49A.1","ML1.1","HV103","HV104","ML101","M13.1","M14.1","M10.1","M15.1"))

#ibdat2012 <- subset(bdat2012,select=c("CASEID","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V501","V714","V731",
#                                     "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1"))

#ibdat2018 <- subset(bdat2018,select=c("CASEID","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V501","V714","V731",
#                                      "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1"))

#ibdatfin <- rbind(ibdat2005,ibdat2012,ibdat2018)
#ibdatfin <- rbind(ibdat2012,ibdat2018)

##############################################################Filtering by Survey Year
#a) For 2012
#ibdatfin$V022 <- as.numeric(ibdatfin$V022)
#########Explorer les variables d'intérêt SP et Passé la nuit précédent l'enquête sous Moustiquaire
#summary(ibdatfin$HV103)
#summary(ibdat2012$ML1.1)
#summary(ibdat2012$M49A.1)
############Filtering dataframe for women who had had at least one dose of SP and women who had treated net 
#ibdat2012 <- filter(.data = ibdat2012,ibdat2012$M49A.1=="Yes")
ibdat2012<- filter(.data = dhs2012,dhs2012$M49A.1=="Yes"& dhs2012$HV227=="Yes")

ibdat2012 <- subset(ibdat2012,select=c("CASEID","V001","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V191","V501","V714","V731",
                                       "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1"))

############b) For 2018
#ibdatfin$V022 <- as.numeric(ibdatfin$V022)
#########Explorer les variables d'intérêt SP et Passé la nuit précédent l'enquête sous Moustiquaire
#summary(ibdatfin$HV103)
#summary(ibdat2018$ML1.1)
#summary(ibdat2018$M49A.1)
############Filtering dataframe for women who had had at least one dose of SP women who had treated net
ibdat2018 <- filter(.data = dhs2018,dhs2018$M49A.1=="Yes" & dhs2018$HV227=="Yes")

ibdat2018 <- subset(ibdat2018,select=c("CASEID","V001","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V191","V501","V714","V731",
                                       "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1"))

#######################################################Exploring,data structure
str(ibdat2012)
str(ibdat2018)

##########Renaming dataframe variables
ibdat2012 <- ibdat2012[-c(25)]
ibdat2018 <- ibdat2018[-c(25)]
dput(names(ibdat2012))
dput(names(ibdat2018))
names(ibdat2012) <- c("CASEID","idclust", "weight", "survey-year", "age","PSU","strate","region", "residence", "education", "h-number", 
                      "a-newspap", "a-radio", "a-tv", "w.quintile","w-quintile", "mat-status", "currently-w","work-lyr","link-headh","sexheadhouse", 
                      "age-headhous", "phone-h", "chldren-born", "living-child", "partner-education", "sp-pregnancy", "sp-number","own-net" ,"slept-net", 
                      "times-1ANC", "anc-number", "wanted-pregn","place-deliver")

names(ibdat2018) <- c("CASEID","idclust" ,"weight", "survey-year", "age","PSU","strate","region", "residence", "education", "h-number", 
                      "a-newspap", "a-radio", "a-tv", "w.quintile","w-quintile", "mat-status", "currently-w","work-lyr","link-headh","sexheadhouse", 
                      "age-headhous", "phone-h", "chldren-born", "living-child", "partner-education", "sp-pregnancy", "sp-number","own-net" ,"slept-net", 
                      "times-1ANC", "anc-number", "wanted-pregn","place-deliver")
############################################Variables reconding For 2012
summary(ibdat2012)
levels(ibdat2012$region)
addmargins(table(ibdat2012$region)) 
levels(ibdat2012$`a-newspap`)
levels(ibdat2012$`a-newspap`) <- c("Not at all","access","access","access")
table(ibdat2012$`a-newspap`)
levels(ibdat2012$`a-radio`) <- c("Not at all","access","access","access")
table(ibdat2012$`a-radio`)
levels(ibdat2012$`a-tv`)
levels(ibdat2012$`a-tv`) <- c("Not at all","access","access","access")
table(ibdat2012$`a-tv`)
levels(ibdat2012$`mat-status`)
#levels(ibdat2012$`mat-status`) <- c("single", "Married","in-couple", "single","single","single")
#table(ibdat2012$`mat-status`)

levels(ibdat2012$`partner-education`)
levels(ibdat2012$`partner-education`) <- c("No education", "Primary","Secondary", "Higher","No education")
table(ibdat2012$`partner-education`)

#levels(ibdat2012$`w-quitile`) <- c("Poorer","Poorer","Middle","Richer","Richer")
table(ibdat2012$`sp-number`)
class(ibdat2012$`sp-number`)
levels(ibdat2012$`sp-number`)
levels(ibdat2012$`sp-number`) <- c("1","10","12","1","1","2","1","21","3","1","4","1","1","1","60","7","8","9","1")
ibdat2012$`sp-number` <- factor((ibdat2012$`sp-number`),levels = c("1","2","3","4","7","8","9","10","12","21","60"))
table(ibdat2012$`sp-number`)
ibdat2012$`sp-number` <- as.numeric(ibdat2012$`sp-number`)
table(ibdat2012$`sp-number`)

#ibdat2012$`sp-number` <- cut(ibdat2012$`sp-number`,breaks = c(1,2,12),right = FALSE)
#table(ibdat2012$`sp-number`)
############################################Variables reconding For 2018
summary(ibdat2018)
levels(ibdat2018$region)
addmargins(table(ibdat2018$region)) 
levels(ibdat2018$`a-newspap`)
levels(ibdat2018$`a-newspap`) <- c("Not at all","access","access","access")
table(ibdat2018$`a-newspap`)
levels(ibdat2018$`a-radio`) <- c("Not at all","access","access","access")
table(ibdat2018$`a-radio`)
levels(ibdat2018$`a-tv`)
levels(ibdat2018$`a-tv`) <- c("Not at all","access","access","access")
table(ibdat2018$`a-tv`)
#levels(ibdat2018$`mat-status`)
#levels(ibdat2018$`mat-status`) <- c("single", "Married","in-couple", "single","single","single")
#table(ibdat2018$`mat-status`)

levels(ibdat2018$`partner-education`)
levels(ibdat2018$`partner-education`) <- c("No education", "Primary","Secondary", "Higher","No education")
table(ibdat2018$`partner-education`)

#levels(ibdat2018$`w-quitile`) <- c("Poorer","Poorer","Middle","Richer","Richer")
table(ibdat2018$`sp-number`)
class(ibdat2018$`sp-number`)
levels(ibdat2018$`sp-number`)
levels(ibdat2018$`sp-number`) <- c("1","1","2","3","4","1")
table(ibdat2018$`sp-number`)
ibdat2018$`sp-number` <- as.numeric(ibdat2018$`sp-number`)
table(ibdat2018$`sp-number`)
#ibdat2018$`sp-number` <- cut(ibdat2018$`sp-number`,breaks = c(1,2,4),include.lowest = TRUE)
#table(ibdat2018$`sp-number`)

###Excluding relation with head of the household
#ibdatfin <- ibdatfin[-c(1,15,18,22,25,29)]
#ibdatfin <- ibdatfin[-c(1,17,20,24,30)]

#######################################################Recoding ANC number  For 2012
class(ibdat2012$`anc-number`)
table(ibdat2012$`anc-number`)
levels(ibdat2012$`anc-number`)
levels(ibdat2012$`anc-number`) <- c("1","1","10","11","12","13","14","15","16","17","18","2","20","3","4","5","6","7","8","9","1")
table(ibdat2012$`anc-number`)
levels(ibdat2012$`anc-number`) <- c("1","1","1","1","1","1","1","1","1","1","2","1","3","4","5","6","7","8","9")
table(ibdat2012$`anc-number`)
ibdat2012$`anc-number` <- as.numeric(ibdat2012$`anc-number`)
table(ibdat2012$`anc-number`)
summary(ibdat2012$`anc-number`)

#ibdat2012$`anc-number` <- cut(ibdat2012$`anc-number`,breaks = c(1,3,9),include.lowest = TRUE)
#table(ibdat2012$`anc-number`)

########ANC number for 2018
class(ibdat2018$`anc-number`)
levels(ibdat2018$`anc-number`)
table(ibdat2018$`anc-number`)
levels(ibdat2018$`anc-number`) <- c("1","1","10","11","12","13","14","15","16","2","20","3","4","5","6","7","8","9","1")
table(ibdat2018$`anc-number`)
levels(ibdat2018$`anc-number`) <- c("1","10","1","1","13","1","15","1","2","1","3","4","5","6","7","8","9")
summary(ibdat2018$`anc-number`)
ibdat2018$`anc-number` <- factor((ibdat2018$`anc-number`),levels = c("1","2","3","4","5","6","7","8","9","10","13","15"))
table(ibdat2018$`anc-number`)
ibdat2018$`anc-number` <- as.numeric(ibdat2018$`anc-number`)
table(ibdat2018$`anc-number`)
summary(ibdat2018$`anc-number`)

#ibdat2018$`anc-number` <- cut(ibdat2018$`anc-number`,breaks = c(1,3,12),include.lowest = TRUE)
#table(ibdat2018$`anc-number`)
##################################################First ANC period for 2012
class(ibdat2012$`times-1ANC`)
levels(ibdat2012$`times-1ANC`)
table(ibdat2012$`times-1ANC`)
levels(ibdat2012$`times-1ANC`) <- c("1","1","2","3","4","5","6","7","8","1","1")
table(ibdat2012$`times-1ANC`)
ibdat2012$`times-1ANC` <- as.numeric(ibdat2012$`times-1ANC`)
table(ibdat2012$`times-1ANC`)
#############Make imputation for missing for firt ANC times
#################Checking for missing data
sapply (ibdat2012, function (x) sum(is.na (x)))
sapply (ibdat2018, function (x) sum(is.na (x)))
library(na.tools)
ibdat2012$`times-1ANC` <- na.bootstrap(ibdat2012$`times-1ANC`)
summary(ibdat2012$`times-1ANC`)

#ibdat2012$`times-1ANC` <- cut(ibdat2012$`times-1ANC`,breaks = c(1,3,6,8),include.lowest = TRUE,labels = c("first-trim","second-trim","third-trim"))
#table(ibdat2012$`times-1ANC`)

###############First ANC period for 2018
class(ibdat2018$`times-1ANC`)
levels(ibdat2018$`times-1ANC`)
table(ibdat2018$`times-1ANC`)
levels(ibdat2018$`times-1ANC`) <- c("1","1","10","2","3","4","5","6","7","8","9","1")
table(ibdat2018$`times-1ANC`)
ibdat2018$`times-1ANC` <- factor((ibdat2018$`times-1ANC`),levels = c("1","2","3","4","5","6","7","8","9","10"))
table(ibdat2018$`times-1ANC`)
ibdat2018$`times-1ANC` <- as.numeric(ibdat2018$`times-1ANC`)
summary(ibdat2018$`times-1ANC`)
set.seed(123)
ibdat2018$`times-1ANC` <- na.bootstrap(ibdat2018$`times-1ANC`)
ibdat2018$`times-1ANC` <- na.bootstrap(ibdat2018$`times-1ANC`)
summary(ibdat2018$`times-1ANC`)
#ibdat2018$`times-1ANC` <- cut(ibdat2018$`times-1ANC`,breaks = c(1,3,6,10),include.lowest = TRUE,labels = c("first-trim","second-trim","third-trim"))
#table(ibdat2018$`times-1ANC`)

#################Checking for missing data
sapply (ibdat2012, function (x) sum(is.na (x)))
sapply (ibdat2018, function (x) sum(is.na (x)))

##############Recoding for age survey 2012
summary(ibdat2012$age)
#ibdat2012$age <- na.resample(ibdat2012$age)
#ibdat2012$age_rec <- cut(ibdat2012$age,breaks = c(16,18,30,40,49),include.lowest = TRUE)
#table(ibdat2012$age_rec )

class(ibdat2012$`partner-education`)
levels(ibdat2012$`partner-education`)
ibdat2012$`partner-education` <- na.replace(ibdat2012$`partner-education`,.na = "No education")
table(ibdat2012$`partner-education`)
sapply (ibdat2012, function (x) sum(is.na (x)))


########################Dealing with NA for 2018 survey
class(ibdat2018$`partner-education`)
levels(ibdat2018$`partner-education`)
ibdat2018$`partner-education` <- na.replace(ibdat2018$`partner-education`,.na = "No education")
table(ibdat2012$`partner-education`)
ibdat2018$age <- na.median(ibdat2018$age)
sapply (ibdat2018, function (x) sum(is.na (x)))

####Recoding for age (survey 2018)
summary(ibdat2018$age)
#ibdat2018$age_rec  <- cut(ibdat2018$age,breaks = c(16,18,30,40,49),include.lowest = TRUE)
#table(ibdat2018$age_rec )


##Recoding for living child and household number for 2012 survey
summary(ibdat2012$`living-child`)
#ibdat2012$`living-child` <- cut(ibdat2012$`living-child`,breaks = c(1,2,4,7,11),right = FALSE)
#table(ibdat2012$`living-child`)
summary(ibdat2012$`h-number`)
#ibdat2012$`h-number` <- cut(ibdat2012$`h-number`,breaks = c(1,5,10,30),include.lowest = TRUE)
#table(ibdat2012$`h-number`)

##Recoding for living child and household number for 2018 survey
summary(ibdat2018$`living-child`)
#ibdat2018$`living-child` <- cut(ibdat2018$`living-child`,breaks = c(1,2,4,7,14),right = FALSE)
#table(ibdat2018$`living-child`)
summary(ibdat2018$`h-number`)
#ibdat2018$`h-number` <- cut(ibdat2018$`h-number`,breaks = c(1,5,10,38),include.lowest = TRUE)
#table(ibdat2018$`h-number`)


##Recoding for age for head of household for 2012 survey
summary(ibdat2012$`age-headhous`)
levels(ibdat2012$`age-headhous`)
table(ibdat2012$`age-headhous`)
levels(ibdat2012$`age-headhous`) <- c("16","17","18","19","20","21","22","23","24","25","26","27",
                                      "28","29","30","31","32","33","34","35","36","37","38","39", 
                                      "40","41","42","43","44","45","46","47","48","49","50","51",
                                      "52","53","54","55","56","57","58","59","60","61","62","63",
                                      "64","65","66","67","68","69","70","71","72","73","74","75",
                                      "76","77","78","79","80","81","82","83","84","85","86","87",
                                      "88","89","90","91","92","93","94","95","16","17")

ibdat2012$`age-headhous` <- factor((ibdat2012$`age-headhous`),levels = c("19","20","21","22","23","24","25","26","27",
                                                                         "28","29","30","31","32","33","34","35","36","37","38","39", 
                                                                         "40","41","42","43","44","45","46","47","48","49","50","51",
                                                                         "52","53","54","55","56","57","58","59","60","61","62","63",
                                                                         "64","65","66","67","68","69","70","71","72","73","74","75",
                                                                         "76","77","78","79","80","81","82","83","84","87","88","89","91","925"))
ibdat2012$`age-headhous`<- as.numeric(levels(ibdat2012$`age-headhous`))[ibdat2012$`age-headhous`]
table(ibdat2012$`age-headhous`)
summary(ibdat2012$`age-headhous`)
ibdat2012$`age-headhous` <- na.resample(ibdat2012$`age-headhous`)
summary(ibdat2012$`age-headhous`)
#ibdat2012$`age-headhous` <- cut(ibdat2012$`age-headhous`,breaks = c(16,25,40,60,91),include.lowest = TRUE)
#table(ibdat2012$`age-headhous`)

##Recoding for living child and household number for 2018 survey
summary(ibdat2018$`age-headhous`)
levels(ibdat2018$`age-headhous`) <- c("15","16","17","18","19","20","21","22","23","24","25","26","27",
                                      "28","29","30","31","32","33","34","35","36","37","38","39", 
                                      "40","41","42","43","44","45","46","47","48","49","50","51",
                                      "52","53","54","55","56","57","58","59","60","61","62","63",
                                      "64","65","66","67","68","69","70","71","72","73","74","75",
                                      "76","77","78","79","80","81","82","83","84","85","86","87",
                                      "88","89","90","91","92","93","94","95","15","17")
ibdat2018$`age-headhous` <- factor((ibdat2018$`age-headhous`),levels = c("16","18","20","21","22","23","24","25","26","27",
                                                                         "28","29","30","31","32","33","34","35","36","37","38","39", 
                                                                         "40","41","42","43","44","45","46","47","48","49","50","51",
                                                                         "52","53","54","55","56","57","58","59","60","61","62","63",
                                                                         "64","65","66","67","68","69","70","71","72","73","74","75",
                                                                         "76","77","78","79","80","81","82","83","84","87","89","90"))
ibdat2018$`age-headhous`<- as.numeric(levels(ibdat2018$`age-headhous`))[ibdat2018$`age-headhous`]
table(ibdat2018$`age-headhous`)
summary(ibdat2018$`age-headhous`)
ibdat2018$`age-headhous` <- na.resample(ibdat2018$`age-headhous`)
summary(ibdat2018$`age-headhous`)

#ibdat2018$`age-headhous` <- cut(ibdat2018$`age-headhous`,breaks = c(16,25,40,60,90),include.lowest = TRUE)
#table(ibdat2018$`age-headhous`)


###############################################Exporting final data into CSV format
write.csv2(ibdat2012,file = "D:\\MOOC\\ib2012_1.csv")
write.csv2(ibdat2018,file = "D:\\MOOC\\ib2018_1.csv")

#write.csv2(ibdat2012,file = "C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2012.csv")
#write.csv2(ibdat2018,file = "C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2018.csv")
################################################Data analysis is starting from here

#####################################################################Re-importing new data
#dhs2012 <- read.csv2("C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2012.csv")
#dhs2018 <- read.csv2("C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2018.csv")



#######################################################################################
dhs2012 <- read.csv2("D:\\MOOC\\ib2012_1.csv")
dhs2018 <- read.csv2("D:\\MOOC\\ib2018_1.csv")
library(na.tools)


############################Outcome variable from the two previous variables for 2012
########Remove some variables
#dhs2012 <- dhs2012[-c(1:2,7,19,23,27,29)]
dhs2012 <- dhs2012[-c(1:2,8,20,24,28,30)]
#dhs2018 <- dhs2018[-c(1:2,7,19,23,27,29)]
dhs2018 <- dhs2018[-c(1:2,8,20,24,28,30)]

sapply (dhs2012, function (x) sum(is.na (x)))
dhs2012[c(6:8,10:13,15:18,22,24,27:28)] <- lapply(dhs2012[c(6:8,10:13,15:18,22,24,27:28)],factor)
str(dhs2012)
sapply (dhs2012, function (x) sum(is.na (x)))

###########################Since percent of missing data is below 5
levels(dhs2012$a.newspap)
dhs2012$a.newspap <- na.replace(dhs2012$a.newspap,.na = "Not at all")
dhs2012$a.radio <- na.replace(dhs2012$a.radio,.na = "Not at all")
dhs2012$a.tv<- na.replace(dhs2012$a.tv,.na = "Not at all")
set.seed(123)
levels(dhs2012$sp.number)
dhs2012$sp.number <-na.bootstrap(dhs2012$sp.number) 

levels(dhs2012$wanted.pregn)
dhs2012$wanted.pregn <-na.bootstrap(dhs2012$wanted.pregn)
levels(dhs2012$living.child)
dhs2012$living.child <- na.bootstrap(dhs2012$living.child)
sapply (dhs2012, function (x) sum(is.na (x)))

##########Recoding slept net
levels(dhs2012$slept.net)
#levels(dhs2012$slept.net) <- c("No","Yes","Yes")
levels(dhs2012$slept.net) <- c("No","Yes","No")
table(dhs2012$slept.net)
levels(dhs2012$slept.net) <- c("1","2")
dhs2012$slept.net <- as.numeric(levels(dhs2012$slept.net))[dhs2012$slept.net]
table(dhs2012$slept.net)

###########################################################################For  2018
sapply (dhs2018, function (x) sum(is.na (x)))
dhs2018[c(6:8,10:13,15:18,22,24,27:28)] <- lapply(dhs2018[c(6:8,10:13,15:18,22,24,27:28)],factor)
set.seed(123)
dhs2018$times.1ANC <- na.bootstrap(dhs2018$times.1ANC)
sapply (dhs2018, function (x) sum(is.na (x)))

##########Recoding slept net
levels(dhs2018$slept.net)
#levels(dhs2018$slept.net) <- c("No","Yes","Yes")
levels(dhs2018$slept.net) <- c("No","Yes","No")
table(dhs2018$slept.net)
levels(dhs2018$slept.net) <- c("1","2")
dhs2018$slept.net <- as.numeric(levels(dhs2018$slept.net))[dhs2018$slept.net]
table(dhs2018$slept.net)
########################################################Building variables of interest
#)For 2012
summary(dhs2012$sp.number)
dhs2012$malprev[dhs2012$sp.number>=2&dhs2012$slept.net==2] <- "complete"
dhs2012$malprev[dhs2012$sp.number<2&dhs2012$slept.net==2] <- "incomplete"
dhs2012$malprev[dhs2012$sp.number>=2&dhs2012$slept.net==1] <- "incomplete"
dhs2012$malprev[dhs2012$sp.number<2&dhs2012$slept.net==1] <- "incomplete"
table(dhs2012$malprev)
dhs2012$malprev <- factor(dhs2012$malprev)
dhs2012$malprev <- relevel(dhs2012$malprev,ref ="incomplete" )
table(dhs2012$malprev)

#)For 2018
summary(dhs2018$sp.number)
dhs2018$malprev[dhs2018$sp.number>=3&dhs2018$slept.net==2] <- "complete"
dhs2018$malprev[dhs2018$sp.number<3&dhs2018$slept.net==2] <- "incomplete"
dhs2018$malprev[dhs2018$sp.number>=3&dhs2018$slept.net==1] <- "incomplete"
dhs2018$malprev[dhs2018$sp.number<3&dhs2018$slept.net==1] <- "incomplete"
table(dhs2018$malprev)
dhs2018$malprev <- factor(dhs2018$malprev)
dhs2018$malprev <- relevel(dhs2018$malprev,ref ="incomplete" )
table(dhs2018$malprev)

##########################################################Recoding outcome SP and others
summary(dhs2012$sp.number)
dhs2012$sp.number <- cut(dhs2012$sp.number,breaks = c(1,2,12),include.lowest = TRUE,right = FALSE)
table(dhs2012$sp.number)
levels(dhs2012$sp.number) <- c("No","Yes")
table(dhs2012$sp.number)

summary(dhs2018$sp.number)
dhs2018$sp.number <- cut(dhs2018$sp.number,breaks = c(1,2,4),include.lowest = TRUE)
table(dhs2018$sp.number)
levels(dhs2018$sp.number) <- c("No","Yes")
table(dhs2018$sp.number)
#######################################################Denormalisation for pooling data
####Pooling data for computing frequencies and proportions, before
dhs <- rbind(dhs2012,dhs2018)
sapply (dhs, function (x) sum(is.na (x)))
dhs$slept.net <- factor(dhs$slept.net)
table(dhs$slept.net)
levels(dhs$slept.net) <- c("No","Yes")
table(dhs$slept.net)

###############
summary(dhs$anc.number)
dhs$anc.number <- cut(dhs$anc.number,breaks = c(1,3,12),include.lowest = TRUE)
table(dhs$anc.number)
levels(dhs$anc.number) <- c("<4",">=4")
table(dhs$anc.number)

###############
summary(dhs$times.1ANC)
dhs$times.1ANC <- cut(dhs$times.1ANC,breaks = c(1,3,6,10),include.lowest = TRUE,labels = c("first-trim","second-trim","third-trim"))
table(dhs$times.1ANC)

###############
summary(dhs$age)
dhs$age_rec <- cut(dhs$age,breaks = c(15,18,30,40,49),include.lowest = TRUE)
table(dhs$age_rec )

#############
summary(dhs$living.child)
dhs$living.child <- cut(dhs$living.child,breaks = c(0,1,2,4,7,13),include.lowest = TRUE)
table(dhs$living.child)
#############
summary(dhs$chldren.born)
dhs$chldren.born <- cut(dhs$chldren.born,breaks = c(1,2,4,7,14),right = FALSE)
table(dhs$chldren.born)

########################
summary(dhs$h.number)
dhs$h.number <- cut(dhs$h.number,breaks = c(2,5,10,38),include.lowest = TRUE)
table(dhs$h.number)
################
summary(dhs$age.headhous)
dhs$age.headhous <- cut(dhs$age.headhous,breaks = c(16,25,40,60,91),include.lowest = TRUE)
table(dhs$age.headhous)
################
levels(dhs$a.newspap)
levels(dhs$a.radio)
levels(dhs$a.tv)
levels(dhs$a.newspap) <-levels(dhs$a.radio) <- levels(dhs$a.tv) <- c("access","no-access")
table(dhs$a.newspap)
table(dhs$a.radio)
table(dhs$a.tv)
######################
levels(dhs$mat.status)
table(dhs$mat.status) 
levels(dhs$mat.status)<- c("Single","Single","Married","Single","Single","Single","Married","Single")
table(dhs$mat.status) 
################
levels(dhs$link.headh)
levels(dhs$link.headh) <- c("ind-link","ind-link","d-link","ind-link","ind-link", "ind-link","d-link", "ind-link","ind-link","d-link",
                                   "d-link","ind-link","ind-link", "ind-link","ind-link","d-link","ind-link","d-link")
table(dhs$link.headh) 

################
levels(dhs$place.deliver)
levels(dhs$place.deliver) <- c("public","public","public","home","home", "private","private","home", "private","public",
                               "public","public","public", "public","home","private","private","home")
table(dhs$place.deliver) 
###############
#dhs$survey.year <- factor(dhs$survey.year)
#levels(dhs$malprev) <- c("0","1")
#table(dhs2012$malprev)
#levels(dhs2018$malprev) <- c("0","1")
#table(dhs2018$malprev)
###############################################splitting year
dhs2012 <- filter(dhs,dhs$survey.year=="2012")
dhs2018 <- filter(dhs,dhs$survey.year=="2018")

 names(dhs2012) <-c("grapp", "weight", "survey.year", "age", "PSU", "region", 
                  "residence", "education", "h.number", "a.newspap", "a.radio", 
                  "a.tv", "w.quintile", "w.quintile.1", "mat.status", "currently.w", 
                   "link.head.of.the.household", "sexheadhouse", "age.headhous", "chldren.born", 
                  "living.child", "partner.education", "sp.number", "slept.net", 
                   "times.1ANC", "antenatal.care.visits", "wanted.pregn", "place.deliver", 
                   "MPM","age.rec")


names(dhs2018) <-c("grapp", "weight", "survey.year", "age", "PSU", "region", 
                   "residence", "education", "h.number", "a.newspap", "a.radio", 
                   "a.tv", "w.quintile", "w.quintile.1", "mat.status", "currently.w", 
                   "link.headh", "sexheadhouse", "age.headhous", "chldren.born", 
                   "living.child", "partner.education", "sp.number", "slept.net", 
                   "times.1ANC", "antenatal.care.visits", "wanted.pregn", "place.deliver", 
                   "MPM","age.rec")

levels(dhs2012$link.head.of.the.household) <- c("indirect.link","direct.link")
#dhs2012$malprev <- as.numeric(levels(dhs2012$malprev))[dhs2012$malprev]
#dhs2018$malprev <- as.numeric(levels(dhs2018$malprev))[dhs2018$malprev]

#levels(dhs$survey.year)
#tab1 <- aggregate(malprev~region+idclust+,data = dhs2012,sum)
#tab2 <- aggregate(malprev~region+idclust,data = dhs2018,sum)

#tab1 <- xtabs(malprev~region+idclust,data = dhs2012)
#round(prop.table(tab1),2)
####################### complexe design
#################Descriptive statistics with weighting
library(dplyr)
library(survey)
library(party)
library(gtsummary)
library(car)
library(GGally)
library(forestmodel)
library(Epi)
library(ggeffects)
library(ggparty)


#dhs2012$idclust<- factor(dhs2012$idclust)
###########################Recoding wealth quintile
levels(dhs2012$w.quintile) <- levels(dhs2018$w.quintile) <-c("Middle","Poorer","Poorer","Richer","Richer")
#levels(dhs2012$w.quintile) <- relevel(dhs2012$w.quintile,ref = "Richer")
#levels(dhs2018$w.quintile) <- relevel(dhs2018$w.quintile,ref = "Richer")
#(sapply(dhs2012, function(x) is.factor(x)))
##############################################Variable Weighting for 2012

Wt1<- dhs2012$weight/1000000
ibdatf1<-cbind(dhs2012,Wt1)
dw1<-svydesign(ids=~PSU,data = ibdatf1 ,strata=~region+residence, weights=~Wt1,nest = TRUE)

##############################################Variable Weighting for 2018
Wt2<- dhs2018$weight/1000000
ibdatf2<-cbind(dhs2018,Wt2)
dw2<-svydesign(ids=~PSU,data = ibdatf2 ,strata=~region+residence, weights=~Wt2,nest = TRUE)

#svytable(~malprev+idclust,design = dw1,round=TRUE)
#tab1 <- xtabs(~malprev+idclust,data =dw1)


#############################################################For  2012 survey
##################Descriptive
####a) For 2012
tab2012 <- tbl_svysummary(dw1)
tab2012
tab2018 <- tbl_svysummary(dw2)
tab2018

##################Univariate Analysis stratifying by year
##b)  2012
tab2012 <- tbl_svysummary(dw1,by="malprev",type =list(sp.number~"dichotomous",slept.net~"dichotomous") )
tab2012 
##b)  2018
tab2018 <- tbl_svysummary(dw2,by="malprev",type =list(sp.number~"dichotomous",slept.net~"dichotomous") )
tab2018

###################################To add p-value
###########################To spicify test
tab1 <- add_p(tab2012,
      test = list(
        all_continuous() ~ "svy.t.test",
        all_categorical() ~ "svy.wald.test"
      )
)

add_p(tab2012)
###########################To spicify test
tab2 <- add_p(tab2018,
      test = list(
        all_continuous() ~ "svy.t.test",
        all_categorical() ~ "svy.wald.test"
      )
)

########################Mergin 2012 and 2018
tbl_merge(tbls = list(tab1, tab2)) %>%
  modify_spanning_header(everything() ~ NA_character_)


###########################################Performing logistic regression
#####################logistic univariate analysis/you can change by including all variables
#tbl_u <- dw1 %>%
#  dplyr::select(malprev,h.number,education, a.newspap,a.radio,a.tv, mat.status,link.headh,currently.w,sexheadhouse ,
  #              age.headhous,partner.education,times.1ANC,anc.number,place.deliver) %>%
#  tbl_uvregression(
#    method = glm,
 #   y = malprev,
 #   method.args = list(family = binomial),
 #   exponentiate = TRUE,
 #   pvalue_fun = ~ style_pvalue(.x, digits = 2)
 # ) %>%
  #add_global_p() %>%
  #add_nevent()
#tbl_u
########################################survey 2018
#tbl_u1 <- dhs2018 %>%
 # dplyr::select(malprev,education,h.number, a.newspap,a.radio,a.tv,mat.status,link.headh,currently.w,sexheadhouse ,
  #              age.headhous,partner.education,times.1ANC,anc.number,place.deliver) %>%
  #tbl_uvregression(
   # method = glm,
    #y = malprev,
    #method.args = list(family = binomial),
    #exponentiate = TRUE,
    #pvalue_fun = ~ style_pvalue(.x, digits = 2)
  #) %>%
  #add_global_p() %>%
  #add_nevent()
#tbl_u1

########################Mergin univariate data 2012 and 2018
#tbl_merge(tbls = list(tbl_u, tbl_u1)) %>%
 # modify_spanning_header(everything() ~ NA_character_)
########################Mergin 2012 and 2018
#tbl_merge(tbls = list(tbl_u, tbl_u1)) %>%
 # modify_spanning_header(everything() ~ NA_character_)

##############################################Multivariate analysis
############For 2012
dput(names(dhs2012))
tab6 <- svyglm(MPM~education+h.number+a.newspap+a.radio+a.tv+w.quintile+mat.status+currently.w+link.headh+place.deliver+sexheadhouse+
              age.headhous+chldren.born+living.child+partner.education+times.1ANC+anc.number+wanted.pregn+
              age.rec,dw1,family = quasibinomial()
              )

#ifelse(n <- sapply(dhs2012, function(x) length(levels(x))) == 1, "DROP", "NODROP")
###########For 2018
tab8 <- svyglm(MPM~education+h.number+a.newspap+a.radio+w.quintile+mat.status+currently.w+link.headh+place.deliver+sexheadhouse+
                 age.headhous+chldren.born+living.child+partner.education+times.1ANC+anc.number+wanted.pregn+
                 age.rec,dw2,family = quasibinomial())
#######################################Step AIC for choosing best model
tab7 <- step(tab6)
tab9 <- step(tab8)

###############################Graphical representation of the model
ggcoef_model(tab7, exponentiate = TRUE)
forest_model(tab7)
####For 2018
ggcoef_model(tab9, exponentiate = TRUE)
forest_model(tab9)
##################Variables effects for 2012/predicted values
ggeffect(tab7,"a.radio")
ggeffect(tab7,"a.tv")
ggeffect(tab7, "mat.status")
ggeffect(tab7, "place.deliver")
ggeffect(tab7, "link.headh")
ggeffect(tab7, "mat.status")
ggeffect(tab7, "w.quintile")
ggeffect(tab7, "age.headhous")
ggeffect(tab7, "partner.education")
ggeffect(tab7, "times.1ANC")
ggeffect(tab7, "anc.number")

##################Variables effects for 2018
ggeffect(tab9,"a.radio")
ggeffect(tab9,"a.newspap")
ggeffect(tab9, "w.quintile")
ggeffect(tab9,"currently.w")
ggeffect(tab9, "place.deliver")
ggeffect(tab9, "sexheadhouse")
ggeffect(tab9, "times.1ANC")
ggeffect(tab9, "anc.number")
ggeffect(tab9, "h.number")

#################Confusion matrix/ Quality of the model/2012
malprev <- predict(tab7, type = "response", newdata =dw1[["variables"]] )
head(malprev)
table(malprev > 0.5, dhs2012$malprev)
##############misclassification of the model
misclas <- (77+196)/781#####34% of misclassification

#################Confusion matrix/ Quality of the model/2018
malprev <- predict(tab9, type = "response", newdata =dw2[["variables"]] )
head(malprev)
table(malprev > 0.5, dhs2018$malprev)
##############misclassification of the model
misclas <- (7+426)/2123#####20% of misclassification

############################################################Model Evaluation
############## ROC curve
ROC(form=malprev~h.number+a.newspap+mat.status+currently.w+sexheadhouse+age.headhous+partner.education+times.1ANC+anc.number
    ,plot="ROC",data = dhs2012)

ROC(form=malprev~h.number+a.newspap+mat.status+currently.w+sexheadhouse+age.headhous+partner.education+times.1ANC+anc.number
    ,plot="ROC",data = dhs2018)
###################################################coefficients significance
anova(tab7, test = "Chisq")
anova(tab9, test = "Chisq")

##############model assessment
with(tab7,pchisq(null.deviance-deviance,df.null-df.residual,lower.tail = F))
with(tab9,pchisq(null.deviance-deviance,df.null-df.residual,lower.tail = F))

#############################################mulitcollinearity
vif(tab7)##########no collinearity
vif(tab9)
#############################Merging univariate and multivariate analysis
#########################################
tab7 <- tab7 %>%
  tbl_regression(exponentiate = TRUE) %>%
  add_global_p(type = "II")
tab7
####################################Using gtsummary
tab9 <- tbl_regression(tab9,exponentiate = TRUE)
########## Find why second-time women used more malprev measure than 1st
tab1 <- prop.table(table(dhs$times.1ANC,dhs$w.quitile),2)
########################################Merging Multivariate data
tbl_merge()
tbl_merge(tbls = list(tab7, tab9)) %>%
  modify_spanning_header(everything() ~ NA_character_)
##########################################################Testing interactions
#a) dhs 2012 models interactions
library(glmulti)
dhs2012 <- dw1[["variables"]]
#tab77 <- glmulti(malprev ~ a.radio + a.tv + w.quintile + mat.status + link.headh + 
#                   place.deliver + age.headhous + partner.education + times.1ANC + 
#                   anc.number,data = dhs2012,family=binomial())

#a) Models interaction outputs
tab77 <-svyglm(malprev~1+a.tv+w.quintile+mat.status+link.headh+place.deliver+age.headhous+link.headh:a.tv+age.headhous:a.tv+age.headhous:w.quintile,
               dw1,family = quasibinomial()
)

ROC(form=malprev~1+a.tv+w.quintile+mat.status+link.headh+place.deliver+age.headhous+link.headh:a.tv+age.headhous:a.tv+age.headhous:w.quintile,
    plot="ROC",data = dhs2012)
ggcoef_model(tab77, exponentiate = TRUE)
forest_model(tab77)
#################Confusion matrix/ Quality of the model/2012
malprev <- predict(tab77, type = "response", newdata =dw1[["variables"]] )
head(malprev)
table(malprev > 0.5, dhs2012$malprev)
##############misclassification of the model
misclas <- (64+199)/781#####33% of misclassification

########we opt for our first model because the new one perform not much


##########################b) dhs 2018 models interactions
dhs2018 <- dw2[["variables"]]
#tab78 <- glmulti(malprev ~ h.number + a.newspap + a.radio + w.quintile + currently.w + 
 #                  place.deliver + sexheadhouse + times.1ANC + anc.number,data = dhs2018,family=binomial())

#a) Models interaction outputs 
tab78 <- svyglm(malprev~1+h.number+a.newspap+a.radio+currently.w+place.deliver+sexheadhouse+a.radio:h.number+a.radio:a.newspap+currently.w:h.number
                +currently.w:a.radio+place.deliver:h.number,dw2,family = quasibinomial()
           ) 
diabetes_results %>%
  roc_curve(truth = diabetes, .pred_pos) %>%
  autoplot()


ROC(form=malprev~1+h.number+a.newspap+a.radio+currently.w+place.deliver+sexheadhouse+a.radio:h.number+a.radio:a.newspap+currently.w:h.number
    +currently.w:a.radio+place.deliver:h.number,
    plot="ROC",data = dhs2018)
#################Confusion matrix/ Quality of the model/2018
malprev <- predict(tab78, type = "response", newdata =dw2[["variables"]] )
head(malprev)
table(malprev > 0.5, dhs2018$malprev)
#################################################################End_Regression
##############misclassification of the model
misclas <- (6+431)/2123#####33% of misclassification

########we opt for our first model because the new one perform not much


#Start###################################Using CART on multivariable result
#A) For 2012
tab10 <- ctree(MPM ~ a.radio + a.tv + w.quintile + mat.status + link.head.of.the.household + 
                 place.deliver + age.headhous + partner.education + times.1ANC + 
                 antenatal.care.visits,dhs2012)

plot(tab10)
tab8 <- as.constparty(tab10)
#################################################Using###ggtree
ggparty(tab10) +
  geom_edge() +
  geom_edge_label() +
  geom_node_splitvar() +
  geom_node_plot(gglist = list(geom_bar(aes(x = "", fill = MPM),
                                        position = position_fill()),
                               xlab("")),
                 # draw only one label for each axis
                 shared_axis_labels = TRUE,
                 # draw line between tree and legend
                 legend_separator = TRUE
  )


# B)For dhs 2018
tab11 <- ctree(MPM ~ h.number + a.newspap + a.radio + w.quintile + currently.w + 
                 place.deliver + sexheadhouse + times.1ANC + antenatal.care.visits,dhs2018)

nid <- nodeids(tab11)
iid <- nid[!(nid %in% nodeids(tab11, terminal = TRUE))]
(pval <- unlist(nodeapply(tab11, ids = iid,
                          FUN = function(n) info_node(n)$p.value)))

myttnc2 <- nodeprune(tab11, ids = iid[pval > 1e-5])

plot(myttnc2)
tab11 <- as.constparty(myttnc2)

#############################################################Using###ggtree
ggparty(tab11) +
  geom_edge() +
  geom_edge_label() +
  geom_node_splitvar() +
  geom_node_plot(gglist = list(geom_bar(aes(x = "", fill = MPM),
                                        position = position_fill()),
                               xlab("")),
                 # draw only one label for each axis
                 shared_axis_labels = TRUE,
                 # draw line between tree and legend
                 legend_separator = TRUE
  )
################################################################End Barry script



#########################################Model evaluation/ Sensitivity Analyses
set.seed(123)
library(tidymodels)
# Create data split for train and test
dhs2012_1 <- initial_split(dhs2012,
                                prop = 0.75,
                                strata = malprev)

# Create training data
malprev_train <- dhs2012_1 %>%
  training()

# Create testing data
malprev_test <- dhs2012_1 %>%
  testing()
# Number of rows in train and test dataset
nrow(malprev_train)
nrow(malprev_test)

fitted_logistic_model<- logistic_reg() %>%
  # Set the engine
  set_engine("glm") %>%
  # Set the mode
  set_mode("classification") %>%
  # Fit the model
  fit(malprev~h.number+a.newspap+mat.status+currently.w+sexheadhouse+age.headhous+partner.education+times.1ANC+anc.number, data = malprev_train)
tidy(fitted_logistic_model)    # Generate Summary Table
tidy(fitted_logistic_model, exponentiate = TRUE)
tidy(fitted_logistic_model, exponentiate = TRUE) %>%
  filter(p.value < 0.05)

# Class prediction
pred_class <- predict(fitted_logistic_model,
                      new_data = malprev_test,
                      type = "class")

pred_class[1:5,]
# Prediction Probabilities
pred_proba <- predict(fitted_logistic_model,
                      new_data = malprev_test,
                      type = "prob")

pred_proba[1:5,]
##################################Final Data Preparation for Model Evaluation
malprev_results <- malprev_test %>%
  select(malprev) %>%
  bind_cols(pred_class, pred_proba)

malprev_results[1:5, ]

#################Confusion matrix
conf_mat(malprev_results, truth = malprev,
         estimate = .pred_class)

#############Accuracy
accuracy(malprev_results, truth = malprev,
         estimate = .pred_class)
##########################Sens
sens(malprev_results, truth = malprev,
     estimate = .pred_class)
###########specificity
spec(malprev_results, truth = malprev,
     estimate = .pred_class)
##########precision
precision(malprev_results, truth = malprev,
          estimate = .pred_class)

###########Recall
precision(malprev_results, truth = malprev,
          estimate = .pred_class)
#F-measure
#F-measure is a weighted harmonic mean of precision and recall with the best score of 1 and the worst score of 0. 
#F-measure score conveys the balance between precision and recall
f_meas(malprev_results, truth = malprev,
          estimate = .pred_class)
#Kappa Cohen Kappa gives information on how much better a model over the random classifier. Kappa can range from ???1 to +1. 
#The value <0 means no agreement while 1.0 shows perfect agreement. The estimated kappa statistics revealed a moderate agreement.

kap(malprev_results, truth = malprev,
       estimate = .pred_class)
#Matthews Correlation Coefficient (MCC)
#The Matthews correlation coefficient (MCC) is used as a measure of the quality of a binary classifier. The value ranges from ???1 and +1.
#MCC: -1 indicates total disagreement
#MCC: 0 indicate no agreement
#MCC: +1 indicates total aggrement

custom_metrics <- metric_set(accuracy, sens, spec, precision, recall, f_meas, kap, mcc)
custom_metrics(malprev_results, truth = malprev,
  estimate = .pred_class)

#############################ROC curve
roc_auc(malprev_results,
        truth = malprev,
        .pred_incomplete)
malprev_results %>%
  roc_curve(truth = malprev, .pred_incomplete) %>%
  autoplot()


################################Spatial Analyisis

##########################Loading libraries
library(sf)
#library(sp)
library(dplyr)
library(ggplot2)
library(tmap)
library(rgeoda)
library(spdep)


####################################################################For 2012
Guinea_12 <- st_read("D:\\MOOC\\shps_2012\\sdr_subnational_boundaries.shp",layer = "sdr_subnational_boundaries")

palu_12 <- read.csv2 ("D:\\MOOC\\malprev12m.csv")
palu_12$Malprev_12 <- palu_12$Malprev_12*100
palu_12$region <- factor(palu_12$region)
#levels(palu_12$region) <- c( "Boké","Conakry","Faranah","Kankan","Kindia","Labe","Mamou","Nzérekore")

###############################merging
OA.Census_12 <- inner_join(Guinea_12, palu_12, by = c("DHSREGFR" = "region"))

###################################################################For 2018
Guinea_18 <- st_read("D:\\MOOC\\shps_2018\\sdr_subnational_boundaries.shp",layer = "sdr_subnational_boundaries")

palu_18 <- read.csv2 ("D:\\MOOC\\malprev18m.csv")
palu_18$malprev18 <- palu_18$malprev18*100

palu_18$region <- factor(palu_18$region)
levels(palu_18$region) <- c( "Boké","Conakry","Faranah","Kankan","Kindia","Labé","Mamou","N'Zérékoré")
###############################merging
OA.Census_18 <- inner_join(Guinea_18, palu_18, by = c("DHSREGFR" = "region"))

###################################################################Making map for 2018
par(mfrow=c(1,2))

malprv2012<- tm_shape(OA.Census_12) +
  tm_fill(col = "Malprev_12", title = "% of incomplete MPM in 2012") +
  tm_compass(type ="arrow",position = "right") +
  tm_scale_bar(position = "center")+
  tm_text("DHSREGFR", size =0.80,col = "black")+
  tm_layout(bg.color = "skyblue", inner.margins = c(0, .02, .02, .02))+
  tm_add_legend(type = "symbol")+tm_borders(col = "white")+
  tm_xlab(text = "source= spatialdata.dhsprogram.com")
malprv2012
########################################################################For 2018
malprv2018  <- tm_shape(OA.Census_18) +
  tm_fill(col = "malprev18", title = "% of Incomplete MPM in 2018") +
  tm_compass(type ="arrow",position = "right") +
  tm_scale_bar(position = "center")+
  tm_text("DHSREGFR", size =0.80,col = "black")+
  tm_layout(bg.color = "skyblue", inner.margins = c(0, .02, .02, .02))+
  tm_add_legend(type = "symbol")+tm_borders(col = "white")+
  tm_xlab(text = "source= spatialdata.dhsprogram.com")
malprv2018
tmap_arrange(malprv2012,malprv2018,nrow = 1)


################################################################Cluster analysis Moran
###################Working with rgeoda

########3.1 Queen Contiguity Weights
queen_weights(sf_obj, order=1, include_lower_order = False, precision_threshold = 0)
queen_w_12 <- queen_weights(OA.Census_12)
queen_w_18 <- queen_weights(OA.Census_18)

summary(queen_w_12)
summary(queen_w_18)

###################Attributes of Weight object
is_symmetric(queen_w_12)
has_isolates(queen_w_18)
weights_sparsity(queen_w_12)
weights_sparsity(queen_w_18)

###To access the details of the weights:list the neighbors of a specified observation
nbrs <- get_neighbors(queen_w_12, idx = 1)
cat("\nNeighbors of the 1-st observation are:", nbrs)

#To compute the spatial lag of a specified observation by passing the values of the selected variable
lag12 <- spatial_lag(queen_w_12, OA.Census_12['Malprev_12'])
lag12
lag18 <- spatial_lag(queen_w_18, OA.Census_18['malprev18'])
lag18
#############3.2 Rook Contiguity Weights
rook_weights(sf_obj, order=1,include_lower_order=False, precision_threshold = 0)

### to create a Rook contiguity weights using the sf object guerry
rook_w <- rook_weights(OA.Census_12)
summary(rook_w)

#######The weights we created are in memory. To save the weights to a file, one can call the function

save_weights(gda_w, id_variable, out_path, layer_name = "")

save_weights(rook_w, OA.Census['ID_1'], out_path = 'D:\\MOOC\\OA.Census_r.gal', 
             layer_name = 'NAME_1')

############3.3 Distance Based Weights
#min_distthreshold(GeoDa gda, bool is_arc = False, is_mile = True)
##To create a Distance based weights, one can call the function `distance_weights`

###Then, with this distance threshold, we can create a distance-band weights using the function
distance_weights(geoda_obj, dist_thres, power=1.0,  is_inverse=False, is_arc=False, is_mile=True)

dist_thres <- min_distthreshold(OA.Census_12)
dist_thres

dist_w <- distance_weights(OA.Census_12, dist_thres)
summary(dist_w)

############3.4 K-Nearest Neighbor Weights
knn_weights(gda, k, power = 1.0,is_inverse = False, is_arc = False, is_mile = True)

##to create a 6-nearest neighbor weights using Guerry
knn6_w <- knn_weights(OA.Census_12, 6)
summary(knn6_w)

##To create a kernel weights with fixed bandwith
bandwidth <- min_distthreshold(OA.Census_12)
kernel_w <- kernel_weights(OA.Census_12, bandwidth, kernel_method = "uniform")
summary(kernel_w)

########To create a kernel weights with adaptive bandwidth or using max Knn distance as bandwidth

adptkernel_w = kernel_knn_weights(OA.Census_12, 6, "uniform")

summary(adptkernel_w)

###Local Indicators of Spatial Association–LISA
#guerry_path <- system.file("extdata", "Guerry.shp", package = "rgeoda")
#guerry <- st_read(guerry_path)
malprev12 <- OA.Census_12["Malprev_12"]
malprev18 <- OA.Census_18["malprev18"]

lisa12 <- local_moran(queen_w_12, malprev12)
lisa18 <- local_moran(queen_w_18, malprev18)

#######we can call the function lisa_values() to get the values of the local Moran
lms12 <- lisa_values(gda_lisa = lisa12)
lms12
lms18 <- lisa_values(gda_lisa = lisa18)
lms18
##To get the pseudo-p values of significance of local Moran computation

pvals12 <- lisa_pvalues(lisa12)
pvals12

###To get the cluster indicators of local Moran computation
##which can be accessed via the function lisa_labels()

lbls12 <- lisa_labels(lisa12)
lbls12

###re-run the above local Moran example using 9,999 permutations.
set.seed(123)
lisa12 <- local_moran(queen_w_12, malprev12, permutations = 9999)
lisa18 <- local_moran(queen_w_18, malprev18, permutations = 9999)

###6.3 Create Local Moran Map
lisa_colors <- lisa_colors(lisa12)
lisa_labels <- lisa_labels(lisa12)
lisa_clusters <- lisa_clusters(lisa12)

loclis12<- plot(st_geometry(OA.Census_12), 
                col=sapply(lisa_clusters, function(x){return(lisa_colors[[x+1]])}), 
                border = "#333333", lwd=0.2)
title(main = "")
#title(main = "Local Moran Map of incomplete malprev 2012")
legend('bottomleft', legend = lisa_labels, fill = lisa_colors, border = "#eeeeee")

############For 2018
lisa_colors <- lisa_colors(lisa18)
lisa_labels <- lisa_labels(lisa18)
lisa_clusters <- lisa_clusters(lisa18)

loclis18<-plot(st_geometry(OA.Census_18), 
               col=sapply(lisa_clusters, function(x){return(lisa_colors[[x+1]])}), 
               border = "#333333", lwd=0.2)
title(main = "")
#title(main = "Local Moran Map of malprev 2018")

legend('bottomleft', legend = lisa_labels, fill = lisa_colors, border = "#eeeeee")

################################
lisa_p <- lisa_pvalues(lisa12)
p_labels <- c("Not significant", "p <= 0.05", "p <= 0.01", "p <= 0.001")
p_colors <- c("#eeeeee", "#84f576", "#53c53c", "#348124")
plot(st_geometry(OA.Census_12), 
     col=sapply(lisa_p, function(x){
       if (x <= 0.001) return(p_colors[4])
       else if (x <= 0.01) return(p_colors[3])
       else if (x <= 0.05) return (p_colors[2])
       else return(p_colors[1])
     }), 
     border = "#333333", lwd=0.2)
#title(main = "Local Moran Map of malprev2012")
legend('bottomleft', legend = p_labels, fill = p_colors, border = "#eeeeee")


######################################
lisa_p <- lisa_pvalues(lisa18)
p_labels <- c("Not significant", "p <= 0.05", "p <= 0.01", "p <= 0.001")
p_colors <- c("#eeeeee", "#84f576", "#53c53c", "#348124")
plot(st_geometry(OA.Census_18), 
     col=sapply(lisa_p, function(x){
       if (x <= 0.001) return(p_colors[4])
       else if (x <= 0.01) return(p_colors[3])
       else if (x <= 0.05) return (p_colors[2])
       else return(p_colors[1])
     }), 
     border = "#333333", lwd=0.2)
#title(main = "Local Moran Map of malprev2018")
legend('bottomleft', legend = p_labels, fill = p_colors, border = "#eeeeee")


######################################################working with spded
#)A For 2012 survey Global Moran
#Extracting neighbors(according to Queen)
#9.1 Find queen neighbors
neighbours <- poly2nb(OA.Census_12)
neighbours
neighbours1 <- poly2nb(OA.Census_18)
neighbours1
#Crating weight matrix
neighbours11 <- nb2listw(neighbours,zero.policy=TRUE)
moran.test(OA.Census_12$Malprev_12, neighbours11)

neighbours2 <- nb2listw(neighbours1,zero.policy=TRUE)
moran.test(OA.Census_18$malprev18, neighbours2)

#neighbours11 |> nb2listw(style="B") -> lw_q_B
#neighbours2 |> nb2listw(style="W") -> lw_q_W
#########################
glance_htest <- function(ht) c(ht$estimate, 
                               "Std deviate"=unname(ht$statistic), 
                               "p.value"=unname(ht$p.value))

(OA.Census_12 |> 
    st_drop_geometry() |> 
    subset(select=DHSREGFR, drop=TRUE) -> Types) |> 
  table()


(OA.Census_12 |> 
    st_drop_geometry() |> 
    subset(select=Malprev_12, drop=TRUE) -> z) |> 
  moran.test(listw=neighbours11, randomisation=FALSE) |> 
  glance_htest()

(OA.Census_18 |> 
    st_drop_geometry() |> 
    subset(select=malprev18, drop=TRUE) -> z1) |> 
  moran.test(listw=neighbours2, randomisation=FALSE) |> 
  glance_htest()

###########Boostrap compute
(z |> 
    moran.test(listw=neighbours11) -> mtr) |> 
  glance_htest()

(z1 |> 
    moran.test(listw=neighbours2) -> mtr1) |> 
  glance_htest()

set.seed(1)
z |> 
  moran.mc(listw=neighbours11, nsim=999, return_boot = TRUE) -> mmc

z1 |> 
  moran.mc(listw=neighbours2, nsim=999, return_boot = TRUE) -> mmc1

c("Permutation bootstrap"=var(mmc$t), 
  "Analytical randomisation"=unname(mtr$estimate[3]))

c("Permutation bootstrap"=var(mmc1$t), 
  "Analytical randomisation"=unname(mtr1$estimate[3]))

#####################
# MORAN scatter plot
z |> moran.plot(listw=neighbours11, labels=OA.Census_12$ID_2, cex=1, pch=".", xlab="malprev12", ylab="lagged malprev12") -> infl_W1
z1 |> moran.plot(listw=neighbours2, labels=OA.Census_18$ID_2, cex=1, pch=".", xlab="malprev18", ylab="lagged malprev18") -> infl_W2

#####################Produce map
OA.Census_12$hat_value <- infl_W1$hat
tm_shape(OA.Census_12) + tm_fill("hat_value")

OA.Census_18$hat_value <- infl_W2$hat
tm_shape(OA.Census_18) + tm_fill("hat_value")
##############################
z |> 
  localmoran(listw=neighbours11, conditional=FALSE, alternative="two.sided") -> locm
z1 |> 
  localmoran(listw=neighbours2, conditional=FALSE, alternative="two.sided") -> locm1

####################
pva <- \(pv) cbind("none"=pv, "bonferroni"=p.adjust(pv, "bonferroni"), "fdr"=p.adjust(pv, "fdr"), "BY"=p.adjust(pv, "BY"))
locm |> 
  subset(select="Pr(z != E(Ii))", drop=TRUE) |> 
  pva() -> pvsp
f <- \(x) sum(x < 0.05)
apply(pvsp, 2, f)

pva <- \(pv) cbind("none"=pv, "bonferroni"=p.adjust(pv, "bonferroni"), "fdr"=p.adjust(pv, "fdr"), "BY"=p.adjust(pv, "BY"))
locm1 |> 
  subset(select="Pr(z != E(Ii))", drop=TRUE) |> 
  pva() -> pvsp
f <- \(x) sum(x < 0.05)
apply(pvsp, 2, f)
#####################
library(parallel)
set.coresOption(ifelse(detectCores() == 1, 1, detectCores()-1L))
###################
system.time(z |> 
              localmoran_perm(listw=neighbours11, nsim=499, alternative="two.sided", iseed=1) -> locm_p)

system.time(z1 |> 
              localmoran_perm(listw=neighbours2, nsim=499, alternative="two.sided", iseed=1) -> locm_p1)

#################
brks <- qnorm(c(0, 0.00001, 0.0001, 0.001, 0.01, 0.025, 0.5, 0.975, 0.99, 0.999, 0.9999, 0.99999, 1))
(locm_p |> 
    subset(select=Z.Ii, drop=TRUE) |> 
    cut(brks) |> 
    table()-> tab)
###############
z |> 
  localmoran(listw=neighbours11, conditional=TRUE, alternative="two.sided") -> locm_c

z1 |> 
  localmoran(listw=neighbours2, conditional=TRUE, alternative="two.sided") -> locm_c1

###################
OA.Census_12$locm_Z <- locm[, "Z.Ii"]
OA.Census_12$locm_c_Z <- locm_c[, "Z.Ii"]
OA.Census_12$locm_p_Z <- locm_p[, "Z.Ii"]
#liss12<- tm_shape(OA.Census_12) + tm_fill(c("locm_Z", "locm_c_Z", "locm_p_Z"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation"))
tm_shape(OA.Census_12) + tm_fill(c("locm_Z", "locm_c_Z", "locm_p_Z"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation"))

OA.Census_18$locm_Z1 <- locm1[, "Z.Ii"]
OA.Census_18$locm_c_Z1 <- locm_c1[, "Z.Ii"]
OA.Census_18$locm_p_Z1 <- locm_p1[, "Z.Ii"]
#liss18<- tm_shape(OA.Census_18) + tm_fill(c("locm_Z1", "locm_c_Z1", "locm_p_Z1"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation"))
tm_shape(OA.Census_18) + tm_fill(c("locm_Z1", "locm_c_Z1", "locm_p_Z1"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation"))
#tmap_arrange(liss12,liss18,nrow = 2)
######################

q_mean <- attr(locm, "quadr")$mean
OA.Census_12$hs_ac_q <- OA.Census_12$hs_cp_q <- OA.Census_12$hs_an_q <- q_mean
is.na(OA.Census_12$hs_an_q) <- !(OA.Census_12$locm_Z < brks[6] | OA.Census_12$locm_Z > brks[8])
is.na(OA.Census_12$hs_cp_q) <- !(OA.Census_12$locm_p_Z < brks[2] | OA.Census_12$locm_p_Z > brks[12])
is.na(OA.Census_12$hs_ac_q) <- !(OA.Census_12$locm_c_Z < brks[2] | OA.Census_12$locm_c_Z > brks[12])

#hotp12<- tm_shape(OA.Census_12) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni"))
tm_shape(OA.Census_12) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni"))
###############
q_mean <- attr(locm1, "quadr")$mean
OA.Census_18$hs_ac_q <- OA.Census_18$hs_cp_q <- OA.Census_18$hs_an_q <- q_mean
is.na(OA.Census_18$hs_an_q) <- !(OA.Census_18$locm_Z1 < brks[6] | OA.Census_18$locm_Z > brks[8])
is.na(OA.Census_18$hs_cp_q) <- !(OA.Census_18$locm_p_Z1 < brks[2] | OA.Census_18$locm_p_Z > brks[12])
is.na(OA.Census_18$hs_ac_q) <- !(OA.Census_18$locm_c_Z1 < brks[2] | OA.Census_18$locm_c_Z > brks[12])

#hotp18<- tm_shape(OA.Census_18) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni"))
tm_shape(OA.Census_18) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni"))
tmap_arrange(hotp12,hotp18,nrow = 2)
#save.image(file =("D:\\MOOC\\spatbarry_v2.RData"))
load("D:\\MOOC\\spatbarry_v2.RData")
############################################################Interpolation_Krigging