####Variations in the use of malaria preventive measures among pregnant women in Guinea. A secondary analysis of the 2012 and 2018 demographic and health surveys. #List of authors and affiliations #Ibrahima BARRY1,2, Almamy Amara TOURE2,3¥, Oumar SANGHO4, Abdoul Habib BEAVOGUI2, Diao CISSE5, Abdourahamane DIALLO6, Aboubacar Sidiki MAGASSOUBA5, Younoussa SYLLA2,3, Lancina DOUMBIA1, Mahamoud Sama Cherif2, Alseny Yarie CAMARA1 , Fatou DIAWARA7, Moctar TOUNKARA1, Alexandre DELAMOU4,7, Seydou DOUMBIA1 #Corresponding author #Almamy Amara TOURE, Centre National de Formation et de Recherche en Santé Rurale de Maferinyah, Forecariah, Guinea, almamy@maferinyah.org #########Importation des données Femmes et personal recode library(foreign) #ir1 <- read.spss("D:\\MOOC\\BASE_2005sav.sav", to.data.frame=TRUE) ir11 <- read.spss("D:\\MOOC\\GNIR62FL.sav", to.data.frame=TRUE) ir112 <- read.spss("D:\\MOOC\\GNIR71FL.sav", to.data.frame=TRUE) #PR1 <- read.spss("D:\\MOOC\\GNPR53FL.sav", to.data.frame=TRUE) PR11 <- read.spss("D:\\MOOC\\GNPR62FL.sav", to.data.frame=TRUE) PR112 <- read.spss("D:\\MOOC\\\\GNPR71FL.sav", to.data.frame=TRUE) ###################Combinez les bd pour rechercher la variables dernières passées sous moustiquaire library(dplyr) #bd <- inner_join(ir1,PR1,by=c("V001"="HV001","V002"="HV002","V003"="HV003")) ##################Recodage de la variable HVOO3 en numérique #PR1$HV003 <- as.numeric(PR1$HV003) PR11$HV003 <- as.numeric(PR11$HV003) PR112$HV003 <- as.numeric(PR112$HV003) ############################################Fusionner les bases de 2005 #bd2005 <- inner_join(ir1,PR1,by=c("V001"="HV001","V002"="HV002","V003"="HVIDX")) bd2012 <- inner_join(ir11,PR11,by=c("V001"="HV001","V002"="HV002","V003"="HVIDX")) #bd2012 <- inner_join(ir11,PR11,by=c("V001"="HV001","V002"="HV002","V003"="HV003")) bd2018 <- inner_join(ir112,PR112,by=c("V001"="HV001","V002"="HV002","V003"="HVIDX")) #bd2018 <- inner_join(ir112,PR112,by=c("V001"="HV001","V002"="HV002","V003"="HV003")) rm(ir11) rm(ir112) rm(PR11) rm(PR112) ####################################Filtering for the last 3 years preceding the survey ######################################################## summary(bd2012$V238) dhs2012 <- filter(.data = bd2012,V238>=1) summary(bd2018$V238) dhs2018 <- filter(.data = bd2018,V238>=1) ######################################################################clearing environment rm(bd2012,bd2018) ####################Selecting variables on each dataframe from each DHS survery from 2005 to 2018 #ibdat2005 <- subset(bdat2005,select=c("CASEID","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V501","V714","V731", # "V150","V151","V152","V153","V201","HML10","HML12","HML16","HML18","V218","V701","M49A.1","ML1.1","HV103","HV104","ML101","M13.1","M14.1","M10.1","M15.1")) #ibdat2012 <- subset(bdat2012,select=c("CASEID","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V501","V714","V731", # "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1")) #ibdat2018 <- subset(bdat2018,select=c("CASEID","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V501","V714","V731", # "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1")) #ibdatfin <- rbind(ibdat2005,ibdat2012,ibdat2018) #ibdatfin <- rbind(ibdat2012,ibdat2018) ##############################################################Filtering by Survey Year #a) For 2012 #ibdatfin$V022 <- as.numeric(ibdatfin$V022) #########Explorer les variables d'intérêt SP et Passé la nuit précédent l'enquête sous Moustiquaire #summary(ibdatfin$HV103) #summary(ibdat2012$ML1.1) #summary(ibdat2012$M49A.1) ############Filtering dataframe for women who had had at least one dose of SP and women who had treated net #ibdat2012 <- filter(.data = ibdat2012,ibdat2012$M49A.1=="Yes") ibdat2012<- filter(.data = dhs2012,dhs2012$M49A.1=="Yes"& dhs2012$HV227=="Yes") ibdat2012 <- subset(ibdat2012,select=c("CASEID","V001","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V191","V501","V714","V731", "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1")) ############b) For 2018 #ibdatfin$V022 <- as.numeric(ibdatfin$V022) #########Explorer les variables d'intérêt SP et Passé la nuit précédent l'enquête sous Moustiquaire #summary(ibdatfin$HV103) #summary(ibdat2018$ML1.1) #summary(ibdat2018$M49A.1) ############Filtering dataframe for women who had had at least one dose of SP women who had treated net ibdat2018 <- filter(.data = dhs2018,dhs2018$M49A.1=="Yes" & dhs2018$HV227=="Yes") ibdat2018 <- subset(ibdat2018,select=c("CASEID","V001","V005","V007","V012","V021","V022","V024","V025","V106","V136","V157","V158","V159","V190","V191","V501","V714","V731", "V150","V151","V152","V153","V201","HML10","V218","V701","M49A.1","ML1.1","HV227","ML101","M13.1","M14.1","M10.1","M15.1")) #######################################################Exploring,data structure str(ibdat2012) str(ibdat2018) ##########Renaming dataframe variables ibdat2012 <- ibdat2012[-c(25)] ibdat2018 <- ibdat2018[-c(25)] dput(names(ibdat2012)) dput(names(ibdat2018)) names(ibdat2012) <- c("CASEID","idclust", "weight", "survey-year", "age","PSU","strate","region", "residence", "education", "h-number", "a-newspap", "a-radio", "a-tv", "w.quintile","w-quintile", "mat-status", "currently-w","work-lyr","link-headh","sexheadhouse", "age-headhous", "phone-h", "chldren-born", "living-child", "partner-education", "sp-pregnancy", "sp-number","own-net" ,"slept-net", "times-1ANC", "anc-number", "wanted-pregn","place-deliver") names(ibdat2018) <- c("CASEID","idclust" ,"weight", "survey-year", "age","PSU","strate","region", "residence", "education", "h-number", "a-newspap", "a-radio", "a-tv", "w.quintile","w-quintile", "mat-status", "currently-w","work-lyr","link-headh","sexheadhouse", "age-headhous", "phone-h", "chldren-born", "living-child", "partner-education", "sp-pregnancy", "sp-number","own-net" ,"slept-net", "times-1ANC", "anc-number", "wanted-pregn","place-deliver") ############################################Variables reconding For 2012 summary(ibdat2012) levels(ibdat2012$region) addmargins(table(ibdat2012$region)) levels(ibdat2012$`a-newspap`) levels(ibdat2012$`a-newspap`) <- c("Not at all","access","access","access") table(ibdat2012$`a-newspap`) levels(ibdat2012$`a-radio`) <- c("Not at all","access","access","access") table(ibdat2012$`a-radio`) levels(ibdat2012$`a-tv`) levels(ibdat2012$`a-tv`) <- c("Not at all","access","access","access") table(ibdat2012$`a-tv`) levels(ibdat2012$`mat-status`) #levels(ibdat2012$`mat-status`) <- c("single", "Married","in-couple", "single","single","single") #table(ibdat2012$`mat-status`) levels(ibdat2012$`partner-education`) levels(ibdat2012$`partner-education`) <- c("No education", "Primary","Secondary", "Higher","No education") table(ibdat2012$`partner-education`) #levels(ibdat2012$`w-quitile`) <- c("Poorer","Poorer","Middle","Richer","Richer") table(ibdat2012$`sp-number`) class(ibdat2012$`sp-number`) levels(ibdat2012$`sp-number`) levels(ibdat2012$`sp-number`) <- c("1","10","12","1","1","2","1","21","3","1","4","1","1","1","60","7","8","9","1") ibdat2012$`sp-number` <- factor((ibdat2012$`sp-number`),levels = c("1","2","3","4","7","8","9","10","12","21","60")) table(ibdat2012$`sp-number`) ibdat2012$`sp-number` <- as.numeric(ibdat2012$`sp-number`) table(ibdat2012$`sp-number`) #ibdat2012$`sp-number` <- cut(ibdat2012$`sp-number`,breaks = c(1,2,12),right = FALSE) #table(ibdat2012$`sp-number`) ############################################Variables reconding For 2018 summary(ibdat2018) levels(ibdat2018$region) addmargins(table(ibdat2018$region)) levels(ibdat2018$`a-newspap`) levels(ibdat2018$`a-newspap`) <- c("Not at all","access","access","access") table(ibdat2018$`a-newspap`) levels(ibdat2018$`a-radio`) <- c("Not at all","access","access","access") table(ibdat2018$`a-radio`) levels(ibdat2018$`a-tv`) levels(ibdat2018$`a-tv`) <- c("Not at all","access","access","access") table(ibdat2018$`a-tv`) #levels(ibdat2018$`mat-status`) #levels(ibdat2018$`mat-status`) <- c("single", "Married","in-couple", "single","single","single") #table(ibdat2018$`mat-status`) levels(ibdat2018$`partner-education`) levels(ibdat2018$`partner-education`) <- c("No education", "Primary","Secondary", "Higher","No education") table(ibdat2018$`partner-education`) #levels(ibdat2018$`w-quitile`) <- c("Poorer","Poorer","Middle","Richer","Richer") table(ibdat2018$`sp-number`) class(ibdat2018$`sp-number`) levels(ibdat2018$`sp-number`) levels(ibdat2018$`sp-number`) <- c("1","1","2","3","4","1") table(ibdat2018$`sp-number`) ibdat2018$`sp-number` <- as.numeric(ibdat2018$`sp-number`) table(ibdat2018$`sp-number`) #ibdat2018$`sp-number` <- cut(ibdat2018$`sp-number`,breaks = c(1,2,4),include.lowest = TRUE) #table(ibdat2018$`sp-number`) ###Excluding relation with head of the household #ibdatfin <- ibdatfin[-c(1,15,18,22,25,29)] #ibdatfin <- ibdatfin[-c(1,17,20,24,30)] #######################################################Recoding ANC number For 2012 class(ibdat2012$`anc-number`) table(ibdat2012$`anc-number`) levels(ibdat2012$`anc-number`) levels(ibdat2012$`anc-number`) <- c("1","1","10","11","12","13","14","15","16","17","18","2","20","3","4","5","6","7","8","9","1") table(ibdat2012$`anc-number`) levels(ibdat2012$`anc-number`) <- c("1","1","1","1","1","1","1","1","1","1","2","1","3","4","5","6","7","8","9") table(ibdat2012$`anc-number`) ibdat2012$`anc-number` <- as.numeric(ibdat2012$`anc-number`) table(ibdat2012$`anc-number`) summary(ibdat2012$`anc-number`) #ibdat2012$`anc-number` <- cut(ibdat2012$`anc-number`,breaks = c(1,3,9),include.lowest = TRUE) #table(ibdat2012$`anc-number`) ########ANC number for 2018 class(ibdat2018$`anc-number`) levels(ibdat2018$`anc-number`) table(ibdat2018$`anc-number`) levels(ibdat2018$`anc-number`) <- c("1","1","10","11","12","13","14","15","16","2","20","3","4","5","6","7","8","9","1") table(ibdat2018$`anc-number`) levels(ibdat2018$`anc-number`) <- c("1","10","1","1","13","1","15","1","2","1","3","4","5","6","7","8","9") summary(ibdat2018$`anc-number`) ibdat2018$`anc-number` <- factor((ibdat2018$`anc-number`),levels = c("1","2","3","4","5","6","7","8","9","10","13","15")) table(ibdat2018$`anc-number`) ibdat2018$`anc-number` <- as.numeric(ibdat2018$`anc-number`) table(ibdat2018$`anc-number`) summary(ibdat2018$`anc-number`) #ibdat2018$`anc-number` <- cut(ibdat2018$`anc-number`,breaks = c(1,3,12),include.lowest = TRUE) #table(ibdat2018$`anc-number`) ##################################################First ANC period for 2012 class(ibdat2012$`times-1ANC`) levels(ibdat2012$`times-1ANC`) table(ibdat2012$`times-1ANC`) levels(ibdat2012$`times-1ANC`) <- c("1","1","2","3","4","5","6","7","8","1","1") table(ibdat2012$`times-1ANC`) ibdat2012$`times-1ANC` <- as.numeric(ibdat2012$`times-1ANC`) table(ibdat2012$`times-1ANC`) #############Make imputation for missing for firt ANC times #################Checking for missing data sapply (ibdat2012, function (x) sum(is.na (x))) sapply (ibdat2018, function (x) sum(is.na (x))) library(na.tools) ibdat2012$`times-1ANC` <- na.bootstrap(ibdat2012$`times-1ANC`) summary(ibdat2012$`times-1ANC`) #ibdat2012$`times-1ANC` <- cut(ibdat2012$`times-1ANC`,breaks = c(1,3,6,8),include.lowest = TRUE,labels = c("first-trim","second-trim","third-trim")) #table(ibdat2012$`times-1ANC`) ###############First ANC period for 2018 class(ibdat2018$`times-1ANC`) levels(ibdat2018$`times-1ANC`) table(ibdat2018$`times-1ANC`) levels(ibdat2018$`times-1ANC`) <- c("1","1","10","2","3","4","5","6","7","8","9","1") table(ibdat2018$`times-1ANC`) ibdat2018$`times-1ANC` <- factor((ibdat2018$`times-1ANC`),levels = c("1","2","3","4","5","6","7","8","9","10")) table(ibdat2018$`times-1ANC`) ibdat2018$`times-1ANC` <- as.numeric(ibdat2018$`times-1ANC`) summary(ibdat2018$`times-1ANC`) set.seed(123) ibdat2018$`times-1ANC` <- na.bootstrap(ibdat2018$`times-1ANC`) ibdat2018$`times-1ANC` <- na.bootstrap(ibdat2018$`times-1ANC`) summary(ibdat2018$`times-1ANC`) #ibdat2018$`times-1ANC` <- cut(ibdat2018$`times-1ANC`,breaks = c(1,3,6,10),include.lowest = TRUE,labels = c("first-trim","second-trim","third-trim")) #table(ibdat2018$`times-1ANC`) #################Checking for missing data sapply (ibdat2012, function (x) sum(is.na (x))) sapply (ibdat2018, function (x) sum(is.na (x))) ##############Recoding for age survey 2012 summary(ibdat2012$age) #ibdat2012$age <- na.resample(ibdat2012$age) #ibdat2012$age_rec <- cut(ibdat2012$age,breaks = c(16,18,30,40,49),include.lowest = TRUE) #table(ibdat2012$age_rec ) class(ibdat2012$`partner-education`) levels(ibdat2012$`partner-education`) ibdat2012$`partner-education` <- na.replace(ibdat2012$`partner-education`,.na = "No education") table(ibdat2012$`partner-education`) sapply (ibdat2012, function (x) sum(is.na (x))) ########################Dealing with NA for 2018 survey class(ibdat2018$`partner-education`) levels(ibdat2018$`partner-education`) ibdat2018$`partner-education` <- na.replace(ibdat2018$`partner-education`,.na = "No education") table(ibdat2012$`partner-education`) ibdat2018$age <- na.median(ibdat2018$age) sapply (ibdat2018, function (x) sum(is.na (x))) ####Recoding for age (survey 2018) summary(ibdat2018$age) #ibdat2018$age_rec <- cut(ibdat2018$age,breaks = c(16,18,30,40,49),include.lowest = TRUE) #table(ibdat2018$age_rec ) ##Recoding for living child and household number for 2012 survey summary(ibdat2012$`living-child`) #ibdat2012$`living-child` <- cut(ibdat2012$`living-child`,breaks = c(1,2,4,7,11),right = FALSE) #table(ibdat2012$`living-child`) summary(ibdat2012$`h-number`) #ibdat2012$`h-number` <- cut(ibdat2012$`h-number`,breaks = c(1,5,10,30),include.lowest = TRUE) #table(ibdat2012$`h-number`) ##Recoding for living child and household number for 2018 survey summary(ibdat2018$`living-child`) #ibdat2018$`living-child` <- cut(ibdat2018$`living-child`,breaks = c(1,2,4,7,14),right = FALSE) #table(ibdat2018$`living-child`) summary(ibdat2018$`h-number`) #ibdat2018$`h-number` <- cut(ibdat2018$`h-number`,breaks = c(1,5,10,38),include.lowest = TRUE) #table(ibdat2018$`h-number`) ##Recoding for age for head of household for 2012 survey summary(ibdat2012$`age-headhous`) levels(ibdat2012$`age-headhous`) table(ibdat2012$`age-headhous`) levels(ibdat2012$`age-headhous`) <- c("16","17","18","19","20","21","22","23","24","25","26","27", "28","29","30","31","32","33","34","35","36","37","38","39", "40","41","42","43","44","45","46","47","48","49","50","51", "52","53","54","55","56","57","58","59","60","61","62","63", "64","65","66","67","68","69","70","71","72","73","74","75", "76","77","78","79","80","81","82","83","84","85","86","87", "88","89","90","91","92","93","94","95","16","17") ibdat2012$`age-headhous` <- factor((ibdat2012$`age-headhous`),levels = c("19","20","21","22","23","24","25","26","27", "28","29","30","31","32","33","34","35","36","37","38","39", "40","41","42","43","44","45","46","47","48","49","50","51", "52","53","54","55","56","57","58","59","60","61","62","63", "64","65","66","67","68","69","70","71","72","73","74","75", "76","77","78","79","80","81","82","83","84","87","88","89","91","925")) ibdat2012$`age-headhous`<- as.numeric(levels(ibdat2012$`age-headhous`))[ibdat2012$`age-headhous`] table(ibdat2012$`age-headhous`) summary(ibdat2012$`age-headhous`) ibdat2012$`age-headhous` <- na.resample(ibdat2012$`age-headhous`) summary(ibdat2012$`age-headhous`) #ibdat2012$`age-headhous` <- cut(ibdat2012$`age-headhous`,breaks = c(16,25,40,60,91),include.lowest = TRUE) #table(ibdat2012$`age-headhous`) ##Recoding for living child and household number for 2018 survey summary(ibdat2018$`age-headhous`) levels(ibdat2018$`age-headhous`) <- c("15","16","17","18","19","20","21","22","23","24","25","26","27", "28","29","30","31","32","33","34","35","36","37","38","39", "40","41","42","43","44","45","46","47","48","49","50","51", "52","53","54","55","56","57","58","59","60","61","62","63", "64","65","66","67","68","69","70","71","72","73","74","75", "76","77","78","79","80","81","82","83","84","85","86","87", "88","89","90","91","92","93","94","95","15","17") ibdat2018$`age-headhous` <- factor((ibdat2018$`age-headhous`),levels = c("16","18","20","21","22","23","24","25","26","27", "28","29","30","31","32","33","34","35","36","37","38","39", "40","41","42","43","44","45","46","47","48","49","50","51", "52","53","54","55","56","57","58","59","60","61","62","63", "64","65","66","67","68","69","70","71","72","73","74","75", "76","77","78","79","80","81","82","83","84","87","89","90")) ibdat2018$`age-headhous`<- as.numeric(levels(ibdat2018$`age-headhous`))[ibdat2018$`age-headhous`] table(ibdat2018$`age-headhous`) summary(ibdat2018$`age-headhous`) ibdat2018$`age-headhous` <- na.resample(ibdat2018$`age-headhous`) summary(ibdat2018$`age-headhous`) #ibdat2018$`age-headhous` <- cut(ibdat2018$`age-headhous`,breaks = c(16,25,40,60,90),include.lowest = TRUE) #table(ibdat2018$`age-headhous`) ###############################################Exporting final data into CSV format write.csv2(ibdat2012,file = "D:\\MOOC\\ib2012_1.csv") write.csv2(ibdat2018,file = "D:\\MOOC\\ib2018_1.csv") #write.csv2(ibdat2012,file = "C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2012.csv") #write.csv2(ibdat2018,file = "C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2018.csv") ################################################Data analysis is starting from here #####################################################################Re-importing new data #dhs2012 <- read.csv2("C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2012.csv") #dhs2018 <- read.csv2("C:\\Users\\Dr Touré\\Desktop\\AAT\\MOOC\\ib2018.csv") ####################################################################################### dhs2012 <- read.csv2("D:\\MOOC\\ib2012_1.csv") dhs2018 <- read.csv2("D:\\MOOC\\ib2018_1.csv") library(na.tools) ############################Outcome variable from the two previous variables for 2012 ########Remove some variables #dhs2012 <- dhs2012[-c(1:2,7,19,23,27,29)] dhs2012 <- dhs2012[-c(1:2,8,20,24,28,30)] #dhs2018 <- dhs2018[-c(1:2,7,19,23,27,29)] dhs2018 <- dhs2018[-c(1:2,8,20,24,28,30)] sapply (dhs2012, function (x) sum(is.na (x))) dhs2012[c(6:8,10:13,15:18,22,24,27:28)] <- lapply(dhs2012[c(6:8,10:13,15:18,22,24,27:28)],factor) str(dhs2012) sapply (dhs2012, function (x) sum(is.na (x))) ###########################Since percent of missing data is below 5 levels(dhs2012$a.newspap) dhs2012$a.newspap <- na.replace(dhs2012$a.newspap,.na = "Not at all") dhs2012$a.radio <- na.replace(dhs2012$a.radio,.na = "Not at all") dhs2012$a.tv<- na.replace(dhs2012$a.tv,.na = "Not at all") set.seed(123) levels(dhs2012$sp.number) dhs2012$sp.number <-na.bootstrap(dhs2012$sp.number) levels(dhs2012$wanted.pregn) dhs2012$wanted.pregn <-na.bootstrap(dhs2012$wanted.pregn) levels(dhs2012$living.child) dhs2012$living.child <- na.bootstrap(dhs2012$living.child) sapply (dhs2012, function (x) sum(is.na (x))) ##########Recoding slept net levels(dhs2012$slept.net) #levels(dhs2012$slept.net) <- c("No","Yes","Yes") levels(dhs2012$slept.net) <- c("No","Yes","No") table(dhs2012$slept.net) levels(dhs2012$slept.net) <- c("1","2") dhs2012$slept.net <- as.numeric(levels(dhs2012$slept.net))[dhs2012$slept.net] table(dhs2012$slept.net) ###########################################################################For 2018 sapply (dhs2018, function (x) sum(is.na (x))) dhs2018[c(6:8,10:13,15:18,22,24,27:28)] <- lapply(dhs2018[c(6:8,10:13,15:18,22,24,27:28)],factor) set.seed(123) dhs2018$times.1ANC <- na.bootstrap(dhs2018$times.1ANC) sapply (dhs2018, function (x) sum(is.na (x))) ##########Recoding slept net levels(dhs2018$slept.net) #levels(dhs2018$slept.net) <- c("No","Yes","Yes") levels(dhs2018$slept.net) <- c("No","Yes","No") table(dhs2018$slept.net) levels(dhs2018$slept.net) <- c("1","2") dhs2018$slept.net <- as.numeric(levels(dhs2018$slept.net))[dhs2018$slept.net] table(dhs2018$slept.net) ########################################################Building variables of interest #)For 2012 summary(dhs2012$sp.number) dhs2012$malprev[dhs2012$sp.number>=2&dhs2012$slept.net==2] <- "complete" dhs2012$malprev[dhs2012$sp.number<2&dhs2012$slept.net==2] <- "incomplete" dhs2012$malprev[dhs2012$sp.number>=2&dhs2012$slept.net==1] <- "incomplete" dhs2012$malprev[dhs2012$sp.number<2&dhs2012$slept.net==1] <- "incomplete" table(dhs2012$malprev) dhs2012$malprev <- factor(dhs2012$malprev) dhs2012$malprev <- relevel(dhs2012$malprev,ref ="incomplete" ) table(dhs2012$malprev) #)For 2018 summary(dhs2018$sp.number) dhs2018$malprev[dhs2018$sp.number>=3&dhs2018$slept.net==2] <- "complete" dhs2018$malprev[dhs2018$sp.number<3&dhs2018$slept.net==2] <- "incomplete" dhs2018$malprev[dhs2018$sp.number>=3&dhs2018$slept.net==1] <- "incomplete" dhs2018$malprev[dhs2018$sp.number<3&dhs2018$slept.net==1] <- "incomplete" table(dhs2018$malprev) dhs2018$malprev <- factor(dhs2018$malprev) dhs2018$malprev <- relevel(dhs2018$malprev,ref ="incomplete" ) table(dhs2018$malprev) ##########################################################Recoding outcome SP and others summary(dhs2012$sp.number) dhs2012$sp.number <- cut(dhs2012$sp.number,breaks = c(1,2,12),include.lowest = TRUE,right = FALSE) table(dhs2012$sp.number) levels(dhs2012$sp.number) <- c("No","Yes") table(dhs2012$sp.number) summary(dhs2018$sp.number) dhs2018$sp.number <- cut(dhs2018$sp.number,breaks = c(1,2,4),include.lowest = TRUE) table(dhs2018$sp.number) levels(dhs2018$sp.number) <- c("No","Yes") table(dhs2018$sp.number) #######################################################Denormalisation for pooling data ####Pooling data for computing frequencies and proportions, before dhs <- rbind(dhs2012,dhs2018) sapply (dhs, function (x) sum(is.na (x))) dhs$slept.net <- factor(dhs$slept.net) table(dhs$slept.net) levels(dhs$slept.net) <- c("No","Yes") table(dhs$slept.net) ############### summary(dhs$anc.number) dhs$anc.number <- cut(dhs$anc.number,breaks = c(1,3,12),include.lowest = TRUE) table(dhs$anc.number) levels(dhs$anc.number) <- c("<4",">=4") table(dhs$anc.number) ############### summary(dhs$times.1ANC) dhs$times.1ANC <- cut(dhs$times.1ANC,breaks = c(1,3,6,10),include.lowest = TRUE,labels = c("first-trim","second-trim","third-trim")) table(dhs$times.1ANC) ############### summary(dhs$age) dhs$age_rec <- cut(dhs$age,breaks = c(15,18,30,40,49),include.lowest = TRUE) table(dhs$age_rec ) ############# summary(dhs$living.child) dhs$living.child <- cut(dhs$living.child,breaks = c(0,1,2,4,7,13),include.lowest = TRUE) table(dhs$living.child) ############# summary(dhs$chldren.born) dhs$chldren.born <- cut(dhs$chldren.born,breaks = c(1,2,4,7,14),right = FALSE) table(dhs$chldren.born) ######################## summary(dhs$h.number) dhs$h.number <- cut(dhs$h.number,breaks = c(2,5,10,38),include.lowest = TRUE) table(dhs$h.number) ################ summary(dhs$age.headhous) dhs$age.headhous <- cut(dhs$age.headhous,breaks = c(16,25,40,60,91),include.lowest = TRUE) table(dhs$age.headhous) ################ levels(dhs$a.newspap) levels(dhs$a.radio) levels(dhs$a.tv) levels(dhs$a.newspap) <-levels(dhs$a.radio) <- levels(dhs$a.tv) <- c("access","no-access") table(dhs$a.newspap) table(dhs$a.radio) table(dhs$a.tv) ###################### levels(dhs$mat.status) table(dhs$mat.status) levels(dhs$mat.status)<- c("Single","Single","Married","Single","Single","Single","Married","Single") table(dhs$mat.status) ################ levels(dhs$link.headh) levels(dhs$link.headh) <- c("ind-link","ind-link","d-link","ind-link","ind-link", "ind-link","d-link", "ind-link","ind-link","d-link", "d-link","ind-link","ind-link", "ind-link","ind-link","d-link","ind-link","d-link") table(dhs$link.headh) ################ levels(dhs$place.deliver) levels(dhs$place.deliver) <- c("public","public","public","home","home", "private","private","home", "private","public", "public","public","public", "public","home","private","private","home") table(dhs$place.deliver) ############### #dhs$survey.year <- factor(dhs$survey.year) #levels(dhs$malprev) <- c("0","1") #table(dhs2012$malprev) #levels(dhs2018$malprev) <- c("0","1") #table(dhs2018$malprev) ###############################################splitting year dhs2012 <- filter(dhs,dhs$survey.year=="2012") dhs2018 <- filter(dhs,dhs$survey.year=="2018") names(dhs2012) <-c("grapp", "weight", "survey.year", "age", "PSU", "region", "residence", "education", "h.number", "a.newspap", "a.radio", "a.tv", "w.quintile", "w.quintile.1", "mat.status", "currently.w", "link.head.of.the.household", "sexheadhouse", "age.headhous", "chldren.born", "living.child", "partner.education", "sp.number", "slept.net", "times.1ANC", "antenatal.care.visits", "wanted.pregn", "place.deliver", "MPM","age.rec") names(dhs2018) <-c("grapp", "weight", "survey.year", "age", "PSU", "region", "residence", "education", "h.number", "a.newspap", "a.radio", "a.tv", "w.quintile", "w.quintile.1", "mat.status", "currently.w", "link.headh", "sexheadhouse", "age.headhous", "chldren.born", "living.child", "partner.education", "sp.number", "slept.net", "times.1ANC", "antenatal.care.visits", "wanted.pregn", "place.deliver", "MPM","age.rec") levels(dhs2012$link.head.of.the.household) <- c("indirect.link","direct.link") #dhs2012$malprev <- as.numeric(levels(dhs2012$malprev))[dhs2012$malprev] #dhs2018$malprev <- as.numeric(levels(dhs2018$malprev))[dhs2018$malprev] #levels(dhs$survey.year) #tab1 <- aggregate(malprev~region+idclust+,data = dhs2012,sum) #tab2 <- aggregate(malprev~region+idclust,data = dhs2018,sum) #tab1 <- xtabs(malprev~region+idclust,data = dhs2012) #round(prop.table(tab1),2) ####################### complexe design #################Descriptive statistics with weighting library(dplyr) library(survey) library(party) library(gtsummary) library(car) library(GGally) library(forestmodel) library(Epi) library(ggeffects) library(ggparty) #dhs2012$idclust<- factor(dhs2012$idclust) ###########################Recoding wealth quintile levels(dhs2012$w.quintile) <- levels(dhs2018$w.quintile) <-c("Middle","Poorer","Poorer","Richer","Richer") #levels(dhs2012$w.quintile) <- relevel(dhs2012$w.quintile,ref = "Richer") #levels(dhs2018$w.quintile) <- relevel(dhs2018$w.quintile,ref = "Richer") #(sapply(dhs2012, function(x) is.factor(x))) ##############################################Variable Weighting for 2012 Wt1<- dhs2012$weight/1000000 ibdatf1<-cbind(dhs2012,Wt1) dw1<-svydesign(ids=~PSU,data = ibdatf1 ,strata=~region+residence, weights=~Wt1,nest = TRUE) ##############################################Variable Weighting for 2018 Wt2<- dhs2018$weight/1000000 ibdatf2<-cbind(dhs2018,Wt2) dw2<-svydesign(ids=~PSU,data = ibdatf2 ,strata=~region+residence, weights=~Wt2,nest = TRUE) #svytable(~malprev+idclust,design = dw1,round=TRUE) #tab1 <- xtabs(~malprev+idclust,data =dw1) #############################################################For 2012 survey ##################Descriptive ####a) For 2012 tab2012 <- tbl_svysummary(dw1) tab2012 tab2018 <- tbl_svysummary(dw2) tab2018 ##################Univariate Analysis stratifying by year ##b) 2012 tab2012 <- tbl_svysummary(dw1,by="malprev",type =list(sp.number~"dichotomous",slept.net~"dichotomous") ) tab2012 ##b) 2018 tab2018 <- tbl_svysummary(dw2,by="malprev",type =list(sp.number~"dichotomous",slept.net~"dichotomous") ) tab2018 ###################################To add p-value ###########################To spicify test tab1 <- add_p(tab2012, test = list( all_continuous() ~ "svy.t.test", all_categorical() ~ "svy.wald.test" ) ) add_p(tab2012) ###########################To spicify test tab2 <- add_p(tab2018, test = list( all_continuous() ~ "svy.t.test", all_categorical() ~ "svy.wald.test" ) ) ########################Mergin 2012 and 2018 tbl_merge(tbls = list(tab1, tab2)) %>% modify_spanning_header(everything() ~ NA_character_) ###########################################Performing logistic regression #####################logistic univariate analysis/you can change by including all variables #tbl_u <- dw1 %>% # dplyr::select(malprev,h.number,education, a.newspap,a.radio,a.tv, mat.status,link.headh,currently.w,sexheadhouse , # age.headhous,partner.education,times.1ANC,anc.number,place.deliver) %>% # tbl_uvregression( # method = glm, # y = malprev, # method.args = list(family = binomial), # exponentiate = TRUE, # pvalue_fun = ~ style_pvalue(.x, digits = 2) # ) %>% #add_global_p() %>% #add_nevent() #tbl_u ########################################survey 2018 #tbl_u1 <- dhs2018 %>% # dplyr::select(malprev,education,h.number, a.newspap,a.radio,a.tv,mat.status,link.headh,currently.w,sexheadhouse , # age.headhous,partner.education,times.1ANC,anc.number,place.deliver) %>% #tbl_uvregression( # method = glm, #y = malprev, #method.args = list(family = binomial), #exponentiate = TRUE, #pvalue_fun = ~ style_pvalue(.x, digits = 2) #) %>% #add_global_p() %>% #add_nevent() #tbl_u1 ########################Mergin univariate data 2012 and 2018 #tbl_merge(tbls = list(tbl_u, tbl_u1)) %>% # modify_spanning_header(everything() ~ NA_character_) ########################Mergin 2012 and 2018 #tbl_merge(tbls = list(tbl_u, tbl_u1)) %>% # modify_spanning_header(everything() ~ NA_character_) ##############################################Multivariate analysis ############For 2012 dput(names(dhs2012)) tab6 <- svyglm(MPM~education+h.number+a.newspap+a.radio+a.tv+w.quintile+mat.status+currently.w+link.headh+place.deliver+sexheadhouse+ age.headhous+chldren.born+living.child+partner.education+times.1ANC+anc.number+wanted.pregn+ age.rec,dw1,family = quasibinomial() ) #ifelse(n <- sapply(dhs2012, function(x) length(levels(x))) == 1, "DROP", "NODROP") ###########For 2018 tab8 <- svyglm(MPM~education+h.number+a.newspap+a.radio+w.quintile+mat.status+currently.w+link.headh+place.deliver+sexheadhouse+ age.headhous+chldren.born+living.child+partner.education+times.1ANC+anc.number+wanted.pregn+ age.rec,dw2,family = quasibinomial()) #######################################Step AIC for choosing best model tab7 <- step(tab6) tab9 <- step(tab8) ###############################Graphical representation of the model ggcoef_model(tab7, exponentiate = TRUE) forest_model(tab7) ####For 2018 ggcoef_model(tab9, exponentiate = TRUE) forest_model(tab9) ##################Variables effects for 2012/predicted values ggeffect(tab7,"a.radio") ggeffect(tab7,"a.tv") ggeffect(tab7, "mat.status") ggeffect(tab7, "place.deliver") ggeffect(tab7, "link.headh") ggeffect(tab7, "mat.status") ggeffect(tab7, "w.quintile") ggeffect(tab7, "age.headhous") ggeffect(tab7, "partner.education") ggeffect(tab7, "times.1ANC") ggeffect(tab7, "anc.number") ##################Variables effects for 2018 ggeffect(tab9,"a.radio") ggeffect(tab9,"a.newspap") ggeffect(tab9, "w.quintile") ggeffect(tab9,"currently.w") ggeffect(tab9, "place.deliver") ggeffect(tab9, "sexheadhouse") ggeffect(tab9, "times.1ANC") ggeffect(tab9, "anc.number") ggeffect(tab9, "h.number") #################Confusion matrix/ Quality of the model/2012 malprev <- predict(tab7, type = "response", newdata =dw1[["variables"]] ) head(malprev) table(malprev > 0.5, dhs2012$malprev) ##############misclassification of the model misclas <- (77+196)/781#####34% of misclassification #################Confusion matrix/ Quality of the model/2018 malprev <- predict(tab9, type = "response", newdata =dw2[["variables"]] ) head(malprev) table(malprev > 0.5, dhs2018$malprev) ##############misclassification of the model misclas <- (7+426)/2123#####20% of misclassification ############################################################Model Evaluation ############## ROC curve ROC(form=malprev~h.number+a.newspap+mat.status+currently.w+sexheadhouse+age.headhous+partner.education+times.1ANC+anc.number ,plot="ROC",data = dhs2012) ROC(form=malprev~h.number+a.newspap+mat.status+currently.w+sexheadhouse+age.headhous+partner.education+times.1ANC+anc.number ,plot="ROC",data = dhs2018) ###################################################coefficients significance anova(tab7, test = "Chisq") anova(tab9, test = "Chisq") ##############model assessment with(tab7,pchisq(null.deviance-deviance,df.null-df.residual,lower.tail = F)) with(tab9,pchisq(null.deviance-deviance,df.null-df.residual,lower.tail = F)) #############################################mulitcollinearity vif(tab7)##########no collinearity vif(tab9) #############################Merging univariate and multivariate analysis ######################################### tab7 <- tab7 %>% tbl_regression(exponentiate = TRUE) %>% add_global_p(type = "II") tab7 ####################################Using gtsummary tab9 <- tbl_regression(tab9,exponentiate = TRUE) ########## Find why second-time women used more malprev measure than 1st tab1 <- prop.table(table(dhs$times.1ANC,dhs$w.quitile),2) ########################################Merging Multivariate data tbl_merge() tbl_merge(tbls = list(tab7, tab9)) %>% modify_spanning_header(everything() ~ NA_character_) ##########################################################Testing interactions #a) dhs 2012 models interactions library(glmulti) dhs2012 <- dw1[["variables"]] #tab77 <- glmulti(malprev ~ a.radio + a.tv + w.quintile + mat.status + link.headh + # place.deliver + age.headhous + partner.education + times.1ANC + # anc.number,data = dhs2012,family=binomial()) #a) Models interaction outputs tab77 <-svyglm(malprev~1+a.tv+w.quintile+mat.status+link.headh+place.deliver+age.headhous+link.headh:a.tv+age.headhous:a.tv+age.headhous:w.quintile, dw1,family = quasibinomial() ) ROC(form=malprev~1+a.tv+w.quintile+mat.status+link.headh+place.deliver+age.headhous+link.headh:a.tv+age.headhous:a.tv+age.headhous:w.quintile, plot="ROC",data = dhs2012) ggcoef_model(tab77, exponentiate = TRUE) forest_model(tab77) #################Confusion matrix/ Quality of the model/2012 malprev <- predict(tab77, type = "response", newdata =dw1[["variables"]] ) head(malprev) table(malprev > 0.5, dhs2012$malprev) ##############misclassification of the model misclas <- (64+199)/781#####33% of misclassification ########we opt for our first model because the new one perform not much ##########################b) dhs 2018 models interactions dhs2018 <- dw2[["variables"]] #tab78 <- glmulti(malprev ~ h.number + a.newspap + a.radio + w.quintile + currently.w + # place.deliver + sexheadhouse + times.1ANC + anc.number,data = dhs2018,family=binomial()) #a) Models interaction outputs tab78 <- svyglm(malprev~1+h.number+a.newspap+a.radio+currently.w+place.deliver+sexheadhouse+a.radio:h.number+a.radio:a.newspap+currently.w:h.number +currently.w:a.radio+place.deliver:h.number,dw2,family = quasibinomial() ) diabetes_results %>% roc_curve(truth = diabetes, .pred_pos) %>% autoplot() ROC(form=malprev~1+h.number+a.newspap+a.radio+currently.w+place.deliver+sexheadhouse+a.radio:h.number+a.radio:a.newspap+currently.w:h.number +currently.w:a.radio+place.deliver:h.number, plot="ROC",data = dhs2018) #################Confusion matrix/ Quality of the model/2018 malprev <- predict(tab78, type = "response", newdata =dw2[["variables"]] ) head(malprev) table(malprev > 0.5, dhs2018$malprev) #################################################################End_Regression ##############misclassification of the model misclas <- (6+431)/2123#####33% of misclassification ########we opt for our first model because the new one perform not much #Start###################################Using CART on multivariable result #A) For 2012 tab10 <- ctree(MPM ~ a.radio + a.tv + w.quintile + mat.status + link.head.of.the.household + place.deliver + age.headhous + partner.education + times.1ANC + antenatal.care.visits,dhs2012) plot(tab10) tab8 <- as.constparty(tab10) #################################################Using###ggtree ggparty(tab10) + geom_edge() + geom_edge_label() + geom_node_splitvar() + geom_node_plot(gglist = list(geom_bar(aes(x = "", fill = MPM), position = position_fill()), xlab("")), # draw only one label for each axis shared_axis_labels = TRUE, # draw line between tree and legend legend_separator = TRUE ) # B)For dhs 2018 tab11 <- ctree(MPM ~ h.number + a.newspap + a.radio + w.quintile + currently.w + place.deliver + sexheadhouse + times.1ANC + antenatal.care.visits,dhs2018) nid <- nodeids(tab11) iid <- nid[!(nid %in% nodeids(tab11, terminal = TRUE))] (pval <- unlist(nodeapply(tab11, ids = iid, FUN = function(n) info_node(n)$p.value))) myttnc2 <- nodeprune(tab11, ids = iid[pval > 1e-5]) plot(myttnc2) tab11 <- as.constparty(myttnc2) #############################################################Using###ggtree ggparty(tab11) + geom_edge() + geom_edge_label() + geom_node_splitvar() + geom_node_plot(gglist = list(geom_bar(aes(x = "", fill = MPM), position = position_fill()), xlab("")), # draw only one label for each axis shared_axis_labels = TRUE, # draw line between tree and legend legend_separator = TRUE ) ################################################################End Barry script #########################################Model evaluation/ Sensitivity Analyses set.seed(123) library(tidymodels) # Create data split for train and test dhs2012_1 <- initial_split(dhs2012, prop = 0.75, strata = malprev) # Create training data malprev_train <- dhs2012_1 %>% training() # Create testing data malprev_test <- dhs2012_1 %>% testing() # Number of rows in train and test dataset nrow(malprev_train) nrow(malprev_test) fitted_logistic_model<- logistic_reg() %>% # Set the engine set_engine("glm") %>% # Set the mode set_mode("classification") %>% # Fit the model fit(malprev~h.number+a.newspap+mat.status+currently.w+sexheadhouse+age.headhous+partner.education+times.1ANC+anc.number, data = malprev_train) tidy(fitted_logistic_model) # Generate Summary Table tidy(fitted_logistic_model, exponentiate = TRUE) tidy(fitted_logistic_model, exponentiate = TRUE) %>% filter(p.value < 0.05) # Class prediction pred_class <- predict(fitted_logistic_model, new_data = malprev_test, type = "class") pred_class[1:5,] # Prediction Probabilities pred_proba <- predict(fitted_logistic_model, new_data = malprev_test, type = "prob") pred_proba[1:5,] ##################################Final Data Preparation for Model Evaluation malprev_results <- malprev_test %>% select(malprev) %>% bind_cols(pred_class, pred_proba) malprev_results[1:5, ] #################Confusion matrix conf_mat(malprev_results, truth = malprev, estimate = .pred_class) #############Accuracy accuracy(malprev_results, truth = malprev, estimate = .pred_class) ##########################Sens sens(malprev_results, truth = malprev, estimate = .pred_class) ###########specificity spec(malprev_results, truth = malprev, estimate = .pred_class) ##########precision precision(malprev_results, truth = malprev, estimate = .pred_class) ###########Recall precision(malprev_results, truth = malprev, estimate = .pred_class) #F-measure #F-measure is a weighted harmonic mean of precision and recall with the best score of 1 and the worst score of 0. #F-measure score conveys the balance between precision and recall f_meas(malprev_results, truth = malprev, estimate = .pred_class) #Kappa Cohen Kappa gives information on how much better a model over the random classifier. Kappa can range from ???1 to +1. #The value <0 means no agreement while 1.0 shows perfect agreement. The estimated kappa statistics revealed a moderate agreement. kap(malprev_results, truth = malprev, estimate = .pred_class) #Matthews Correlation Coefficient (MCC) #The Matthews correlation coefficient (MCC) is used as a measure of the quality of a binary classifier. The value ranges from ???1 and +1. #MCC: -1 indicates total disagreement #MCC: 0 indicate no agreement #MCC: +1 indicates total aggrement custom_metrics <- metric_set(accuracy, sens, spec, precision, recall, f_meas, kap, mcc) custom_metrics(malprev_results, truth = malprev, estimate = .pred_class) #############################ROC curve roc_auc(malprev_results, truth = malprev, .pred_incomplete) malprev_results %>% roc_curve(truth = malprev, .pred_incomplete) %>% autoplot() ################################Spatial Analyisis ##########################Loading libraries library(sf) #library(sp) library(dplyr) library(ggplot2) library(tmap) library(rgeoda) library(spdep) ####################################################################For 2012 Guinea_12 <- st_read("D:\\MOOC\\shps_2012\\sdr_subnational_boundaries.shp",layer = "sdr_subnational_boundaries") palu_12 <- read.csv2 ("D:\\MOOC\\malprev12m.csv") palu_12$Malprev_12 <- palu_12$Malprev_12*100 palu_12$region <- factor(palu_12$region) #levels(palu_12$region) <- c( "Boké","Conakry","Faranah","Kankan","Kindia","Labe","Mamou","Nzérekore") ###############################merging OA.Census_12 <- inner_join(Guinea_12, palu_12, by = c("DHSREGFR" = "region")) ###################################################################For 2018 Guinea_18 <- st_read("D:\\MOOC\\shps_2018\\sdr_subnational_boundaries.shp",layer = "sdr_subnational_boundaries") palu_18 <- read.csv2 ("D:\\MOOC\\malprev18m.csv") palu_18$malprev18 <- palu_18$malprev18*100 palu_18$region <- factor(palu_18$region) levels(palu_18$region) <- c( "Boké","Conakry","Faranah","Kankan","Kindia","Labé","Mamou","N'Zérékoré") ###############################merging OA.Census_18 <- inner_join(Guinea_18, palu_18, by = c("DHSREGFR" = "region")) ###################################################################Making map for 2018 par(mfrow=c(1,2)) malprv2012<- tm_shape(OA.Census_12) + tm_fill(col = "Malprev_12", title = "% of incomplete MPM in 2012") + tm_compass(type ="arrow",position = "right") + tm_scale_bar(position = "center")+ tm_text("DHSREGFR", size =0.80,col = "black")+ tm_layout(bg.color = "skyblue", inner.margins = c(0, .02, .02, .02))+ tm_add_legend(type = "symbol")+tm_borders(col = "white")+ tm_xlab(text = "source= spatialdata.dhsprogram.com") malprv2012 ########################################################################For 2018 malprv2018 <- tm_shape(OA.Census_18) + tm_fill(col = "malprev18", title = "% of Incomplete MPM in 2018") + tm_compass(type ="arrow",position = "right") + tm_scale_bar(position = "center")+ tm_text("DHSREGFR", size =0.80,col = "black")+ tm_layout(bg.color = "skyblue", inner.margins = c(0, .02, .02, .02))+ tm_add_legend(type = "symbol")+tm_borders(col = "white")+ tm_xlab(text = "source= spatialdata.dhsprogram.com") malprv2018 tmap_arrange(malprv2012,malprv2018,nrow = 1) ################################################################Cluster analysis Moran ###################Working with rgeoda ########3.1 Queen Contiguity Weights queen_weights(sf_obj, order=1, include_lower_order = False, precision_threshold = 0) queen_w_12 <- queen_weights(OA.Census_12) queen_w_18 <- queen_weights(OA.Census_18) summary(queen_w_12) summary(queen_w_18) ###################Attributes of Weight object is_symmetric(queen_w_12) has_isolates(queen_w_18) weights_sparsity(queen_w_12) weights_sparsity(queen_w_18) ###To access the details of the weights:list the neighbors of a specified observation nbrs <- get_neighbors(queen_w_12, idx = 1) cat("\nNeighbors of the 1-st observation are:", nbrs) #To compute the spatial lag of a specified observation by passing the values of the selected variable lag12 <- spatial_lag(queen_w_12, OA.Census_12['Malprev_12']) lag12 lag18 <- spatial_lag(queen_w_18, OA.Census_18['malprev18']) lag18 #############3.2 Rook Contiguity Weights rook_weights(sf_obj, order=1,include_lower_order=False, precision_threshold = 0) ### to create a Rook contiguity weights using the sf object guerry rook_w <- rook_weights(OA.Census_12) summary(rook_w) #######The weights we created are in memory. To save the weights to a file, one can call the function save_weights(gda_w, id_variable, out_path, layer_name = "") save_weights(rook_w, OA.Census['ID_1'], out_path = 'D:\\MOOC\\OA.Census_r.gal', layer_name = 'NAME_1') ############3.3 Distance Based Weights #min_distthreshold(GeoDa gda, bool is_arc = False, is_mile = True) ##To create a Distance based weights, one can call the function `distance_weights` ###Then, with this distance threshold, we can create a distance-band weights using the function distance_weights(geoda_obj, dist_thres, power=1.0, is_inverse=False, is_arc=False, is_mile=True) dist_thres <- min_distthreshold(OA.Census_12) dist_thres dist_w <- distance_weights(OA.Census_12, dist_thres) summary(dist_w) ############3.4 K-Nearest Neighbor Weights knn_weights(gda, k, power = 1.0,is_inverse = False, is_arc = False, is_mile = True) ##to create a 6-nearest neighbor weights using Guerry knn6_w <- knn_weights(OA.Census_12, 6) summary(knn6_w) ##To create a kernel weights with fixed bandwith bandwidth <- min_distthreshold(OA.Census_12) kernel_w <- kernel_weights(OA.Census_12, bandwidth, kernel_method = "uniform") summary(kernel_w) ########To create a kernel weights with adaptive bandwidth or using max Knn distance as bandwidth adptkernel_w = kernel_knn_weights(OA.Census_12, 6, "uniform") summary(adptkernel_w) ###Local Indicators of Spatial Association–LISA #guerry_path <- system.file("extdata", "Guerry.shp", package = "rgeoda") #guerry <- st_read(guerry_path) malprev12 <- OA.Census_12["Malprev_12"] malprev18 <- OA.Census_18["malprev18"] lisa12 <- local_moran(queen_w_12, malprev12) lisa18 <- local_moran(queen_w_18, malprev18) #######we can call the function lisa_values() to get the values of the local Moran lms12 <- lisa_values(gda_lisa = lisa12) lms12 lms18 <- lisa_values(gda_lisa = lisa18) lms18 ##To get the pseudo-p values of significance of local Moran computation pvals12 <- lisa_pvalues(lisa12) pvals12 ###To get the cluster indicators of local Moran computation ##which can be accessed via the function lisa_labels() lbls12 <- lisa_labels(lisa12) lbls12 ###re-run the above local Moran example using 9,999 permutations. set.seed(123) lisa12 <- local_moran(queen_w_12, malprev12, permutations = 9999) lisa18 <- local_moran(queen_w_18, malprev18, permutations = 9999) ###6.3 Create Local Moran Map lisa_colors <- lisa_colors(lisa12) lisa_labels <- lisa_labels(lisa12) lisa_clusters <- lisa_clusters(lisa12) loclis12<- plot(st_geometry(OA.Census_12), col=sapply(lisa_clusters, function(x){return(lisa_colors[[x+1]])}), border = "#333333", lwd=0.2) title(main = "") #title(main = "Local Moran Map of incomplete malprev 2012") legend('bottomleft', legend = lisa_labels, fill = lisa_colors, border = "#eeeeee") ############For 2018 lisa_colors <- lisa_colors(lisa18) lisa_labels <- lisa_labels(lisa18) lisa_clusters <- lisa_clusters(lisa18) loclis18<-plot(st_geometry(OA.Census_18), col=sapply(lisa_clusters, function(x){return(lisa_colors[[x+1]])}), border = "#333333", lwd=0.2) title(main = "") #title(main = "Local Moran Map of malprev 2018") legend('bottomleft', legend = lisa_labels, fill = lisa_colors, border = "#eeeeee") ################################ lisa_p <- lisa_pvalues(lisa12) p_labels <- c("Not significant", "p <= 0.05", "p <= 0.01", "p <= 0.001") p_colors <- c("#eeeeee", "#84f576", "#53c53c", "#348124") plot(st_geometry(OA.Census_12), col=sapply(lisa_p, function(x){ if (x <= 0.001) return(p_colors[4]) else if (x <= 0.01) return(p_colors[3]) else if (x <= 0.05) return (p_colors[2]) else return(p_colors[1]) }), border = "#333333", lwd=0.2) #title(main = "Local Moran Map of malprev2012") legend('bottomleft', legend = p_labels, fill = p_colors, border = "#eeeeee") ###################################### lisa_p <- lisa_pvalues(lisa18) p_labels <- c("Not significant", "p <= 0.05", "p <= 0.01", "p <= 0.001") p_colors <- c("#eeeeee", "#84f576", "#53c53c", "#348124") plot(st_geometry(OA.Census_18), col=sapply(lisa_p, function(x){ if (x <= 0.001) return(p_colors[4]) else if (x <= 0.01) return(p_colors[3]) else if (x <= 0.05) return (p_colors[2]) else return(p_colors[1]) }), border = "#333333", lwd=0.2) #title(main = "Local Moran Map of malprev2018") legend('bottomleft', legend = p_labels, fill = p_colors, border = "#eeeeee") ######################################################working with spded #)A For 2012 survey Global Moran #Extracting neighbors(according to Queen) #9.1 Find queen neighbors neighbours <- poly2nb(OA.Census_12) neighbours neighbours1 <- poly2nb(OA.Census_18) neighbours1 #Crating weight matrix neighbours11 <- nb2listw(neighbours,zero.policy=TRUE) moran.test(OA.Census_12$Malprev_12, neighbours11) neighbours2 <- nb2listw(neighbours1,zero.policy=TRUE) moran.test(OA.Census_18$malprev18, neighbours2) #neighbours11 |> nb2listw(style="B") -> lw_q_B #neighbours2 |> nb2listw(style="W") -> lw_q_W ######################### glance_htest <- function(ht) c(ht$estimate, "Std deviate"=unname(ht$statistic), "p.value"=unname(ht$p.value)) (OA.Census_12 |> st_drop_geometry() |> subset(select=DHSREGFR, drop=TRUE) -> Types) |> table() (OA.Census_12 |> st_drop_geometry() |> subset(select=Malprev_12, drop=TRUE) -> z) |> moran.test(listw=neighbours11, randomisation=FALSE) |> glance_htest() (OA.Census_18 |> st_drop_geometry() |> subset(select=malprev18, drop=TRUE) -> z1) |> moran.test(listw=neighbours2, randomisation=FALSE) |> glance_htest() ###########Boostrap compute (z |> moran.test(listw=neighbours11) -> mtr) |> glance_htest() (z1 |> moran.test(listw=neighbours2) -> mtr1) |> glance_htest() set.seed(1) z |> moran.mc(listw=neighbours11, nsim=999, return_boot = TRUE) -> mmc z1 |> moran.mc(listw=neighbours2, nsim=999, return_boot = TRUE) -> mmc1 c("Permutation bootstrap"=var(mmc$t), "Analytical randomisation"=unname(mtr$estimate[3])) c("Permutation bootstrap"=var(mmc1$t), "Analytical randomisation"=unname(mtr1$estimate[3])) ##################### # MORAN scatter plot z |> moran.plot(listw=neighbours11, labels=OA.Census_12$ID_2, cex=1, pch=".", xlab="malprev12", ylab="lagged malprev12") -> infl_W1 z1 |> moran.plot(listw=neighbours2, labels=OA.Census_18$ID_2, cex=1, pch=".", xlab="malprev18", ylab="lagged malprev18") -> infl_W2 #####################Produce map OA.Census_12$hat_value <- infl_W1$hat tm_shape(OA.Census_12) + tm_fill("hat_value") OA.Census_18$hat_value <- infl_W2$hat tm_shape(OA.Census_18) + tm_fill("hat_value") ############################## z |> localmoran(listw=neighbours11, conditional=FALSE, alternative="two.sided") -> locm z1 |> localmoran(listw=neighbours2, conditional=FALSE, alternative="two.sided") -> locm1 #################### pva <- \(pv) cbind("none"=pv, "bonferroni"=p.adjust(pv, "bonferroni"), "fdr"=p.adjust(pv, "fdr"), "BY"=p.adjust(pv, "BY")) locm |> subset(select="Pr(z != E(Ii))", drop=TRUE) |> pva() -> pvsp f <- \(x) sum(x < 0.05) apply(pvsp, 2, f) pva <- \(pv) cbind("none"=pv, "bonferroni"=p.adjust(pv, "bonferroni"), "fdr"=p.adjust(pv, "fdr"), "BY"=p.adjust(pv, "BY")) locm1 |> subset(select="Pr(z != E(Ii))", drop=TRUE) |> pva() -> pvsp f <- \(x) sum(x < 0.05) apply(pvsp, 2, f) ##################### library(parallel) set.coresOption(ifelse(detectCores() == 1, 1, detectCores()-1L)) ################### system.time(z |> localmoran_perm(listw=neighbours11, nsim=499, alternative="two.sided", iseed=1) -> locm_p) system.time(z1 |> localmoran_perm(listw=neighbours2, nsim=499, alternative="two.sided", iseed=1) -> locm_p1) ################# brks <- qnorm(c(0, 0.00001, 0.0001, 0.001, 0.01, 0.025, 0.5, 0.975, 0.99, 0.999, 0.9999, 0.99999, 1)) (locm_p |> subset(select=Z.Ii, drop=TRUE) |> cut(brks) |> table()-> tab) ############### z |> localmoran(listw=neighbours11, conditional=TRUE, alternative="two.sided") -> locm_c z1 |> localmoran(listw=neighbours2, conditional=TRUE, alternative="two.sided") -> locm_c1 ################### OA.Census_12$locm_Z <- locm[, "Z.Ii"] OA.Census_12$locm_c_Z <- locm_c[, "Z.Ii"] OA.Census_12$locm_p_Z <- locm_p[, "Z.Ii"] #liss12<- tm_shape(OA.Census_12) + tm_fill(c("locm_Z", "locm_c_Z", "locm_p_Z"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation")) tm_shape(OA.Census_12) + tm_fill(c("locm_Z", "locm_c_Z", "locm_p_Z"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation")) OA.Census_18$locm_Z1 <- locm1[, "Z.Ii"] OA.Census_18$locm_c_Z1 <- locm_c1[, "Z.Ii"] OA.Census_18$locm_p_Z1 <- locm_p1[, "Z.Ii"] #liss18<- tm_shape(OA.Census_18) + tm_fill(c("locm_Z1", "locm_c_Z1", "locm_p_Z1"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation")) tm_shape(OA.Census_18) + tm_fill(c("locm_Z1", "locm_c_Z1", "locm_p_Z1"), breaks=brks, midpoint=0, title="Standard deviates of\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Analytical total", "Analytical conditional", "Conditional permutation")) #tmap_arrange(liss12,liss18,nrow = 2) ###################### q_mean <- attr(locm, "quadr")$mean OA.Census_12$hs_ac_q <- OA.Census_12$hs_cp_q <- OA.Census_12$hs_an_q <- q_mean is.na(OA.Census_12$hs_an_q) <- !(OA.Census_12$locm_Z < brks[6] | OA.Census_12$locm_Z > brks[8]) is.na(OA.Census_12$hs_cp_q) <- !(OA.Census_12$locm_p_Z < brks[2] | OA.Census_12$locm_p_Z > brks[12]) is.na(OA.Census_12$hs_ac_q) <- !(OA.Census_12$locm_c_Z < brks[2] | OA.Census_12$locm_c_Z > brks[12]) #hotp12<- tm_shape(OA.Census_12) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni")) tm_shape(OA.Census_12) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni")) ############### q_mean <- attr(locm1, "quadr")$mean OA.Census_18$hs_ac_q <- OA.Census_18$hs_cp_q <- OA.Census_18$hs_an_q <- q_mean is.na(OA.Census_18$hs_an_q) <- !(OA.Census_18$locm_Z1 < brks[6] | OA.Census_18$locm_Z > brks[8]) is.na(OA.Census_18$hs_cp_q) <- !(OA.Census_18$locm_p_Z1 < brks[2] | OA.Census_18$locm_p_Z > brks[12]) is.na(OA.Census_18$hs_ac_q) <- !(OA.Census_18$locm_c_Z1 < brks[2] | OA.Census_18$locm_c_Z > brks[12]) #hotp18<- tm_shape(OA.Census_18) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni")) tm_shape(OA.Census_18) + tm_fill(c("hs_an_q", "hs_ac_q", "hs_cp_q"), colorNA="grey95", textNA="Not significant", title="Incomplete MPM hotspot status\nLocal Moran's I") + tm_facets(free.scales=FALSE, ncol=2) + tm_layout(panel.labels=c("Unadjusted analytical total", "Bonferroni analytical cond.", "Cond. perm. with Bonferroni")) tmap_arrange(hotp12,hotp18,nrow = 2) #save.image(file =("D:\\MOOC\\spatbarry_v2.RData")) load("D:\\MOOC\\spatbarry_v2.RData") ############################################################Interpolation_Krigging