setwd("") #Home directory ################################ SET LIBRARIES ################################# library(raster) library(rgdal) library(sp) library(readxl) library(car) library(gstat) library(GSIF) library(ranger) library(ModelMap) library(caret) library(corrplot) library(devtools) library(randomForest) library(dplyr) library(ggplot2) library(GGally) library(rattle) library(rpart.plot) library(base) library(Cubist) library(automap) library(rJava) library(extraTrees) library(rworldmap) library(ggmap) library(mapproj) library(VSURF) ################################ INPUT DATASET ################################# ###Subset Data S1 file to keep only dietary variables s_comp1.1 <- readxl::read_excel("Data_S1.xls",col_names=TRUE, na="NA", sheet="Data4.1") write.csv(s_comp1,file="R_script\\Output\\Tables\\(RF2.1)C13_Initial.csv") s_avg1 <- s_comp1.1 s_comp1.2 <- readxl::read_excel("Data_S1.xls",col_names=TRUE, na="NA", sheet="Data4.2") write.csv(s_comp2,file="R_script\\Output\\Tables\\(RF2.2)C13_Initial.csv") s_avg2 <- s_comp1.2 s_comp1.3 <- readxl::read_excel("Data_S1.xls",col_names=TRUE, na="NA", sheet="Data4.3") write.csv(s_comp3,file="R_script\\Output\\Tables\\(RF2.3)C13_Initial.csv") s_proj3 <- project(as.matrix(s_comp3[,c("Lon","Lat")]), "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs") plot(s_proj3) ###Project s-_comp1.3 using the location of residence of volunteers s_proj1.3 <- project(as.matrix(s_comp1.3[,c("Lon","Lat")]), "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs") plot(s_proj1.3) ################################ INPUT ENVIRONMENTAL COVARIATE RASTERS ################################# ###The projected covariates are available at: https://drive.google.com/drive/folders/1g9rCGo3Kd3hz2o5JKkSbgNsGJclvsuQm?usp=sharing ###Use this link and download the Projected_rasters folder as a .zip file into your working directory (35GO) ###Unzip the file using the 7zip program https://www.7-zip.org/ which can handle large .zip file. ###Verify that the unzip folder has a name of "Projected_rasters" as below and if necessary rename the folder to the correct name ###Once this is done the following raster should load in your script ### Import Raster Files(see Material and Methods and Table S15 in Supplementary for downloading those raster) r.sugar = raster("sugar_reproj.tif") r.corn = raster("corn_reproj.tif") r.pet = raster("pet_reproj.tif") r.elevation = raster("elevation_reproj.tif") r.clay = raster("rclay_reproj.tif") r.ph = raster("rph_reproj.tif") r.WCmap = raster("map_reproj.tif") r.BCA = raster("BCA_reproj.tif") r.BCB = raster("BCB_reproj.tif") r.SSA = raster("SSA_reproj.tif") r.SUL = raster("SUL_reproj.tif") r.AOD = raster("AODpm25_reproj.tif") r.dust = raster("dust_reproj.tif") r.salt = raster("salt_reproj.tif") r.mat = raster("mat_reproj.tif") r.nman =raster("nman_reproj.tif") r.nfert =raster("nfert_reproj.tif") r.distance<-raster("D:\\clement_back-up_2017-11-14\\European_Isoscape\\Projected_rasters\\Projected_rasters\\distance.tif") ### Crop Rasters to Canada Extent canada <- as.vector(c(-12000000,-4000000,5000000,8000000)) r.pet = crop(r.pet, canada, snap='near') r.elevation = crop(r.elevation, r.pet, snap='near') r.clay = crop(r.clay, r.pet, snap='near') r.ph = crop(r.ph, r.pet, snap='near') r.WCmap = crop(r.WCmap, r.pet, snap='near') r.BCA = crop(r.BCA, r.pet, snap='near') r.BCB = crop(r.BCB, r.pet, snap='near') r.SUL = crop(r.SUL, r.pet, snap='near') r.AOD = crop(r.AOD, r.pet, snap='near') r.dust = crop(r.dust, r.pet, snap='near') r.salt = crop(r.salt, r.pet, snap='near') r.mat = crop(r.mat,r.pet,snap='near') r.nman = crop(r.nman,r.pet,snap='near') r.nfert = crop(r.nfert,r.pet,snap='near') r.sugar = crop(r.sugar,r.pet,snap='near') r.corn = crop(r.corn,r.pet,snap='near') r.distance = crop(r.distance,r.pet,snap='near') ###Resample all rasters at the same resolution #r.mat= aggregate(r.mat, fact = 1000) #r.nfert=aggregate(r.nfert, fact = 1000) #r.nman=aggregate(r.nman, fact = 1000) #r.dust = aggregate(r.dust, fact = 1000) #r.WCmap = aggregate(r.WCmap, fact = 1000) #r.salt = aggregate(r.salt, fact = 1000) #r.ai = aggregate(r.ai, fact = 1000) #r.pet = aggregate(r.pet, fact = 1000) #r.elevation = aggregate(r.elevation, fact = 1000) #r.clay = aggregate(r.clay, fact = 1000) #r.ph = aggregate(r.ph, fact = 1000) #r.BCA = aggregate(r.BCA, fact = 1000) #r.BCB = aggregate(r.BCB, fact = 1000) #r.SUL = aggregate(r.SUL, fact = 1000) #r.AOD = aggregate(r.AOD, fact = 1000) #r.sugar =aggregate(r.sugar, fact = 1000) #r.corn =aggregate(r.corn, fact = 1000) ############################# EXTRACT RASTER VALUES ############################ Mode <- function(x) { ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } ### Extract raster covariate values (with 10km2 buffer) petxy <-extract(r.pet, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) elevationxy <-extract(r.elevation, s_proj1.3, buffer=10000, fun=Mode, na.rm=TRUE) clayxy <-extract(r.clay, s_proj1.3, buffer=10000, fun=Mode, na.rm=TRUE) phxy <-extract(r.ph, s_proj1.3, buffer=10000, fun=Mode, na.rm=TRUE) WCmapxy <-extract(r.WCmap, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) BCAxy <-extract(r.BCA, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) BCBxy <-extract(r.BCB, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) SULxy <-extract(r.SUL, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) AODxy <-extract(r.AOD, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) Dustxy <-extract(r.dust, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) Saltxy <-extract(r.salt, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) matxy <-extract(r.mat, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) nfertxy <-extract(r.nfert,s_proj1.3, buffer=20000, fun=Mode,na.rm=TRUE) nmanxy <-extract(r.nman,s_proj1.3, buffer=60000, fun=Mode,na.rm=TRUE) sugarxy <-extract(r.sugar, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) cornxy <-extract(r.corn, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) distancexy <-extract(r.distance, s_proj1.3, buffer=10000, fun=Mode,na.rm=TRUE) ### Combine data in S34 s_proj1.3_xy <- data.frame(s_proj1.3,s_comp1.3$FID ,s_comp1.3$N15 ,s_comp1.3$C13,s_comp1.3$S34 ,s_comp1.3$Sea,s_comp1.3$Wat, s_comp1.3$Pop,s_comp1.3$Pop,petxy,petxy,elevationxy,clayxy,phxy,WCmapxy,BCAxy,BCBxy,SULxy,AODxy,Dustxy,Saltxy,matxy,nfertxy,nmanxy,sugarxy,cornxy,distancexy) s_proj1.4_xy <- data.frame(s_proj1.3,s_comp1.3$FID ,s_comp1.3$N15 ,s_comp1.3$C13,s_comp1.3$S34 ,s_comp1.3$Lat,s_comp1.3$Lon ,petxy,elevationxy,clayxy,phxy,WCmapxy,BCAxy,BCBxy,SULxy,AODxy,Dustxy,Saltxy,matxy,nfertxy,nmanxy,sugarxy,cornxy,distancexy) s_proj1.5_xy <- data.frame(s_proj1.3,s_comp1.3$FID ,s_comp1.3$N15 ,s_comp1.3$C13,s_comp1.3$S34 ,petxy,elevationxy,clayxy,phxy,WCmapxy,BCAxy,BCBxy,SULxy,AODxy,Dustxy,Saltxy,matxy,nfertxy,nmanxy,sugarxy,cornxy,distancexy) str(s_proj1.3_xy) str(s_proj1.4_xy) str(s_proj1.5_xy) colnames(s_proj1.3_xy)<-c("X","Y","FID","15N","C13","S34","Lat","Lon","Sea","wat","Pop","r.pet","r.elevation","r.clay","r.ph","r.WCmap","r.BCA","r.BCB","r.SUL","r.AOD","r.dust","r.salt","r.mat","r.nfert","r.nman","r.sugar","r.corn","r.distance") colnames(s_proj1.4_xy)<-c("X","Y","FID","15N","C13","S34","Lat","Lon","r.pet","r.elevation","r.clay","r.ph","r.WCmap","r.BCA","r.BCB","r.SUL","r.AOD","r.dust","r.salt","r.mat","r.nfert","r.nman","r.sugar","r.corn","r.distance") colnames(s_proj1.5_xy)<-c("X","Y","FID","15N","C13","S34","r.pet","r.elevation","r.clay","r.ph","r.WCmap","r.BCA","r.BCB","r.SUL","r.AOD","r.dust","r.salt","r.mat","r.nfert","r.nman","r.sugar","r.corn","r.distance") ###Dataset including dietary geographic and environmental data s_avg1.3 <- s_proj1.3_xy ### Dataset including geographic and environmental data only s_avg1.4 <- s_proj1.4_xy ### Dataset including environmental variables only s_avg1.5 <- s_proj1.5_xy ###############################CORRELATION fIGURES################################# library("dplyr") library("ggcorrplot") library("Hmisc") pdf("Correlation_d13C_diet.pdf", width=10, height=10) cor_5 <- rcorr(as.matrix(s_avg2[,2:20])) M <- cor_5$r p_mat <- cor_5$P par(mfrow=c(1,1)) corrplot(M, method = "color", col = col(200), type = "upper", addCoef.col = "black", # Add coefficient of correlation tl.col = "black", tl.srt = 45, #Text label color and rotation # Combine with significance level p.mat = p_mat, sig.level = 0.01, # hide correlation coefficient on the principal diagonal diag = FALSE ) dev.off() pdf("Correlation_d13C_EVS.pdf", width=10, height=10) cor_5 <- rcorr(as.matrix(s_avg1.5[,4:23])) M <- cor_5$r p_mat <- cor_5$P par(mfrow=c(1,1)) corrplot(M, method = "color", col = col(200), type = "upper", addCoef.col = "black", # Add coefficient of correlation tl.col = "black", tl.srt = 45, #Text label color and rotation # Combine with significance level p.mat = p_mat, sig.level = 0.01, # hide correlation coefficient on the principal diagonal diag = FALSE ) dev.off() ############################### DATA SPLITTING ################################# ### Identification of significant predictors using VSURF for RandomForest models ### Remove colinear covariates set.seed(123) s_avg1.1.vsurf<-VSURF(s_avg1[,3:16],s_avg1$S34) s_avg1.1.vsurf$varselect.pred plot(s_avg1.1.vsurf) s_avg1.1.sub<-s_avg1[,3:16] s_avg1.2.vsurf<-VSURF(s_avg2[,3:18],s_avg2$S34) s_avg1.2.vsurf$varselect.pred plot(s_avg1.2.vsurf) s_avg1.2.sub<-s_avg1.2[,3:18] s_avg1.3.vsurf<-VSURF(s_avg1.3[,5:24],s_avg1.3$S34) s_avg1.3.vsurf$varselect.pred plot(s_avg1.3.vsurf) s_avg1.3.sub<-s_avg1.3[,5:24] s_avg1.4.vsurf<-VSURF(s_avg1.4[,5:22],s_avg1.4$S34) s_avg1.4.vsurf$varselect.pred plot(s_avg1.4.vsurf) s_avg1.4.sub<-s_avg1.4[,5:21] s_avg1.5.vsurf<-VSURF(s_avg1.5[,5:20],s_avg1.5$S34) s_avg1.5.vsurf$varselect.pred plot(s_avg1.5.vsurf) s_avg1.5.sub<-s_avg1.5[,5:15] ### Important Variable Selection Using VSURF Identified Variables names(s_avg1.1.sub) names(s_avg1.2.sub) names(s_avg1.3.sub) names(s_avg1.4.sub) names(s_avg1.5.sub) ###Subset each dataset using the significant predictors only set.seed(123) ###Dietary predictors (Fig. 6A) s_avg1.1_VSURF<-subset(s_avg1, select=c("S34", "Sea")) ###Dietary + Geographic predictors (Fig. 6B) s_avg1.2_VSURF<-subset(s_avg2, select=c("S34", "Lon","Lat","Sea")) ###Dietary + Environmental predictors (Fig. 6D) s_avg1.3_VSURF<-subset(s_avg1.3, select=c("S34", "r.salt", "Sea", "r.ph")) #(Manual Selection) ###Geographic predictor only s_avg1.4_VSURF<-subset(s_avg1.4, select=c("S34", "Lon","Lat")) ###Environmental predictors only (Fig 6C) s_avg1.5_VSURF<-subset(s_avg1.3, select=c("S34", "r.salt","r.WCmap", "r.ph")) s_avg1.1_VSURF s_avg1.2_VSURF s_avg1.3_VSURF s_avg1.4_VSURF s_avg1.5_VSURF ### Splitting data for 10-fold cross-validation (Repeated 5 times) set.seed(123) training1 <- s_avg1.1_VSURF; training2 <- s_avg1.2_VSURF; training3 <- s_avg1.3_VSURF; training4 <- s_avg1.4_VSURF; training5 <- s_avg1.5_VSURF; fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 5, verboseIter=FALSE, returnResamp="final", savePredictions="all") ############################## MODEL COMPARISON ################################ ### Random forest model (Different models using caret cross-validation algorithms on the training dataset) mtry <- 6 tunegrid <- expand.grid(.mtry=mtry) set.seed(124) bestmtry <- tuneRF(training1, training1$S34, stepFactor=1, improve=1e-7, ntree=500) RF1 <- train(S34 ~ ., data = training1, method = "rf", tuneGrid=tunegrid,trControl= fitControl) set.seed(124) bestmtry <- tuneRF(training2, training2$S34, stepFactor=1, improve=1e-7, ntree=500) RF2 <- train(S34 ~ ., data = training2, method = "rf", tuneGrid=tunegrid,trControl= fitControl) set.seed(124) bestmtry <- tuneRF(training3, training3$S34, stepFactor=1, improve=1e-7, ntree=500) RF3 <- train(S34 ~ ., data = training3, method = "rf", tuneGrid=tunegrid,trControl= fitControl) set.seed(124) bestmtry <- tuneRF(training4, training4$S34, stepFactor=1, improve=1e-7, ntree=500) RF4 <- train(S34 ~ ., data = training4, method = "rf", tuneGrid=tunegrid,trControl= fitControl) set.seed(124) bestmtry <- tuneRF(training5, training5$S34, stepFactor=1, improve=1e-7, ntree=500) RF5 <- train(S34 ~ ., data = training5, method = "rf", tuneGrid=tunegrid,trControl= fitControl) ###################### MODEL COMPARISON (OBSERVATIONS) ######################### results <- resamples(list(RF1.1=RF1, RF1.2=RF2, RF1.3=RF3, RF1.4=RF4, RF1.5=RF5)) summary(results) theme1<-trellis.par.get() theme1$plot.symbol$col = rgb(.2, .2, .2, .4) theme1$plot.symbol$pch = 16 theme1$plot.line$col = rgb(1, 0, 0, .7) theme1$plot.line$lwd <- 2 trellis.par.set(theme1) trellis.par.set(theme1) bwplot(results, layout = c(1, 3), scales =list(x =list(relation = "free", limits = rep(list(c(0, 1), c(0, 1), c(0, 1)))))) splom(results) ############################ Visualize Best Model ############################## ### Final Variable Importance Plot par(mfrow=c(2,3)) varImpPlot(RF1$finalModel,main='RF1') varImpPlot(RF2$finalModel,main='RF2') varImpPlot(RF3$finalModel,main='RF3') varImpPlot(RF4$finalModel,main='RF4') varImpPlot(RF5$finalModel,main='RF5') ### Partial Dependance Plots par(mfrow = c(2,2)) partialPlot(RF1$finalModel, training1, x.var = "Sea") par(mfrow = c(2,2)) partialPlot(RF2$finalModel, training2, x.var = "Lon") partialPlot(RF2$finalModel, training2, x.var = "Sea") partialPlot(RF2$finalModel, training2, x.var = "Lat") par(mfrow = c(2,2)) partialPlot(RF3$finalModel, training3, x.var = "r.salt") partialPlot(RF3$finalModel, training3, x.var = "r.WCmap") partialPlot(RF3$finalModel, training3, x.var = "r.ph") partialPlot(RF3$finalModel, training3, x.var = "Sea") par(mfrow = c(2,2)) partialPlot(RF4$finalModel, training4, x.var = "Long") partialPlot(RF4$finalModel, training4, x.var = "Lat") par(mfrow = c(2,2)) partialPlot(RF5$finalModel, training5, x.var = "r.WCmap") partialPlot(RF5$finalModel, training5, x.var = "r.salt") partialPlot(RF5$finalModel, training5, x.var = "r.ph") ### Training Data Plot par(mfrow = c(2,2)) pred1<-as.data.frame(predict(RF1,training1)) plot(pred1$`predict(RF1, training1)`,training1$S34,pch=19,col="black",xlim=c(0,3),ylim=c(0,3.5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmA.1<-lm(training1$S34~pred1$`predict(RF1, training1)`) abline(lmA.1) pred2<-as.data.frame(predict(RF2,training2)) plot(pred2$`predict(RF2, training2)`,training2$S34,pch=19,col="black",xlim=c(0,3),ylim=c(0,3.5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmA.2<-lm(training2$S34~pred2$`predict(RF2, training2)`) abline(lmA.2) pred3<-as.data.frame(predict(RF3,training3)) plot(pred3$`predict(RF3, training3)`,training3$S34,pch=19,col="black",xlim=c(0,3),ylim=c(0,3.5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmA.3<-lm(training3$S34~pred3$`predict(RF3, training3)`) abline(lmA.3) pred4<-as.data.frame(predict(RF4,training4)) plot(pred4$`predict(RF4, training4)`,training4$S34,pch=19,col="black",xlim=c(0,3),ylim=c(0,3.5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmA.4<-lm(training4$S34~pred4$`predict(RF4, training4)`) abline(lmA.4) pred5<-as.data.frame(predict(RF5,training5)) plot(pred5$`predict(RF5, training5)`,training5$S34,pch=19,col="black",xlim=c(0,3),ylim=c(0,3.5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmA.5<-lm(training5$S34~pred5$`predict(RF5, training5)`) abline(lmA.5) ### Testing Data Plot (10-fold testing datasets) par(mfrow = c(2,2)) plot(RF1$pred$pred,RF1$pred$obs,pch=19, col="black", xlim=c(-1,5),ylim=c(-1,5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmB.1<-lm(RF1$pred$obs~RF1$pred$pred) abline(lmB.1) plot(RF2$pred$pred,RF2$pred$obs,pch=19, col="black", xlim=c(-1,5),ylim=c(-1,5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmB.2<-lm(RF2$pred$obs~RF2$pred$pred) abline(lmB.2) plot(RF4$pred$pred,RF4$pred$obs,pch=19, col="black", xlim=c(-1,5),ylim=c(-1,5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmB.4<-lm(RF4$pred$obs~RF4$pred$pred) abline(lmB.4) plot(RF3$pred$pred,RF3$pred$obs,pch=19, col="black", xlim=c(-1,5),ylim=c(-1,5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmB.3<-lm(RF3$pred$obs~RF3$pred$pred) abline(lmB.3) plot(RF5$pred$pred,RF5$pred$obs,pch=19, col="black", xlim=c(-1,5),ylim=c(-1,5),xlab="Modelled d34S",ylab="Observed d34S",cex.lab=1.5, cex.axis=1.5) lmB.5<-lm(RF5$pred$obs~RF5$pred$pred) abline(lmB.5) ############################## RESIDUALS ANALYSIS ############################## ### Extract residuals and add to original dataset par(mfrow = c(2,3)) pred_RF1_final<-predict(RF1,s_avg1.1) residuals<-s_avg1.1$S34-pred_RF1_final train1<-s_avg1.1 train1$resid<-residuals plot(train1$S34,train1$resid,pch=19, col="black", xlim=c(-2,4),ylim=c(-2,2),xlab="training1 d34S",ylab="Residuals",cex.lab=1.5, cex.axis=1.5) pred_RF2_final<-predict(RF2,s_avg1.2) residuals<-s_avg1.2$S34-pred_RF2_final train2<-s_avg1.2 train2$resid<-residuals plot(train2$S34,train2$resid,pch=19, col="black", xlim=c(-2,4),ylim=c(-2,2),xlab="training2 d34S",ylab="Residuals",cex.lab=1.5, cex.axis=1.5) pred_RF3_final<-predict(RF3,s_avg1.3) residuals<-s_avg1.3$S34-pred_RF3_final train3<-s_avg1.3 train3$resid<-residuals plot(train3$S34,train3$resid,pch=19, col="black", xlim=c(-2,4),ylim=c(-2,2),xlab="training3 d34S",ylab="Residuals",cex.lab=1.5, cex.axis=1.5) pred_RF4_final<-predict(RF4,s_avg1.4) residuals<-s_avg1.4$S34-pred_RF4_final train4<-s_avg1.4 train4$resid<-residuals plot(train4$S34,train4$resid,pch=19, col="black", xlim=c(-2,4),ylim=c(-2,2),xlab="training4 d34S",ylab="Residuals",cex.lab=1.5, cex.axis=1.5) pred_RF5_final<-predict(RF5,s_avg1.5) residuals<-s_avg1.5$S34-pred_RF5_final train5<-s_avg1.5 train5$resid<-residuals plot(train5$S34,train5$resid,pch=19, col="black", xlim=c(-2,4),ylim=c(-2,2),xlab="training5 d34S",ylab="Residuals",cex.lab=1.5, cex.axis=1.5) write.csv(train1,file="R_script//Output//Tables//(RF1.1)Residuals.csv") write.csv(train2,file="R_script//Output//Tables//(RF1.2)Residuals.csv") write.csv(train3,file="R_script//Output//Tables//(RF1.3)Residuals.csv") write.csv(train4,file="R_script//Output//Tables//(RF1.4)Residuals.csv") write.csv(train5,file="R_script//Output//Tables//(RF1.5)Residuals.csv") ###SPATIAL MODEL (Fig. 7)#### ###Create a raster stack with the environmental predictors canada_stack<-stack(r.salt, r.WCmap,r.ph) names(canada_stack)<-c("r.salt","r.WCmap", "r.ph") ####Create a grid to apply model extent canada.grid<-r.ph/r.ph ###Apply model spatially and create raster for mean d34S (Fig. 7) rf2 <- predict(canada_stack, RF5, ext=canada.grid, na.rm=TRUE, overwrite=TRUE, progress='text') writeRaster(rf2, filename="rf_enviro", format="GTiff", overwrite=TRUE) pred_RF5_final<-predict(RF5,s_avg1.5) residuals<-s_avg1.5$S34-pred_RF5_final train5<-s_avg1.5 train5$resid<-residuals plot(train5$S34,train5$resid,pch=19, col="black", xlim=c(-2,4),ylim=c(-2,2),xlab="training5 d34S",ylab="Residuals",cex.lab=1.5, cex.axis=1.5) ###Test factors influencing the residuals of the model (seafood consumption and travel history) train5<-train5[complete.cases(train5), ] train5$sea<-as.numeric(train5$sea) par(mfrow = c(1,3)) plot(train$resid~train5$S34) lm5<-lm(train5$resid~train5$S34) abline(lm5) plot(train5$sea,train5$resid,pch=1) plot(train5$travel,train5$resid) residuals_p <- SpatialPointsDataFrame(coords = train5[,c("X","Y")], data = train5, proj4string = crs(r.salt)) sp_canada_stack2<-as(canada_stack, "SpatialPixelsDataFrame") omm1 <- fit.gstatModel(residuals_p, S34~r.salt+r.ph+r.WCmap, sp_canada_stack2,method = "ranger") rk1 <- predict(omm1, sp_canada_stack2) omm2 <- fit.gstatModel(residuals_p, S34~r.salt+r.ph+r.WCmap, sp_canada_stack2,method = "randomForest") rf_2 <- predict(omm2, sp_canada_stack2) summary(rk1)$RMSE summary(rf_2)$RMSE res1 <- rk1@validation$residual res2 <- rf_2@validation$residual t_test <- t.test(res1, res2, alternative = "greater", paired = TRUE) v_test <- var.test(res1, res2, alternative = "greater") breakpoints<-c(-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3,3.5,4,4.5,5) d34S_agg$Col <- matlab.like2(15)[as.numeric(cut(d34S_agg$d34S,breaks = breakpoints))] pdf("d34S_isoscape_rk.pdf",width=7, height=5) par(mfrow=c(1,1)) plot(rk1, col= matlab.like2(11), breaks=breakpoints, axes = FALSE) plot(wrld_simpl_p,add=TRUE) points(d34S_agg$X.1,d34S_agg$Y.1,pch=21,col="black",bg=d34S_agg$Col,cex=1) #scaleBar(crs(na.dH_Hobson2012$mean), "topright",cex=1, seg.len=2,box.color = NULL) dev.off() pdf("d34S_isoscape_rf2.pdf",width=7, height=5) par(mfrow=c(1,1)) plot(rf_2, col= matlab.like2(11), breaks=breakpoints, axes = FALSE) plot(wrld_simpl_p,add=TRUE) points(d34S_agg$X.1,d34S_agg$Y.1,pch=21,col="black",bg=d34S_agg$Col,cex=1) #scaleBar(crs(na.dH_Hobson2012$mean), "topright",cex=1, seg.len=2,box.color = NULL) dev.off()