#1)Settings #list of R packages used library(chron) library(readr) library(stringr) library(plyr) library(dplyr) library(ggplot2) library(gridExtra) library(doBy) library(gdata) library(tableone) library(ggpmisc) library(splines) library(lme4) library(lmerTest) library(effects) library(gamm4) library(lattice) library(lubridate) library(mgcv) library(itsadug) library(stargazer) #2)Data import e data manipulation #2.1-importing Boston Marathon clean dataset (S1 dataset) #dataset already used in "Knechtle B, Di Gangi S, Rüst CA, Nikolaidis PT #The performance differences between the sexes in the Boston Marathon from 1972 to 2017 #June 2018, Journal of Strength and Conditioning Research" #List of variables: ####### #YEAR= year of competition #Name_group=Name and surname of the runner #HOME_C=Nationality of the runner #GENDER=Sex of the runner #time=race time (hour:minutes:seconds) #natio=country area of the runner #Name_class= id variable with name, surname, nationality, sex, and period of competition #(to uniquely identify runners) #Num_name=numeric code for unique Name_class library(readxl) myfile<-"C:/Users/Stefania/Desktop/marathon_2_paper/fit/fit 2/revision/S1_dataset/S1_dataset.xlsx" db_FM_W <- read_excel(myfile, sheet = "S1_dataset", col_types = c("numeric", "text", "text", "text", "text", "text", "text", "numeric")) #convert time (string) into time in h:min:sec db_FM_W$time<-chron(times=db_FM_W$time) #NA for natio db_FM_W$natio[db_FM_W$natio=="NA"]<-NA #Selection groups #near elite groups (top 101:200) db_top101<-as.data.frame(db_FM_W %>% arrange(time) %>% group_by(YEAR, GENDER) %>% slice(101:200)) #near elite groups (top 21:100) db_top21<-as.data.frame(db_FM_W %>% arrange(time) %>% group_by(YEAR, GENDER) %>% slice(21:100)) #annual top ten db_topten_FM<-as.data.frame(db_FM_W %>% arrange(time) %>% group_by(YEAR, GENDER) %>% slice(1:10)) #annual winners db_top_FM<-as.data.frame(db_FM_W %>% arrange(time) %>% group_by(YEAR, GENDER) %>% slice(1:1)) #2.2-importing weather dataset (S2 dataset) weather<-read_excel(myfile, sheet = "S2_dataset", col_types = c("numeric", "numeric", "numeric", "text", "numeric", "numeric", "text", "numeric", "numeric", "numeric")) #All finishers weather conditions: dataset with the weather conditions "averaged" over the hours of the race (9-16) #for wind direction (Wind) we take the most frequent value over the interval #for the amount of precipitations, we sum the hourly values over the interval weather_avg<-as.data.frame(weather %>% group_by(YEAR) %>% summarise(count=n(), #average temperature temp_c=mean(Temperature_C, na.rm=T), #average WBGT wbgt=mean(WBGT_C, na.rm=T), #most frequent wind direction wind_m = names(table(Wind))[which.max(table(Wind))], #sum of hourly amount of preipitations prec=sum(`Precipitation (mm)`), #average pressure press=mean(`Pressure (hPa)`, na.rm=T), #average wind speed wind_sp=mean(wind_speed_km, na.rm=T))) #Near elite groups and annual top ten weather conditions: dataset with the weather conditions "averaged" over the hours of the race (9-13) weather_avg2<-subset(weather, as.numeric(chron(times=hour))>=0.3708333 & as.numeric(chron(times=hour))<=0.5416667) weather_avg2<-as.data.frame(weather_avg2 %>% group_by(YEAR) %>% summarise(count=n(), temp_c=mean(Temperature_C, na.rm=T), wbgt=mean(WBGT_C, na.rm=T), wind_m = names(table(Wind))[which.max(table(Wind))], prec=sum(`Precipitation (mm)`), press=mean(`Pressure (hPa)`, na.rm=T), wind_sp=mean(wind_speed_km, na.rm=T))) #Annual winners weather conditions: dataset with the weather conditions "averaged" over the hours of the race (9-12) weather_avg3<-subset(weather, as.numeric(chron(times=hour))>=0.3708333 & as.numeric(chron(times=hour))<=0.5) weather_avg3<-as.data.frame(weather_avg3 %>% group_by(YEAR) %>% summarise(count=n(), temp_c=mean(Temperature_C, na.rm=T), wbgt=mean(WBGT_C, na.rm=T), wind_m = names(table(Wind))[which.max(table(Wind))], prec=sum(`Precipitation (mm)`), press=mean(`Pressure (hPa)`, na.rm=T), wind_sp=mean(wind_speed_km, na.rm=T))) #2.3-create complete dataset (weather and marathon performance together) #All finishers db_FM_W<-join(db_FM_W, subset(weather_avg, select=c(-count))) #recode wind direction (head/tail/side wind) db_FM_W$wind_dir<-str_sub(db_FM_W$wind_m,1,1) db_FM_W$wind_dir[db_FM_W$wind_dir=="W"]<-"tail wind" db_FM_W$wind_dir[db_FM_W$wind_dir=="E"]<-"head wind" db_FM_W$wind_dir[db_FM_W$wind_dir=="S"|db_FM_W$wind_dir=="N"]<-"side wind" #Top 101:200 db_top101<-join(db_top101, subset(weather_avg2, select=c(-count))) #recode wind direction (head/tail/side wind) db_top101$wind_dir<-str_sub(db_top101$wind_m,1,1) db_top101$wind_dir[db_top101$wind_dir=="W"]<-"tail wind" db_top101$wind_dir[db_top101$wind_dir=="E"]<-"head wind" db_top101$wind_dir[db_top101$wind_dir=="S"|db_top101$wind_dir=="N"]<-"side wind" #Top 21:100 db_top21<-join(db_top21, subset(weather_avg2, select=c(-count))) #recode wind direction (head/tail/side wind) db_top21$wind_dir<-str_sub(db_top21$wind_m,1,1) db_top21$wind_dir[db_top21$wind_dir=="W"]<-"tail wind" db_top21$wind_dir[db_top21$wind_dir=="E"]<-"head wind" db_top21$wind_dir[db_top21$wind_dir=="S"|db_top21$wind_dir=="N"]<-"side wind" #Top 10 db_topten_FM<-join(db_topten_FM, subset(weather_avg2, select=c(-count))) #recode wind direction (head/tail/side wind) db_topten_FM$wind_dir<-str_sub(db_topten_FM$wind_m,1,1) db_topten_FM$wind_dir[db_topten_FM$wind_dir=="W"]<-"tail wind" db_topten_FM$wind_dir[db_topten_FM$wind_dir=="E"]<-"head wind" db_topten_FM$wind_dir[db_topten_FM$wind_dir=="S"|db_topten_FM$wind_dir=="N"]<-"side wind" #Annual winners db_top_FM<-join(db_top_FM, subset(weather_avg3, select=c(-count))) #recode wind direction (head/tail/side wind) db_top_FM$wind_dir<-str_sub(db_top_FM$wind_m,1,1) db_top_FM$wind_dir[db_top_FM$wind_dir=="W"]<-"tail wind" db_top_FM$wind_dir[db_top_FM$wind_dir=="E"]<-"head wind" db_top_FM$wind_dir[db_top_FM$wind_dir=="S"|db_top_FM$wind_dir=="N"]<-"side wind" #3) Statistical analysis #defining factors and reference categories for categorical predictors db_FM_W$GENDER<-factor(db_FM_W$GENDER) db_top_FM$GENDER<-factor(db_top_FM$GENDER) db_topten_FM$GENDER<-factor(db_topten_FM$GENDER) db_top101$GENDER<-factor(db_top101$GENDER) db_top21$GENDER<-factor(db_top21$GENDER) db_FM_W$natio<-relevel(factor(db_FM_W$natio), ref="KEN-ETH") db_topten_FM$natio<-relevel(factor(db_topten_FM$natio), ref="KEN-ETH") db_top21$natio<-relevel(factor(db_top21$natio), ref="KEN-ETH") db_FM_W$wind_dir<-relevel(factor(db_FM_W$wind_dir), ref="tail wind") db_top_FM$wind_dir<-relevel(factor(db_top_FM$wind_dir), ref="tail wind") db_topten_FM$wind_dir<-relevel(factor(db_topten_FM$wind_dir), ref="tail wind") db_top101$wind_dir<-relevel(factor(db_top101$wind_dir), ref="tail wind") db_top21$wind_dir<-relevel(factor(db_top21$wind_dir), ref="tail wind") #defining country groups for annual winners db_top_FM$natio2<-as.vector(db_top_FM$natio) db_top_FM$natio2[db_top_FM$natio2 %in% c("Asia", "Canada", "Oceania")]<-"Other" db_top_FM$natio2<-relevel(factor(db_top_FM$natio2), ref="KEN-ETH") #GAMM models #All finishers fit_g1<- gamm4(time ~ s(YEAR, by=GENDER)+natio*GENDER + temp_c + wbgt + prec + press + wind_sp + wind_dir, random=~ (1|Num_name), data=db_FM_W, family= gaussian) #Annual winners fit_g2<- gamm4(time ~ s(YEAR, by=GENDER)+natio2*GENDER + temp_c +prec + press + wind_sp + wind_dir, random=~ (1|Num_name), data=db_top_FM, family= gaussian) #Top 101:200 fit_g3<- gamm4(time ~ s(YEAR, by=GENDER) + GENDER + temp_c + wbgt + prec+press + wind_sp + wind_dir, random=~ (1|Num_name), data=db_top101, family= gaussian) #Top 21:100 fit_g4<- gamm4(time ~ s(YEAR, by=GENDER)+natio+GENDER + temp_c + wbgt + prec+ press + wind_sp + wind_dir, random=~ (1|Num_name), data=db_top21, family= gaussian) #Top 10 fit_g5<- gamm4(time ~ s(YEAR, by=GENDER)+natio+GENDER + temp_c + wbgt + prec + press + wind_sp + wind_dir, random=~ (1|Num_name), data=db_topten_FM, family= gaussian)