######################
#Sato et al. R script#
######################

#load data
d_all = read.csv("AthaInsect2016_seasonalTotal.csv")
d_gls = read.csv("AthaGlucosinolates.csv")

#load library
library(vegan)
library(nlme)

#########
#GLS PCA#
#########
gls = c()
for(i in unique(d_all$Line)) {
  gls = rbind(gls, subset(d_gls, Accession==i))
}
rownames(gls) = gls[,1]
pca_res = prcomp(gls[,-1],scale=T)

#fig. S1: GLS PCA
svg("FigS1_GLS_PCA.svg",height=4,width=4)
biplot(pca_res,cex=0.8,cex.axis=0.8,las=1)
dev.off()

pca_gls = data.frame(gls$Accession, pca_res$x[,1], pca_res$x[,2], pca_res$x[,3])
colnames(pca_gls) = c("Line", "PC1", "PC2", "PC3")

gls_PC1 = c()
gls_PC2 = c()
gls_PC3 = c()
for(j in d_all$Line) {
  gls_PC1 = c(gls_PC1, as.numeric(subset(pca_gls, Line==j)[2]))
  gls_PC2 = c(gls_PC2, as.numeric(subset(pca_gls, Line==j)[3]))
  gls_PC3 = c(gls_PC3, as.numeric(subset(pca_gls, Line==j)[4]))
}

d_all = cbind(d_all, gls_PC1, gls_PC2, gls_PC3)


#########################################
#summerizing guild and community indices#
#########################################
headerID = c(11,14,15,16:21,24:29)

richness = specnumber(d_all[,headerID])
shannon_h = diversity(d_all[,headerID],index="shannon")
total = apply(d_all[,headerID], 1, sum)

exoInsects = c("Ps","Pa","Ar","Px","Pr_sum","Al","Bh","Tni")
exo = apply(d_all[,exoInsects], 1, sum)

endoInsects = c("Mp_sum","Le_sum","Tt","Fo","mine")
endo = apply(d_all[,endoInsects], 1, sum)

specInsects = c("Le_sum","Ps","Pa","Ar","Px","Pr_sum")
spec = apply(d_all[,specInsects], 1, sum)

genInsects = c("Mp_sum","Tt","Fo","Al","Bh","Tni","mine")
gen = apply(d_all[,genInsects], 1, sum)

d_all = cbind(d_all, exo, endo, spec, gen, richness, shannon_h, total)


##############
#RDA analysis#
##############
rda_all = rda(log(d_all[,headerID]+1)~d_all$Line*d_all$Site,scale=T)
rda_int = rda(log(d_all[,headerID]+1)~d_all$Line+d_all$Site,scale=T)
rda_site = rda(log(d_all[,headerID]+1)~d_all$Line,scale=T)
rda_geno = rda(log(d_all[,headerID]+1)~d_all$Site,scale=T)
rda_null = rda(log(d_all[,headerID]+1)~1,scale=T)

anova(rda_int,rda_all)
anova(rda_site,rda_int)
anova(rda_geno,rda_int)
anova(rda_all)
anova(rda_int)
anova(rda_geno)
anova(rda_null)

#fig. 2: RDA plot
d_z = subset(d_all, Site=="CHZ")
d_j = subset(d_all, Site=="JPN")
rda_chz = rda(log(d_z[,headerID]+1)~d_z$Line,scale=T,data=d_z)
rda_jpn = rda(log(d_j[,headerID]+1)~d_j$Line,scale=T,data=d_j)

svg("Fig2_RDA.svg",height=4,width=12)
par(mfcol=c(1,2))
plot(1,1,type="n",xlim=c(-1.5,0.5),ylim=c(-1.5,1.5),las=1,xlab="RDA1",ylab="RDA2",main="Zurich",cex.axis=1.2)
abline(h=0,lty=2,col="grey"); abline(v=0,lty=2,col="grey")
points(rda_chz,display="cn",pch=1)
ordiellipse(rda_chz, groups = d_z$Line, kind = "se", draw = "lines", col="grey")
arrows(rep(0,16),rep(0,16),scores(rda_chz)$species[,1],scores(rda_chz)$species[,2],length=0.1)

plot(1,1,type="n",xlim=c(-1.5,0.5),ylim=c(-1.5,1.5),las=1,xlab="RDA1",ylab="RDA2",main="Otsu",cex.axis=1.2)
abline(h=0,lty=2,col="grey"); abline(v=0,lty=2,col="grey")
points(rda_jpn, display="cn",pch=1)
ordiellipse(rda_jpn, groups = d_j$Line, kind = "se", draw = "lines", col="grey")
arrows(rep(0,16),rep(0,16),scores(rda_jpn)$species[,1],scores(rda_jpn)$species[,2],length=0.1)
dev.off()

#fig. S2: Abundance
svg("FigS2_abundance.svg",height=4,width=8)
par(mfcol=c(1,2))
barplot(sort(apply(d_z[,headerID],2,sum),decreasing = T),las=2, ylim=c(0,max(apply(d_z[,headerID],2,sum))+50))
barplot(sort(apply(d_j[,headerID],2,sum),decreasing = T),las=2, ylim=c(0,max(apply(d_j[,headerID],2,sum))+50))
dev.off()


###################
#heritability test#
###################
h_test = function(d, y_vec, boot) {
  
  n_vec = c(); h_vec = c(); h2_bootL = c(); h2_bootU = c(); chi_vec = c(); p_vec = c()
  for(i in y_vec) {
    
    n_vec = c(n_vec, i)
    
    form_lme = as.formula(paste0("log(",i,"+1)~1"))
    lme_res = lme(form_lme, random=~1|Line, data=d, method="REML")
    g_var = as.numeric(VarCorr(lme_res)[1])
    t_var = g_var + as.numeric(VarCorr(lme_res)[2])
    h2 = g_var/t_var
    geno_LL = logLik(lme_res)
    
    form_lm = paste0("log(",i,"+1)~1")
    null_LL = logLik(lm(form_lm,data=d),REML=TRUE)
    
    LR_chi = as.numeric(2*(geno_LL-null_LL))
    h_vec = c(h_vec, h2)
    chi_vec = c(chi_vec, LR_chi)
    p_vec = c(p_vec, pchisq(LR_chi, 1, lower.tail=F))
    
    #Note: Below is a bootstrap code for heritability estimates
    #      This result was not included in the manuscript, because upper CIs are too broad and uninformative.
    h2_boot = c()
    for(j in 1:boot) {
      boot_i = sample(c(1:nrow(d)),replace=T)
      d_boot = d[boot_i,]
      
      form_lme_i = as.formula(paste0("log(",i,"+1)~1"))
      lme_res_i = try(lme(form_lme_i, random=~1|Line, data=d_boot, method="REML"))
      if(class(lme_res_i)!="try-error") {
        g_var_i = as.numeric(VarCorr(lme_res_i)[1])
        t_var_i = g_var_i + as.numeric(VarCorr(lme_res_i)[2])
        h2_i = g_var_i/t_var_i
        h2_boot = c(h2_boot, h2_i)
      }
    }
    h2_bootL = c(h2_bootL, quantile(h2_boot,0.025))
    h2_bootU = c(h2_bootU, quantile(h2_boot,0.975))
  }
  res = cbind(n_vec, h_vec, h2_bootL, h2_bootU, chi_vec, p_vec)
  colnames(res) = c("y", "H2", "LCI95%", "UCI95%", "Chi", "P")
  return(res)
}

h_jpn = h_test(subset(d_all,Site=="JPN"), c("Ar", "Px", "Pr_sum", "Mp_sum", "Le_sum", "Fo", colnames(d_all)[c(34:40)]),999)
h_jpn[,6] = p.adjust(as.numeric(h_jpn[,6]),"BH") #adjust FDR by the number of response variables
write.csv(h_jpn, "H2listJPN.csv")

h_chz = h_test(subset(d_all,Site=="CHZ"), c("Hole", "Px", "Ps", "Pa", "Le_sum", "Fo", colnames(d_all)[c(34:40)]),999)
h_chz[,6] = p.adjust(as.numeric(h_chz[,6]),"BH")
write.csv(h_chz, "H2listCHZ.csv")


##########################################
#multiple regression (natural accessions)#
##########################################
coef_trait = function(d, y_list) {
  
  res_list = c()
  
  for(i in y_list) {
    
    form_main = paste0("scale(log(",i,"+1))~scale(log(Trichome+1))+scale(gls_PC1)+scale(gls_PC2)+Bolting+scale(LeafLen)+factor(Block)")
    lm_main = lm(form_main, data=d)
    
    res_list = rbind(res_list, cbind(rep(i,5), summary(lm_main)$coef[c(2:6),]))
  }
  return(res_list)
}

#table S1: p-table, natural accessions
coef_JPN = coef_trait(subset(d_all,Site=="JPN"), c("Ar", "Px", "Pr_sum", "Mp_sum", "Le_sum", "Fo", colnames(d_all)[c(34:40)]))
for(k in 0:4) { #adjust FDR by the number of response variables
  coef_JPN[,5][5*(1:12)-k] = p.adjust(as.numeric(coef_JPN[,5][5*(1:12)-k]),"BH")
}
colnames(coef_JPN) = c("y", "Coef", "SE", "t", "P")
write.csv(coef_JPN, "coef_JPN.csv")

coef_CHZ = coef_trait(subset(d_all,Site=="CHZ"), c("Hole", "Px", "Ps", "Pa", "Le_sum", "Fo", colnames(d_all)[c(34:40)]))
for(k in 0:4) {
  coef_CHZ[,5][5*(1:12)-k] = p.adjust(as.numeric(coef_CHZ[,5][5*(1:12)-k]),"BH")
}
colnames(coef_CHZ) = c("y", "Coef", "SE", "t", "P")
write.csv(coef_CHZ, "coef_CHZ.csv")

#fig. 3: p-heatmap
svg("Fig3_pHeatmap.svg",height=4,width=8)
par(mfcol=c(1,2))
p_seq = rep(NA,length(coef_CHZ[,5]))
p_seq[which(as.numeric(coef_CHZ[,5])>0.05)] = 1
p_seq[which((as.numeric(coef_CHZ[,5])<0.01)&(as.numeric(coef_CHZ[,4])>0))] = 2
p_seq[which((as.numeric(coef_CHZ[,5])<0.05)&(as.numeric(coef_CHZ[,5])>0.01)&(as.numeric(coef_CHZ[,4])>0))] = 3
p_seq[which((as.numeric(coef_CHZ[,5])<0.01)&(as.numeric(coef_CHZ[,4])<0))] = 4
p_seq[which((as.numeric(coef_CHZ[,5])<0.05)&(as.numeric(coef_CHZ[,5])>0.01)&(as.numeric(coef_CHZ[,4])<0))] = 5
image(x=c(1:13),y=c(1:5),z=matrix(p_seq,13,5,byrow=T),col=c("white","darkred","red","darkblue","blue"),las=1,ylab="Trait",xlab="Insect", main="Zurich")

p_seq = rep(NA,length(coef_JPN[,5]))
p_seq[which(as.numeric(coef_JPN[,5])>0.05)] = 1
p_seq[which((as.numeric(coef_JPN[,5])<0.01)&(as.numeric(coef_JPN[,4])>0))] = 2
p_seq[which((as.numeric(coef_JPN[,5])<0.05)&(as.numeric(coef_JPN[,5])>0.01)&(as.numeric(coef_JPN[,4])>0))] = 3
p_seq[which((as.numeric(coef_JPN[,5])<0.01)&(as.numeric(coef_JPN[,4])<0))] = 4
p_seq[which((as.numeric(coef_JPN[,5])<0.05)&(as.numeric(coef_JPN[,5])>0.01)&(as.numeric(coef_JPN[,4])<0))] = 5
image(x=c(1:13),y=c(1:5),z=matrix(p_seq,13,5,byrow=T),col=c("white","darkred","red","darkblue","blue"),las=1,ylab="Trait",xlab="Insect", main="Otsu")
dev.off()

###################################
#multiple regression (gl1 mutants)#
###################################
dj_gl1 = subset(d_j, Line=="gl1-1"|Line=="Ler-1")
dj_gl2 = subset(d_j, Line=="gl1-2"|Line=="Col-0")
dj_gl = rbind(dj_gl1,dj_gl2)
group = c(rep(1,20),rep(2,20))
dj_gl = cbind(dj_gl,group)

dz_gl1 = subset(d_z, Line=="gl1-1"|Line=="Ler-1")
dz_gl2 = subset(d_z, Line=="gl1-2"|Line=="Col-0")
dz_gl = rbind(dz_gl1,dz_gl2)
group = c(rep(1,20),rep(2,20))
dz_gl = cbind(dz_gl,group)

coef_gl1 = function(d, y_list) {
  
  res_list = c()
  
  for(i in y_list) {
    
    form_main = paste0("scale(log(",i,"+1))~scale(log(Trichome+1))+Bolting+scale(LeafLen)+factor(Block)")
    lm_main = lme(as.formula(form_main), random=~1|group, data=d, method="ML")
    
    res_list = rbind(res_list, cbind(c(i,i,i),summary(lm_main)$tTable[c(2:4),]))
  }
  return(res_list)
}

#table S2: p-table, gl1 mutants
GL1_jpn = coef_gl1(dj_gl, c("Ar", colnames(d_all)[c(34,36,38,39,40)]))
colnames(GL1_jpn) = c("y", "Coef", "SE", "df", "t", "P")
for(k in 0:2) {
  GL1_jpn[,6][3*(1:6)-k] = p.adjust(as.numeric(GL1_jpn[,6][3*(1:6)-k]),"BH")
}
write.csv(GL1_jpn, "coefGL1_JPN.csv")

GL1_chz = coef_gl1(dz_gl, c("Hole", colnames(d_all)[c(34,36,38,39,40)]))
colnames(GL1_chz) = c("y", "Coef", "SE", "df", "t", "P")
for(k in 0:2) {
  GL1_chz[,6][3*(1:6)-k] = p.adjust(as.numeric(GL1_chz[,6][3*(1:6)-k]),"BH")
}
write.csv(GL1_chz, "coefGL1_CHZ.csv")


#fig. 4: barplot
geno_bar = function(d, i) {
  form_fig = paste0(i,"~Line")
  
  mean_vec = aggregate(as.formula(form_fig),data=d,FUN=mean)
  mean_vec = mean_vec[order(mean_vec[,2]),]
  
  sd_vec = aggregate(as.formula(form_fig),data=d,FUN=sd)
  sd_vec = sd_vec[mean_vec[,1],]
  se_vec = sd_vec[,2]/sqrt(10)
  
  mean_bar = barplot(mean_vec[,2],ylim=c(0,max(mean_vec[,2])+max(se_vec)),names=mean_vec[,1],las=2,ylab=i,cex.names=2,cex.axis=2)
  arrows(mean_bar, mean_vec[,2]+se_vec, mean_bar, mean_vec[,2]-se_vec,code=3,length=0)
}

svg("Fig4_barplot.svg",height=12,width=12)
par(mfcol=c(3,2))
geno_bar(d=d_z,i="Hole"); geno_bar(d=d_z,i="exo"); geno_bar(d=d_z,i="richness")
geno_bar(d=d_j,i="Ar"); geno_bar(d=d_j,i="exo"); geno_bar(d=d_j,i="richness")
dev.off()