#housekeeping rm(list=ls()) options(max.print=1000000) # to calculate IDC heritability ################################################################# ## Installing R packages ## ## A smart function ## ################################################################# check.packages <- function(pkg){ new.pkg <- pkg[!(pkg%in%installed.packages()[,"Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, require, character.only = TRUE) } packages<-c("tidyverse","psych", "lme4") check.packages(packages) library(tidyverse) library(psych)# use "harmonic.mean" function from library psych to calculate the harmonic mean of the reps library(lme4) #setwd("C:/Users/") setwd("/Users//") ################################################################### # obtain observed idc scores for the checks # These will be used to obtain predicted values of block effects ################################################################### # set working directory setwd("~/Box/Manuscripts/Soy/2_paper_ISU/For submission/for PLOS One submission/MS 2/re_submitted supplementaldata and R codes") # read in the data idc_cks=read.csv("S01.csv"); (dim(idc_cks)) # there are 20606 observations with IDC scores for the checks length(unique(idc_cks$Block)) # 3499 blocks # Some checks have no scores in some blocks length(unique(idc_cks$Line)) # 640 check genotypes model_for_IDC_cks = lmer(obs_IDC_score ~ Line + (1|Block), data= idc_cks, REML = T) #################################################################### # obtain predictions of random block effects based on check scores #################################################################### random_effect_blocks=as.data.frame(ranef(model_for_IDC_cks)) length(unique(random_effect_blocks$grp)) # number of predicted block effects = 3499 random_effect_blocks$Block <- random_effect_blocks$grp random_effect_blocks <- random_effect_blocks[c("Block","condval")] random_effect_blocks$blk_effect <- random_effect_blocks$.condval ######################################################## # merge the block effects into the data set # consisting of 4171 observed idc scores for # 1000 lines that had been retained based on # predicted genotypic values for other agronomic traits ########################################################0 idc4YearsSel=read.csv("S02.csv"); dim(idc4YearsSel) head(idc4YearsSel) # 4171 records with idc scores for the 1000 selected lines length(unique(idc4YearsSel$Line)) # 1000 nobspergeno_sel= nrow(idc4YearsSel)/length(unique(idc4YearsSel$Line)) nobspergeno_sel # 4.171 # please notice that block effects already added to the S02.csv merge(idc4YearsSel,random_effect_blocks, by=c("Block"),all.x = TRUE ) # write.csv(idc4YearsSel, file= "S02.csv") ################################################### # estimate variance components for use in # estimating reliability in 1000 selected entries ################################################## model = lmer(obs_IDC_score ~ (1|Line) + blk_effect, data = idc4YearsSel, REML = T) #model variance_model=data.frame(summary(model)$varcor) variance_model # create a number of obs table for Model replicate_table_model= table(idc4YearsSel$Line)%>%data.frame() head(replicate_table_model) (harmonic_mean_reps= harmonic.mean(replicate_table_model$Freq)) # harmonic mean =2.455 ############################################################ # calculate reliabiliy # ############################################################ (reliability_model_arithmetic_mean= variance_model[1,4]/(variance_model[1,4] + variance_model[2,4]/nobspergeno_sel)) # reliability by avg nobs: 0.7674872 (reliability_model_Harmonic_mean= variance_model[1,4]/(variance_model[1,4] + variance_model[2,4]/harmonic_mean_reps)) # reliability by harmonic nobs: 0.660 print(paste0("The reliability for all entries using avg number of obs per genotype is: ", round( reliability_model_arithmetic_mean,4))) print(paste0("The reliability for all entries using the Harmonic mean per genotype is: ", round( reliability_model_Harmonic_mean,4))) ####################################################### # Evaluate residuals from general linear mixed model # of IDC evaluated as ordinal scores ####################################################### # calculate the residual from model applied to only the experimental lines residuals_model=as.data.frame(resid(model)) class(residuals_model) names(residuals_model)="residuals_exp_lines" hist(residuals_model$residuals_exp_lines, col = "blue", main = "Histogram of residuals from a general linear mixed model of IDC evaluated as ordinal scores on experimental lines ", xlab = "residuals_exp_lines", ylab = "Frequency", prob=T) lines(density(residuals_model$residuals_exp_lines, bw=0.8), col="red", lwd=4) qqnorm(residuals_model$residuals_exp_lines, main = "Normal QQ plot of residuals from a general linear mixed model of IDC evaluated as ordinal scores on experimental lines ", col="blue") qqline(residuals_model$residuals_exp_lines, col="red", lwd=2)