# This codes runs and analyses the Monte-Carlo simulations of the effects of QRP's in 
# meta-analyses.
#
# Author: James Spottiswoode (james.spottiswoode@gmail.com)
#
# The code is divided into 3 sections.  
#
# The first section, functions, defines functions which are called later. 
#
# The second section entitled "code to run Monte Carlo simulations of MA" runs the desired
# number of Monte Carlo simulations.  As this section uses a genetic algorithm to optimise
# parameters it can take around 20 hours to run on an 8 core computer (Mac, 2.5GHz Intel Core i7, 16GB RAM).
# 
# The final section entitled "Analyse Results" gathers data from a log file generated during the 
# simulation run and summarizes the results in a CSV file for importing into Excel.
#
# Note: This code was developed under OSX (Mac) operating system.  The parallel processing section
# may require some changes to work under Windows.

# ----------------------------------------------- Functions ---------------------------------------
### Function to compute publication probability 
# Arguments: 
# pvalue = P of study

# Value: 
# probability of publication 
pubprob = function(pvalue) return((25 + 40 * (tanh(2-10*pvalue)+1))/100)

### Function returns binomial probability
# Arguments"
# h = hits
# n = number of trials
# p = probability of a hit
#
# Value:
# Probability 
poz = function(h,n,p, mode = 'ge') {

	if (mode == 'ge') r = (1 - sum(dbinom(0:(h - 1),n,p)))
	if (mode == 'gt') r =  (1 - sum(dbinom(0:(h),n,p)))
	if (mode == 'le') r = (1 - sum(dbinom((h + 1):n,n,p)))
	if (r < 1e-16) r = .Machine$double.eps
	return(r)
}

### Function returns Z of a given P
zop = function(x) qnorm(1 - x)


### Function to simulate one MA
# Arguments:
# C2P_prevalence = prevalence of confirmation to pilot QRP
# C2P_trial_number = trial when conversion of confirmation to pilot occurred
# etc - variable names should be obvious from the paper and supplementary materials

# Value:
# list of two dataframes of results and diagnostics

# Note: The simulation code is deliberately not written to be elegant, fast code, but rather
# to make clear the steps each simulated experiment goes through as QRP's occur, or not.
# The dataframe formed includes a comment field which logs what happened in English.

ma_sim = function(C2P_prevalence,           
			C2P_probability_threshold,C2P_trial_number,P2C_prevalence,P2C_probability_threshold,P2C_trial_number,
			OS_prevalence,OS_probability_threshold,            
			OS_min_trials,OE_prevalence,OE_probability_threshold,OE_max_extra_trials,PB_prevalence,
			RmSS_prevalence,RmSS_fraction,psi_hit_rate,  nmonte,mce_hit_rate ) {

# frames for diagnostics & results
	results = data.frame(study = rep(NA,n), actual_trials = rep(NA,n),sim_trials = rep(NA,n),
		hits = rep(NA,n),fd = rep(NA,n),comment = rep(NA,n),n_FD = rep(NA,n),n_OE = rep(NA,n),n_OS = rep(NA,n),
			n_S = rep(NA,n),n_C2P = rep(NA,n), n_PB = rep(NA,n),n_P2C = rep(NA,n))
	diags = results
		
# MA level counters
	n_C2P = 0
	n_OS = 0
	n_OE = 0
	n_FD = 0
	n_S = 0
	n_PB = 0
	n_P2C = 0
			  
# loop around studies	
	for (study in 1:nstudies) {

# Set filedraw T to get one pass through while loop
		filedraw = T 	# T if either kind of filedraw has happened on study
	
# trials in MA
		MA_trials = d$trials[study]
		while(filedraw) {

# start comment
			comment = ''	
			filedraw = F	# set F so only repeat loop of filedraw happens
		
# Set logicals for QRPs
			C2P = runif(1) < C2P_prevalence
			P2C = runif(1) < P2C_prevalence
			OS = runif(1) < OS_prevalence
			OE = runif(1) < OE_prevalence
			PB = runif(1) < PB_prevalence
			RmSS = runif(1) < RmSS_prevalence

# While loop around trials		
			stopped = F
			trial_count = 0
			hit_count = 0
			while(!stopped) {
	
# do a trial and update counters etc
				trial_count = trial_count + 1
				hit = runif(1) < psi_hit_rate
				if (hit) hit_count = hit_count + 1
				running_p = pob(hit_count, trial_count, mce_hit_rate)
				
# C2P - stop initially unsuccessful study
				if (C2P & trial_count == C2P_trial_number & running_p > C2P_probability_threshold) {
					stopped = T				
					filedraw = T	# filedraw occurred - need to repeat study	
					n_FD = n_FD + 1
					n_C2P = n_C2P + 1
					comment = paste(comment,'In C2P trial_count',trial_count,'hits',hit_count,':')
				}
	
# OS - optional stopping
				if (!stopped & OS & running_p < OS_probability_threshold & trial_count > OS_min_trials) {
					stopped = T
					n_OS = n_OS + 1
					comment = paste(comment,'In OS trial_count',trial_count,'P',running_p,':')
				}
	
# OE - optional extension
				if (!stopped & OE & running_p < OE_probability_threshold & running_p > 0.05 & 
					trial_count == MA_trials) {
					final_extra_trials = 0
					final_extra_hits = 0
					final_prob = 0.05
					new_hits = 0
					test = T
					for (j in 1:OE_max_extra_trials) {
						if (runif(1) < psi_hit_rate & test) new_hits = new_hits + 1
						oe_prob = pob(hit_count + new_hits, trial_count + j, mce_hit_rate)
						if (oe_prob < 0.05 & test) {
							test = F
							final_extra_trials = j
							final_extra_hits = new_hits
							final_prob = oe_prob
						}
					}							

# Never hit 0.05
					if (final_prob >= 0.05) {
						final_extra_trials = OE_max_extra_trials
						final_extra_hits = new_hits
					}

#	add extra trials.  Could be the max allowed if experiment still didn't get to 0.05
					trial_count = trial_count + final_extra_trials
					hit_count = hit_count + final_extra_hits
					stopped = T
					comment = paste(comment,'In OE extending by',final_extra_trials,'trials. Final P',final_prob)
					n_OE = n_OE + 1			
				}
				
# stop at MA number of trials otherwise
				if (trial_count == MA_trials) stopped = T
			}			 # end trial while
				
			if (P2C & !filedraw) {
			
# run pilot and get its stats
				new_hits = sum(as.numeric(runif(P2C_trial_number) < 0.25))
				pilot_prob = pob(new_hits, P2C_trial_number, 0.25)
				
# Now do the P2C only if pilot did well
				if (pilot_prob < P2C_probability_threshold) {
					trial_count = trial_count + P2C_trial_number
					hit_count = hit_count + new_hits
					comment = paste(comment,'In P2C: added',new_hits,'hits in',P2C_trial_number,'trials. Pilot prob',pilot_prob)
					n_P2C = n_P2C + 1
				}
			}
			
# PB - regular filedraw
# first conditional is whether experimenter will filedraw AT ALL
			if (PB & !filedraw) {
				running_p = pob(hit_count, trial_count, mce_hit_rate)
				
# Second conditional is P dependent			
				if (runif(1) > pubprob(running_p)) {
						filedraw = T	# filedraw occurred - need to repeat study		
						n_FD = n_FD + 1
						n_PB = n_PB + 1	
						comment = paste(comment, 'In regular filedraw using pfd. Study filedrawed with P =',running_p,':')
				} else {
						comment = paste(comment, 'In regular filedraw using pfd. Study NOT filedrawed with P =',running_p,':')
				} 
			}
		
#	Removing subjects
			if (RmSS & !filedraw) {
				subjects_removed = ifelse(floor(RmSS_fraction*trial_count) > 1, floor(RmSS_fraction*trial_count), 1)
				trial_count = trial_count - subjects_removed
				n_S = n_S + subjects_removed
			}

		}		# end of filedraw loop
	
# update results 
		results$study[rescnt] = study
		results$actual_trials[rescnt] = MA_trials
		results$sim_trials[rescnt] = trial_count
		results$hits[rescnt] = hit_count
		results$fd[rescnt] = filedraw
		results$comment[rescnt] = comment
		rescnt = rescnt + 1
		

	} 	# end study loop

# update counters
	results$n_FD = n_FD
	results$n_OE = n_OE
	results$n_OS = n_OS
	results$n_S = n_S
	results$n_C2P = n_C2P
	results$n_PB = n_PB
	results$n_P2C = n_P2C
	
	r = list(results = results, diags = diags)
	return(r)
}		


### Objective function for the GA
# Arguments:
# pars = vector of parameters represented as integers with are unpacked into real valued
# QRP parameters a few lines below

# Value:
# The fitness of the chromosome, i.e. Monte-Carlo simulations of the MA with QRP parameter
# as defined by pars ventor.

objfunc = function(pars) {

	rf = 1e+5
# unpack parameters
	C2P_prevalence   = 						pars[1]/rf
	C2P_probability_threshold = 	pars[2]/rf
	C2P_trial_number  = 					pars[3]
	P2C_prevalence  = 						pars[4]/rf
	P2C_probability_threshold  = 	pars[5]/rf
	P2C_trial_number  = 					pars[6]
	OS_prevalence     = 					pars[7]/rf	
	OS_min_trials     = 					pars[8]	
	OE_prevalence     = 					pars[9]/rf	
	OE_probability_threshold = 		pars[10]/rf	
	OE_max_extra_trials     = 		pars[11]	
	PB_prevalence     = 					pars[12]/rf	
	RmSS_prevalence     = 				pars[13]/rf	
	RmSS_fraction     = 					pars[14]/rf	
	psi_hit_rate     = 						pars[15]/rf	

	
# Run Monte Carlo of MA

# Frame for MC results
	res = data.frame(mHR = rep(NA, nmonte), HR = rep(NA, nmonte),
			cor = rep(NA, nmonte), chi2z = rep(NA, nmonte))			

# Run desired number of MC's
	for (i in 1:nmonte) {
		x = ma_sim(C2P_prevalence,           
			C2P_probability_threshold,C2P_trial_number,P2C_prevalence,P2C_probability_threshold,P2C_trial_number,
			OS_prevalence,OS_probability_threshold,            
			OS_min_trials,OE_prevalence,OE_probability_threshold,OE_max_extra_trials,PB_prevalence,
			RmSS_prevalence,RmSS_fraction,psi_hit_rate,  nmonte,mce_hit_rate)
			
# Store results in dataframes
		results = x$results		
		results = na.omit(results)
		results$hr = results$hits/results$sim_trials
		vpob = Vectorize(pob, c('h','n'))
		results$p = vpob(results$hits, results$sim_trials, 0.25)
			
		res$mHR[i] = mean(results$hr)
		res$HR[i] = sum(results$hits)/sum(results$sim_trials)
		res$cor[i] = cor(results$hr, results$sim_trials, method = 'spearman')
		sim_dist = hist(results$p, breaks = seq(0,1,0.1), plot = F)$counts
		pv = chisq.test(rbind(ma_dist,sim_dist))$p.value
		res$chi2z[i] = ifelse(pv > 1e-16, zop(pv), 10)

	}
	
# Compute summary Z's of MC simulations wrt MA
	res = transform(res, mean_hr = (mHR + HR)/2)
	res = transform(res, diff_hr = mHR - HR)

# MA values
	ma_mean_hr = 	(ma_mHR + ma_HR)/2
	ma_diff_hr = ma_mHR - ma_HR
	
# Now Zs
	z_mean_hr = (mean(res$mean_hr) - ma_mean_hr)/sd(res$mean_hr)
	z_diff_hr = (mean(res$diff_hr) - ma_diff_hr)/sd(res$diff_hr)
	z_cor = 	 (mean(res$cor) - ma_cor)/sd(res$cor)
	z_chi = 	 mean(res$chi2z)

# get combined Zs for fitness 		
	totz = z_mean_hr^2 + z_diff_hr^2 + z_cor^2 + z_chi^2
	print(paste(z_mean_hr, z_diff_hr, z_cor, z_chi))
	return(totz)
}


# -------------------------- code to run Monte Carlo simulations of MA -----------------------------

# Libraries
library(rgenoud)
library(xlsx)
library(parallel) 

# Set working directory
setwd("enter working directory path here")

# PARAMETERS
progname = '_MA_sim_v11'
post_communique = T

# GA parameters
maxgen = 100
waitgen = 20
nparams = 15
popsize = 60		# was 60
nfloat = 5			# Number of sig digits in reals

# MC parameters
nmonte = 200		# Monte Carlos

# Cluster parameter
ncores = 7

# new seed for RNG 
set.seed(NULL, kind = NULL, normal.kind = NULL)
seed = floor(runif(1)*1e6)
nowstr = format(Sys.time(),'_%m_%d_%Y_%H_%M')

# vectorized  pob()
vpob = Vectorize(pob, c('h','n'))

# read GZ DB
# GFMA file
gfma_fname = 'file of MA results.xlsx'
d = read.xlsx(gfma_fname,sheetIndex = 1)

d$hits = as.numeric(d$hits)
d$trials = as.numeric(d$trials)
d$hitrate = d$hits/d$trials
d$binom.p = vpob(d$hits,d$trials,.25)

# MA summary stats
ma_dist = hist(d$binom.p, breaks = seq(0,1,0.1), plot = F)$counts
ma_mHR = mean(d$hitrate)
ma_HR = sum(d$hits)/sum(d$trials)
ma_cor = cor(d$hitrate, d$trials, method = 'spearman')

nstudies = nrow(d)
n = 10000							# rows in res
diagcnt = 1						# counter for diags
rescnt = 1						# resukts counter
		

# Here we set up the GA 

# Psi Parameters
mce_hit_rate = 0.25
psi_hit_rate = 0.25

# QRP parameters not GA'd
OS_probability_threshold = 0.05

# factor for integers into reals
rf = 10^nfloat

# set up boundaries for parameters
boundaries = matrix( c(0.*rf, 1*rf,						# C2P_prevalence * 100000
											 0*rf, 0.7*rf,					# C2P_probability_threshold
											 4, 10,									# C2P_trial_number
											 0.*rf, 1*rf,						# P2C_prevalence * 100000 
											 0*rf, 0.5*rf,					# P2C_probability_threshold
											 4, 10,									# P2C_trial_number
											 0.*rf, 1*rf,						# OS_prevalence * 100000
											 15, 30,								# OS_min_trials
											 0.*rf, 1*rf,						# OE_prevalence * 100000
											 0, 0.15*rf,						# OE_probability_threshold * 100000
											 10, 20,								# OE_max_extra_trials
											 0.*rf, 1*rf, 					# PB_prevalence * 100000
											 0.*rf, 1*rf,						# RmSS_prevalence * 100000
											 0, 0.05*rf,						# RmSS_fraction * 100000
											 0.25*rf, 0.25*rf),			# psi HR * 100000
											 nrow = nparams, ncol = 2, byrow = T)


# Set up cluster
cl <- makeCluster(rep('localhost',ncores), type="SOCK")
clusterExport(cl, c("d","ma_sim","nmonte","n","nstudies", "nmonte","mce_hit_rate",
	"OS_probability_threshold","diagcnt","rescnt","ma_dist","ma_mHR","ma_HR","ma_cor","pubprob"))

# Log file 	
sink(file = paste('MA_log',nowstr,progname,'.txt',sep=''), split = T)

# Run GA
result = genoud(objfunc, nparams, max = F, data.type.int=T, Domains = boundaries, 
				pop.size = popsize, max.generations = maxgen, wait.generations = waitgen, print.level = 3, 
				cluster = cl)
				
stopCluster(cl)
sink()

very.best = result$par
save(result, very.best,file = paste('results',nowstr,'.dat',sep=''))


# ------------------------------------------- Analyse Results -------------------------------------

# Libraries
library(xlsx)

# PARAMETERS
post_communique = F
mce_hit_rate = 0.25
nm_sim = 200																				# monte carlos in simulation <=  MUST be entered
fname = 'MA_log_05_02_2015_06_53_MA_sim_v11.txt'		# <=  MUST be entered as the log file output during simulation run
oname = 'reruns_5_3_PHR_25_prevalences_0_1.csv'			# <=  MUST be entered as desired output file for results summary
nfloat = 5		# Number of sig digits in reals

# GFMA file
gfma_fname = 'file of MA results.xlsx'

# MC parameters
nmonte = 500					# Monte Carlos for this evaluation

# vectorized  pob()
vpob = Vectorize(pob, c('h','n'))

# PARAMETERS
lab = 1.3
ax = 1.3
tit = 1.5
delta = 0.005
ww = 8.57
hh = 6
cex.ano = 0.7
offset = 0.02

# factor for reals
rf = 10^nfloat
d = read.xlsx(gfma_fname,sheetIndex = 1)

d$hits = as.numeric(d$hits)
d$trials = as.numeric(d$trials)
d$hitrate = d$hits/d$trials
d$binom.p = vpob(d$hits,d$trials,.25)

n = 1000							# rows in res
diagcnt = 1						# counter for diags
rescnt = 1						# resukts counter

# MA summary stats
ma_dist = hist(d$binom.p, breaks = seq(0,1,0.1), plot = F)$counts
ma_mHR = mean(d$hitrate)
ma_HR = sum(d$hits)/sum(d$trials)
ma_cor = cor(d$hitrate, d$trials, method = 'spearman')

nstudies = nrow(d)

dr = read.csv(fname, header = F)

drp = dr[grepl('Fitness',dr[,1]),]
drp = as.numeric(substr(drp, 17,30))

# extract the values from the last gen
dr1 = dr[grepl('GENERATION',dr[,1]),]
lastgen = length(dr1) - 1
ind = which(grepl(paste('GENERATION:',lastgen),dr[,1]))
dr2 = dr[ind:nrow(dr),]
dr3 = dr2[grepl('best',dr2)]
very.best = as.numeric(substr(dr3,17,30))

C2P_prevalence   = 						very.best[1]/rf
C2P_probability_threshold = 	very.best[2]/rf
C2P_trial_number  = 					very.best[3]
P2C_prevalence  = 						very.best[4]/rf
P2C_probability_threshold  = 	very.best[5]/rf
P2C_trial_number  = 					very.best[6]
OS_prevalence     = 					very.best[7]/rf	
OS_min_trials     = 					very.best[8]	
OE_prevalence     = 					very.best[9]/rf	
OE_probability_threshold = 		very.best[10]/rf	
OE_max_extra_trials     = 		very.best[11]	
PB_prevalence     = 					very.best[12]/rf	
RmSS_prevalence     = 				very.best[13]/rf	
RmSS_fraction     = 					very.best[14]/rf	
psi_hit_rate     = 						very.best[15]/rf	


# QRP parameters not GA'd
OS_probability_threshold = 0.05

# set fractions used in GA encoding
f = c(rf,rf,1,rf,rf,1, rf,1,rf,rf,1,rf,rf,rf,rf)

# Extract ranges
vars = c('C2P_prevalence'  ,
'C2P_probability_threshold'				         ,
'C2P_trial_number'		     ,
'P2C_prevalence'		       ,
'P2C_probability_threshold'					       ,
'P2C_trial_number'		     ,
'OS_prevalence'		         ,
'OS_min_trials'		         ,
'OE_prevalence'		         ,
'OE_probability_threshold' ,
'OE_max_extra_trials'      ,
'PB_prevalence'		         ,
'RmSS_prevalence' 	       ,
'RmSS_fraction'		         ,
'psi_hit_rate ' 			     )
ranges = data.frame(vars=vars,low=NA,high=NA)
for (i in 1:length(vars)) {
	line = dr$V1[grepl(paste('X',i,' ',sep=''),dr$V1)]
	vals = unlist(strsplit(line, '<=', fixed = T))
	ranges$vars[i] = vars[i]
	ranges$low[i] = as.numeric(vals[1])
	ranges$high[i] = as.numeric(vals[3])
}
ranges$low = ranges$low/f
ranges$high = ranges$high/f

# extract fitness & plot and get mean and sd
drind = which(grepl('Fitness',dr[,1]))
drm = dr[drind + 1,]
drm = as.numeric(substr(drm, 17,30))
drsd = dr[drind + 2,]
drsd = sqrt(as.numeric(substr(drsd, 17,30)))

yr = c(min(c(drm - drsd, drp),na.rm=T),max(c(drm + drsd, drp),na.rm=T))
plotCI(1:length(drm), drm, drsd,xlab = 'Generation', ylab = 'Fitness',ylim = yr,
	main = paste('Fitness vs Generation. Mean & SD of Population black, Best red\nPsi HR Range ',ranges[15,2],'to',ranges[15,3]),
	cex.axis=ax,cex.lab=lab,cex.main=tit )
par(new = T)
plot(1:length(drp), drp, type = 'b', col = 'red',ylim = yr, axes = F, ylab='',xlab='')

grid(col='darkblue')
dev.copy(pdf,width = ww, height= hh, paste(oname,'_performance.pdf',sep=''))
dev.off()

# dataframe for capturing MC results
res = data.frame(mHR = rep(NA, nmonte), HR = rep(NA, nmonte),
	cor = rep(NA, nmonte), chi2z = rep(NA, nmonte),P1 = rep(NA, nmonte), P7= rep(NA, nmonte),chi2 = rep(NA,nmonte),
		n_FD = rep(NA, nmonte), n_C2P = rep(NA, nmonte), n_PB = rep(NA, nmonte),
		n_OE = rep(NA, nmonte),n_OS = rep(NA, nmonte),n_S = rep(NA, nmonte))	
		
# Monte Carlo				
for (i in 1:nmonte) {
	x = ma_sim(C2P_prevalence,           
		C2P_probability_threshold,C2P_trial_number,P2C_prevalence,P2C_probability_threshold,P2C_trial_number,
		OS_prevalence,OS_probability_threshold,            
		OS_min_trials,OE_prevalence,OE_probability_threshold,OE_max_extra_trials,PB_prevalence,
		RmSS_prevalence,RmSS_fraction,psi_hit_rate,  nmonte,mce_hit_rate )
		
	results = x$results		
	results = na.omit(results)
	results$hr = results$hits/results$sim_trials
	vpob = Vectorize(pob, c('h','n'))
	results$p = vpob(results$hits, results$sim_trials, 0.25)
		
	res$mHR[i] = mean(results$hr)
	res$HR[i] = sum(results$hits)/sum(results$sim_trials)
	res$cor[i] = cor(results$hr, results$sim_trials, method = 'spearman')
	sim_dist = hist(results$p, breaks = seq(0,1,0.1),plot=F)$counts
	res$chi2z[i] = zop(chisq.test(rbind(ma_dist,sim_dist))$p.value)
	res$chi2[i] = chisq.test(rbind(ma_dist,sim_dist))$statistic
	
	res$P1[i] = sim_dist[1]
	res$P7[i] = sum(sim_dist[8:10])
	res$n_FD[i] = results$n_FD[1]
	res$n_OE[i] = results$n_OE[1]
	res$n_OS[i] = results$n_OS[1]
	res$n_S[i] = results$n_S[1]
	res$n_C2P[i] = results$n_C2P[1]
	res$n_PB[i] = results$n_PB[1]
	
	ddiags = x$diags
	
}

write.csv(na.omit(x$diags),file='diags.csv')

# compute fitness
res = transform(res, mean_hr = (mHR + HR)/2)
res = transform(res, diff_hr = mHR - HR)

# MA values
ma_mean_hr = 	(ma_mHR + ma_HR)/2
ma_diff_hr = ma_mHR - ma_HR

# Now Zs
z_mean_hr = (mean(res$mean_hr) - ma_mean_hr)/sd(res$mean_hr)
z_diff_hr = (mean(res$diff_hr) - ma_diff_hr)/sd(res$diff_hr)
z_cor = 	 (mean(res$cor) - ma_cor)/sd(res$cor)
z_chi = 	 mean(res$chi2z)
	
# fitness = z_mean_hr^2 + z_diff_hr^2 + z_cor^2 + z_chi^2
fitness = z_mean_hr^2 + z_diff_hr^2 + z_cor^2 + z_chi^2

# put all results into csv                                                                   
output = data.frame(metric = rep('', 200), sim_mean = rep('', 200), sim_sd = rep('', 200), MA = rep('', 200))

# -- Metrics
cnt = 1
# Section Header
output$metric[cnt] = 'Key Metrics from Sim. and MA'
cnt = cnt + 1

output$metric[cnt] = 'HR'
output$sim_mean[cnt] = mean(res$HR)
output$sim_sd[cnt] = sd(res$HR)
output$MA[cnt] = ma_HR
cnt = cnt + 1

output$metric[cnt] = 'mHR'
output$sim_mean[cnt] = mean(res$mHR)
output$sim_sd[cnt] = sd(res$mHR)
output$MA[cnt] = ma_mHR
cnt = cnt + 1

output$metric[cnt] = 'Correlation'
output$sim_mean[cnt] = mean(res$cor)
output$sim_sd[cnt] = sd(res$cor)
output$MA[cnt] = ma_cor
cnt = cnt + 1

output$metric[cnt] = 'P1'
output$sim_mean[cnt] = mean(res$P1)
output$sim_sd[cnt] = sd(res$P1)
output$MA[cnt] = ma_dist[1]
cnt = cnt + 1

output$metric[cnt] = 'P7'
output$sim_mean[cnt] = mean(res$P7)
output$sim_sd[cnt] = sd(res$P7)
output$MA[cnt] = sum(ma_dist[8:10])
cnt = cnt + 1

output$metric[cnt] = 'Chi2'
output$sim_mean[cnt] = mean(res$chi2)
output$sim_sd[cnt] = sd(res$chi2)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Number of FDs by both routes'
output$sim_mean[cnt] = mean(res$n_FD)
output$sim_sd[cnt] = sd(res$n_FD)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Number of FDs by C2P'
output$sim_mean[cnt] = mean(res$n_C2P)
output$sim_sd[cnt] = sd(res$n_C2P)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Number of FDs by PB'
output$sim_mean[cnt] = mean(res$n_PB)
output$sim_sd[cnt] = sd(res$n_PB)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Number of OEs'
output$sim_mean[cnt] = mean(res$n_OE)
output$sim_sd[cnt] = sd(res$n_OE)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Number of OSs'
output$sim_mean[cnt] = mean(res$n_OS)
output$sim_sd[cnt] = sd(res$n_OS)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Number of Ss removed'
output$sim_mean[cnt] = mean(res$n_S)
output$sim_sd[cnt] = sd(res$n_S)
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Fraction of Ss removed per study'
tots = sum(d$trials)
output$sim_mean[cnt] = mean(res$n_S)/nstudies
output$sim_sd[cnt] = sd(res$n_S)/nstudies
output$MA[cnt] = ''
cnt = cnt + 1

# ----  Add Parameters & ranges
# Section Header
output$metric[cnt] = ''
cnt = cnt + 1
output$metric[cnt] = 'Fitness Details'
cnt = cnt + 1

output$metric[cnt] = 'Fitness'
output$sim_mean[cnt] = fitness
output$sim_sd[cnt] = ''
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Z Mean HR'
output$sim_mean[cnt] = z_mean_hr
output$sim_sd[cnt] = ''
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Z Diff HR'
output$sim_mean[cnt] = z_diff_hr
output$sim_sd[cnt] = ''
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Z Cor'
output$sim_mean[cnt] = z_cor
output$sim_sd[cnt] = ''
output$MA[cnt] = ''
cnt = cnt + 1

output$metric[cnt] = 'Z Chi2'
output$sim_mean[cnt] = z_chi
output$sim_sd[cnt] = ''
output$MA[cnt] = ''
cnt = cnt + 1

# ----  Add Parameters & ranges
# Section Header
output$metric[cnt] = ''
cnt = cnt + 1
output$metric[cnt] = 'Best Fit Parameters and GA Ranges'
cnt = cnt + 1

# New header
output$metric[cnt] = 'Parameter'
output$sim_mean[cnt] = 'Best Fit'
output$sim_sd[cnt] = 'Lower Bound'
output$MA[cnt] = 'Upper Bound'
cnt = cnt + 1

for (i in 1:length(vars)) {
	output$metric[cnt + i - 1] = vars[i]
	output$sim_mean[cnt + i - 1] = very.best[i]/f[i]
	output$sim_sd[cnt + i - 1] = ranges$low[i]
	output$MA[cnt + i - 1] = ranges$high[i]
}
cnt = cnt + i

# Section Header
output$metric[cnt] = ''
cnt = cnt + 1
output$metric[cnt] = 'GA Fitness by Generation'
cnt = cnt + 1

drp = na.omit(drp)
for (i in 1:length(drp)) {
	output$metric[cnt + i - 1] = paste('Fitness of generation',i)
	output$sim_mean[cnt + i - 1] = drp[i]
}

cnt = cnt + i
output$metric[cnt] = ''
cnt = cnt + 1

# Housekeeping vars
output$metric[cnt] = 'GA Log file'
output$sim_mean[cnt] = fname
cnt = cnt + 1

popsize = as.numeric(unlist(strsplit(dr[grepl('Population size',dr$V1),],':')))[2]
output$metric[cnt] = 'Population size'
output$sim_mean[cnt] = popsize
cnt = cnt + 1

maxgen = as.numeric(unlist(strsplit(dr[grepl('HARD Maximum',dr$V1),],':')))[2]
output$metric[cnt] = 'Max generations'
output$sim_mean[cnt] = maxgen
cnt = cnt + 1

maxconstgen = as.numeric(unlist(strsplit(dr[grepl('Maximum Nonchanging Generations',dr$V1),],':')))[2]
output$metric[cnt] = 'Max unimproved generations'
output$sim_mean[cnt] = maxconstgen
cnt = cnt + 1

output$metric[cnt] = '# Monte Carlos in Simulation'
output$sim_mean[cnt] = nm_sim
cnt = cnt + 1

output$metric[cnt] = '# Monte Carlos in Evaluation'
output$sim_mean[cnt] = nmonte
cnt = cnt + 1

output$metric[cnt] = 'GFMA Data file'
output$sim_mean[cnt] = gfma_fname
cnt = cnt + 1

output$metric[cnt] = '# Studies in GFMA'
output$sim_mean[cnt] = nrow(d)
cnt = cnt + 1

output$metric[cnt] = 'Post communique'
output$sim_mean[cnt] = post_communique
cnt = cnt + 1

# output
output = output[1:(cnt),]
write.csv(output, file = oname,row.names=F)