one.generation <- function(pop1.A, pop1.B, pop2.A, pop2.B, num.loci, num.all, mut.rate, mig.rate, pop.size, ploidy=4, theta)
{	
	# divide theta by two since full tetrasomy means that half of the alleles from the A subgenome ends up in the B subgenome and vice versa
	theta = theta / 2
	# it is possible to give a whole vector with a different theta for every locus, if this is not the case, expand the single value
	if(length(theta)== 1){
		theta = rep(theta,num.loci)
	}
	
	# divide migration rate by two since in a standard island model there is migration to itself
	mig.rate=mig.rate / 2
	
	sub.size = (ploidy*pop.size/2)
	
	# initialise
	new.pop1.A = pop1.A
	new.pop1.B = pop1.B
	new.pop2.A = pop2.A
	new.pop2.B = pop2.B
	
	for(l in 1:num.loci){
		#calculate expected frequencies before migration
		exp.freqs1.A = (1-theta[l]) * (1-mut.rate) * pop1.A[l,] + theta[l] * (1-mut.rate) * pop1.B[l,] + mut.rate * (rep(sub.size/num.all, num.all))
		exp.freqs1.B = (1-theta[l]) * (1-mut.rate) * pop1.B[l,] + theta[l] * (1-mut.rate) * pop1.A[l,] + mut.rate * (rep(sub.size/num.all, num.all))

		exp.freqs2.A = (1-theta[l]) * (1-mut.rate) * pop2.A[l,] + theta[l] * (1-mut.rate) * pop2.B[l,] + mut.rate * (rep(sub.size/num.all, num.all))
		exp.freqs2.B = (1-theta[l]) * (1-mut.rate) * pop2.B[l,] + theta[l] * (1-mut.rate) * pop2.A[l,] + mut.rate * (rep(sub.size/num.all, num.all))

		#calculate expected frequencies after migration
		mig.freqs1.A = (1-mig.rate) * exp.freqs1.A + mig.rate * exp.freqs2.A
		mig.freqs1.B = (1-mig.rate) * exp.freqs1.B + mig.rate * exp.freqs2.B
		
		mig.freqs2.A = (1-mig.rate) * exp.freqs2.A + mig.rate * exp.freqs1.A
		mig.freqs2.B = (1-mig.rate) * exp.freqs2.B + mig.rate * exp.freqs1.B
		
		#draw random allele frequencies based on the expectations
		new.pop1.A[l,] = rmultinom(1, sub.size, mig.freqs1.A)
		new.pop1.B[l,] = rmultinom(1, sub.size, mig.freqs1.B)
		new.pop2.A[l,] = rmultinom(1, sub.size, mig.freqs2.A)
		new.pop2.B[l,] = rmultinom(1, sub.size, mig.freqs2.B)
	}

	return(list(p1.A= new.pop1.A, p1.B= new.pop1.B, p2.A= new.pop2.A, p2.B= new.pop2.B))
}

#define the main parameters of teh model
num.loci = 10			#the number of loci
num.all = 100			#the maximum number of alleles per locus
pop.size = 1000			#the number of individuals per population
theta = 1				#the rate of exchange of alleles between the two subgenomes
mut.rate = 0.00001		#the mutation rate
mig.rate = 0			#the migration rate between the two populations
burn.in = 10000			#how long the model should be run BEFORE the divergence of the two populations
num.gens = 20000		#how long the model should be run AFTER the divergence of the two populations
sample.size = 100		#how many individuals should be created for output to a genetic data file

#define some parameters that should not be changed
ploidy = 4
num.pops = 2
	
#to get initial frequencies, draw overall allele frequencies from an exponential distribution
#this actually does not matter too much, but will help reach equilibrium a bit quicker
all.freqs = NULL

for(l in 1:num.loci){
	draw = rexp(num.all)
	freqs = draw/sum(draw)
	all.freqs = rbind(all.freqs, freqs)
}
row.names(all.freqs) = sprintf("locus_%02d", 1:num.loci)

#convert frequencies to allele counts
all.freqs.A = all.freqs * (ploidy*pop.size/2)
all.freqs.B = all.freqs * (ploidy*pop.size/2)

#now perform the generations before the population divergence, so with a single population
g = 0
while(g < burn.in){
	#do generations, but with migration rate zero and also using all.freqs as a dummy for the second population
	#this allows us to reuse the same function as for two populations
	lret = one.generation(all.freqs.A, all.freqs.B, all.freqs.A, all.freqs.B, num.loci, num.all, mut.rate, 0, pop.size, ploidy, theta)
	all.freqs.A = lret$p1.A
	all.freqs.B = lret$p1.B
	#increment the counter and report the progress
	g =  g+1
	if(g %% 100 == 0){ print(g) }
}

#set initial frequencies after the split to the overall frequencies of the single population
pop1.A = all.freqs.A
pop1.B = all.freqs.B

pop2.A = all.freqs.A
pop2.B = all.freqs.B

#now perform the generations after the population divergence, so with a two populations
g = 0
while(g < num.gens){
	#do the magic here
	lret = one.generation(pop1.A, pop1.B, pop2.A, pop2.B, num.loci, num.all, mut.rate, mig.rate, pop.size, ploidy, theta)
	pop1.A = lret$p1.A
	pop1.B = lret$p1.B
	pop2.A = lret$p2.A
	pop2.B = lret$p2.B
	
	g = g+1
	if(g %% 100 == 0){ print(g) }
} #generations

#after simulation has finished, calculate allele frequencies for both populations
pop1 = (pop1.A + pop1.B) / (ploidy*pop.size)
pop2 = (pop2.A + pop2.B) / (ploidy*pop.size)

#set statistics to zero
Hs1 = 0
Hs2 = 0
Ht = 0
Ho1 = 0
Ho2 = 0	

#calculate summary statistics per locus
for(l in 1:num.loci){
	Hs1 = Hs1 + sum(pop1[l,]^2)
	Hs2 = Hs2 + sum(pop2[l,]^2)
	
	Ht = Ht + sum( ((pop1[l,] + pop2[l,])/2)^2 )
	
	#calculate observed heterozygosity based on the allele frequencies
	pop_f1.A = pop1.A[l,] / (ploidy*pop.size/2)
	pop_f1.B = pop1.B[l,] / (ploidy*pop.size/2)
	Ho1 = Ho1 + sum((1/6 * (pop_f1.A^2) + 1/6 * (pop_f1.B ^2) + 2/3 * (pop_f1.A * pop_f1.B)))
	
	pop_f2.A = pop2.A[l,] / (ploidy*pop.size/2)
	pop_f2.B = pop2.B[l,] / (ploidy*pop.size/2)
	Ho2 = Ho2 + sum((1/6 * (pop_f2.A^2) + 1/6 * (pop_f2.B ^2) + 2/3 * (pop_f2.A * pop_f2.B)))
}

# now calculate multilocus values of summary statistics
Hs = 1 - (Hs1 + Hs2) / (2*num.loci)
Ho = 1 - (Ho1 + Ho2) / (2*num.loci)
Fis = 1 - Ho / Hs
Ht = 1 - (Ht / num.loci) 
Dst = Ht-Hs
Dpst = num.pops / (num.pops-1) * Dst
Hpt = Hs + Dpst
Gst = Dpst / Hpt
Gpst = Gst / (1-Hs)

all.stats = cbind(theta,Hs, Ho, Gst, Ht, Fis)

write.table(all.stats, file="results.txt", col.names=TRUE, quote=FALSE, sep="\t",row.names=FALSE)

#create random individuals and write these to a file

#draw a sample of random genotypes
gendata = array(0, c(num.pops*sample.size, num.loci*ploidy))

for(l in 1:num.loci){
	#population1
	alleles1.A = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop1.A[l,])
	dim(alleles1.A) = c(sample.size,ploidy/2)
	gendata[1:sample.size,(ploidy*l-(ploidy-1)):(ploidy*l-ploidy/2)] = alleles1.A

	alleles1.B = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop1.B[l,])
	dim(alleles1.B) = c(sample.size,ploidy/2)
	gendata[1:sample.size,(ploidy*l-(ploidy/2-1)):(ploidy*l)] = alleles1.B
	
	#population2
	alleles2.A = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop2.A[l,])
	dim(alleles2.A) = c(sample.size,ploidy/2)
	gendata[(sample.size+1):(2*sample.size),(ploidy*l-(ploidy-1)):(ploidy*l-ploidy/2)] = alleles2.A

	alleles2.B = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop2.B[l,])
	dim(alleles2.B) = c(sample.size,ploidy/2)
	gendata[(sample.size+1):(2*sample.size),(ploidy*l-(ploidy/2-1)):(ploidy*l)] = alleles2.B
}


#now write to GenoDive format
full.name = sprintf("data_full_%.8f.gdv", theta)

#write comment line
cat("Data with full genotypes\n",file=full.name)
#write info line
cat(c(num.pops*sample.size, num.pops, num.loci, ploidy, 3), file=full.name, sep="\t", append=TRUE) 
#write pop names
cat("\nPop1\nPop2\n",file=full.name, append=TRUE)

#write output per individual
for(i in 1:(num.pops*sample.size)){
	pop = floor((i-1)/sample.size)+1
	cat(sprintf("%d\tind%03d", pop, i), file=full.name, append=TRUE)
	
	#now write data for all loci
	for(l in 1:num.loci){
		cat("\t", file=full.name, append=TRUE)	

		ind.data = gendata[i,(ploidy*l-(ploidy-1)):(ploidy*l)]
		#write full data
		for(p in 1:ploidy){
			cat(sprintf("%03d", ind.data[p]), file=full.name, append=TRUE)
		}
		
	}
	cat("\n", file=full.name, append=TRUE)	
} #indvidual output