one.generation <- function(pop1.A, pop1.B, pop2.A, pop2.B, num.loci, num.all, mut.rate, mig.rate, pop.size, ploidy=4, theta) { # divide theta by two since full tetrasomy means that half of the alleles from the A subgenome ends up in the B subgenome and vice versa theta = theta / 2 # it is possible to give a whole vector with a different theta for every locus, if this is not the case, expand the single value if(length(theta)== 1){ theta = rep(theta,num.loci) } # divide migration rate by two since in a standard island model there is migration to itself mig.rate=mig.rate / 2 sub.size = (ploidy*pop.size/2) # initialise new.pop1.A = pop1.A new.pop1.B = pop1.B new.pop2.A = pop2.A new.pop2.B = pop2.B for(l in 1:num.loci){ #calculate expected frequencies before migration exp.freqs1.A = (1-theta[l]) * (1-mut.rate) * pop1.A[l,] + theta[l] * (1-mut.rate) * pop1.B[l,] + mut.rate * (rep(sub.size/num.all, num.all)) exp.freqs1.B = (1-theta[l]) * (1-mut.rate) * pop1.B[l,] + theta[l] * (1-mut.rate) * pop1.A[l,] + mut.rate * (rep(sub.size/num.all, num.all)) exp.freqs2.A = (1-theta[l]) * (1-mut.rate) * pop2.A[l,] + theta[l] * (1-mut.rate) * pop2.B[l,] + mut.rate * (rep(sub.size/num.all, num.all)) exp.freqs2.B = (1-theta[l]) * (1-mut.rate) * pop2.B[l,] + theta[l] * (1-mut.rate) * pop2.A[l,] + mut.rate * (rep(sub.size/num.all, num.all)) #calculate expected frequencies after migration mig.freqs1.A = (1-mig.rate) * exp.freqs1.A + mig.rate * exp.freqs2.A mig.freqs1.B = (1-mig.rate) * exp.freqs1.B + mig.rate * exp.freqs2.B mig.freqs2.A = (1-mig.rate) * exp.freqs2.A + mig.rate * exp.freqs1.A mig.freqs2.B = (1-mig.rate) * exp.freqs2.B + mig.rate * exp.freqs1.B #draw random allele frequencies based on the expectations new.pop1.A[l,] = rmultinom(1, sub.size, mig.freqs1.A) new.pop1.B[l,] = rmultinom(1, sub.size, mig.freqs1.B) new.pop2.A[l,] = rmultinom(1, sub.size, mig.freqs2.A) new.pop2.B[l,] = rmultinom(1, sub.size, mig.freqs2.B) } return(list(p1.A= new.pop1.A, p1.B= new.pop1.B, p2.A= new.pop2.A, p2.B= new.pop2.B)) } #define the main parameters of teh model num.loci = 10 #the number of loci num.all = 100 #the maximum number of alleles per locus pop.size = 1000 #the number of individuals per population theta = 1 #the rate of exchange of alleles between the two subgenomes mut.rate = 0.00001 #the mutation rate mig.rate = 0 #the migration rate between the two populations burn.in = 10000 #how long the model should be run BEFORE the divergence of the two populations num.gens = 20000 #how long the model should be run AFTER the divergence of the two populations sample.size = 100 #how many individuals should be created for output to a genetic data file #define some parameters that should not be changed ploidy = 4 num.pops = 2 #to get initial frequencies, draw overall allele frequencies from an exponential distribution #this actually does not matter too much, but will help reach equilibrium a bit quicker all.freqs = NULL for(l in 1:num.loci){ draw = rexp(num.all) freqs = draw/sum(draw) all.freqs = rbind(all.freqs, freqs) } row.names(all.freqs) = sprintf("locus_%02d", 1:num.loci) #convert frequencies to allele counts all.freqs.A = all.freqs * (ploidy*pop.size/2) all.freqs.B = all.freqs * (ploidy*pop.size/2) #now perform the generations before the population divergence, so with a single population g = 0 while(g < burn.in){ #do generations, but with migration rate zero and also using all.freqs as a dummy for the second population #this allows us to reuse the same function as for two populations lret = one.generation(all.freqs.A, all.freqs.B, all.freqs.A, all.freqs.B, num.loci, num.all, mut.rate, 0, pop.size, ploidy, theta) all.freqs.A = lret$p1.A all.freqs.B = lret$p1.B #increment the counter and report the progress g = g+1 if(g %% 100 == 0){ print(g) } } #set initial frequencies after the split to the overall frequencies of the single population pop1.A = all.freqs.A pop1.B = all.freqs.B pop2.A = all.freqs.A pop2.B = all.freqs.B #now perform the generations after the population divergence, so with a two populations g = 0 while(g < num.gens){ #do the magic here lret = one.generation(pop1.A, pop1.B, pop2.A, pop2.B, num.loci, num.all, mut.rate, mig.rate, pop.size, ploidy, theta) pop1.A = lret$p1.A pop1.B = lret$p1.B pop2.A = lret$p2.A pop2.B = lret$p2.B g = g+1 if(g %% 100 == 0){ print(g) } } #generations #after simulation has finished, calculate allele frequencies for both populations pop1 = (pop1.A + pop1.B) / (ploidy*pop.size) pop2 = (pop2.A + pop2.B) / (ploidy*pop.size) #set statistics to zero Hs1 = 0 Hs2 = 0 Ht = 0 Ho1 = 0 Ho2 = 0 #calculate summary statistics per locus for(l in 1:num.loci){ Hs1 = Hs1 + sum(pop1[l,]^2) Hs2 = Hs2 + sum(pop2[l,]^2) Ht = Ht + sum( ((pop1[l,] + pop2[l,])/2)^2 ) #calculate observed heterozygosity based on the allele frequencies pop_f1.A = pop1.A[l,] / (ploidy*pop.size/2) pop_f1.B = pop1.B[l,] / (ploidy*pop.size/2) Ho1 = Ho1 + sum((1/6 * (pop_f1.A^2) + 1/6 * (pop_f1.B ^2) + 2/3 * (pop_f1.A * pop_f1.B))) pop_f2.A = pop2.A[l,] / (ploidy*pop.size/2) pop_f2.B = pop2.B[l,] / (ploidy*pop.size/2) Ho2 = Ho2 + sum((1/6 * (pop_f2.A^2) + 1/6 * (pop_f2.B ^2) + 2/3 * (pop_f2.A * pop_f2.B))) } # now calculate multilocus values of summary statistics Hs = 1 - (Hs1 + Hs2) / (2*num.loci) Ho = 1 - (Ho1 + Ho2) / (2*num.loci) Fis = 1 - Ho / Hs Ht = 1 - (Ht / num.loci) Dst = Ht-Hs Dpst = num.pops / (num.pops-1) * Dst Hpt = Hs + Dpst Gst = Dpst / Hpt Gpst = Gst / (1-Hs) all.stats = cbind(theta,Hs, Ho, Gst, Ht, Fis) write.table(all.stats, file="results.txt", col.names=TRUE, quote=FALSE, sep="\t",row.names=FALSE) #create random individuals and write these to a file #draw a sample of random genotypes gendata = array(0, c(num.pops*sample.size, num.loci*ploidy)) for(l in 1:num.loci){ #population1 alleles1.A = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop1.A[l,]) dim(alleles1.A) = c(sample.size,ploidy/2) gendata[1:sample.size,(ploidy*l-(ploidy-1)):(ploidy*l-ploidy/2)] = alleles1.A alleles1.B = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop1.B[l,]) dim(alleles1.B) = c(sample.size,ploidy/2) gendata[1:sample.size,(ploidy*l-(ploidy/2-1)):(ploidy*l)] = alleles1.B #population2 alleles2.A = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop2.A[l,]) dim(alleles2.A) = c(sample.size,ploidy/2) gendata[(sample.size+1):(2*sample.size),(ploidy*l-(ploidy-1)):(ploidy*l-ploidy/2)] = alleles2.A alleles2.B = sample(1:num.all, sample.size*ploidy/2, replace=TRUE, prob=pop2.B[l,]) dim(alleles2.B) = c(sample.size,ploidy/2) gendata[(sample.size+1):(2*sample.size),(ploidy*l-(ploidy/2-1)):(ploidy*l)] = alleles2.B } #now write to GenoDive format full.name = sprintf("data_full_%.8f.gdv", theta) #write comment line cat("Data with full genotypes\n",file=full.name) #write info line cat(c(num.pops*sample.size, num.pops, num.loci, ploidy, 3), file=full.name, sep="\t", append=TRUE) #write pop names cat("\nPop1\nPop2\n",file=full.name, append=TRUE) #write output per individual for(i in 1:(num.pops*sample.size)){ pop = floor((i-1)/sample.size)+1 cat(sprintf("%d\tind%03d", pop, i), file=full.name, append=TRUE) #now write data for all loci for(l in 1:num.loci){ cat("\t", file=full.name, append=TRUE) ind.data = gendata[i,(ploidy*l-(ploidy-1)):(ploidy*l)] #write full data for(p in 1:ploidy){ cat(sprintf("%03d", ind.data[p]), file=full.name, append=TRUE) } } cat("\n", file=full.name, append=TRUE) } #indvidual output