# Additonal file 2.
# R code for coalescent simulation under the assumption of selective neutrality
# used in
# Title: Admixture with indigenous people helps local adaptation: admixture-enabled selection in Polynesians
# Authors: Isshiki et al.

N0 <- 1000 #population size
rec_rate <- 1.3e-08 #recombination date
mut_rate <- 1.2e-08 #mutation rate
adm_t <- 100  #admixed generation
div_t <- 1667 #diverged generation
L <- 1000000 #length
T <- 3e+08/L #the number of sampling


log_file <- sprintf("sim_genotype_%sMb_%st_Ne%s.log", as.character(L/1000000), as.character(T),as.character(N0))

adm_T <- adm_t/(4 * N0)
div_T <- div_t/(4 * N0)

R <- 4*N0*rec_rate*L
theta <- 4*N0*mut_rate*L

sink(log_file)
L
T
adm_T
div_T
R
theta
sink()

library(scrm)

for (m in 1:9){
o_file <-  sprintf("sim_genotype_%sMb_%st_Ne%s_%s.txt", as.character(L/1000000), as.character(T),as.character(N0),as.character(m))
    
#coales_model <- scrm('184 T -I 3 48 46 90  -eps adm_T 2 3 0.2456 -ej adm_T 2 1 -ej div_T  3 1 -r R L -t theta -oSFS')
coales_model <- scrm('184 300 -I 3 48 46 90  -eps 0.025 2 3 0.2456 -ej 0.025 2 1 -ej 0.41675  3 1 -r 52 1000000 -t 48 -oSFS')
df <- do.call(cbind.data.frame, coales_model$seg_sites)

#列名
y <-colnames(df)
y <-as.numeric(y)
n <- ncol(df)
num <- rep(0:0, length=n)
pos <- rep(0:0, length=n)
pos[1] <- floor(L*y[1])
 for (i in 2:n){
if (y[i-1] < y[i]) {
   num[i]<- num[i-1]  
 }
else {
	num[i]<- num[i-1]+1
}
pos[i] <- floor((y[i]+num[i])*L)
}

colnames(df) <- pos


TNG_chrom <- df[49:94,]
TNG_MAF <- apply(TNG_chrom,2,sum)

ord <- sample(1:n, n, replace = FALSE)

p <- cbind(pos, TNG_MAF)
data <- cbind(p, ord)

data_0 <- subset(data,TNG_MAF == 0 | TNG_MAF == 46)
rownames(data_0) <- c(1:nrow(data_0))
ord_data_0 <- data_0[order(data_0[,3]),]

#adjust the SNP number to the real data
kosu <- read.table("TNG_freq.txt",header=T)
#num Total chrom_num last_chrom_num
#0 52098 5209 5217
#1 10571 1057 1058
#2 7731 773 774
#3 6323 632 635
#4 5903 590 593
#5 5824 582 586
#6 5446 544 550
#7 5003 500 503
#8 4869 486 495
#9 4693 469 472
#10 4694 469 473
#11 4470 447 447
#12 4381 438 439
#13 4265 426 431
#14 4329 432 441
#15 4254 425 429
#16 4125 412 417
#17 4001 400 401
#18 3960 396 396
#19 3830 383 383
#20 3808 380 388
#21 3569 356 365
#22 2950 295 295
#23 1261 126 127

kosu <- kosu$chrom_num
pos_data_0 <- as.vector(ord_data_0[1:kosu[1],1]) #posをkosu0個


pos_data <- pos_data_0
for (i in 1:23){
idata <- subset(data,TNG_MAF == i | TNG_MAF == 46-i)
ord_idata <- idata[order(idata[,3]),]
pos_idata <- as.vector(ord_idata[1:kosu[i+1],1])
pos_data <- c(pos_data,pos_idata)}

pos_data_name <- as.character(sort(pos_data))

df_corr <- df[,pos_data_name]


n<-nrow(df_corr)/2
n
col_no <- ncol(df_corr)
x <- data.frame(matrix(rep(NA,n*col_no),nrow=n))

#allele to genotype
for (i in 1:n){
  x[i,] <- df_corr[2*i-1,]+df_corr[2*i,]
}

colnames(x) <-colnames(df_corr)

write.table(x, o_file)
}

m <- 10
o_file <-  sprintf("sim_genotype_%sMb_%st_Ne%s_%s.txt", as.character(L/1000000), as.character(T),as.character(N0),as.character(m))
coales_model <- scrm('184 300 -I 3 48 46 90  -eps 0.025 2 3 0.2456 -ej 0.025 2 1 -ej 0.41675  3 1 -r 52 1000000 -t 48 -oSFS')
df <- do.call(cbind.data.frame, coales_model$seg_sites)


y <-colnames(df)
y <-as.numeric(y)
n <- ncol(df)
num <- rep(0:0, length=n)
pos <- rep(0:0, length=n)
pos[1] <- floor(L*y[1])
 for (i in 2:n){
if (y[i-1] < y[i]) {
   num[i]<- num[i-1]
 }
else {
    num[i]<- num[i-1]+1
}
pos[i] <- floor((y[i]+num[i])*L)
}

colnames(df) <- pos


TNG_chrom <- df[49:94,]
TNG_MAF <- apply(TNG_chrom,2,sum)

ord <- sample(1:n, n, replace = FALSE)

p <- cbind(pos, TNG_MAF)
data <- cbind(p, ord)

data_0 <- subset(data,TNG_MAF == 0 | TNG_MAF == 46)
rownames(data_0) <- c(1:nrow(data_0))
ord_data_0 <- data_0[order(data_0[,3]),]
#adjust the SNP number to the real data
kosu <- read.table("TNG_freq.txt",header=T)

kosu <- kosu$last_chrom_num
pos_data_0 <- as.vector(ord_data_0[1:kosu[1],1]) #posをkosu0個


pos_data <- pos_data_0
for (i in 1:23){
idata <- subset(data,TNG_MAF == i | TNG_MAF == 46-i)
ord_idata <- idata[order(idata[,3]),]
pos_idata <- as.vector(ord_idata[1:kosu[i+1],1])
pos_data <- c(pos_data,pos_idata)}
pos_data_name <- as.character(sort(pos_data))

df_corr <- df[,pos_data_name]

n<-nrow(df_corr)/2
n
col_no <- ncol(df_corr)
x <- data.frame(matrix(rep(NA,n*col_no),nrow=n))

#allele to genotype
for (i in 1:n){
  x[i,] <- df_corr[2*i-1,]+df_corr[2*i,]
}

colnames(x) <-colnames(df_corr)

write.table(x, o_file)