# Additonal file 2. # R code for coalescent simulation under the assumption of selective neutrality # used in # Title: Admixture with indigenous people helps local adaptation: admixture-enabled selection in Polynesians # Authors: Isshiki et al. N0 <- 1000 #population size rec_rate <- 1.3e-08 #recombination date mut_rate <- 1.2e-08 #mutation rate adm_t <- 100 #admixed generation div_t <- 1667 #diverged generation L <- 1000000 #length T <- 3e+08/L #the number of sampling log_file <- sprintf("sim_genotype_%sMb_%st_Ne%s.log", as.character(L/1000000), as.character(T),as.character(N0)) adm_T <- adm_t/(4 * N0) div_T <- div_t/(4 * N0) R <- 4*N0*rec_rate*L theta <- 4*N0*mut_rate*L sink(log_file) L T adm_T div_T R theta sink() library(scrm) for (m in 1:9){ o_file <- sprintf("sim_genotype_%sMb_%st_Ne%s_%s.txt", as.character(L/1000000), as.character(T),as.character(N0),as.character(m)) #coales_model <- scrm('184 T -I 3 48 46 90 -eps adm_T 2 3 0.2456 -ej adm_T 2 1 -ej div_T 3 1 -r R L -t theta -oSFS') coales_model <- scrm('184 300 -I 3 48 46 90 -eps 0.025 2 3 0.2456 -ej 0.025 2 1 -ej 0.41675 3 1 -r 52 1000000 -t 48 -oSFS') df <- do.call(cbind.data.frame, coales_model$seg_sites) #列名 y <-colnames(df) y <-as.numeric(y) n <- ncol(df) num <- rep(0:0, length=n) pos <- rep(0:0, length=n) pos[1] <- floor(L*y[1]) for (i in 2:n){ if (y[i-1] < y[i]) { num[i]<- num[i-1] } else { num[i]<- num[i-1]+1 } pos[i] <- floor((y[i]+num[i])*L) } colnames(df) <- pos TNG_chrom <- df[49:94,] TNG_MAF <- apply(TNG_chrom,2,sum) ord <- sample(1:n, n, replace = FALSE) p <- cbind(pos, TNG_MAF) data <- cbind(p, ord) data_0 <- subset(data,TNG_MAF == 0 | TNG_MAF == 46) rownames(data_0) <- c(1:nrow(data_0)) ord_data_0 <- data_0[order(data_0[,3]),] #adjust the SNP number to the real data kosu <- read.table("TNG_freq.txt",header=T) #num Total chrom_num last_chrom_num #0 52098 5209 5217 #1 10571 1057 1058 #2 7731 773 774 #3 6323 632 635 #4 5903 590 593 #5 5824 582 586 #6 5446 544 550 #7 5003 500 503 #8 4869 486 495 #9 4693 469 472 #10 4694 469 473 #11 4470 447 447 #12 4381 438 439 #13 4265 426 431 #14 4329 432 441 #15 4254 425 429 #16 4125 412 417 #17 4001 400 401 #18 3960 396 396 #19 3830 383 383 #20 3808 380 388 #21 3569 356 365 #22 2950 295 295 #23 1261 126 127 kosu <- kosu$chrom_num pos_data_0 <- as.vector(ord_data_0[1:kosu[1],1]) #posをkosu0個 pos_data <- pos_data_0 for (i in 1:23){ idata <- subset(data,TNG_MAF == i | TNG_MAF == 46-i) ord_idata <- idata[order(idata[,3]),] pos_idata <- as.vector(ord_idata[1:kosu[i+1],1]) pos_data <- c(pos_data,pos_idata)} pos_data_name <- as.character(sort(pos_data)) df_corr <- df[,pos_data_name] n<-nrow(df_corr)/2 n col_no <- ncol(df_corr) x <- data.frame(matrix(rep(NA,n*col_no),nrow=n)) #allele to genotype for (i in 1:n){ x[i,] <- df_corr[2*i-1,]+df_corr[2*i,] } colnames(x) <-colnames(df_corr) write.table(x, o_file) } m <- 10 o_file <- sprintf("sim_genotype_%sMb_%st_Ne%s_%s.txt", as.character(L/1000000), as.character(T),as.character(N0),as.character(m)) coales_model <- scrm('184 300 -I 3 48 46 90 -eps 0.025 2 3 0.2456 -ej 0.025 2 1 -ej 0.41675 3 1 -r 52 1000000 -t 48 -oSFS') df <- do.call(cbind.data.frame, coales_model$seg_sites) y <-colnames(df) y <-as.numeric(y) n <- ncol(df) num <- rep(0:0, length=n) pos <- rep(0:0, length=n) pos[1] <- floor(L*y[1]) for (i in 2:n){ if (y[i-1] < y[i]) { num[i]<- num[i-1] } else { num[i]<- num[i-1]+1 } pos[i] <- floor((y[i]+num[i])*L) } colnames(df) <- pos TNG_chrom <- df[49:94,] TNG_MAF <- apply(TNG_chrom,2,sum) ord <- sample(1:n, n, replace = FALSE) p <- cbind(pos, TNG_MAF) data <- cbind(p, ord) data_0 <- subset(data,TNG_MAF == 0 | TNG_MAF == 46) rownames(data_0) <- c(1:nrow(data_0)) ord_data_0 <- data_0[order(data_0[,3]),] #adjust the SNP number to the real data kosu <- read.table("TNG_freq.txt",header=T) kosu <- kosu$last_chrom_num pos_data_0 <- as.vector(ord_data_0[1:kosu[1],1]) #posをkosu0個 pos_data <- pos_data_0 for (i in 1:23){ idata <- subset(data,TNG_MAF == i | TNG_MAF == 46-i) ord_idata <- idata[order(idata[,3]),] pos_idata <- as.vector(ord_idata[1:kosu[i+1],1]) pos_data <- c(pos_data,pos_idata)} pos_data_name <- as.character(sort(pos_data)) df_corr <- df[,pos_data_name] n<-nrow(df_corr)/2 n col_no <- ncol(df_corr) x <- data.frame(matrix(rep(NA,n*col_no),nrow=n)) #allele to genotype for (i in 1:n){ x[i,] <- df_corr[2*i-1,]+df_corr[2*i,] } colnames(x) <-colnames(df_corr) write.table(x, o_file)