The scripts used in this study: 1. Call snp using tassel software perl ./tassel-5.2.51-standalone/run_pipeline.pl -Xmx1024g -fork1 -GBSSeqToTagDBPlugin -e EcoRI -i ./data/ -db gbs_F.db -k keyfile_eco_mse.txt -minKmerL 30 -kmerLength 60 -c 3 -batchSize 8 -mxKmerNum 600000000 -endPlugin -runfork1 1>> gbs.log perl ./tassel-5.2.51-standalone/run_pipeline.pl -Xmx256g -fork1 -TagExportToFastqPlugin -db gbs_F.db -o gbs.fq.gz -c 3 -endPlugin -runfork1 1>> gbs.log ./bwa-0.7.15/bwa aln -n 3 -k 1 -t 10 Gallus6.fa gbs.fq.gz > gbs.sai ./bwa-0.7.15/bwa samse -n 1 Gallus6.fa gbs.sai gbs.fq.gz > gbs.sam perl ./tassel-5.2.51-standalone/run_pipeline.pl -Xmx256g -fork1 -SAMToGBSdbPlugin -i gbs.sam -db gbs_F.db -endPlugin -runfork1 1>> gbs.log perl ./tassel-5.2.51-standalone/run_pipeline.pl -Xmx256g -fork1 -DiscoverySNPCallerPluginV2 -db gbs_F.db -ref Gallus6.fa -gapAlignRatio 0.04 -maxTagsCutSite 128 -sC 1 -mnLCov 0.1 -mnMAF 0.01 -endPlugin -runfork1 1>> gbs.log perl ./tassel-5.2.51-standalone/run_pipeline.pl -Xmx256g -fork1 -ProductionSNPCallerPluginV2 -batchSize 8 -db gbs_F.db -e EcoRI -i ./data/ -k keyfile_eco_mse.txt -kmerLength 60 -o gbs.raw.vcf -endPlugin -runfork1 1>> gbs.log perl ./tassel-5.2.51-standalone/run_pipeline.pl -Xmx256g -fork1 -SNPQualityProfilerPlugin -db gbs_F.db -statFile SNPquality_gbs.stat -endPlugin -runfork1 1>> gbs.log 2. Quality control and phasing ./vcftools-v0.1.16/bin/vcftools --vcf gbs.raw.vcf --maf 0.01 --max-alleles 2 --min-alleles 2 --minDP 5 --minGQ 98 --recode --out gbs_all ./vcftools-v0.1.16/bin/vcftools --vcf gbs_all.recode.vcf --max-missing 0.2 --recode --out gbs_all_max awk '{if($0~/#/){print $0}else {if($1 ~ /^[0-9]/){print $0}}}' gbs_all_max.recode.vcf > gbs_all_max_filter.vcf java -Xmx150g -jar ./beagle_5.0/beagle.28Sep18.793.jar gt=gbs_all_max_filter.vcf out=gbs gp=true nthreads=20 impute=true 3. Construction of linkage map java -cp ./LepMap3/binary/bin ParentCall2 data=pedigree.txt vcfFile=chr.recode.vcf removeNonInformative=1 ignoreParentOrder=1 > p.call java -cp ./LepMap3/binary/bin Filtering2 data=p.call removeNonInformative=1 > p_fil.call java -cp ./LepMap3/binary/bin SeparateChromosomes2 data=p_fil.call lodLimit=10 numThreads=20 minLod=3 > map.txt java -cp ./LepMap3/binary/bin OrderMarkers2 data=p_fil.call map=map.txt numThreads=20> genetic_map.txt 4. Infer local ancestry for each haplotype of each F9 individual using RFmix ./rfmix -f F9.vcf -r F0.vcf -m sample.map -g genetic_map.txt -c 5 -G 9 -o result --chromosome=chr 5. SNP-based GWAS and ancestral-haplotype-based GWAS ./gcta_1.91.1beta/gcta64 --grm F9 --autosome --autosome-num 33 --pheno F9_ind.phe --covar F9_ind_sex_patch --reml --out BW8_snp ./gcta_1.91.1beta/gcta64 --mlma --bfile F9 --grm F9 --autosome --autosome-num 33 --pheno F9_ind.phe --covar F9_ind_sex_patch --out BW8_snp 6. Haplotype-based GWAS a. Haplotyp-based GRM library(vcfR) library(stringr) library(dplyr) #input vcf file vcf_file='test.vcf' vcf <- read.vcfR(vcf_file) #construct snp matrix make_snp_matrix <- function(vcf){ gtvcf <- vcf@gt gtvcf <- gtvcf[,-1] gtvcf <- as.data.frame(gtvcf) #add _1 and _2 to sample names col <- colnames(gtvcf) n_sample <- dim(gtvcf)[2] n_snp <- dim(gtvcf)[1] name1 <- array() for (i in 1:n_sample) { name1[i] <- paste(col[i],"_1",sep = "")} name2 <- array() for (i in 1:n_sample) { name2[i] <- paste(col[i],"_2",sep = "")} #"|" as the separator sub1<- function(x){x[1]} sub3<- function(x){x[3]} m1 <- matrix(NA,nrow = n_sample*2,ncol = n_snp) for (i in 1:n_sample) { spl <- strsplit(as.character(gtvcf[,i]),'|') j = 2*i-1 k = 2*i m1[j,] <- sapply(spl,sub1) m1[k,] <- sapply(spl,sub3) } #write row names xh <- as.data.frame(m1) for (i in 1:n_sample) { j = 2*i-1 k = 2*i rownames(xh)[j] <- name1[i] rownames(xh)[k] <- name2[i]} return(xh) } xh <- make_snp_matrix(vcf) write.table(xh,"F9_chr28snp_matrix.txt",col.names = F,row.names = F,sep = "\t",quote = F) #xh is the SNP matrix, the rows is 2* the number of samples, and the columns is the number of SNP #input:xh, the length of window xh <- read.table("F9_chr28snp_matrix.txt") haplotype_grm_fix_window <- function(xh,window){ #nc is the length of snp nc <- dim(xh)[2] #nh is the total number of haplotype nh <- trunc(nc/window) #nr is haplotype type number——2* number of individual nr <- dim(xh)[1] hm <- matrix(NA,ncol = nh,nrow = nr) xhnum <- seq(1+window,window*nh,window) xxh <- matrix(NA,ncol = 1,nrow = nr) for (i in 1:nr) { xxh[i,] <- paste(xh[i,],collapse="")} hm <- substr(xxh,1,window) for (i in xhnum ) { hm <- cbind(hm,substr(xxh,i,i+window-1))} h1 <- matrix(NA,ncol = nr,nrow = nr) for (j in 1:nr) { for (i in 1:nr) { if(hm[i,1] == hm[j,1]){ h1[j,i] <- 1 }else{h1[j,i] <- 0} } } for (o in 2:nh) { for (j in 1:nr) { for (i in 1:nr) { if(hm[i,o] == hm[j,o]){ h1[j,i] <- h1[j,i] + 1 }else{h1[j,i] <- h1[j,i] + 0} } } } #hh is gametic relationship matrix hh <- h1/nh h <- nr t1 <- matrix(c(1,1),byrow = F) t2 <- t(t1) Iah <- diag(rep(1,5),h/2,h/2) K <- kronecker(t(Iah),t2) #H11 is haplotype matrix H11 <- K%*%hh%*%t(K)/2 return(H11) } hap <- haplotype_grm_fix_window(xh,5) write.table(hap,"hap_matrix.txt",col.names = F,row.names = F,sep = "\t",quote = F) b. Haplotype-based GWAS library(lme4qtl) geno_in <- read.table("all_hap.txt") ###Each row represents an individual, and each column represents a haplotype ID. If the haplotype frequency is less than 0.1, the haplotype ID is marked as NA. phe <- read.table("BW8.phe",header=FALSE,sep="\t") #Body weight phenotype sex_covar <- read.table("sex_patch.txt",header=FALSE,sep="\t") Z1 <- read.table("hap_matrix.txt",header=FALSE,sep="\t") # Calculated from script [a. haplotyp-based GRM] Z2=as.matrix(Z1) row.names(Z2) <- colnames(Z2) <- phe$V1 dat <- data.frame(ID = phe$V1,trait = phe$V3,PATCH = sex_covar$V3,SEX = sex_covar$V4) ##remove individuals with haplotype frequencies below 10%. motif_dat <- function(genotype,dat){ delete <- which(is.na(genotype) == T) dat[delete,] <- NA return(dat) } motif_Z <- function(genotype,Z){ delete <- which(is.na(genotype) == T) Z[delete,delete] <- NA return(Z) } for (i in 1:ncol(geno_in)) { genotype <- as.factor(geno_in[,i]) dat1 <- motif_dat(genotype,dat) Z <- motif_Z(genotype,Z2) m0 <- relmatLmer(trait ~ PATCH + SEX + (1|ID), dat1, relmat = list(ID = Z)) ##Build a null model m1 <- try(update(m0,.~. + genotype)) result <- try(anova(m0,m1)) print(result) } 7. Adjusted the phenotype of F9 individuals phe=read.table('phe.txt', sep =' ', header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) phe$sex <- factor(phe$sex) phe$patch<-factor(phe$patch) fit <- lm(bw8~sex+patch, data = phe) y.res<-residuals(fit) write.table(y.res,file="phe_adjust.txt",append=F,row.names=F) 8. Estimation of haplotye effect size library(hglm) library(rrBLUP) Infile <- read.table("hap_info.txt",sep = " ",stringsAsFactors = F,header = F) #each column sample_id phe sex patch haplotype_allele1 haplotype_allele2... Prep_data <- function(Infile){ X1 <- Infile[,c(3,4)] y <- Infile[,2] Z <- Infile[,c(5:ncol(Infile))] X <- data.frame(1,X1$V3,X1$V4) return(list("y"=y,"X"=X,"Z"=Z)) } prep <- Prep_data(Infile = Infile) hg <- hglm(X = as.matrix(prep$X),y = prep$y,Z =as.matrix(prep$Z)) S<- summary(hg) out <- S$RandCoefMat write.table(out,file="result.txt",row.names = F,col.names =T,quote = F,sep="\t")