# At the very beginning, please download the supplementary file, and move them to # your working directory. #### Installation #### # There are three ways to install geneHapR, # 1. from CRAN; # 2. from Gitee; # 3. from Github. ## Install geneHapR from CRAN # Install dependencies # Sometimes there was some problems when auto install the dependencies in R, # so, we suggest to install the depending packages manually. install.packages("BiocManager") BiocManager::install(c("Biostrings", "GenomicRanges", "IRanges", "rtracklayer")) # Install the geneHapR package from CRAN install.packages("geneHapR") # Or install geneHapR from Gitee install.packages("devtools") devtools::install_git("https://gitee.com/zhangrenl/genehapr") ## Or install geneHapR from Github install.packages("devtools") devtools::install_github("zhangrenl/genehapr") #### Haplotype identification and visualization of OsGHD7.1 #### # Library the package library(geneHapR) # Set working directory # Please change the directory to the place of your data files. setwd("./") #### 1. Importing data #### # 1.1. Import genotypic data # In this example, the genotype was stored in table format. geno <- read.csv(file = "PATH_to_additional_file_2") head(geno[,1:20]) # 1.2. Import annotation # In this example, the annotation was prepared in bed6 format according the # annotation retrieved from Rice Genome Annotation Project. # "http://rice.uga.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/chr07.dir/Chr7.gff3" bed <- import_bed("PATH_to_additional_file_3") bed # 1.3. Import accession information (phenotypic data) # In this example, the phenotype data was prepared in a TSV file (tab separated table). pheno <- import_AccINFO("PATH_to_additional_file_4") head(pheno) # 1.4. Import accession information (subpopulation, geo-coordinates...) # In this example, the dataset was prepared in a CSV file (Comma-separated Value). AccINFO <- read.csv("PATH_to_additional_file_5", row.names = 1) head(AccINFO) #### 2. Haplotype identification #### hapResult <- table2hap(geno, hapPrefix = "H", # Prefix of haplotype names pad = 3, # Padding length of Arabic number in haplotype names hetero_remove = TRUE, # Elimination accessions with heterozygotes sites na_drop = TRUE) # Elimination accessions with missing genotypes plotHapTable(hapResult) # Visualize the original result. #### 3. Haplotype result adjustment #### # 3.1. Set position of ATG as zero hapRe0 <- hapSetATGas0(gff = bed, # The original annotation hap = hapResult, # The original haplotype result geneID = "LOC_Os07g15770", # The geneID provided in annotations Chr = "Chr7", # The chromosome name POS = c(0, 9999999)) # The start and end position gff0 <- gffSetATGas0(gff = bed, hap = hapResult, geneID = "LOC_Os07g15770", Chr = "Chr7", POS = c(0, 9999999)) plotHapTable(hapRe0) # Visualize the result with coordinate adjustment # 3.2. Eliminating rare Haplotypes (frequency less than 5) hapRe0 <- filter_hap(hapRe0, rm.mode = "freq", # The adjustment mode, one of "freq", "haplotype", "position" and "accession" freq.min = 5) # The minimum accession number plotHapTable(hapRe0) # Visualize the result after elimination of rare haplotypes #### 4. Visualization #### # 4.1 Haplotype variants # visualization of haplotypes’ genotypes and frequencies plotHapTable(hapRe0, hapPrefix = "H", # prefix of haplotype names title = "OsGHD7", # Figure title angle = 0) # Angle of numeric coordinates # Display the variants on gene model displayVarOnGeneModel(gff = gff0, # Annotations hap = hapRe0, # Haplotype result Chr = "Chr7", # Chromosome name start = -200, # Start position of gene model end = 3000) # End position of gene model # 4.2 Haplotype Network # In this step all character should be in nucleotide alphabet # And the "DEL" that means deletion was assigned as "N". hapRe0[hapRe0 == "DEL"] = "N" colnames(AccINFO) hapnet = get_hapNet(hapRe0, AccINFO = AccINFO, # Accession information groupName = "Subpopulation", # The column name contains accession groups na.label = "Unknown") # The label for unknown individuals plotHapNet(hapNet = hapnet, scale = "log10", # Scale method, one of none, log2 and log10 show.mutation = 2, # Mutation symbols #xlim = c(-14,19), ylim = c(-10,4), pie.lim = c(2,10), legend = TRUE) # Legend position # 4.3 Major haplotype Geo-distribution hapDistribution(hap = hapRe0, AccINFO = AccINFO, hapNames = c("H001","H002","H003"), # Haplotypes for display on the map LON.col = "Longitude", # The column name of Longitude LAT.col = "Latitude", # The column name of Latitude symbolSize = 0.9, # The pie size lty.pie = 0, # Border type of pies borderCol.pie = 1, # Border color of pies lwd.pie = 1, # Border width of pies lwd = 1, # Border width of countries cex.legend = 1, # The size of legend symbol.lim = c(3,6), # The circle size label.col = "black", # The text color in circle label.cex = 0.8, # The text size label.font = 2, # Text font, 1 for normal and 2 for bold... hap.color = c("red","green","blue"))# Colors of each haplotype # 4.4 Phenotype differences symnum.args = list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "ns")) names(pheno) fig.GW <- hapVsPheno(hap = hapResult, pheno = pheno, symnum.args = symnum.args, # Symbols for significance phenoName = "Grain_width", # The phenotype name freq.min = 5, # Minimum number of accession for display mergeFigs = FALSE) # Merge the heatmap of p-Value and Violin or not fig.GW$plotHap # Haplotype with accession number fig.GW$T.Result # P value of pairwise comparison using student t test fig.GW$fig_pvalue # Heatmap of p value fig.GW$fig_Violin # Violin plot of phenotypes #fig.GW$figs # 4.5 Visualization of LD-Block # Haplotype "H004" were removed, because LD calculation only support bi-allele, yet hap <- filter_hap(hapRe0, rm.mode = "haplotype", haplotype.rm = "H004") plotHapTable(hap) plot_LDheatmap(hap = hap, gff = gff0, Chr = "Chr7", # Chromosome name start = -200, end = 3000, # The start and end position of gene map SNP.name = T, # Show the SNP names or not snpmarks_height = 0.01, # The top of SNP markers title = "OsGHD7", # Title geneMapLabelY = 0.8, # The position of "geneMap" Label geneMapLabelX = 0.1, # The position of "geneMap" Label map.height = 0.02, # The height of "geneMap" geneMapLocation = 0.3, # The distance between gene model and heat map cex_snpname = 0.7, # Size of SNP names add.map = T, # Whether add gene model color_snp = "black", # Color of SNP segment color_gmodel = "grey", # Fill color of "geneMap" color_snpname = "black", # Color of SNP names colorLegend = T, # Whether add the color legend LDmeasure = "r") # LD measure method, "r" or "D'"