##### ## ## Analyses H3K4me3 ## ## Marion Aguirrebengoa ##  ## Last modification : 10/17/2016 ## ## R version 3.2.5 (2016-04-14) ## ##### ## REQUIREMENTS library( "rtracklayer" ); library( "BSgenome.Hsapiens.UCSC.hg19" ); seqlens = seqlengths( Hsapiens ); ## READ DATA # Random sites rdm346 = import.bed( "Rdm_346_filtered_by_histone.bed" ) load( "rand.Rdata" ); # Read AsiSI 80 best cleaved sites asi80 = "BLESS_80best_JunFragPE_Rmdups_pm500bp.bed"; asi = list(); asi[["80"]] = import.bed( asi80 ); # H3K4me3 bcbw = import.bw( "H3K4me3_BGI_RUN_3_bin50_log2ratio.bw", as = "RleList" ); bw = list(); bw[["pOHT"]] = import.bw( "H3K4me3_OHT_normalized_hg19.bw", as = "RleList" ); bw[["mOHT"]] = import.bw( "H3K4me3_normalized_hg19.bw", as = "RleList" ); # Genes list sh = read.table( "liste_best_299.bed", sep = "\t", header = TRUE ); str = sh[, "Strand"]; str[str == -1] = "-"; str[str == 1] = "+"; shgr = GRanges( seqnames = paste0( "chr", sh[, "Chromosome.Name"] ), strand = str, ranges = IRanges( start = sh[, "Gene.Start..bp."], end = sh[, "Gene.End..bp."] ), name = sh[, "Ensembl.Gene.ID"] ); ## FUNCTIONS # Compute 1 value per site compute1ValPerSite = function( bed, wig, w = 20000, seqlens, fun = "sum" ){ if( class( wig ) != "SimpleRleList" ){ stop( "ERROR : unknown class of wig, please provide a SimpleRleList" ); } vec = NULL; for( i in 1:length( bed ) ){ if( i %% 100 == 0 ){ message( i, "/", length( bed ) ); } bedi = bed[i, ]; chr = as.character( seqnames( bedi ) ); cov = wig[[chr]]; center = start( bedi ) + 4; stW = center - w; edW = center + w; stW[stW < 1] = 1; edW[edW > seqlens[chr] | edW > length( cov )] = min( length( cov), seqlens[chr] ); v = Views( cov, start = stW, end = edW ); if( fun == "sum" ){ vm = sum( v ); }else if( fun == "mean" ){ vm = mean( v ); }else{ stop( "ERROR : unknown function fun = ", fun, " - fun must be 'sum' or 'mean'" ); } vec = c( vec, vm ); } return( vec ); } # Cut Rle in bin resizeRle = function( rle, binSize ){ if( is.null( rle ) ){ return( rep( NA, binSize ) ); }else{ len = length( rle ); step = len / binSize; st = floor( seq( 1, len, step ) ); ed = floor( seq( step, len + step - 1, step ) ); ed[ed > len] = len; v = Views( rle, start = st, end = ed ); vm = viewApply( v, function( x ){ return( mean( as.numeric( x ) ) ) } ); return( vm ); } } # Create a matrix from a list of Rle computeMatrix = function( rleList, binSize ){ nrleList = lapply( rleList, resizeRle, binSize = binSize ); return( matrix( unlist( nrleList ), nrow = length( nrleList ), ncol = binSize, byrow = TRUE ) ); } # Get values on genes and intergenic zones getGeneBody = function( coverage, bed, chrList = paste0( "chr", c( 1:22, "X" ) ), binSize = 100, w = 3000 ){ rv = rvav = rvap = list(); sbed = bed[width( bed ) > binSize, ]; if( length( sbed ) > 0 ){ for( i in 1: length( sbed ) ){ if( i %% 100 == 0 ){ message( i, "/", length( sbed ) ); } sbedi = sbed[i, ]; str = as.character( strand( sbedi ) ); chr = as.character( seqnames( sbedi ) ); scov = coverage[[chr]]; if( ( start( sbedi ) - w > 0 ) & ( end( sbedi ) + w < length( scov ) ) ){ if( str == "+" ){ vav = Views( scov, start = ( start( sbedi ) - w ), end = start( sbedi ) ); v = Views( scov, start = start( sbedi ), end = end( sbedi ) ); vap = Views( scov, start = end( sbedi ), end = ( end( sbedi ) + w ) ); rvav[[i]] = vav[[1]]; rv[[i]] = v[[1]]; rvap[[i]] = vap[[1]]; }else{ vav = Views( scov, start = ( start( sbedi ) - w ), end = start( sbedi ) ); v = Views( scov, start = start( sbedi ), end = end( sbedi ) ); vap = Views( scov, start = end( sbedi ), end = ( end( sbedi ) + w ) ); rvav[[i]] = rev( vap[[1]] ); rv[[i]] = rev( v[[1]] ); rvap[[i]] = rev( vav[[1]] ); } } } } matav = computeMatrix( rvav, binSize = w / 100 ); mat = computeMatrix( rv, binSize = binSize ); matap = computeMatrix( rvap, binSize = w / 100 ); matav = matav[rowSums( is.na( matav ) ) == 0, ]; mat = mat[rowSums( is.na( mat ) ) == 0, ]; matap = matap[rowSums( is.na( matap ) ) == 0, ]; cb = cbind( matav, mat, matap ); res = colMeans( cb ); return( res ); } ## RUN BOXPLOT # Compute values resAsiBC = list(); for( a in names( asi ) ){ message( a ); resAsiBC[[a]] = compute1ValPerSite( bed = asi[[a]], wig = bcbw, w = 500, seqlens, fun = "mean" ); } resRandBC = list(); for( r in names( rand ) ){ message( r ); resRandBC[[r]] = compute1ValPerSite( bed = rand[[r]], wig = bcbw, w = 500, seqlens, fun = "mean" ); } resAsi = list(); for( o in names( bw ) ){ resAsi[[o]] = list(); for( a in names( asi ) ){ message( o, " - ", a ); resAsi[[o]][[a]] = compute1ValPerSite( bed = asi[[a]], wig = bw[[o]], w = 500, seqlens, fun = "mean" ); } } resRand = list(); for( o in names( bw ) ){ resRand[[o]] = list(); for( r in names( rand ) ){ message( o, " - ", r ); resRand[[o]][[r]] = compute1ValPerSite( bed = rand[[r]], wig = bw[[o]], w = 500, seqlens, fun = "mean" ); } } # Draw boxplot postscript( "Boxplot_bamCompare_SitesAndRandoms.eps" ); par( mfrow = c( 1, 2 ) ); boxplot( resAsiBC[["80"]], resRandBC[["80"]], col = c( "dodgerblue3", "grey" ), main = "Boxplot_bamCompare_80SitesAndRandoms" ); axis( 1, 1:2, c( "AsiSIsites", "Random" ) ); dev.off(); # Compute P-values wilcox.test( resAsi[["mOHT"]][["80"]], resAsi[["pOHT"]][["80"]], paired = TRUE )$p.value; # 9.380713e-07 wilcox.test( resAsiBC[["80"]], mu = 0 )$p.value; # 4.425968e-05 wilcox.test( resRand[["mOHT"]][["80"]], resRand[["pOHT"]][["80"]], paired = TRUE )$p.value; wilcox.test( resRandBC[["80"]], mu = 0 )$p.value; ## RUN PROFILE # Compute Profile on Shiloh genes profsh = list(); for( o in names( bw ) ){ message( o ); profsh[[o]] = getGeneBody( coverage = bw[[o]], bed = shgr, binSize = 100, w = 3000 ); } # Draw all profiles postscript( "Profiles.eps" ); plot( 1:length( profsh[["pOHT"]] ), profsh[["pOHT"]], type = "l", col = "red", ylim = c( 0, 4 ), xaxt = "n", xlab = "", ylab = "" ) points( 1:length( profsh[["mOHT"]] ), profsh[["mOHT"]], type = "l", col = "blue" ) title( "Shiloh" ) abline( v = 30, col = "grey" ); abline( v = 130, col = "grey" ); dev.off();