#####RNA-seq analysis for emf33 comb libraries (OSC) library(ggrepel) #RNA-seq for TE only te_class <- read.table("~/Dropbox (hannonlab)/Sequencing Data/deeptools_annotation/te_list_gl_vs_soma_merged.txt", as.is = TRUE) te_reads <- read.table("~/Dropbox (hannonlab)/Sequencing Data/Nxf2_final/OSC/RNA-seq/comb/counting/emf33_te_reads.txt", as.is = TRUE, header = TRUE) #Calculate RPM rownames(te_reads) <- te_class[,1] te_reads[,2] <- te_reads[,2]*(1000000/sum(te_reads[,2])) te_reads[,3] <- te_reads[,3]*(1000000/sum(te_reads[,3])) te_reads[,4] <- te_reads[,4]*(1000000/sum(te_reads[,4])) te_reads[,5] <- te_reads[,5]*(1000000/sum(te_reads[,5])) #Normalize to scaling factor (te/dm6 reads) te_reads[,2] <- te_reads[,2]*0.076810813 te_reads[,3] <- te_reads[,3]*0.1851656 te_reads[,4] <- te_reads[,4]*0.114993838 te_reads[,5] <- te_reads[,5]*0.132746382 #Removes all rows with at least one value < 1 rpm te_reads <- te_reads[!rowSums(te_reads < 1),] # Calculate FC for all conditions treatment (eg. Nxf2) vs gfp) fc4_te_osc <- cbind(te_reads[,3]/te_reads[,2]>4, te_reads[,4]/te_reads[,2]>4, te_reads[,5]/te_reads[,2]>4, te_reads[,4]/te_reads[,3]>4, te_reads[,5]/te_reads[,3]>4, te_reads[,5]/te_reads[,4]>4) colnames(fc4_te_osc) <- c("piwi_gfp", "panx_gfp", "nxf2_gfp", "panx_piwi", "nxf2_piwi", "nxf2_panx") #FC <0.25 fc4_te_osc_neg <- cbind(te_reads[,3]/te_reads[,2]<0.25, te_reads[,4]/te_reads[,2]<0.25, te_reads[,5]/te_reads[,2]<0.25, te_reads[,4]/te_reads[,3]<0.25, te_reads[,5]/te_reads[,3]<0.25, te_reads[,5]/te_reads[,4]<0.25) colnames(fc4_te_osc_neg) <- c("piwi_gfp", "panx_gfp", "nxf2_gfp", "panx_piwi", "nxf2_piwi", "nxf2_panx") #edit rownames and add class column library(stringr) test <- str_split_fixed(rownames(te_reads), "_", 2) te_reads[,1] <- test[,1] te_reads <- cbind(te_reads, test[,2]) colnames(te_reads) <- c("te","gfp", "piwi", "panx", "nxf2", "class") te_reads[,2:5] <- log2(te_reads[,2:5]) library(ggplot2) #RNA-seq for dm6 dm6_reads_gfp <- read.table("~/Dropbox (hannonlab)/Sequencing Data/Nxf2_final/OSC/RNA-seq/comb/counting/gfp_rna_.count.htseq", as.is = TRUE) dm6_reads_piwi <- read.table("~/Dropbox (hannonlab)/Sequencing Data/Nxf2_final/OSC/RNA-seq/comb/counting/piwi_rna_.count.htseq", as.is = TRUE) dm6_reads_panx <- read.table("~/Dropbox (hannonlab)/Sequencing Data/Nxf2_final/OSC/RNA-seq/comb/counting/panx_rna_.count.htseq", as.is = TRUE) dm6_reads_nxf2 <- read.table("~/Dropbox (hannonlab)/Sequencing Data/Nxf2_final/OSC/RNA-seq/comb/counting/nxf2_rna_.count.htseq", as.is = TRUE) dm6_reads <- cbind(dm6_reads_gfp, dm6_reads_piwi[,2], dm6_reads_panx[,2], dm6_reads_nxf2[,2]) dm6_reads <- dm6_reads[1:17622,] #Calculate rpm dm6_reads[,2] <- dm6_reads[,2]*(1000000/sum(dm6_reads[,2])) dm6_reads[,3] <- dm6_reads[,3]*(1000000/sum(dm6_reads[,3])) dm6_reads[,4] <- dm6_reads[,4]*(1000000/sum(dm6_reads[,4])) dm6_reads[,5] <- dm6_reads[,5]*(1000000/sum(dm6_reads[,5])) dm6_reads_rpm <- dm6_reads rownames(dm6_reads_rpm) <- dm6_reads[,1] colnames(dm6_reads_rpm) <- c("gfp", "piwi", "panx", "nxf2") #Removes all rows with at least one value < 1 dm6_reads_rpm <- dm6_reads_rpm[!rowSums(dm6_reads_rpm < 1),] # Calculate FC for all conditions treatment (eg. Nxf2) vs gfp) fc4_dm6_osc <- cbind(dm6_reads_rpm[,3]/dm6_reads_rpm[,2]>4, dm6_reads_rpm[,4]/dm6_reads_rpm[,2]>4, dm6_reads_rpm[,5]/dm6_reads_rpm[,2]>4, dm6_reads_rpm[,4]/dm6_reads_rpm[,3]>4, dm6_reads_rpm[,5]/dm6_reads_rpm[,3]>4, dm6_reads_rpm[,5]/dm6_reads_rpm[,4]>4) colnames(fc4_dm6_osc) <- c("piwi_gfp", "panx_gfp", "nxf2_gfp", "panx_piwi", "nxf2_piwi", "nxf2_panx") fc4_dm6_osc_neg <- cbind(dm6_reads_rpm[,3]/dm6_reads_rpm[,2]<0.25, dm6_reads_rpm[,4]/dm6_reads_rpm[,2]<0.25, dm6_reads_rpm[,5]/dm6_reads_rpm[,2]<0.25, dm6_reads_rpm[,4]/dm6_reads_rpm[,3]<0.25, dm6_reads_rpm[,5]/dm6_reads_rpm[,3]<0.25, dm6_reads_rpm[,5]/dm6_reads_rpm[,4]<0.25) colnames(fc4_dm6_osc_neg) <- c("piwi_gfp_neg", "panx_gfp_neg", "nxf2_gfp_neg", "panx_piwi_neg", "nxf2_piwi_neg", "nxf2_panx_neg") dm6_reads_rpm_log2 <- log2(dm6_reads_rpm[2:5]) # Combine Genes and TE in one plot colnames(te_reads) <- c("gene", "gfp", "piwi", "panx", "nxf2", "class") dm6_reads_rpm_log2 <- cbind(rownames(dm6_reads_rpm_log2),dm6_reads_rpm_log2) dm6_reads_rpm_log2 <- data.frame(dm6_reads_rpm_log2,"gene") colnames(dm6_reads_rpm_log2) <- c("gene", "gfp", "piwi", "panx", "nxf2","class") te_dm6_comb_reads_log2 <- rbind(dm6_reads_rpm_log2,te_reads[1:6]) #########Plot all upregultated TE names########## #fc_all <- rbind(fc4_dm6_osc,fc4_te_osc) #fc_all_neg <- rbind(fc4_dm6_osc_neg,fc4_te_osc_neg) dummy <- as.data.frame(matrix(nrow = 7841, ncol = 6, FALSE)) colnames(dummy) <- c("piwi_gfp", "panx_gfp", "nxf2_gfp", "panx_piwi", "nxf2_piwi", "nxf2_panx") fc_all <- rbind(dummy,fc4_te_osc) fc_all_neg <- rbind(dummy,fc4_te_osc_neg) te_dm6_comb_reads_log2 <- cbind(te_dm6_comb_reads_log2,fc_all,fc_all_neg)