library(reshape2)

Attaching package: ‘reshape2’

The following objects are masked from ‘package:data.table’:

    dcast, melt
library(ggplot2)
library(gplots)
package ‘gplots’ was built under R version 3.5.2
Attaching package: ‘gplots’

The following object is masked from ‘package:IRanges’:

    space

The following object is masked from ‘package:S4Vectors’:

    space

The following object is masked from ‘package:stats’:

    lowess
library(pheatmap)
package ‘pheatmap’ was built under R version 3.5.2
library(plyr)
package ‘plyr’ was built under R version 3.5.2
Attaching package: ‘plyr’

The following object is masked from ‘package:IRanges’:

    desc

The following object is masked from ‘package:S4Vectors’:

    rename
set.seed(123)
library(data.table)
library(magrittr)
library(dplyr)
package ‘dplyr’ was built under R version 3.5.2
Attaching package: ‘dplyr’

The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize

The following objects are masked from ‘package:data.table’:

    between, first, last

The following object is masked from ‘package:AnnotationDbi’:

    select

The following objects are masked from ‘package:GenomicRanges’:

    intersect, setdiff, union

The following object is masked from ‘package:GenomeInfoDb’:

    intersect

The following objects are masked from ‘package:IRanges’:

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from ‘package:S4Vectors’:

    first, intersect, rename, setdiff, setequal, union

The following object is masked from ‘package:Biobase’:

    combine

The following objects are masked from ‘package:BiocGenerics’:

    combine, intersect, setdiff, union

The following object is masked from ‘package:biomaRt’:

    select

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
library(cowplot)
package ‘cowplot’ was built under R version 3.5.2
********************************************************
Note: As of version 1.0.0, cowplot does not change the
  default ggplot2 theme anymore. To recover the previous
  behavior, execute:
  theme_set(theme_cowplot())
********************************************************
library(igraph)
package ‘igraph’ was built under R version 3.5.2
Attaching package: ‘igraph’

The following objects are masked from ‘package:dplyr’:

    as_data_frame, groups, union

The following object is masked from ‘package:GenomicRanges’:

    union

The following object is masked from ‘package:IRanges’:

    union

The following object is masked from ‘package:S4Vectors’:

    union

The following objects are masked from ‘package:BiocGenerics’:

    normalize, path, union

The following objects are masked from ‘package:stats’:

    decompose, spectrum

The following object is masked from ‘package:base’:

    union
theme_set(theme_classic() )


get_colors <- function(groups, group.col = palette()){
  groups <- as.factor(groups)
  ngrps <- length(levels(groups))
  if(ngrps > length(group.col)) 
    group.col <- rep(group.col, ngrps)
  color <- group.col[as.numeric(groups)]
  names(color) <- as.vector(groups)
  return(color)
}


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

Importing metadata

metadata
     File.accession Biosample.Age Experiment.date.released Biosample.term.name Paired.end Paired.with Audit.WARNING
  1:    ENCFF405VKS      13.5 day               2015-10-13         neural tube         NA                          
  2:    ENCFF241GLU      13.5 day               2015-10-13         neural tube         NA                          
  3:    ENCFF957SPL         0 day               2015-06-23         neural tube         NA                          
  4:    ENCFF046EJC         0 day               2015-06-23         neural tube         NA                          
  5:    ENCFF085MBO         0 day               2015-06-23         neural tube         NA                          
 ---                                                                                                               
293:     SRR1805824          P30a                    16484        whole cortex         NA                          
294:     SRR1805823         P110a                    16484        whole cortex         NA                          
295:     SRR1805822         P110b                    16484        whole cortex         NA                          
296:     SRR1805821          21Ma                    16484        whole cortex         NA                          
297:     SRR1805820          21Mb                    16484        whole cortex         NA                          

ME_final.rep1 <- fread("~/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/out.high_quality.txt")
ME_final.rep2 <- fread("~/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep2/out.high_quality.txt")

ME_final <- ME_final.rep1[ ME %in% ME_final.rep2$ME, ]


ME_cov <- fread("~/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/out_filtered_ME.PSI.txt")
ME_cov[, `:=`(FILE_NAME=File, ME=ME_coords)]

number_of_microexons <- length(ME_final[ , unique(ME)])



ME_cov_filtered <- ME_cov[ME %in% ME_final[, ME]  , ]
library(stringi)




#mat <- stri_split_fixed(ME_cov_filtered$ME_coverages, ',', simplify=T)
#mat <- `dim<-`(as.numeric(mat), dim(mat))  # convert to numeric and save dims
#rowsum(mat, na.rm=T)

#sum(mat, na.rm=T)


#sapply(strsplit(as.character(ME_cov_filtered$ME_coverages), ",", fixed=T), function(x) sum(as.numeric(x)))
#ME_cov_filtered[, strsplit( ME_coverages, ",") ]


#library(dplyr)
#library(tidyr)
#df %>%
#   separate_rows(y, z, convert = TRUE) %>%
#   group_by(x) %>% 
#   summarise_all(sum)

Obtainin PSI matrix




Tissues <- as.character(metadata$Biosample.term.name)
names(Tissues) <- as.character(metadata$File.accession)

Ages <- as.character(metadata$Biosample.Age)
names(Ages) <- as.character(metadata$File.accession)



ME_cov_filtered  <- unique(ME_cov_filtered[FILE_NAME %in% metadata[ , File.accession],])
#ME_cov_filtered_ENCODE <- unique(ME_cov_filtered[FILE_NAME %in% metadata[ , File.accession],])



#colnames(Tissue_PSI_matrix_dcast) <- as.character( paste(sep = " ", Tissues[colnames(Tissue_PSI_matrix_dcast)], Ages[colnames(Tissue_PSI_matrix_dcast)])) #To replase the file name by the biological sample name

#Tissue_PSI_matrix_dcast <- Tissue_PSI_matrix_dcast[, order(paste(sep = " ", Tissues[colnames(Tissue_PSI_matrix_dcast)], Ages[colnames(Tissue_PSI_matrix_dcast)]))]

Probabilistic PCA


library(pcaMethods)

Tissue_PSI_matrix_melt <- ME_cov_filtered[, c("ME", "FILE_NAME", "PSI") ]
Tissue_PSI_matrix_dcast <- reshape2::dcast(Tissue_PSI_matrix_melt, ME ~ FILE_NAME)
row.names(Tissue_PSI_matrix_dcast) <- Tissue_PSI_matrix_dcast$ME
Tissue_PSI_matrix_dcast <- data.matrix(Tissue_PSI_matrix_dcast)[,-1]


Tissue_PSI_matrix_dcast[Tissue_PSI_matrix_dcast=="NaN"] <- NA

dim(Tissue_PSI_matrix_dcast)

dim(Tissue_PSI_matrix_dcast[apply(Tissue_PSI_matrix_dcast, 1, function(x) length(which(is.na(x)))) < 271*0.9, ])

dim(Tissue_PSI_matrix_dcast[apply(Tissue_PSI_matrix_dcast, 1, function(x) length(which(is.na(x)))) < 289*0.9, ])

result <- pca(t(Tissue_PSI_matrix_dcast[apply(Tissue_PSI_matrix_dcast, 1, function(x) length(which(is.na(x)))) < 289*0.9, ]), method="ppca", nPcs=3, seed=123)
## Get the estimated complete observations
cObs <- completeObs(result)
## Plot the scores
plotPcs(result, type = "scores")




summary(result)

Tissue_PSI_matrix_dcast_ppca <- t(cObs)

Tissue_PSI_matrix_dcast_ppca[Tissue_PSI_matrix_dcast_ppca>1] <- 1
Tissue_PSI_matrix_dcast_ppca[Tissue_PSI_matrix_dcast_ppca<0] <- 0


##colnames(Tissue_PSI_matrix_dcast_ppca) <- as.character( paste(sep = " ", Tissues[colnames(Tissue_PSI_matrix_dcast_ppca)], Ages[colnames(Tissue_PSI_matrix_dcast_ppca)])) #To replase the file name by the biological sample name



#tissue_heatmap <- pheatmap::pheatmap(Tissue_PSI_matrix_dcast_ppca,  , fontsize = 4,  cutree_rows = 24, cutree_cols = 16, clustering_method = "ward.D2")

#Tissue_PSI_matrix_dcast_ppca <- t(cObs)

#Tissue_PSI_matrix_dcast_ppca[Tissue_PSI_matrix_dcast_ppca>1] <- 1
#Tissue_PSI_matrix_dcast_ppca[Tissue_PSI_matrix_dcast_ppca<0] <- 0


##colnames(Tissue_PSI_matrix_dcast_ppca) <- as.character( paste(sep = " ", Tissues[colnames(Tissue_PSI_matrix_dcast_ppca)], Ages[colnames(Tissue_PSI_matrix_dcast_ppca)])) #To replase the file name by the biological sample name



#tissue_heatmap <- pheatmap::pheatmap(Tissue_PSI_matrix_dcast_ppca,  , fontsize = 4,  cutree_rows = 24, cutree_cols = 17, clustering_method = "ward.D2")

hc_cols <- hclust(dist(t(Tissue_PSI_matrix_dcast_ppca)), method = "ward.D2")
hc_rows <- hclust(dist(Tissue_PSI_matrix_dcast_ppca), method = "ward.D2")
Tissues_name <- as.character(metadata$Biosample.term.name)
names(Tissues_name) <- as.character(metadata$File.accession)

Tissues_age <- as.character(metadata$Biosample.Age)
names(Tissues_age) <- as.character(metadata$File.accession)

Tissue_date <- as.character(metadata$Experiment.date.released)
names(Tissue_date) <- as.character(metadata$File.accession)
#Tissue_clusters <- cutree(hc_cols, k = 16)
#Tissue_clusters <- cbind(Tissue_clusters, Tissues_name[names(Tissue_clusters)], Tissues_age[names(Tissue_clusters)], Tissue_date[names(Tissue_clusters)]) 

#colnames(Tissue_clusters) <- c("cluster", "name", "age", "date")

#Tissue_clusters <- data.frame(Tissue_clusters)

##Tissue_clusters_sum[which(grepl("hindbrain|midbrain|forebrain|neural\ tube", Tissue_clusters_sum$name)), ]

#Tissue_clusters$name <- factor(Tissue_clusters$name, level=c( "skeletal muscle tissue", "heart", "thymus", "spleen", "liver",  "adrenal gland", "intestine", "stomach", "lung",  "kidney", "bladder", "limb", "embryonic facial prominence", "forebrain", "hindbrain", "midbrain", "neural tube", "whole cortex"))


#Tissue_clusters$age <- mapvalues(Tissue_clusters$age, 
#          from = c("10.5 day", "11.5 day", "12.5 day", "13.5 day", "14.5 day", "15.5 day", "16.5 day", "0 day", "8 week", "P0a", "P0b", "P110a", "P110b", "P15a", "P15b", "P30a", "P30b", "P4a", #"P4a", "P7a", "P7b", "21Ma", "21Mb", "E14.5", "E16.5", "P4b"),
#         to = c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21, 59, 21, 21, 131, 131, 36, 36, 41,41, 25, 25, 27,27, 651, 651, 14.5, 16.5, 25 ) )




#Tissue_clusters$age <- factor(Tissue_clusters$age, levels = as.character(sort(as.numeric(levels(Tissue_clusters$age)))))
Tissue_clusters <- cutree(hc_cols, k = 16)
Tissue_clusters <- cbind(Tissue_clusters, Tissues_name[names(Tissue_clusters)], Tissues_age[names(Tissue_clusters)], Tissue_date[names(Tissue_clusters)]) 

colnames(Tissue_clusters) <- c("cluster", "name", "age", "date")

Tissue_clusters <- data.frame(Tissue_clusters)

#Tissue_clusters_sum[which(grepl("hindbrain|midbrain|forebrain|neural\ tube", Tissue_clusters_sum$name)), ]

Tissue_clusters$name <- factor(Tissue_clusters$name, level=c( "skeletal muscle tissue", "heart", "thymus", "spleen", "liver",  "adrenal gland", "intestine", "stomach", "lung",  "kidney", "bladder", "limb", "embryonic facial prominence", "forebrain", "hindbrain", "midbrain", "neural tube", "whole cortex"))

#Tissue_clusters$age <- factor(Tissue_clusters$age, levels=c("10.5 day", "11.5 day", "12.5 day", "13.5 day", "14.5 day", "15.5 day", "16.5 day", "0 day", "8 week"))

Tissue_clusters$File.accession <- row.names(Tissue_clusters)


Tissue_clusters <- data.table(Tissue_clusters)


Tissue_clusters$age <- mapvalues(Tissue_clusters$age, 
          from = c("10.5 day", "11.5 day", "12.5 day", "13.5 day", "14.5 day", "15.5 day", "16.5 day", "0 day", "8 week", "P0a", "P0b", "P110a", "P110b", "P15a", "P15b", "P30a", "P30b", "P4a", "P4a", "P7a", "P7b", "21Ma", "21Mb", "E14.5", "E16.5", "P4b"),
          to = c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21, 59, 21, 21, 131, 131, 36, 36, 41,41, 25, 25, 27,27, 651, 651, 14.5, 16.5, 25 ) )

ME_clusters <- cutree(hc_rows, k = 24)

PCA_loadings <- data.frame(loadings(result))
PCA_loadings$ME <- row.names(PCA_loadings)


PCA_loadings <- data.table(PCA_loadings)
PCA_loadings[, `:=`(PC1=-PC1, PC2=-PC2)]


PCA_loadings$ME_cluster <- ME_clusters[PCA_loadings$ME]


PCA_loadings_stats <- PCA_loadings[ , .(PC1_mean=mean(PC1)) , by="ME_cluster"]
ME_cluster_loading_order <- PCA_loadings_stats[order(PC1_mean)]$ME_cluster

PCA_loadings$ME_cluster <- factor(PCA_loadings$ME_cluster , levels=ME_cluster_loading_order)


ggplot(PCA_loadings) +
  geom_boxplot(aes(as.factor(ME_cluster) , PC1))
ggplot(PCA_loadings) +
  geom_boxplot(aes(as.factor(ME_cluster) , PC2))
ggplot(PCA_loadings) +
  geom_boxplot(aes(as.factor(ME_cluster) , PC3))

PCA_loadings_stats <- PCA_loadings[ , .(PC1_mean=mean(PC1)) , by="ME_cluster"]

PCA_loadings_stats_PC2 <- PCA_loadings[ , .(PC2_mean=mean(PC2)) , by="ME_cluster"]



ME_cluster_loading_order <- PCA_loadings_stats[order(PC1_mean)]$ME_cluster

Neuronal Microexons

PCA_loadings_stats[abs(PC1_mean)>0.03, ][order(PC1_mean)]
PCA_loadings_stats_PC2[abs(PC2_mean)>0.03, ][order(-PC2_mean)]
Tissue_PSI_matrix_melt_ppca <- melt(Tissue_PSI_matrix_dcast_ppca)

colnames(Tissue_PSI_matrix_melt_ppca) <- c("ME", "FILE_NAME", "PSI")

ME_clusters_table <- data.frame(ME_clusters)

ME_clusters_table$ME <- row.names(ME_clusters_table)
ME_clusters_table <- data.table(ME_clusters_table)


ME_clusters_PSI <-  data.table(merge(Tissue_PSI_matrix_melt_ppca, ME_clusters_table , by=c("ME")))



fwrite( ME_clusters_PSI, "../../Paper/Jacob/ME_clusters_PSI.tvs", quote = FALSE, col.names = TRUE, sep="\t")

fwrite( ME_cluster_names, "../../Paper/Jacob/ME_cluster_names.tvs", quote = FALSE, col.names = TRUE, sep="\t")
ME_clusters_PSI$ME_clusters <- mapvalues(ME_clusters_PSI$ME_clusters, 
          from =1:18,
          to = c("I1", "E1", "E3", "I2", "N1", "M1", "N2", "NM3", "NM2", "N5", "NM1", "N3", "N4", "NN2", "E2", "I4", "I3", "NN1"))
Tissue_PSI_matrix_melt_ppca <- data.table(Tissue_PSI_matrix_melt_ppca)

Tissue_PSI_matrix_melt_ppca

PCA_loadings_stats <- PCA_loadings[ , .(PC1_mean=mean(PC1), PC2_mean=mean(PC2) ) , by="ME_cluster"]

#PCA_loadings_stats[, PC1_mean:=-PC1_mean]

PCA_loadings_stats[order(-PC1_mean)]
PCA_loadings_stats[order(PC2_mean)]

#PCA_loadings_stats_PC2[, PC2_mean:=-PC2_mean]
PCA_loadings_stats[order(PC2_mean)]



loading_threshold <- 0.010
loading_threshold_PC2 <- 0.010


Neuronal <- PCA_loadings_stats[ round(PC1_mean, 3) >  loading_threshold, ][order(-PC1_mean)]$ME_cluster
Muscular <- PCA_loadings_stats_PC2[round(PC2_mean, 3) < -loading_threshold_PC2, ][order(PC2_mean)]$ME_cluster

NonNeuronal<- PCA_loadings_stats[ round(PC1_mean, 3) <= -loading_threshold, ][order(PC1_mean)]$ME_cluster

#WeakNeuronal <- PCA_loadings_stats[  round(PC1_mean, 3) < loading_threshold & round(PC1_mean, 3) >=  0.01, ][order(PC1_mean)]$ME_cluster

N = 1 
NM = 1
NN = 1
M = 1
WN = 1

ME_cluster_names <- data.table()


for (ME_cluster in Neuronal){
  
  if (ME_cluster %in% Muscular){
    
    ME_cluster.name = paste0("NM", NM)
    ME_cluster.type = "Neuro-Muscular"
    
    ME_cluster_names = rbind(ME_cluster_names, cbind(ME_cluster, ME_cluster.name, ME_cluster.type  ))
    
    NM = NM + 1
    
  }

  
  else {
    
    ME_cluster.name = paste0("N", N)
    ME_cluster.type = "Neuronal"
    
    ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
    
    N = N + 1
    
  }
  
}


for (ME_cluster in NonNeuronal) {
  

  ME_cluster.name = paste0("NN", NN)
  ME_cluster.type = "Non-Neuronal"
  
  ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
  
  NN = NN + 1
  
  
}


#for (ME_cluster in WeakNeuronal) {
  

#  ME_cluster.name = paste0("WN", WN)
#  ME_cluster.type = "Weak-Neuronal"
  
#  ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
  
#  WN = WN + 1
  
  
#}



for (ME_cluster in Muscular){
  
  
  if ( (ME_cluster %in% ME_cluster_names$ME_cluster)==F) {
  
  
  ME_cluster.name = paste0("M", M)
  ME_cluster.type = "Muscular"
  
  ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
  
  M = M + 1
  
  }
  
  
  }


ME_cluster_names


ME_clusters_mean_PSIs <-  ME_clusters_PSI[, .(mean_PSI=mean(PSI), sd_PSI=sd(PSI)), by="ME_clusters" ]

ME_clusters_flat <- ME_clusters_mean_PSIs[ ! ME_clusters %in% ME_cluster_names$ME_cluster,  ]


E = 1
I = 1
O = 1 



for (ME_cluster in ME_clusters_flat[ mean_PSI <= 1/3,  ][order(mean_PSI)]$ME_clusters) {
  
  
  ME_cluster.name = paste0("E", E)
  ME_cluster.type = "Excluded"
  
  ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
  
  E = E + 1
  
  
} 



for (ME_cluster in ME_clusters_flat[ mean_PSI>=2/3,  ][order(-mean_PSI)]$ME_clusters) {
  
  
  ME_cluster.name = paste0("I", I)
  ME_cluster.type = "Included"
  
  ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
  
  I = I + 1
  
  
}




for (ME_cluster in ME_clusters_flat[ mean_PSI > 1/3  &  mean_PSI < 2/3  ,  ][order(mean_PSI)]$ME_clusters) {
  
  
  ME_cluster.name = paste0("O", O)
  ME_cluster.type = "Other"
  
  ME_cluster_names = rbind(ME_cluster_names,  cbind(ME_cluster, ME_cluster.name, ME_cluster.type ))
  
  O = O + 1
  
  
} 
ME_cluster_names

ME_cluster_names$ME_cluster <- as.numeric(ME_cluster_names$ME_cluster)

ME_cluster_names <- merge( x=ME_cluster_names, y=ME_clusters_mean_PSIs, by.x="ME_cluster", by.y="ME_clusters")

PCA_loadings_stats$ME_cluster <- as.numeric(as.character(PCA_loadings_stats$ME_cluster))

PCA_loadings_stats_PC2$ME_cluster <- as.numeric(as.character(PCA_loadings_stats_PC2$ME_cluster))


ME_cluster_names <- merge(ME_cluster_names, PCA_loadings_stats, by="ME_cluster")
#ME_cluster_names <- merge(ME_cluster_names, PCA_loadings_stats_PC2, by="ME_cluster")

ME_cluster_names$ME_cluster.type <- factor(ME_cluster_names$ME_cluster.type, levels = c("Excluded", "Included", "Neuronal",  "Non-Neuronal", "Muscular",  "Neuro-Muscular", "Other"))
Fig5.X.2 <- ggplot(ME_cluster_names) +
  geom_text(aes(x=PC1_mean, y=-PC2_mean, label = ME_cluster.name, colour = ME_cluster.type ), ) +
  xlab("Mean PC1 loading") +
  ylab("Mean PC2 loading") +
    theme(legend.position="top")


Fig5.X.2
Fig5.X.3 <- ggplot(ME_cluster_names) +
  geom_text(aes(x=mean_PSI, y=sd_PSI, label = ME_cluster.name, colour = ME_cluster.type ), ) +
  xlab("Mean PSI") +
  ylab("PSI standar deviation") +
    theme(legend.position="top") 
  


Fig5.X.3


ME_clusters.table <- cbind(ME_clusters, names(ME_clusters))
colnames(ME_clusters.table) <- c("ME_clusters", "ME")

ME_clusters.table <- data.table(ME_clusters.table)
ME_clusters.table$ME_clusters <- as.numeric(ME_clusters.table$ME_clusters)


ME_cluster_info <- merge(ME_clusters.table, ME_final[, c("ME", "len_micro_exon_seq_found", "U2_scores")], by="ME")
ME_cluster_info <- data.table(ME_cluster_info)


ME_cluster_info_by <- ME_cluster_info[ , .(asym = sum(len_micro_exon_seq_found %% 3),
                    mean_U2_score = mean(U2_scores),
                    total=.N
                    ), by="ME_clusters"  ]

ME_cluster_info_by[, symetrical_fraction:=(total-asym)/total]


ME_cluster_names <- merge(ME_cluster_names, ME_cluster_info_by, by.x="ME_cluster", by.y="ME_clusters")




#ME_cluster_info <- merge(ME_cluster_info_by[, .(ME_clusters=unique(ME_clusters)), by=ME], ME_final[, c("ME", "len_micro_exon_seq_found", "U2_scores")], by="ME")


 






Fig5.X.4 <- ggplot(ME_cluster_names) +
  geom_text(aes(x=symetrical_fraction, y=mean_U2_score, label = ME_cluster.name, colour = ME_cluster.type ), ) +
  xlab("In-frame microexon fraction") +
  ylab("Mean splicing strength") +
    theme(legend.position="top")


Fig5.X.4
  
ME_cluster_names[ ME_cluster.type=="Excluded", ]
ME_cluster_names[ ME_cluster.type=="Included", ]
ME_cluster_names[ ME_cluster.type=="Neuronal", ]
ME_cluster_names[ ME_cluster.type=="Neuro-Muscular", ]
ME_cluster_names[ ME_cluster.type=="Neuronal", ]
ME_cluster_names[ ME_cluster.type=="Included", ]
ME_cluster_names[ ME_cluster.type=="Excluded", ]
ME_cluster_names[ ME_cluster.type=="Included", ]
ME_cluster_names[ ME_cluster.type=="Other", ]

levels(factor(ME_cluster_names$ME_cluster.type))
ME_cluster_names.levels <- c()

for (i in levels(factor(ME_cluster_names$ME_cluster.type))){
  
  
  
  
  ME_cluster_names.levels<- c(ME_cluster_names.levels, ME_cluster_names[ME_cluster.type==i, ME_cluster.name])
  
  
}
Fig5.X.1.legend
TableGrob (5 x 5) "guide-box": 2 grobs
                                    z     cells                  name           grob
99_4000b8f4423e777ea3de92bc832bc4ce 1 (3-3,3-3)                guides gtable[layout]
                                    0 (2-4,2-4) legend.box.background zeroGrob[NULL]

Fig5.X.plots <- plot_grid(Fig5.X.1 + theme(legend.position="none"), 
                    Fig5.X.2 + theme(legend.position="none"), 
                    Fig5.X.3 + theme(legend.position="none"), 
                    Fig5.X.4 + theme(legend.position="none"), 
                    nrow =1,
                    rel_widths = c(1.7,1,1,1),
                    labels = "AUTO" )


Fig5.X <- plot_grid(Fig5.X.1.legend, Fig5.X.plots, ncol = 1, rel_heights = c(0.2, 1))

Fig5.X
ME_clusters.names <- data.table(ME_clusters)
ME_clusters.names$ME <- names(ME_clusters)
ME_clusters.names <- merge(ME_clusters.names,  ME_cluster_names[, c("ME_cluster", "ME_cluster.name")],  by.x="ME_clusters", by.y="ME_cluster")
ME_final.ME_clusters <- merge(ME_final, ME_clusters.names, by="ME")

ME_cluster.conservation <- ME_final.ME_clusters[ mean_conservations_vertebrates!="None" ,  .(mean_conservation=mean(as.numeric(mean_conservations_vertebrates)))  , by=c("ME_clusters", "ME_cluster.name")]


ME_cluster_names <-  merge(ME_cluster_names, ME_cluster.conservation,  by.x=c("ME_cluster", "ME_cluster.name"), by.y=c("ME_clusters", "ME_cluster.name"))

round(cor(ME_cluster_names$symetrical_fraction, ME_cluster_names$mean_conservation, method="pearson"), 2)


ggplot(ME_cluster_names, aes(x=symetrical_fraction, y=mean_conservation)) +
  geom_smooth(method=lm, linetype="dashed") +
  geom_text(aes( label = ME_cluster.name, colour = ME_cluster.type ) ) +
  xlab("In-frame microexon fraction") +
  ylab("Mean Phylop conservation score") +
  theme(legend.position="top") +
 labs(color = "Microexon cluster class")

some_ME_clusters <-c(
ME_clusters[ME_clusters==1][1],
ME_clusters[ME_clusters==2][1],
ME_clusters[ME_clusters==3][1],
ME_clusters[ME_clusters==4][1],
ME_clusters[ME_clusters==5][1],
ME_clusters[ME_clusters==6][1],
ME_clusters[ME_clusters==7][1],
ME_clusters[ME_clusters==8][1],
ME_clusters[ME_clusters==9][1],
ME_clusters[ME_clusters==10][1],
ME_clusters[ME_clusters==11][1],
ME_clusters[ME_clusters==12][1],
ME_clusters[ME_clusters==13][1],
ME_clusters[ME_clusters==14][1],
ME_clusters[ME_clusters==15][1],
ME_clusters[ME_clusters==16][1],
ME_clusters[ME_clusters==17][1],
ME_clusters[ME_clusters==18][1],
ME_clusters[ME_clusters==19][1],
ME_clusters[ME_clusters==20][1],
ME_clusters[ME_clusters==21][1],
ME_clusters[ME_clusters==22][1],
ME_clusters[ME_clusters==23][1],
ME_clusters[ME_clusters==24][1]

)


col_ann <- data.frame(  Tissue_clusters$cluster, row.names=Tissue_clusters$File.accession)
colnames(col_ann) <- "Tissue clusters"

rownames(col_ann) <- Tissue_clusters$File.accession



tissue_cluster_type = data.frame(cluster=c(1, 6, 8, 14, 12, 7, 15, 5, 11, 4, 9, 3, 13, 10, 16, 2), 
                                 `Tissue cluster type`=c("Non-neuronal",
                                                "Non-neuronal",
                                                "Non-neuronal",
                                                "Non-neuronal",
                                                "Embryonic neuronal",
                                                "Adrenal gland",
                                                "Skeletal muscle",
                                                "Heart",
                                                "Embryonic neuronal",
                                                "Embryonic neuronal",
                                                "Embryonic neuronal",
                                                "Postnatal neuronal",
                                                "Embryonic neuronal",
                                                "Postnatal neuronal",
                                                "Postnatal neuronal",
                                                "Postnatal neuronal"))

col_ann$File <- rownames(col_ann)
col_ann <- merge(col_ann, tissue_cluster_type, by.x="Tissue clusters",by.y="cluster")
rownames(col_ann) <- col_ann$File

colnames(col_ann) <- c("Tissue clusters", "File", "Tissue cluster type")



ann_colors = list(
    cluster = c(black="black"),
    `Tissue.cluster.type` =  c(`Adrenal gland` = "#B79F00",
                      `Skeletal muscle` = "#00BFC4",
                      `Heart` = "#F564E3",
                      `Non-neuronal` = "#A58AFF",
                      `Embryonic neuronal` = "#00BA38",
                      `Postnatal neuronal` = "#F8766D"
                      ),
    GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E")
)

col_ann$`Tissue cluster type` <- factor(col_ann$`Tissue cluster type` , levels = c("Adrenal gland", "Skeletal muscle", "Heart", "Non-neuronal", "Intermediate", "Embryonic neuronal", "Postnatal neuronal"))

fake_plot <- ggplot(col_ann, aes(as.numeric(`Tissue clusters`), `Tissue clusters`, fill=`Tissue cluster type` )) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values=c("#B79F00", "#00BFC4", "#F564E3", "#A58AFF", "#00BA38", "#F8766D" ), name= "Tissue cluster type") +
   theme(legend.position="top", legend.justification="center") +
  guides(fill=guide_legend(nrow=1,byrow=TRUE)) 


fake_plot.legend <- get_legend(fake_plot)

library(rlang)



#rownames(col_ann) <- rownames(Tissue_clusters)

Tissue_PSI_matrix_dcast_ppca <- t(cObs)

Tissue_PSI_matrix_dcast_ppca[Tissue_PSI_matrix_dcast_ppca>1] <- 1
Tissue_PSI_matrix_dcast_ppca[Tissue_PSI_matrix_dcast_ppca<0] <- 0


Tissue_PSI_matrix_dcast_ppca_heatmap <- duplicate(Tissue_PSI_matrix_dcast_ppca)


some_ME_clusters_space <- paste("      ", some_ME_clusters, sep = "") 
names(some_ME_clusters_space) <- names(some_ME_clusters)

row.names(Tissue_PSI_matrix_dcast_ppca_heatmap) <- some_ME_clusters_space[row.names(Tissue_PSI_matrix_dcast_ppca_heatmap)]



#tissue_heatmap <- pheatmap::pheatmap(Tissue_PSI_matrix_dcast_ppca_heatmap,  , fontsize = 10 ,  cutree_rows = 24, cutree_cols = 16, clustering_method = "ward.D2" , show_colnames=FALSE, annotation_col= col_ann, annotation_names_col=FALSE, treeheight_col=200, treeheight_row=100)

plot_grid(fake_plot.legend)



PCA <- data.frame(scores(result))





PCA$File.accession <- row.names(PCA)
PCA <- data.table(PCA)
PCA <- PCA[, `:=`(PC1=-PC1) ]


PCA <- merge(Tissue_clusters, PCA, by="File.accession")

PCA$age <- as.numeric(as.character(PCA$age))
PCA_stats <- PCA[ , .(mean_PC1=mean(PC1)) , by=c("name", "age")]


Sup_PCA_age.B <- ggplot() +
  geom_jitter(data = PCA[name %in% c("forebrain", "midbrain", "hindbrain", "neural tube"), ],  aes(age, PC1, colour=name), width=0.1, height = 0 ) +
  geom_line(data = PCA_stats[name %in% c("forebrain", "midbrain", "hindbrain", "neural tube"), ], aes(age, mean_PC1, group=name, colour=name)) +
  labs(colour="Neural tissue")
PCA_stats<- PCA[ , .(mean_PC1=mean(PC1)) , by=c("name", "age")]

Fig4.C <- ggplot() +
  geom_line(data = PCA_stats[name %in% c("forebrain", "midbrain", "hindbrain", "neural tube", "whole cortex"), ], aes(log(age), mean_PC1, group=name, colour=name)) +
  geom_point(data = PCA_stats[name %in% c("forebrain", "midbrain", "hindbrain", "neural tube", "whole cortex"), ], aes(log(age), mean_PC1, group=name, colour=name)) +
  xlab("log(DPC)") +
  ylab("Mean PC1") +
  xlim(c(log(10), log(50))) +
  theme(legend.position="top") +
  scale_color_discrete(name="Neuronal tissue", labels=c("Forebrain", "Hindbrain", "Midbrain", "Neural tube", "Whole cortex"))
  

Fig4.C

PCA_stats_PC2<- PCA[ name=="heart" , .(mean_PC2=mean(PC2)) , by=c("name", "age")]
PC1_cluster <- PCA[ , .(PC1=mean(PC1))  , by=cluster]

tissue_cluster_PCA <- factor(PC1_cluster[order(PC1)]$cluster, levels = PC1_cluster[order(PC1)]$cluster)


my_col <- get_colors(tissue_cluster_PCA, group.col=colorRampPalette(colors = c("black",  "purple", "blue", "seagreen", "gold", "red"))(14))

PCA$cluster <- factor(PCA$cluster, levels = PC1_cluster[order(PC1)]$cluster)


Sup_PCA.A <- ggplot(PCA) +
  geom_text(aes(PC1, PC2, label=cluster, colour=cluster) ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none")

Sup_PCA.B <-  ggplot(PCA) +
  geom_text(aes(PC1, PC3, label=cluster, colour=cluster) ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none")

Sup_PCA.C <-  ggplot(PCA) +
  geom_text(aes(PC2, PC3, label=cluster, colour=cluster) ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none")

library(rlang)

PCA_plot <- duplicate(PCA)


PCA_plot[name=="heart", group:="Heart"]
PCA_plot[name=="skeletal muscle tissue", group:="SKM"]
PCA_plot[name=="adrenal gland", group:="Adrenal Gland"]
PCA_plot[name=="whole cortex", group:="Cortex"]
PCA_plot[name %in% c("forebrain", "midbrain", "hindbrain", "neural tube") , group:="Neural"]

PCA_plot$age <- as.numeric(as.character(PCA_plot$age ))


PCA_plot[!name %in% c("forebrain", "midbrain", "hindbrain", "neural tube", "whole cortex", "heart","skeletal muscle tissue", "adrenal gland"), `:=`(group="Other", age=NA )]

ggplot(PCA_plot[name!="whole cortex",]) +
  geom_point(aes(PC1, PC2, colour=age, shape=group ) ) +
  geom_point(aes(PC1, PC2, colour=age, shape=group ) ) +
  scale_color_gradient(low="blue",  high="red")

library(stringr)

Sup_PCA_age.A <- ggplot() +
  geom_point(data=PCA_plot[group=="Other"], aes(PC1, PC2, colour=age ), alpha=0.5) +
  geom_text(data=PCA_plot[group!="Other" & name!="whole cortex"], aes(PC1, PC2, colour=age, label=str_sub(group, 1,1) ), size=3.5, alpha=0.5 ) +
  scale_color_gradient(low="blue",  high="red") +
  labs(colour="Embryonic stage (DPC)")
ggplot() +
  geom_point(data=PCA_plot[group=="Other"], aes(PC1, PC2, colour=log(age) ), alpha=0.8) +
  geom_text(data=PCA_plot[group!="Other"], aes(PC1, PC2, colour=log(age), label=str_sub(group, 1,1) ), size=3.5, alpha=0.8 ) +
  scale_color_gradient2(low="blue", mid="gold",  high="red", midpoint = 3.7)
plot_grid(Sup_PCA_age.A, Sup_PCA_age.B, labels = "AUTO")


PCA_batch <- merge(PCA, PCA[, .N, by=c("date")][, .(date, batch=frank(date) )], by="date")

my_col_date <- get_colors(tissue_cluster_PCA, group.col=colorRampPalette(colors = c("black",  "purple", "blue", "seagreen", "gold", "red"))(10))





Sup_PCA.D <-  ggplot(PCA_plot) +
  geom_text(aes(PC1, PC2, label=str_sub(group, 1,1) , colour=factor(cluster) ) ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none")


Sup_PCA.E <-  ggplot(PCA_plot) +
  geom_text(aes(PC1, PC3, label=str_sub(group, 1,1) , colour=factor(cluster) ) ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none")

Sup_PCA.F <-  ggplot(PCA_plot) +
  geom_text(aes(PC2, PC3, label=str_sub(group, 1,1) , colour=factor(cluster) ) ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none")




Sup_PCA.G <-  ggplot(PCA_batch) +
  geom_text(aes(PC1, PC2, label=cluster, colour=factor(batch) ) ) +
  scale_color_manual(values=my_col_date) + 
  theme(legend.position = "none")


Sup_PCA.H <-  ggplot(PCA_batch) +
  geom_text(aes(PC1, PC3, label=cluster, colour=factor(batch) ) ) +
  scale_color_manual(values=my_col_date) + 
  theme(legend.position = "none")


Sup_PCA.I <-  ggplot(PCA_batch) +
  geom_text(aes(PC2, PC3, label=cluster, colour=factor(batch) ) ) +
  scale_color_manual(values=my_col_date) + 
  theme(legend.position = "none")
plot_grid(Sup_PCA.A, Sup_PCA.B, Sup_PCA.C, Sup_PCA.D, Sup_PCA.E, Sup_PCA.F, Sup_PCA.G, Sup_PCA.H, Sup_PCA.I, labels="AUTO" )
#Tissue_clusters <- cutree(hc_cols, k = 18)
#Tissue_clusters <- cbind(Tissue_clusters, Tissues_name[names(Tissue_clusters)], Tissues_age[names(Tissue_clusters)], Tissue_date[names(Tissue_clusters)]) 

#colnames(Tissue_clusters) <- c("cluster", "name", "age", "date")

#Tissue_clusters <- data.frame(Tissue_clusters)

#Tissue_clusters_sum[which(grepl("hindbrain|midbrain|forebrain|neural\ tube", Tissue_clusters_sum$name)), ]

#Tissue_clusters$name <- factor(Tissue_clusters$name, level=c( "skeletal muscle tissue", "heart", "thymus", "spleen", "liver",  "adrenal gland", "intestine", "stomach", "lung",  "kidney", "bladder", "limb", "embryonic facial prominence", "forebrain", "hindbrain", "midbrain", "neural tube", "whole cortex"))


#Tissue_clusters$age <- factor(Tissue_clusters$age, levels = as.character(sort(as.numeric(levels(Tissue_clusters$age)))))


#Tissue_clusters_sum <- Tissue_clusters %>% group_by(cluster, name, age ) %>% summarise(count = n())

Tissue_clusters_DT <- data.table(Tissue_clusters)
Tissue_clusters_sum <- Tissue_clusters_DT[, .(count=.N) , by=c("cluster", "name", "age" )]

Tissue_clusters_sum$age <- factor(Tissue_clusters_sum$age, levels= sort(as.numeric(as.character(levels(Tissue_clusters_sum$age)))) )


## NA ###

Tissue_clusters_sum$cluster <- factor(Tissue_clusters_sum$cluster , levels =PCA[, mean(PC1), by="cluster" ][order(V1)]$cluster)


Tissue_clusters_sum <- merge(Tissue_clusters_sum, PCA[, .(mPC1=mean(PC1), mPC2=mean(PC2), mPC3=mean(PC3) ), by="cluster" ][order(mPC1)], by="cluster")
Tissue_clusters_sum[, `:=`(mean_PC1=round(mPC1, 2),mean_PC2=round(mPC2, 2), mean_PC3=round(mPC3, 2))  ]

Sup.Tissue_clusters.b.pre <- ggplot( Tissue_clusters_sum, aes(age, name) ) +
  geom_tile(aes(fill = count)) +
  scale_fill_gradient(low="grey", high="red")+
  facet_grid(. ~ cluster + mean_PC1 + mean_PC2 + mean_PC3, labeller = label_parsed ) +
  theme_bw()+
  labs(fill = "Sample count") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))


Sup.Tissue_clusters.c <- plot_grid(fake_plot.legend, Sup.Tissue_clusters.b.pre, ncol=1, rel_heights = c(0.1, 1) )
Sup.Tissue_clusters.c

library(ggrepel)

PCA_plot[, `:=`(mPC1=mean(PC1), mPC2=mean(PC2)), by="cluster" ]




Sup.Tissue_clusters.a <- ggplot(PCA_plot) +
  geom_point(aes(PC1, PC2 , colour=factor(cluster) ),  alpha=0.3 ) +
  stat_ellipse(aes(PC1, PC2 , colour=factor(cluster) )) +
  geom_text_repel(data=PCA_plot[, .(mPC1=mean(PC1), mPC2=mean(PC2)), by="cluster" ],
                  aes(mPC1, mPC2, label=cluster), size=7, point.padding = NA, fontface = "bold" ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none") 

Sup.Tissue_clusters.b <- ggplot(PCA_plot) +
  geom_point(aes(PC1, PC3 , colour=factor(cluster) ),  alpha=0.3 ) +
  stat_ellipse(aes(PC1, PC3 , colour=factor(cluster) )) +
  geom_text_repel(data=PCA_plot[, .(mPC1=mean(PC1), mPC2=mean(PC3)), by="cluster" ],
                  aes(mPC1, mPC2, label=cluster), size=7, point.padding = NA, fontface = "bold" ) +
  scale_color_manual(values=my_col) + 
  theme(legend.position = "none") 
  

Sup.Tissue_clusters.ab <- plot_grid(Sup.Tissue_clusters.a, Sup.Tissue_clusters.b, labels = c("E", "F"))
Sup.Tissue_clusters.ab
Fig4.EFG <- plot_grid( Sup.Tissue_clusters.ab, Sup.Tissue_clusters.c, ncol=1, labels=c("", "G"), rel_heights = c(0.8, 1) )
Fig4.EFG
Fig.4A <- ggplot(PCA_plot) +
  geom_point(aes(PC1, PC2, colour=log(age), shape=group ), size=3 , alpha=0.7) +
  scale_color_gradient2(low="blue", mid="yellow",  high="red", midpoint = 3.7, name="ln(Age)") +
  scale_shape(name="Tissue") +
  theme(legend.position="top") 

plot_grid(Fig4, Fig4.EFG, ncol=1, rel_heights = c(2, 1))

Here we sorted microexons by PC1 loadings, but we fliped PC1 we cocider the values as -1 PCA loading on our paper

Loading therhold is abs(0.035)


ME_Tissues_clusters_PSI <- merge(ME_clusters_PSI, Tissue_clusters, by.x="FILE_NAME", by.y = "File.accession")
ME_Tissues_clusters_PSI$cluster <- factor(ME_Tissues_clusters_PSI$cluster, levels = levels(Tissue_clusters_sum$cluster))
ME_Tissues_clusters_PSI <- merge( ME_Tissues_clusters_PSI,  ME_cluster_names[ , c("ME_cluster", "ME_cluster.name")], by.x="ME_clusters", by.y="ME_cluster")

ME_Tissues_clusters_PSI$ME_cluster.name <- factor(ME_Tissues_clusters_PSI$ME_cluster.name, levels = sort(as.character(unique(ME_Tissues_clusters_PSI$ME_cluster.name))))

ME_Tissues_clusters_PSI_sub <- ME_Tissues_clusters_PSI[ME_cluster.name %in%  ME_cluster_names[ME_cluster.type %in% c("Neuronal", "Neuro-Muscular", "Muscular",  "Non-Neuronal"), ME_cluster.name], ]
ME_Tissues_clusters_PSI_non_neuronal <- ME_Tissues_clusters_PSI_sub[cluster %in% c(1, 8, 6, 7, 15, 5) , ]

ME_Tissues_clusters_PSI_non_neuronal$cluster <- mapvalues(ME_Tissues_clusters_PSI_non_neuronal$cluster,
          from = c(1, 8, 6, 7, 15, 5),
          to = c( "C1", "C8", "C6", "C7", "C15", "C5") )


ME_Tissues_clusters_PSI_non_neuronal$cluster <- factor(ME_Tissues_clusters_PSI_non_neuronal$cluster, levels=c( "C1", "C8", "C6", "C7", "C15", "C5"))

ME_Tissues_clusters_PSI_non_neuronal_by_ME <- ME_Tissues_clusters_PSI_non_neuronal[ , .(PSI_mean=mean(PSI)), by=c("ME", "cluster", "ME_cluster.name") ]
ME_Tissues_clusters_PSI_non_neuronal_by_ME_cluster <- ME_Tissues_clusters_PSI_non_neuronal[ , .(PSI_mean=mean(PSI)), by=c( "cluster", "ME_cluster.name") ]



  
ggplot( ) +
        geom_line(data= ME_Tissues_clusters_PSI_non_neuronal_by_ME, aes(factor(cluster), PSI_mean, group=ME), colour="grey") +
        facet_grid( ME_cluster.name ~ .)
  

TOTAL.ME_level <- rbind(
ME_Tissues_clusters_PSI_non_neuronal_by_ME[ , .(ME, age.name=cluster, ME_cluster.name, PSI_mean) ], 
ME_Tissues_clusters_PSI_neuronal_by_ME )

TOTAL.cluster_level <-  rbind(
ME_Tissues_clusters_PSI_neuronal_by_ME_cluster,
ME_Tissues_clusters_PSI_non_neuronal_by_ME_cluster[ , .(age.name=cluster, ME_cluster.name,  name="Non neuronal", PSI_mean) ] )


TOTAL.cluster_level$name <- mapvalues(TOTAL.cluster_level$name,
          from = c("forebrain", "midbrain", "hindbrain", "neural tube", "whole cortex"),
          to = c("Forebrain", "Midbrain", "Hindbrain", "Neural tube", "Whole cortex") )


TOTAL.cluster_level$name <- factor(TOTAL.cluster_level$name, levels=c("Non neuronal", "Forebrain", "Midbrain", "Hindbrain", "Neural tube", "Whole cortex") )



Fig5.A <- ggplot( ) +
        geom_line(data= TOTAL.ME_level, aes(factor(age.name), PSI_mean, group=ME), colour="grey", alpha=0.5) +
        geom_line(data = TOTAL.cluster_level, aes(factor(age.name), PSI_mean, group=name, colour=name ), size=1.2 ) +
        facet_grid( ME_cluster.name ~ .) +
        labs(colour = "Mean PSI by") +
        ylab("PSI") +
        xlab("") +
        theme(panel.background = element_rect(fill = 'white', colour = 'black')) +
        theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")

Fig5.A



ggplot( ) +
        geom_line(data= TOTAL.ME_level, aes(factor(age.name), PSI_mean, group=ME), colour="grey", alpha=0.5) +
        geom_line(data = TOTAL.cluster_level, aes(factor(age.name), PSI_mean, group=name, colour=name ), size=1.2 ) +
        facet_grid( ME_cluster.name ~ .) +
        labs(colour = "Mean PSI by") +
        ylab("PSI") +
        xlab("") +
        theme(panel.background = element_rect(fill = 'white', colour = 'black')) +
        theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")

Whippet-delta




file.remove('./whippet_delta.yaml')



write.table( "whippet_delta:", file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)


wcontrol <- paste(Tissue_clusters[cluster %in% c(1, 8, 6),  ]$File.accession, collapse=",")


for (i in c("forebrain", "midbrain", "hindbrain", "neural tube")){
  
  for (a in c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21)) {
    
    if (length(Tissue_clusters[name==i & age==a , ]$File.accession)>=4){
    
      name = paste0("    ", gsub(" ", "_", paste("control_vs", i, paste0(a, ":"), sep=" " )))
      A =  paste0("        A : ", wcontrol )
      B =  paste0("        B : ",  paste(Tissue_clusters[name==i & age==a , ]$File.accession, collapse=","))
      
      write.table(name, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
      write.table( A, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
      write.table( B, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    
    }

  }
  
}


signal = c("hindbrain", "neural tube")

for (a in c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21)) {
    
  if (length(Tissue_clusters[name %in% signal & age==a , ]$File.accession)>=4){
    
    name = paste0("    ", gsub(" ", "_", paste("control_vs", paste(signal, collapse="-" ), paste0(a, ":"), sep=" " )))
    A =  paste0("        A : ", wcontrol )
    B =  paste0("        B : ",  paste(Tissue_clusters[name %in% signal & age==a , ]$File.accession, collapse=","))
      
    write.table(name, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( A, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( B, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)

  }
}
 

signal = c("hindbrain", "neural tube", "midbrain")

for (a in c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21)) {
    
  if (length(Tissue_clusters[name %in% signal & age==a , ]$File.accession)>=4){
    
    name = paste0("    ", gsub(" ", "_", paste("control_vs", paste(signal, collapse="-" ), paste0(a, ":"), sep=" " )))
    A =  paste0("        A : ", wcontrol )
    B =  paste0("        B : ",  paste(Tissue_clusters[name %in% signal & age==a , ]$File.accession, collapse=","))
      
    write.table(name, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( A, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( B, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)

  }
}


signal = c("hindbrain", "neural tube", "midbrain","forebrain")

for (a in c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21)) {
    
  if (length(Tissue_clusters[name %in% signal & age==a , ]$File.accession)>=4){
    
    name = paste0("    ", gsub(" ", "_", paste("control_vs", paste(signal, collapse="-" ), paste0(a, ":"), sep=" " )))
    A =  paste0("        A : ", wcontrol )
    B =  paste0("        B : ",  paste(Tissue_clusters[name %in% signal & age==a , ]$File.accession, collapse=","))
      
    write.table(name, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( A, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( B, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)

  }
}



signal = c("adrenal gland", "skeletal muscle tissue", "heart")

for (tissue in signal) {
    
  if (length(Tissue_clusters[name==tissue, ]$File.accession)>=4){
    
    name = paste0("    ", gsub(" ", "_", paste("control_vs", paste0(tissue, ":"), sep=" " )))
    A =  paste0("        A : ", wcontrol )
    B =  paste0("        B : ",  paste(Tissue_clusters[name==tissue, ]$File.accession, collapse=","))
      
    write.table(name, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( A, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table( B, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)

  }
}


name= "    hindbrain_neural_tube_21_vs_forebrain_midbrain_21:"
A = paste0("        A : ",  paste(Tissue_clusters[age==21 & name %in% c("hindbrain", "neural tube"), ]$File.accession, collapse=","))
B = paste0("        B : ",  paste(Tissue_clusters[age==21 & name %in% c("forebrain", "midbrain"), ]$File.accession, collapse=","))

write.table(name, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table( A, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table( B, file="./whippet_delta.yaml", append=TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)

Whippet ENCODE


Get_delta_table <- function( signal, ages, path, extension ){

  Delta_group <- data.table()
  
  
  for (a in ages) {
      
    if (length(Tissue_clusters[name %in% signal & age==a , ]$File.accession)>=4){
      
      name = gsub(" ", "_", paste("control_vs", paste(signal, collapse="-" ), a, sep=" " ))
  
      delta <- fread ( paste0(path, name, extension))
      
      delta$age <- a
      
      Delta_group <- rbind(Delta_group, delta )
      
    }
  
  }
  return(Delta_group)    
}
length(unique(Delta_HNM_merge[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9, exon_ID]))
[1] 421

Delta_F_whippet <- Get_delta_table( c("forebrain"),
                                      c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21),
                                      "~/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/",
                                      ".diff.microexons")


Delta_F_ME <- Get_delta_table( c("forebrain"),
                                      c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21),
                                      "~/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/",
                                      ".diff.ME.microexons")


Delta_F_merge <- merge(Delta_F_whippet, Delta_F_ME, by=c("exon_ID", "age"))

length(unique(Delta_F_merge[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9, exon_ID]))

Sup_vulcano.B <- ggplot( ) +
  geom_point(data=Delta_HNM_ME, aes(-DeltaPsi, Probability)) +
  geom_point(data=Delta_HNM_ME[ DeltaPsi>=0.1 &  Probability >= 0.9 , ], aes(-DeltaPsi, Probability), color="brown") +
  geom_point(data=Delta_HNM_ME[ DeltaPsi<(-0.1) &  Probability >= 0.9 , ], aes(-DeltaPsi, Probability), color="darkgreen") +
  geom_text(data=Delta_HNM_ME[ DeltaPsi>=0.1 &  Probability >= 0.9 , .N, by="age"], aes(-0.85, 0.9, label=N), colour = "brown", fontface = "bold", size=5) +
  geom_text(data=Delta_HNM_ME[ DeltaPsi<(-0.1) &  Probability >= 0.9  , .N, by="age"], aes(0.85, 0.9, label=N), colour = "darkgreen", fontface = "bold", size=5) +
  facet_grid(. ~ age) +
  xlim(c(-1,1)) +
  xlab("Delta PSI") +
  theme_bw()


Sup_vulcano.A <- ggplot( ) +
  geom_point(data=Delta_HNM_whippet, aes(-DeltaPsi, Probability)) +
  geom_point(data=Delta_HNM_whippet[ DeltaPsi>=0.1 &  Probability >= 0.9 , ], aes(-DeltaPsi, Probability), color="brown") +
  geom_point(data=Delta_HNM_whippet[ DeltaPsi<(-0.1) &  Probability >= 0.9 , ], aes(-DeltaPsi, Probability), color="darkgreen") +
  geom_text(data=Delta_HNM_whippet[ DeltaPsi>=0.1 &  Probability >= 0.9 , .N, by="age"], aes(-0.85, 0.9, label=N), colour = "brown", fontface = "bold", size=5) +
  geom_text(data=Delta_HNM_whippet[ DeltaPsi<(-0.1) &  Probability >= 0.9  , .N, by="age"], aes(0.85, 0.9, label=N), colour = "darkgreen", fontface = "bold", size=5) +  
  facet_grid(. ~ age) +
  xlim(c(-1,1)) +
  xlab("Delta PSI") +
  theme_bw()

whipet vs ME

ME_AD_AA <- fread("../Final_Report/Reps/Rep1/Whippet/Quant/ME_AD_AA.txt")

Delta_HNM_total <-  merge(
Delta_HNM_whippet[ , c("exon_ID", "Gene", "Node", "Coord", "Strand", "Type", "DeltaPsi", "Probability", "age")],
Delta_HNM_ME[ , c("exon_ID", "Gene", "Node", "Coord", "Strand", "Type", "DeltaPsi", "Probability", "age")],

by=c("exon_ID", "Gene", "Node", "Coord", "Strand", "Type",  "age"))

Delta_HNM_total[ , cor(DeltaPsi.x, DeltaPsi.y, method="pearson") , by=age ]

Delta_HNM_total[paste( exon_ID, age, sep="|") %in% total_diff_HNM , cor(DeltaPsi.x, DeltaPsi.y, method="pearson") , by=age ]


Delta_HNM_total[, Node_type:=paste0("m", Type)]
Delta_HNM_total[ Type=="CE" & exon_ID %in% ME_AD_AA[ Type=="AA" & exon_type=="E" ,  exon_ID  ], Node_type:="mCE_AA" ]
Delta_HNM_total[ Type=="CE" & exon_ID %in% ME_AD_AA[ Type=="AA" & exon_type=="ME" ,  exon_ID  ], Node_type:="mCE_mAA" ]
Delta_HNM_total[ Type=="CE" & exon_ID %in% ME_AD_AA[ Type=="AD" & exon_type=="E" ,  exon_ID  ], Node_type:="mCE_AD" ]
Delta_HNM_total[ Type=="CE" & exon_ID %in% ME_AD_AA[ Type=="AD" & exon_type=="ME" ,  exon_ID  ], Node_type:="mCE_mAD" ]

whippet_ME_corr <- merge( Delta_HNM_total[ , cor(DeltaPsi.x, DeltaPsi.y, method="pearson") , by=age ], 
                          Delta_HNM_total[ (abs(DeltaPsi.x)>=0.1 & Probability.x>=0.9) |(abs(DeltaPsi.y)>=0.1 & Probability.y>=0.9)  ,
                                           cor(DeltaPsi.x, DeltaPsi.y, method="pearson") , by=age ], 
                          by="age")

#whippet_ME_corr.types <- merge( Delta_HNM_total[ , .(N=.N, cor.total=(cor(DeltaPsi.x, DeltaPsi.y, method="pearson"))) , by=c("age", "Node_type") ], 
#                          Delta_HNM_total[paste( exon_ID, age, sep="|") %in% total_diff_HNM , .(N=.N, cor.sig=(cor(DeltaPsi.x, DeltaPsi.y, method="pearson"))) , by=c("age", #"Node_type") ],
#                          by=c("age", "Node_type"))



whippet_ME_corr.types <- Delta_HNM_total[ , .(N=.N, cor.total=(cor(DeltaPsi.x, DeltaPsi.y, method="pearson"))) , by=c("age", "Node_type") ]


#whippet_ME_corr.types[N.y<5, cor.sig:=NA]

#whippet_ME_corr.types.melted <- rbind(whippet_ME_corr.types[, .(age, Node_type, N=N.x, cor=cor.total, Group="Total") ],
 #     whippet_ME_corr.types[, .(age, Node_type, N=N.y, cor=cor.sig, Group="Significant") ])

Delta_HNM_total$Node_type <- factor(Delta_HNM_total$Node_type , levels = c("mCE", "mAA", "mAD", "mCE_mAA", "mCE_mAD", "mCE_AA", "mCE_AD"))

whippet_ME_corr.types$Node_type <- factor(whippet_ME_corr.types$Node_type , levels = c("mCE", "mAA", "mAD", "mCE_mAA", "mCE_mAD", "mCE_AA", "mCE_AD"))


Sup_vulcano.D <- ggplot(whippet_ME_corr.types[N>5, ]) +
  geom_boxplot(aes(Node_type, cor.total, fill=Node_type)) +
  scale_fill_discrete(name = "Microexon node type") +
  xlab("") +
  ylab("Delta PSI correlation") +
  theme_bw() +
  theme(legend.position = "top") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) +
     guides(fill = guide_legend(nrow = 1))

Sup_vulcano.C <- ggplot(Delta_HNM_total[!is.na(Node_type)] ) +
  geom_bar(aes(as.factor(age), fill=Node_type), stat="count" ) +
  geom_text( data=Delta_HNM_total[ , .(N=.N, cor=cor(DeltaPsi.x, DeltaPsi.y, method="pearson")) , by=age ],
             aes(x=as.factor(age), y=N+200, label= paste0("R=", round(cor, 2)))) +
  xlab("DPC") +
  ylab("Number of splicing nodes") +
    scale_fill_discrete(name = "Microexon node type") +
  theme_bw() +
  theme(legend.position = "top") +
  guides(fill = guide_legend(nrow = 1))

Sup_vulcano.legend <- get_legend(Sup_vulcano.C)

library(ggExtra)

ME_AD_AA

Delta_HNM_total.alt_ME <- merge(Delta_HNM_total[ , .(exon_ID, Node_type, age, 
                                                     DeltaPsi.wp=DeltaPsi.x, Probability.wp=Probability.x, 
                                                     DeltaPsi.me=DeltaPsi.y, Probability.me=Probability.y)],
                                ME_AD_AA[ ,c("exon_ID", "exon_ID.alt")], by=c("exon_ID"))

Delta_HNM_total.alt_ME.compare <- merge(Delta_HNM_total.alt_ME,
                                        Delta_HNM_total[ , .(exon_ID, Node_type, age, 
                                                             DeltaPsi.wp.alt=DeltaPsi.x, Probability.wp.alt=Probability.x, 
                                                             DeltaPsi.me.alt=DeltaPsi.y, Probability.me.alt=Probability.y)],
                                        by.x=c("exon_ID.alt", "age"), by.y=c("exon_ID", "age") )



Delta_HNM_total.alt_ME.compare.stats <- Delta_HNM_total.alt_ME.compare[, .( alt_cor.wp=cor(DeltaPsi.wp, DeltaPsi.wp.alt), alt_cor.me=cor(DeltaPsi.me, DeltaPsi.me.alt) ), by=c("exon_ID.alt", "Node_type.x") ]


p <- ggplot(Delta_HNM_total.alt_ME.compare.stats)+
  geom_point(aes(x=alt_cor.wp, y=alt_cor.me), alpha=0.4)+
  theme_bw() +
  geom_density_2d(aes(x=alt_cor.wp, y=alt_cor.me), alpha=0.4, n=500, adjust=1.3) +
  geom_rug() +
  xlab("Whippet short/long microexon correlation") +
  ylab("MicroExonator short/long microexon correlation")


Sup_vulcano.E <- ggMarginal(p, type = "histogram", fill="transparent")


Sup_vulcano.CD <- plot_grid(Sup_vulcano.C  + theme(legend.position = "NA"),
                                Sup_vulcano.D  + theme(legend.position = "NA"),
                                nrow=1, labels = c("C", "D") )


Sup_vulcano.CD <- plot_grid(Sup_vulcano.legend, Sup_vulcano.CD, ncol=1, rel_heights = c(1, 8) )


Sup_vulcano.bottom <-plot_grid(Sup_vulcano.CD, plot_grid(nullGrob(), Sup_vulcano.E, nullGrob(),  nrow=1, rel_widths = c(1,3,1), labels = c("", "E", "") ), ncol=1)

Sup_vulcano.bottom <-plot_grid(Sup_vulcano.CD, Sup_vulcano.E, labels = c( "", "E"), nrow=1, rel_widths = c(2, 1))




plot_grid(Sup_vulcano.A, Sup_vulcano.B, Sup_vulcano.bottom,  ncol=1, labels = c("A", "B", ""), rel_heights = c(1,1,3))

library(gridExtra)
library(grid)

plot_grid(Sup_vulcano.CD, plot_grid(nullGrob(), Sup_vulcano.E, nullGrob(),  nrow=1, rel_widths = c(1,3,1), labels = c("", "E", "") ), ncol=1)

Delta_HNM_whippet_ME <- merge(Delta_HNM_whippet, Delta_HNM_ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type", "age") )

Delta_HNM_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , .N , by=(age)]

Delta_HNM_whippet_ME_included <- Delta_HNM_whippet_ME[ (DeltaPsi.x<=-0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y<=-0.1 &  Probability.y >= 0.9)  , .N , by=(age)]
Delta_HNM_whippet_ME_excluded <- Delta_HNM_whippet_ME[ (DeltaPsi.x>=0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y>=0.1 &  Probability.y >= 0.9)  , .N , by=(age)]


Delta_HNM_included_stats <- cbind(Delta_HNM_whippet_included[order(age)], Delta_HNM_ME_included[order(age), 2], Delta_HNM_whippet_ME_included[order(age), 2])
colnames(Delta_HNM_included_stats) <- c("age", "Whippet", "MicroExonator", "Both")


whippet_ME_age_stats <- function(Delta_S_whippet, Delta_S_ME) { 


  Delta_S_whippet_ME <- merge(Delta_S_whippet, Delta_S_ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type", "age") )
  
  Delta_S_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , .N , by=(age)]
  
  Delta_S_whippet_ME_included <- Delta_S_whippet_ME[ (DeltaPsi.x<=-0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y<=-0.1 &  Probability.y >= 0.9)  , .N , by=(age)]
  Delta_S_whippet_ME_excluded <- Delta_S_whippet_ME[ (DeltaPsi.x>=0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y>=0.1 &  Probability.y >= 0.9)  , .N , by=(age)]
  
  
  #Delta_S_included_stats <- cbind(Delta_S_whippet_included[order(age)], Delta_S_ME_included[order(age), 2], Delta_S_whippet_ME_included[order(age), 2])
  #colnames(Delta_S_included_stats) <- c("age", "Whippet", "MicroExonator", "Both")
  
  
  
  Delta_S_whippet_ME[, diff_high:=FALSE]
  Delta_S_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , diff_high:=TRUE]
  
  Delta_S_whippet_ME[, diff_low:=FALSE]
  Delta_S_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.8) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.8) , diff_low:=TRUE]
  
  
  ages <- sort(unique(Delta_S_whippet_ME$age))
  age_int <- c(ages[length(ages)])
  
  
    
  for (a in rev(ages[1:length(ages)-1])){
    
    age_int = cbind(age_int, a)
    
    Delta_S_whippet_ME[ age==a & diff_high==TRUE &  exon_ID %in% Delta_S_whippet_ME[age %in% age_int & diff_high==TRUE, .N, by=c("exon_ID", "Gene") ][N==length(age_int)]$exon_ID   , diff_age:=a ]
  }
    
  Delta_S_whippet_ME_age_diff <- Delta_S_whippet_ME[ diff_high==TRUE, .(diff_age = min(diff_age, na.rm = TRUE)), by=c("exon_ID", "Gene")]
  
  
  Delta_S_whippet_ME$delta_sing <- sign(Delta_S_whippet_ME$DeltaPsi.x)
  
  Delta_S_whippet_ME_diff_count <- Delta_S_whippet_ME[diff_high==TRUE , .N , by=c("exon_ID", "delta_sing")]
  rescue_exons <- Delta_S_whippet_ME_diff_count[N>=2 & !exon_ID %in% Delta_S_whippet_ME_age_diff[diff_age!=Inf, ]$exon_ID , ]$exon_ID
  
  
  #ages <- Delta_S_included_stats$age
  #age_int <- c(ages[length(ages)])
  Delta_S_whippet_ME[exon_ID %in% rescue_exons]
  
    
  for (a in rev(ages[1:length(ages)-1])){
    
    age_int = cbind(age_int, a)
    
    Delta_S_whippet_ME[exon_ID %in% rescue_exons & age==a & diff_low==TRUE &  exon_ID %in% Delta_S_whippet_ME[age %in% age_int & diff_low==TRUE, .N, by=c("exon_ID", "Gene") ][N==length(age_int)]$exon_ID   , diff_age_low:=a ]
  }
    
  
  Delta_S_whippet_ME_age_diff_low <- Delta_S_whippet_ME[ diff_low==TRUE, .(diff_age = min(diff_age_low, na.rm = TRUE)), by=c("exon_ID", "Gene")]
  
  
  diff_ME_sing <- Delta_S_whippet_ME[diff_high==TRUE, -sign(DeltaPsi.y)]
  names(diff_ME_sing) <- Delta_S_whippet_ME[diff_high==TRUE, exon_ID]
  
  
  Delta_S_whippet_ME_age_diff_total <-  rbind(Delta_S_whippet_ME_age_diff[diff_age!=Inf], Delta_S_whippet_ME_age_diff_low[diff_age!=Inf])
  
  Delta_S_whippet_ME_age_diff_total[, change_dir:=diff_ME_sing[exon_ID] ]
  
  return(Delta_S_whippet_ME_age_diff_total)

}

Delta_HNM_whippet_ME_age_diff_total <-  whippet_ME_age_stats(Delta_HNM_whippet, Delta_HNM_ME)
Delta_F_whippet_ME_age_diff_total <-  whippet_ME_age_stats(Delta_F_whippet, Delta_F_ME)


Delta_F_whippet_ME_age_diff_total_stats <-  Delta_F_whippet_ME_age_diff_total[, .(Count=.N) , by=c("diff_age", "change_dir") ]
Delta_HNM_whippet_ME_age_diff_total_stats <-  Delta_HNM_whippet_ME_age_diff_total[, .(Count=.N) , by=c("diff_age", "change_dir") ]

ggplot() +
  geom_bar(data=Delta_F_whippet_ME_age_diff_total_stats[change_dir==1], aes(factor(diff_age), Count), stat = "identity", fill="skyblue") +

  geom_bar(data=Delta_F_whippet_ME_age_diff_total_stats[change_dir==-1], aes(factor(diff_age), -Count), stat = "identity", fill="firebrick") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA))






ggplot() +
  geom_bar(data=Delta_HNM_whippet_ME_age_diff_total_stats[change_dir==1], aes(factor(diff_age), Count), stat = "identity", fill="skyblue") +

  geom_bar(data=Delta_HNM_whippet_ME_age_diff_total_stats[change_dir==-1], aes(factor(diff_age), -Count), stat = "identity", fill="firebrick") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA))



ME_diff_age <- Delta_HNM_whippet_ME_age_diff_total$diff_age
names(ME_diff_age) <- Delta_HNM_whippet_ME_age_diff_total$exon_ID


diff_ME_sing <- Delta_HNM_whippet_ME_age_diff_total$change_dir
names(diff_ME_sing) <- Delta_HNM_whippet_ME_age_diff_total$exon_ID

ME_clusters_table[, diff_age:=NA]
ME_clusters_table[, change_dir:=NA]
ME_clusters_table[, diff_age:=ME_diff_age[ME] ]
ME_clusters_table[, change_dir:=diff_ME_sing[ME] ]

ME_clusters_table[!is.na(diff_age)]


ME_clusters_table[ , Total:=.N, by= ME_clusters]

ME_clusters_table_stats <-  ME_clusters_table[, .N , by=c("ME_clusters", "diff_age", "change_dir", "Total")]

ME_clusters_table_stats[, Percentage:=N*100/Total]


ME_clusters_table_stats[ , sum(N), by=c( "change_dir" ) ] #TOTAL
ME_clusters_table_stats[ , sum(N), by=c( "diff_age", "change_dir" ) ] #TOTAL

ME_clusters_table_stats <- merge(ME_clusters_table_stats, ME_cluster_names[, c("ME_cluster", "ME_cluster.name", "ME_cluster.type")], by.x="ME_clusters", by.y="ME_cluster")

ME_clusters_table_stats_HNM <- ME_clusters_table_stats[diff_age!="NA" & change_dir!="NA"]



ME_clusters_table_stats_HNM_sub <- ME_clusters_table_stats_HNM[ME_cluster.name %in%  ME_cluster_names[ME_cluster.type %in% c("Neuronal", "Neuro-Muscular", "Muscular", "Non-Neuronal"), ME_cluster.name], ]
ME_clusters_table_stats_HNM_sub$ME_cluster.name <- factor(ME_clusters_table_stats_HNM_sub$ME_cluster.name, levels = sort(unique(as.character(ME_clusters_table_stats_HNM_sub$ME_cluster.name))))



Fig5.B <- ggplot() +
  geom_bar(data=ME_clusters_table_stats_HNM_sub[change_dir==1], aes(factor(diff_age), Percentage), stat = "identity", fill="skyblue") +
  geom_bar(data=ME_clusters_table_stats_HNM_sub[change_dir==-1], aes(factor(diff_age), -Percentage), stat = "identity", fill="firebrick") +
  facet_grid( ME_cluster.name ~ .) +
  ylim(c(-30, 75)) +
  ylab("Percentage") +
  xlab("") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0))
Fig5.B



ME_diff_age <- Delta_F_whippet_ME_age_diff_total$diff_age
names(ME_diff_age) <- Delta_F_whippet_ME_age_diff_total$exon_ID


diff_ME_sing <- Delta_F_whippet_ME_age_diff_total$change_dir
names(diff_ME_sing) <- Delta_F_whippet_ME_age_diff_total$exon_ID

ME_clusters_table[, diff_age:=NA]
ME_clusters_table[, change_dir:=NA]
ME_clusters_table[, diff_age:=ME_diff_age[ME] ]
ME_clusters_table[, change_dir:=diff_ME_sing[ME] ]

ME_clusters_table[!is.na(diff_age)]




#ME_clusters_table[ , Total:=.N, by= ME_clusters]

ME_clusters_table_stats <-  ME_clusters_table[, .N , by=c("ME_clusters", "diff_age", "change_dir", "Total")]

ME_clusters_table_stats[, Percentage:=N*100/Total]


 ME_clusters_table_stats[ , sum(N), by=c( "change_dir" ) ] # Total

ME_clusters_table_stats <- merge(ME_clusters_table_stats, ME_cluster_names[, c("ME_cluster", "ME_cluster.name", "ME_cluster.type")], by.x="ME_clusters", by.y="ME_cluster") 
ME_clusters_table_stats_F <- ME_clusters_table_stats[diff_age!="NA" & change_dir!="NA"]


ME_clusters_table_stats_F_sub <- ME_clusters_table_stats_F[ME_cluster.name %in%  ME_cluster_names[ME_cluster.type %in% c("Neuronal", "Neuro-Muscular", "Muscular", "Non-Neuronal"), ME_cluster.name], ]
ME_clusters_table_stats_F_sub$ME_cluster.name <- factor(ME_clusters_table_stats_F_sub$ME_cluster.name, levels = sort(unique(as.character(ME_clusters_table_stats_F_sub$ME_cluster.name))))




Fig5.C <- ggplot() +
  geom_bar(data=ME_clusters_table_stats_F_sub[change_dir==1], aes(factor(diff_age), Percentage), stat = "identity", fill="skyblue") +
  geom_bar(data=ME_clusters_table_stats_F_sub[change_dir==-1], aes(factor(diff_age), -Percentage), stat = "identity", fill="firebrick") +
  facet_grid( ME_cluster.name ~ .) +
  ylim(c(-30, 75)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA))

Fig5.C

Delta_SKM_whippet <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/control_vs_skeletal_muscle_tissue.diff.microexons")
Delta_SKM_ME <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/control_vs_skeletal_muscle_tissue.diff.ME.microexons")
Delta_SKM_whippet_ME <- merge(Delta_SKM_whippet, Delta_SKM_ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type")  )
Delta_SKM_whippet_ME[, diff_high:=FALSE]
Delta_SKM_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , diff_high:=TRUE]
Delta_SKM_whippet_ME[diff_high==TRUE , change_dir:=sign(DeltaPsi.x) ]
Delta_SKM_whippet_ME[, Tissue:="SKM"]

Delta_Heart_whippet <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/control_vs_heart.diff.microexons")
Delta_Heart_ME <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/control_vs_heart.diff.ME.microexons")
Delta_Heart_whippet_ME <- merge(Delta_Heart_whippet, Delta_Heart_ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type")  )
Delta_Heart_whippet_ME[, diff_high:=FALSE]
Delta_Heart_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , diff_high:=TRUE]
Delta_Heart_whippet_ME[diff_high==TRUE , change_dir:=sign(DeltaPsi.x) ]
Delta_Heart_whippet_ME[, Tissue:="Heart"]



Delta_AG_whippet <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/control_vs_adrenal_gland.diff.microexons")
Delta_AG_ME <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Whippet/control_vs_adrenal_gland.diff.ME.microexons")
Delta_AG_whippet_ME <- merge(Delta_AG_whippet, Delta_AG_ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type")  )
Delta_AG_whippet_ME[, diff_high:=FALSE]
Delta_AG_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , diff_high:=TRUE]
Delta_AG_whippet_ME[diff_high==TRUE , change_dir:=sign(DeltaPsi.x)]
Delta_AG_whippet_ME[, Tissue:="Heart"]
neural_AG_PSI <- merge(merge(
Delta_AG_whippet_ME[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9, .(exon_ID, PSI_AG=Psi_B.x)  ],

Delta_HNM_merge[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9,  .(exon_ID, age, PSI_HNM=Psi_B.x)],
by="exon_ID"),

Delta_F_merge[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9,  .(exon_ID, age, PSI_F=Psi_B.x)],
by=c("exon_ID", "age") )

table(neural_AG_PSI$age) # Over 14.5 there is the highest number of intersections


neural_AG_PSI_melted <- melt(neural_AG_PSI[age==14.5, c("exon_ID", "PSI_AG", "PSI_HNM", "PSI_F")])



Sup.AG_MHNF.A <- ggplot(neural_AG_PSI_melted, aes(variable, value)) +
  geom_boxplot() +
  geom_jitter() +
  #geom_line(aes(group=exon_ID)) +
  xlab("Tissue") +
  ylab("PSI") +
  ggsignif::geom_signif(test = "wilcox.test", comparisons = list(c("PSI_AG", "PSI_HNM"), c("PSI_AG", "PSI_F"))
, step_increase=0.15 , test.args = list(alternative = "two.sided", paired = TRUE), map_signif_level = TRUE) +
  scale_x_discrete(labels = c('Adrenal Gland','Brain MHN','Forebrain'))



Sup.AG_MHNF.B <- ggplot(neural_AG_PSI_melted, aes(variable, value)) +
  #geom_boxplot() +
  #geom_jitter() +
  geom_line(aes(group=exon_ID)) +
  xlab("Tissue") +
  ylab("PSI") +
  ggsignif::geom_signif(test = "wilcox.test", comparisons = list(c("PSI_AG", "PSI_HNM"), c("PSI_AG", "PSI_F"))
, step_increase=0.15 , test.args = list(alternative = "two.sided", paired = TRUE), map_signif_level = TRUE) +
  scale_x_discrete(labels = c('Adrenal Gland','Brain MHN','Forebrain'))


plot_grid(Sup.AG_MHNF.A, Sup.AG_MHNF.B, ncol=1, labels="AUTO")



ME_clusters_table[ , Diff_SKM:="FALSE"]
ME_clusters_table[ME %in% Delta_SKM_whippet_ME[diff_high==TRUE & change_dir==1, exon_ID], Diff_SKM:="Excluded" ]
ME_clusters_table[ME %in% Delta_SKM_whippet_ME[diff_high==TRUE & change_dir==-1, exon_ID], Diff_SKM:="Included"]

ME_clusters_table[ , Diff_Heart:="FALSE"]
ME_clusters_table[ME %in% Delta_Heart_whippet_ME[diff_high==TRUE & change_dir==1, exon_ID], Diff_Heart:="Excluded" ]
ME_clusters_table[ME %in% Delta_Heart_whippet_ME[diff_high==TRUE & change_dir==-1, exon_ID], Diff_Heart:="Included"]

ME_clusters_table[ , Diff_AG:="FALSE"]
ME_clusters_table[ME %in% Delta_AG_whippet_ME[diff_high==TRUE & change_dir==1, exon_ID], Diff_AG:="Excluded" ]
ME_clusters_table[ME %in% Delta_AG_whippet_ME[diff_high==TRUE & change_dir==-1, exon_ID], Diff_AG:="Included"]





ME_clusters_table_stats_SKM <- ME_clusters_table[ Diff_SKM!="FALSE", .N, by=c("ME_clusters", "Total", "Diff_SKM") ]
ME_clusters_table_stats_SKM[, Percentage:=(N*100/Total)]
ME_clusters_table_stats_SKM[, Tissue:="SKM"]
colnames(ME_clusters_table_stats_SKM) <- c("ME_clusters", "Total", "Diff", "N", "Percentage", "Tissue" )

ME_clusters_table_stats_Heart <- ME_clusters_table[ Diff_Heart!="FALSE", .N, by=c("ME_clusters", "Total", "Diff_Heart") ]
ME_clusters_table_stats_Heart[, Percentage:=(N*100/Total)]
ME_clusters_table_stats_Heart[, Tissue:="HRT"]
colnames(ME_clusters_table_stats_Heart) <- c("ME_clusters", "Total", "Diff", "N", "Percentage", "Tissue" )

ME_clusters_table_stats_AG <- ME_clusters_table[ Diff_AG!="FALSE", .N, by=c("ME_clusters", "Total", "Diff_AG") ]
ME_clusters_table_stats_AG[, Percentage:=(N*100/Total)]
ME_clusters_table_stats_AG[, Tissue:="AD"]
colnames(ME_clusters_table_stats_AG) <- c("ME_clusters", "Total", "Diff", "N", "Percentage", "Tissue" )


ME_clusters_table_stats_SHA <- rbind(ME_clusters_table_stats_SKM, ME_clusters_table_stats_Heart, ME_clusters_table_stats_AG)



ME_clusters_table_stats_SHA <- merge(ME_clusters_table_stats_SHA, ME_cluster_names[, c("ME_cluster", "ME_cluster.name", "ME_cluster.type")], by.x="ME_clusters", by.y="ME_cluster")

ME_clusters_table_stats_SHA_sub <- ME_clusters_table_stats_SHA[ME_cluster.name %in%  ME_cluster_names[ME_cluster.type %in% c("Neuronal", "Neuro-Muscular", "Muscular", "Non-Neuronal"), ME_cluster.name], ]


ME_clusters_table_stats_SHA_sub$ME_cluster.name <- factor(ME_clusters_table_stats_SHA_sub$ME_cluster.name, levels = sort(unique(as.character(ME_clusters_table_stats_SHA_sub$ME_cluster.name))))




Fig5.D <- ggplot() +
  geom_bar(data=ME_clusters_table_stats_SHA_sub[Diff=="Included"], aes(factor(Tissue), Percentage), stat = "identity", fill="skyblue") +
  geom_bar(data=ME_clusters_table_stats_SHA_sub[Diff=="Excluded"], aes(factor(Tissue), -Percentage), stat = "identity", fill="firebrick") +
  facet_grid( ME_cluster.name ~ .) +
  ylim(c(-30, 75)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA))
Fig5.D
library(UpSetR)

require(UpSetR)

listInput <- list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), two = c(1, 2, 4, 5, 
    10), three = c(1, 5, 6, 7, 8, 9, 10, 12, 13))



Delta_HNM_whippet_ME_age_diff_total[,  exon_ID]

Delta_F_whippet_ME_age_diff_total[, exon_ID ]


tissue_intersection <- list(Brain = ME_clusters_table[diff_age!="NA", ME],
                            SKM = ME_clusters_table[Diff_SKM!=FALSE, ME],
                            Heart = ME_clusters_table[Diff_Heart!=FALSE, ME],
                            AG = ME_clusters_table[Diff_AG!=FALSE, ME]
                            )


tissue_intersection <- list(Brain_HNM = Delta_HNM_whippet_ME_age_diff_total[,  exon_ID],
                            Brain_F = Delta_F_whippet_ME_age_diff_total[,  exon_ID],
                            SKM = ME_clusters_table[Diff_SKM!=FALSE, ME],
                            Heart = ME_clusters_table[Diff_Heart!=FALSE, ME],
                            AG = ME_clusters_table[Diff_AG!=FALSE, ME]
                            )



Fig5.y <- upset(fromList(tissue_intersection), order.by = "freq", nsets = 5, keep.order = T)
Fig5.y
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}


library(eulerr)



tissue_intersection <- list(Brain = ME_clusters_table[diff_age!="NA", ME],
                            SKM = ME_clusters_table[Diff_SKM!=FALSE, ME],
                            Heart = ME_clusters_table[Diff_Heart!=FALSE, ME],
                            AG = ME_clusters_table[Diff_AG!=FALSE, ME])


tissue_intersection.venn <- euler(c("Brain" = 212,  
                    "SKM" = 35,  
                    "Heart" = 11, 
                    "AG" = 11,
                    "Brain&AG" = 29,
                    "SKM&Heart" = 21,
                    "Brain&Heart" = 18,
                    "Brain&SKM&Heart" = 18,
                    "Brain&SKM" = 14,
                    "Brain&SKM&Heart&AG" = 3,
                    "Brain&Heart&AG" = 2,
                    "Heart&AG" = 1,
                    "SKM&AG" = 1,
                    "Brain&SKM&AG" = 1))




tissue_intersection.venn <- euler(c("Brain_HNM&Brain_F" = 156,
                                    "Brain_HNM" = 71,
                                    "SKM" = 34,
                                    "Brain_HNM&Brain_F&AG" = 29,
                                    "SKM&Heart" = 20,
                                    "Brain_HNM&Brain_F&Heart" = 19,
                                    "Brain_F" = 15,
                                    "Brain_HNM&Brain_F&SKM&Heart" = 14,
                                    "Brain_HNM&Brain_F&SKM" = 13,
                                    "Heart" = 11,
                                    "AG" = 11,
                                    "Brain_HNM&SKM" = 5,
                                    "Brain_HNM&Brain_F&SKM&Heart&AG" = 5,                                   
                                    "Brain_HNM&SKM&Heart" = 4,
                                    "Brain_HNM&Heart" = 3,
                                    "Brain_F&AG" = 2,
                                    "Brain_HNM&Heart&AG" = 2,
                                    "Heart&AG" = 1,
                                    "SKM&AG" = 1 ,
                                    "Brain_F&SKM" = 1,
                                    "Brain_F&SKM&Heart" = 1,
                                    "Brain_HNM&SKM&AG" = 1,
                                    "Brain_HNM&Brain_F&Heart&AG" = 1,
                                    "Brain_HNM&Brain_F&SKM&AG" = 1))

                    
    
ven <- plot(tissue_intersection.venn, counts = FALSE, font=4, cex=1, alpha=0.5,
     fill=gg_color_hue(5), quantities = list(fontsize = 0))

ven


library(ggplotify)



Fig5.col2.row1 <- plot_grid(Fig5.B, Fig5.C, Fig5.D, nrow=1,  labels = c("F", "G", "H"), rel_widths = c(2, 1.5, 1.5) )

Fig5.col2 <- plot_grid(Fig5.col2.row1, as.grob(Fig5.y), ncol = 1,   labels = c("", "I"), rel_heights = c(3, 1) )

Fig5.bottom <- plot_grid(Fig5.A, Fig5.col2, nrow=1,  rel_widths = c(1, 1), labels = c("E", ""))


#Fig5 <- plot_grid(Fig5.col1, Fig5.B, Fig5.C, Fig5.D, nrow=1, rel_widths = c(10, 2, 1.5, 1.5 ), labels = c("", "F", "G", "H") )
Fig5 <- plot_grid(Fig5.X, Fig5.bottom, ncol=1, rel_heights = c(1, 4) )
Fig5 

Error


Delta_HNM_whippet_ME_age_diff[diff_age!="Inf", ]$exon_ID

Delta_HNM_whippet_ME$delta_sing <- sign(Delta_HNM_whippet_ME$DeltaPsi.x)

Delta_HNM_whippet_ME_diff_count <- Delta_HNM_whippet_ME[diff_high==TRUE , .N , by=c("exon_ID", "delta_sing")]



Delta_HNM_whippet_ME_diff_count[ N==2 & !exon_ID %in% Delta_HNM_whippet_ME_age_diff[diff_age!="Inf", ]$exon_ID, ]

Delta_HNM_whippet_ME_age_diff_low[diff_age!=Inf]

library(biomaRt)





MicroExon_genes <- gsub("\\..*","", Delta_HNM_whippet_ME$Gene) 
names(MicroExon_genes) <- Delta_HNM_whippet_ME$exon_ID
 
Delta_HNM_whippet_ME_age_diff_total[, ensembl_gene_id:=MicroExon_genes[exon_ID]]

ensembl = useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")





gene_table <- data.table(getBM(attributes=c('ensembl_gene_id', "mgi_symbol"),filters = 'ensembl_gene_id', values = Delta_HNM_whippet_ME_age_diff_total$ensembl_gene_id , mart = ensembl))


gene_table <- data.table(getBM(attributes=c('ensembl_gene_id', "mgi_symbol"),filters = 'ensembl_gene_id', values = Delta_HNM_whippet_ME_age_diff_total$ensembl_gene_id , mart = ensembl))


Delta_HNM_whippet_ME_age_diff_total <- merge( Delta_HNM_whippet_ME_age_diff_total, gene_table, by='ensembl_gene_id')



cat(as.character(unique(Delta_HNM_whippet_ME_age_diff_total$mgi_symbol)), sep='\n') 


Delta_HNM_whippet_ME_age_diff_total[mgi_symbol=="Dctn2"]

Input for PPI network

genes_background <- unique(c(Delta_HNM_merge$Gene.x, Delta_F_merge$Gene.x))

genes_background <-  gsub("\\..*","", genes_background)


neuronal_ME_genes <- unique(c(Delta_HNM_whippet_ME_age_diff_total$Gene, Delta_F_whippet_ME_age_diff_total$Gene))
neuronal_ME_genes <- gsub("\\..*","", neuronal_ME_genes)

neuronal_ME_gene_names <-  gene_table[ensembl_gene_id %in% neuronal_ME_genes, ]

cat(as.character(unique(neuronal_ME_gene_names$mgi_symbol)), sep='\n') 


fwrite(data.table(genes_background), "../PPI/genes_background.txt", quote = FALSE, col.names = FALSE)

Delta_HNM_whippet_ME_age_diff_total[ , ensembl_gene_id:=gsub("\\..*","", Gene) ]
Delta_HNM_whippet_ME_age_diff_total[ , ensembl_gene_id:=gsub("\\..*","", Gene) ]


Delta_HNMF_whippet_ME_age_diff_total <- merge(Delta_HNM_whippet_ME_age_diff_total, Delta_F_whippet_ME_age_diff_total, by=c("exon_ID", "Gene", "change_dir"), all=TRUE)
Delta_HNMF_whippet_ME_age_diff_total[, diff_age:=min(diff_age.y, diff_age.x, na.rm=TRUE), by=exon_ID]

Delta_HNMF_whippet_ME_age_diff_total[ , ensembl_gene_id:=gsub("\\..*","", Gene) ]





Delta_HNMF_whippet_ME_age_diff_total <- merge(Delta_HNMF_whippet_ME_age_diff_total, neuronal_ME_gene_names, by="ensembl_gene_id")




#merge( Delta_HNMF_whippet_ME_age_diff_total, PPI_centrality, by="mgi_symbol")



#Delta_HNMF_whippet_ME_age_diff_total[!mgi_symbol %in% PPI_centrality$mgi_symbol]


fwrite(data.table(data.table(unique(Delta_HNMF_whippet_ME_age_diff_total$ensembl_gene_id))), "../Final_Figures/Figure5/microexons_HMNF_genes.txt", quote = FALSE, col.names = FALSE)

Delta_Heart_whippet_ME[ , ensembl_gene_id:=gsub("\\..*","", Gene) ]
Delta_SKM_whippet_ME[ , ensembl_gene_id:=gsub("\\..*","", Gene) ]
Delta_AG_whippet_ME[ , ensembl_gene_id:=gsub("\\..*","", Gene) ]


Delta_Heart_whippet_ME <- merge(Delta_Heart_whippet_ME, neuronal_ME_gene_names, by="ensembl_gene_id")
Delta_SKM_whippet_ME <- merge(Delta_SKM_whippet_ME, neuronal_ME_gene_names, by="ensembl_gene_id")
Delta_AG_whippet_ME <- merge(Delta_AG_whippet_ME, neuronal_ME_gene_names, by="ensembl_gene_id")

fwrite(data.table(data.table(unique(Delta_Heart_whippet_ME[diff_high==TRUE, ensembl_gene_id]))), "../Final_Figures/Figure5/heart_genes.txt", quote = FALSE, col.names = FALSE)
fwrite(data.table(data.table(unique(Delta_SKM_whippet_ME[diff_high==TRUE, ensembl_gene_id]))), "../Final_Figures/Figure5/SKM_genes.txt", quote = FALSE, col.names = FALSE)
fwrite(data.table(data.table(unique(Delta_AG_whippet_ME[diff_high==TRUE, ensembl_gene_id]))), "../Final_Figures/Figure5/AG_genes.txt", quote = FALSE, col.names = FALSE)


Delta_Heart_whippet_ME[diff_high==TRUE, ensembl_gene_id]
Delta_SKM_whippet_ME[diff_high==TRUE, ensembl_gene_id]
Delta_AG_whippet_ME[diff_high==TRUE, ensembl_gene_id]

microexons_Vastdb <- read_delim("~/Google_Drive/Results/ME/mm10/VastDB/VastDb.microexons.txt", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)


microexons_Vastdb <- microexons_Vastdb$X1



microexons_GENCODE <- read_delim("~/Google_Drive/Results/ME/mm10/gencode.vM16.annotation.microexons.txt", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)


microexons_GENCODE <- microexons_GENCODE$X1
ME_final.out[, In.10_percent_of_bulk:=ME %in% me_after_10_samples_filter) ]  
Error: unexpected ')' in "ME_final.out[, In.10_percent_of_bulk:=ME %in% me_after_10_samples_filter)"
ME_final.out[In.10_percent_of_bulk==TRUE, .N, by="type"]
fwrite(neuronal_ME_HNMF.table, "../../../../Thesis/"neuronal_ME_HNMF.tsv", sep="\t" )
Error: unexpected symbol in "fwrite(neuronal_ME_HNMF.table, "../../../../Thesis/"neuronal_ME_HNMF.tsv"

#library("STRINGdb")

#string_db <- STRINGdb$new( version="10", species=9606, score_threshold=0, input_directory="" )


#neuronal_ME_gene_names_mapped <- string_db$map( neuronal_ME_gene_names, "mgi_symbol", removeUnmappedRows = TRUE )

# string_db$plot_network( neuronal_ME_gene_names_mapped )



#hits <- as.character(unique(neuronal_ME_gene_names$mgi_symbol))

#example1_mapped <- string_db$map( hits, "gene", removeUnmappedRows = TRUE )

#plot_network( hits )

#data(diff_exp_example1)


#example1_mapped <- string_db$map( diff_exp_example1, "gene", removeUnmappedRows = TRUE )

#map

#?STRINGdb


neuronal_clusters_genes <- merge(neuronal_clusters, gene_info, by.x="transcript", by.y="ensembl_transcript_id")

neuronal_clusters_genes$ME_clusters <- mapvalues(neuronal_clusters_genes$ME_clusters, 
          from =1:18,
          to = c("I1", "E1", "E3", "I2", "N1", "M1", "N2", "NM3", "NM2", "N5", "NM1", "N3", "N4", "NN2", "E2", "I4", "I3", "NN1"))

#Generaring values for pie vertics

colour_pallete <- list( c(heat.colors(8), "#1E90FFFF"))
values_clusters = list()

ME_per_gene = list()

vertex_shape = list()
circle_color = list()

label_color = list()

for (i in 1:144){
  

  gene_name = nodes[i, ]$id


  if (i==16){
    
    gene_name = "Sptan1"   # STRING DB changed the name from Spna2 to Spatan1
  }
  
  if (i==49){
    
    gene_name = "Diaph1"   ## STRING DB changed the name from Diap1 to Diaph1
  }
  
  
  ME_per_gene[[i]] <- nrow(neuronal_clusters_genes[mgi_symbol==gene_name, ])


  N1 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="N1", ])
  N2 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="N2", ])
  N3 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="N3", ])
  N4 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="N4", ])
  N5 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="N5", ])
  NM1 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="NM1", ])
  NM2 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="NM2", ])
  NM3 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="NM3", ])
  NN1 =  nrow(neuronal_clusters_genes[mgi_symbol==gene_name  & ME_clusters=="NN1", ])
  
  
  #values_clusters[[i]] <- c(N1, N2, N3, N4, N5, NM1, NM2, NM3, NN1)
  
  cluster_counts <- c(NM1, N1, NM2,  N2, NM3, N3, N4, N5,  NN1)
  
  values_clusters[[i]] <- cluster_counts
  
  if (length(which(cluster_counts!=0) ) == 1  ) {
    
    vertex_shape[[i]] <- "circle"   #Nodes with only one kind of cluster need to be circles instead of pie to avoid pie border
    circle_color[[i]] <-  colour_pallete[[1]][which(cluster_counts!=0)] 
    
    
  } else {
    
    
    vertex_shape[[i]] <- "pie"
    circle_color[[i]] <-  "#FFFF00FF"   #This colour is not taken in consideration for pies
    
    
  }
  
  
  #if (gene_name %in%  SFARI_Gene_animal_genes[model.species=="Mus musculus" , gene.symbol]) {
    
  if (gene_name %in%  SFARI_Gene_human_genes[ gene.score<=3 , gene.symbol]) {  
    
    
    
    label_color[[i]] <- "darkgreen"
    
    
  } else{
    
     label_color[[i]] <- "black"
    
  }
  
    
    
  if ( sum( c( N1, N2, N3, N4, N5, NM1, NM2, NM3, NN1)) == 0 ) {
    
    print( c(i, nodes[i, ]$id ) )
  }
  

}

library(igraph)

vertex.pie.color=list(heat.colors(9))





ME_PPI <- fread("../PPI/ME.PPI.tsv")




links <- ME_PPI[, c("#node1", "node2", "combined_score")]



colnames(links) <- c("from", "to", "score")

nodes.1 <- ME_PPI[, c("#node1")]
colnames(nodes.1) <- c("id")

nodes.2 <- ME_PPI[, c("node2")]
colnames(nodes.2) <- c("id")


nodes <- rbind(nodes.1, nodes.2)


nodes <- unique(  nodes[ , , by="id"] )



net <- graph_from_data_frame(d=links, vertices=nodes, directed=F)

E(net)$size <- log(2-  E(net)$score)

E(net)$width <- (1-  E(net)$score)  * 15




#V(net)$size <- log(unlist(ME_per_gene) + 2) * 4

#E(net)$score

#plot(net, vertex.shape= unlist(vertex_shape), vertex.color = unlist(circle_color)  , vertex.pie=values_clusters, rescale=T, pie.lty=2, vertex.pie.color=colour_pallete, vertex.label.font=2, vertex.label.dist=0.4, vertex.label.color=unlist(label_color), vertex.label.cex=1.2)

#colour_pallete


#plot(net,rescale=T, vertex.label.cex=1.2, vertex.size=1 )





#harmonic_centrality <- PPI_diff_age_HNMF$harmonic_centrality
#names(harmonic_centrality) <- PPI_diff_age_HNMF$mapped_gene

#V(net)$size <- log(harmonic_centrality[names(V(net))] +1 )


#V(net)$size <- harmonic_centrality[names(V(net))]/6

#plot(net)




#PPI_diff_age_HNM_central [  %in% names(V(net))

#harmonic_centrality[names(V(net))]


#harmonic_centrality[names(V(net))]




#harmonic_centrality[names(V(net))]


#names(V(net)) 

#PPI_diff_age_HNM_central[  (mgi_symbol %in%  names(V(net)))==FALSE ,  ]

#PPI_diff_age_HNM_central[mgi_symbol=="Itsn1"]

#PPI_diff_age_HNMF
library(CINNA)

PPI_betweenness <- estimate_betweenness(net, vids = V(net), directed = F, cutoff=-1,
  weights = NULL, nobigint = TRUE)

PPI_eigen_centrality <- eigen_centrality(net, directed = F, scale = T, weights = NULL)$vector


#cbind(as.numeric(PPI_betweenness[V(net)]), as.numeric( PPI_eigen_centrality[V(net)]  ) )

PPI_harmonic_centrality <- harmonic_centrality(net)


PPI_degree_centrality <- centr_degree(net)$res
names(PPI_degree_centrality) <- V(net)$name

PPI_centrality <- data.frame(cbind(  PPI_betweenness[V(net)] ,  PPI_eigen_centrality[V(net)], PPI_harmonic_centrality, PPI_degree_centrality   ))

colnames(PPI_centrality) <- c("betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality")

PPI_centrality$mapped_gene <-  row.names(PPI_centrality)

PPI_centrality <- data.table(PPI_centrality)


PPI_diff_age_HNM_stats_EC <- PPI_diff_age_HNMF[is.na(diff_age.x)!=TRUE][,  .(min_dif_age=min(diff_age.x), Number_of_ME=.N), by=c("ensembl_gene_id", "eigen_centrality") ]
PPI_diff_age_HNM_stats_EC[, rank:=frank(-eigen_centrality)]
PPI_diff_age_HNM_stats_EC[ , max_rank:=max(rank)]
PPI_diff_age_HNM_stats_EC[ , percentil:=rank/max_rank*100]
PPI_diff_age_HNM_stats_EC[, central:=FALSE]
PPI_diff_age_HNM_stats_EC[percentil<15, central:=TRUE]



ggplot(PPI_diff_age_HNM_stats_EC) +
  geom_bar(aes(min_dif_age, fill=central), stat = "count", position="fill" ) +
  xlab("Earliest microexon differential inclusion detection per gene (DPC)")+
  ylab("Protein type proportion") +
  scale_x_continuous(breaks=seq(10.5,16.5,1)) +
  scale_fill_discrete(name = "Protein type", labels = c("Non-central", "Central"))  +
            theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")




PPI_diff_age_HNM_stats_HC <- PPI_diff_age_HNMF[is.na(diff_age.x)!=TRUE][,  .(min_dif_age=min(diff_age.x), Number_of_ME=.N), by=c("ensembl_gene_id", "harmonic_centrality") ]
PPI_diff_age_HNM_stats_HC[, rank:=frank(-harmonic_centrality)]
PPI_diff_age_HNM_stats_HC[ , max_rank:=max(rank)]
PPI_diff_age_HNM_stats_HC[ , percentil:=rank/max_rank*100]
PPI_diff_age_HNM_stats_HC[, central:=FALSE]
PPI_diff_age_HNM_stats_HC[percentil<15, central:=TRUE]



ggplot(PPI_diff_age_HNM_stats_HC) +
  geom_bar(aes(min_dif_age, fill=central), stat = "count", position="fill" ) +
  xlab("Earliest microexon differential inclusion detection per gene (DPC)")+
  ylab("Protein type proportion") +
  scale_x_continuous(breaks=seq(10.5,16.5,1)) +
  scale_fill_discrete(name = "Protein type", labels = c("Non-central", "Central"))  +
            theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")




centrality_logistic_regression <- glm(formula = central ~ min_dif_age, family = "binomial", data = PPI_diff_age_HNM_stats_EC)
summary(centrality_logistic_regression)


centrality_logistic_regression <- glm(formula = central ~ min_dif_age, family = "binomial", data = PPI_diff_age_HNM_stats_HC)
summary(centrality_logistic_regression)
var2str <- function(v1) {
  deparse(substitute(v1))
}
#GO_Reactome <- fread("../Figures/New_Figure 5/STRING/GO/Whole_genome_background/Reactome Pathways.tsv")




Axon_guidance <- unlist(strsplit(GO_Reactome[`term description`=="Axon guidance"]$`matching proteins in your network (labels)`, ","))
L1CAM_interactions <- unlist(strsplit(GO_Reactome[`term description`=="L1CAM interactions"]$`matching proteins in your network (labels)`, ","))
Protein_protein_interactions_at_synapses <- unlist(strsplit(GO_Reactome[`term description`=="Protein-protein interactions at synapses"]$`matching proteins in your network (labels)`, ","))
ER_to_Golgi_Anterograde_Transport <- unlist(strsplit(GO_Reactome[`term description`=="ER to Golgi Anterograde Transport"]$`matching proteins in your network (labels)`, ","))
Clathrin_mediated_endocytosis <- unlist(strsplit(GO_Reactome[`term description`=="Clathrin-mediated endocytosis"]$`matching proteins in your network (labels)`, ","))
Golgi_Associated_Vesicle_Biogenesis <- unlist(strsplit(GO_Reactome[`term description`=="Golgi Associated Vesicle Biogenesis"]$`matching proteins in your network (labels)`, ","))
Membrane_Trafficking <- unlist(strsplit(GO_Reactome[`term description`=="Membrane Trafficking"]$`matching proteins in your network (labels)`, ","))
Intra_Golgi_and_retrograde_Golgi_to_ER_traffic <- unlist(strsplit(GO_Reactome[`term description`=="Intra-Golgi and retrograde Golgi-to-ER traffic"]$`matching proteins in your network (labels)`, ","))

GOs <- list(Axon_guidance, L1CAM_interactions)

PPI_centrality_GO <- PPI_diff_age_HNMF[, c("mapped_gene", "exon_ID", "diff_age",  "betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality", "type") ]

PPI_centrality_GO <-
  rbind(PPI_diff_age_HNMF[ mapped_gene %in% Axon_guidance, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(Axon_guidance)) ],
PPI_diff_age_HNMF[ mapped_gene %in% L1CAM_interactions, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(L1CAM_interactions)) ],
PPI_diff_age_HNMF[ mapped_gene %in% Protein_protein_interactions_at_synapses, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(Protein_protein_interactions_at_synapses)) ],
PPI_diff_age_HNMF[ mapped_gene %in% ER_to_Golgi_Anterograde_Transport, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(ER_to_Golgi_Anterograde_Transport)) ],
PPI_diff_age_HNMF[ mapped_gene %in% Clathrin_mediated_endocytosis, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(Clathrin_mediated_endocytosis)) ],
PPI_diff_age_HNMF[ mapped_gene %in% Golgi_Associated_Vesicle_Biogenesis, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(Golgi_Associated_Vesicle_Biogenesis)) ],
PPI_diff_age_HNMF[ mapped_gene %in% Membrane_Trafficking, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(Membrane_Trafficking)) ],
PPI_diff_age_HNMF[ mapped_gene %in% Intra_Golgi_and_retrograde_Golgi_to_ER_traffic, .(mapped_gene, exon_ID, diff_age,  betweenness, eigen_centrality, harmonic_centrality, degree_centrality, type, GO=var2str(Intra_Golgi_and_retrograde_Golgi_to_ER_traffic)) ])







ggplot(PPI_centrality_GO, aes(GO, log(betweenness))) +
  geom_jitter() +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0))


PPI_centrality_GO_gene_centered <- PPI_centrality_GO[ , .(diff_age=min(diff_age)) , by=c("mapped_gene", "betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality", "GO")]


ggplot(PPI_centrality_GO_gene_centered ) +
  geom_jitter(aes(harmonic_centrality, diff_age, color=GO), width=0.5, height=0.1 ) +
   theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")
ggplot(PPI_centrality_GO_gene_centered, aes(GO, diff_age)) +
+     geom_jitter(height = 0.1, width = 0.1) +
+     theme(axis.text.x = element_text(angle = 90, vjust = 0))

PPI_centrality_GO_gene_centered_stats <-  PPI_centrality_GO_gene_centered[ , .(N=.N),  by=c("GO", "diff_age")]

PPI_centrality_GO_gene_centered_stats[ , Total:=sum(N), by ="GO"]

PPI_centrality_GO_gene_centered_stats[, fraction:=N/Total ]

diff_age_levels <- PPI_centrality_GO_gene_centered_stats[ , .(mean=mean(diff_age)), by="GO"][order(-mean)]$GO


PPI_centrality_GO_gene_centered_stats$GO <- factor(PPI_centrality_GO_gene_centered_stats$GO, levels= diff_age_levels)

ggplot(PPI_centrality_GO_gene_centered_stats) +
  geom_tile(aes( diff_age, GO, fill = fraction)) +
  scale_fill_gradient(low="grey", high="red")+
  theme_bw()+
  labs(fill = "Gene fraction") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))

GO_Reactome_all <- rbind(
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% Axon_guidance, .(mapped_gene, exon_ID, diff_age, GO=var2str(Axon_guidance)) ] ,
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% L1CAM_interactions, .(mapped_gene, exon_ID, diff_age, GO=var2str(L1CAM_interactions)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% Protein_protein_interactions_at_synapses, .(mapped_gene, exon_ID, diff_age, GO=var2str(Protein_protein_interactions_at_synapses)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% ER_to_Golgi_Anterograde_Transport, .(mapped_gene, exon_ID, diff_age, GO=var2str(ER_to_Golgi_Anterograde_Transport)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% Clathrin_mediated_endocytosis, .(mapped_gene, exon_ID, diff_age, GO=var2str(Clathrin_mediated_endocytosis)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% Golgi_Associated_Vesicle_Biogenesis, .(mapped_gene, exon_ID, diff_age, GO=var2str(Golgi_Associated_Vesicle_Biogenesis)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% Membrane_Trafficking, .(mapped_gene, exon_ID, diff_age, GO=var2str(Membrane_Trafficking)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% Intra_Golgi_and_retrograde_Golgi_to_ER_traffic, .(mapped_gene, exon_ID, diff_age, GO=var2str(Intra_Golgi_and_retrograde_Golgi_to_ER_traffic))] )


GO_Reactome_all_stats <-  GO_Reactome_all[ , .(N=.N),  by=c("GO", "diff_age")]
GO_Reactome_all_stats[ , Total:=sum(N), by ="GO"]
GO_Reactome_all_stats[, fraction:=N/Total ]
diff_age_levels <- GO_Reactome_all_stats[ , .(mean=mean(diff_age)), by="GO"][order(-mean)]$GO

diff_age_levels <- c("Membrane_Trafficking",
                     "Intra_Golgi_and_retrograde_Golgi_to_ER_traffic",
                     "Golgi_Associated_Vesicle_Biogenesis",
                     "Clathrin_mediated_endocytosis",
                     "ER_to_Golgi_Anterograde_Transport",
                      "Axon_guidance",
                      "L1CAM_interactions",
                      "Protein_protein_interactions_at_synapses")


GO_Reactome_all_stats$GO <- factor(GO_Reactome_all_stats$GO, levels= rev(diff_age_levels))


F6G <- ggplot(GO_Reactome_all_stats) +
  geom_tile(aes( diff_age, GO, fill = fraction)) +
  geom_text(aes(diff_age, GO, label = N), color="white") +
  scale_fill_gradient(low="grey", high="red", limits=c(0, 0.7) )+
    scale_x_continuous(breaks=seq(10.5,16.5,1)) +
  theme_bw()+
  xlab("DPC") +
  ylab("") +
  labs(fill = "Gene fraction") +
  theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")

F6G
F6FG <- plot_grid(F6F, F6G, ncol=1, labels = c("F", "G"), rel_heights = c(1.3, 1))


Biological_Process <- fread("../PPI/enrichment.Process.tsv")
Cellular_Component <- fread("../PPI/enrichment.Component.tsv")
GO_Reactome <- fread("../PPI/enrichment.RCTM.tsv")


nervous_system_development <- unlist(strsplit(Biological_Process[`term description`=="nervous system development"]$`matching proteins in your network (labels)`, ","))
vesicle_mediated_transport <- unlist(strsplit(Biological_Process[`term description`=="vesicle-mediated transport"]$`matching proteins in your network (labels)`, ","))
cytoskeleton_organization <- unlist(strsplit(Biological_Process[`term description`=="cytoskeleton organization"]$`matching proteins in your network (labels)`, ","))
synapse_assembly <- unlist(strsplit(Biological_Process[`term description`=="synapse assembly"]$`matching proteins in your network (labels)`, ","))
signal_transduction <- unlist(strsplit(Biological_Process[`term description`=="signal transduction"]$`matching proteins in your network (labels)`, ","))

synapse <- unlist(strsplit(Cellular_Component[`term description`=="synapse"]$`matching proteins in your network (labels)`, ","))
somatodendritic_compartment <- unlist(strsplit(Cellular_Component[`term description`=="somatodendritic compartment"]$`matching proteins in your network (labels)`, ","))
postsynapse <- unlist(strsplit(Cellular_Component[`term description`=="postsynapse"]$`matching proteins in your network (labels)`, ","))
presynapse <- unlist(strsplit(Cellular_Component[`term description`=="presynapse"]$`matching proteins in your network (labels)`, ","))
growth_cone <- unlist(strsplit(Cellular_Component[`term description`=="growth cone"]$`matching proteins in your network (labels)`, ","))

GO_Biological_Process <- rbind(
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% nervous_system_development, .(mapped_gene, exon_ID, diff_age, GO=var2str(nervous_system_development)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% vesicle_mediated_transport, .(mapped_gene, exon_ID, diff_age, GO=var2str(vesicle_mediated_transport)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% cytoskeleton_organization, .(mapped_gene, exon_ID, diff_age, GO=var2str(cytoskeleton_organization)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% synapse_assembly, .(mapped_gene, exon_ID, diff_age, GO=var2str(synapse_assembly)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% signal_transduction, .(mapped_gene, exon_ID, diff_age, GO=var2str(signal_transduction)) ] )

GO_Cellular_Component <- rbind(
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% synapse, .(mapped_gene, exon_ID, diff_age, GO=var2str(synapse)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% somatodendritic_compartment, .(mapped_gene, exon_ID, diff_age, GO=var2str(somatodendritic_compartment)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% postsynapse, .(mapped_gene, exon_ID, diff_age, GO=var2str(postsynapse)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% presynapse, .(mapped_gene, exon_ID, diff_age, GO=var2str(presynapse)) ],
Delta_HNMF_whippet_ME_age_diff_total_PPI[ mapped_gene %in% growth_cone, .(mapped_gene, exon_ID, diff_age, GO=var2str(growth_cone)) ] )




GO_Biological_Process_stats <-  GO_Biological_Process[ , .(N=.N),  by=c("GO", "diff_age")]
GO_Biological_Process_stats[ , Total:=sum(N), by ="GO"]
GO_Biological_Process_stats[, fraction:=N/Total ]
diff_age_levels <- GO_Biological_Process_stats[ , .(mean=median(diff_age)), by="GO"][order(-mean)]$GO
GO_Biological_Process_stats$GO <- factor(GO_Biological_Process_stats$GO, levels= diff_age_levels)



Sup_GO_time.A <- ggplot(GO_Biological_Process_stats) +
  geom_tile(aes( diff_age, GO, fill = fraction)) +
    geom_text(aes(diff_age, GO, label = N), color="white") +
  scale_fill_gradient(low="grey", high="red" )+
  theme_bw()+
  labs(fill = "Gene fraction") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))



GO_Cellular_Component_stats <-  GO_Cellular_Component[ , .(N=.N),  by=c("GO", "diff_age")]
GO_Cellular_Component_stats[ , Total:=sum(N), by ="GO"]
GO_Cellular_Component_stats[, fraction:=N/Total ]
diff_age_levels <- GO_Cellular_Component_stats[ , .(mean=median(diff_age)), by="GO"][order(-mean)]$GO
GO_Cellular_Component_stats$GO <- factor(GO_Cellular_Component_stats$GO, levels= diff_age_levels)



Sup_GO_time.B <- ggplot(GO_Cellular_Component_stats) +
  geom_tile(aes( diff_age, GO, fill = fraction)) +
  geom_text(aes(diff_age, GO, label = N), color="white") +
  scale_fill_gradient(low="grey", high="red" )+
  theme_bw()+
  labs(fill = "Gene fraction") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0),
    axis.title = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))

library(ggpubr)


GO_Biological_Process_centrality <-  merge(GO_Biological_Process,  PPI_diff_age_HNMF[, c("exon_ID", "mapped_gene", "betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality", "type") ], by=c("exon_ID", "mapped_gene"))


GO_Biological_Process_centrality <- unique(GO_Biological_Process_centrality[, c("mapped_gene", "GO", "betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality")])[harmonic_centrality>10]  #Filterning nodes that are not conected to the main network


GO_Biological_Process_centrality$GO <- factor(GO_Biological_Process_centrality$GO,  levels = GO_Biological_Process_centrality[ , .(median=median(harmonic_centrality)) , by="GO" ][order(-median)]$GO)


ggplot(unique(GO_Biological_Process_centrality[, c("mapped_gene", "GO", "betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality")])[harmonic_centrality>10] )  +
  geom_boxplot(aes(GO, harmonic_centrality  ) ) +
    ylab("eigencentrality") +
    theme(axis.text.x = element_text(angle = 45, vjust = 0.6),
    axis.title.y = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))



GO_Biological_Process_centrality$GO <- factor(GO_Biological_Process_centrality$GO,  levels = GO_Biological_Process_centrality[ , .(median=median(eigen_centrality)) , by="GO" ][order(-median)]$GO)


ggplot( GO_Biological_Process_centrality )  +
  geom_boxplot(aes(GO, eigen_centrality  ) ) +
    ylab("eigencentrality") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.6),
    axis.title.x = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))

GO_Biological_Process_centrality$GO <- factor(GO_Biological_Process_centrality$GO,  levels = GO_Biological_Process_centrality[ , .(median=median(betweenness)) , by="GO" ][order(-median)]$GO)


ggplot(unique(GO_Biological_Process_centrality[, c("mapped_gene", "GO", "betweenness", "eigen_centrality", "harmonic_centrality", "degree_centrality")])[harmonic_centrality>10] )  +
  geom_boxplot(aes(GO, log10(betweenness))   ) +
    theme(axis.text.x = element_text(angle = 45, vjust = 0.6),
    axis.title = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5))
GO_Biological_Process_centrality$GO <- factor(GO_Biological_Process_centrality$GO,  levels = GO_Biological_Process_centrality[ , .(median=median(eigen_centrality)) , by="GO" ][order(-median)]$GO)





my_comparisons <- list(c("vesicle_mediated_transport", "cytoskeleton_organization" ),
                       c("vesicle_mediated_transport", "signal_transduction" ),
                       c("vesicle_mediated_transport", "nervous_system_development"),
                       c("vesicle_mediated_transport", "synapse_assembly" ),
                       c("cytoskeleton_organization", "signal_transduction" ),
                       c("cytoskeleton_organization", "nervous_system_development" ),
                       c("cytoskeleton_organization", "synapse_assembly"),
                       c("signal_transduction", "nervous_system_development" ),
                       c("signal_transduction", "nervous_system_development" ),
                       c("nervous_system_development", "synapse_assembly" )
                       )


ggplot( GO_Biological_Process_centrality, aes(GO, eigen_centrality  )  )  +
  geom_boxplot() +
    ylab("eigencentrality") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.6),
    axis.title.x = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5)) +
  geom_signif(comparisons = my_comparisons, 
              map_signif_level=TRUE , y_position =  seq(0.9, 1.9, 0.1))


GO_Biological_Process_centrality_HNM_age <- merge(GO_Biological_Process_centrality, Delta_HNMF_whippet_ME_age_diff_total_PPI[is.na(diff_age.x)!=TRUE][ , .(min_age_HNM=min(diff_age.x))  , by="mapped_gene" ], by="mapped_gene")

kruskal.test(data = GO_Biological_Process_centrality_HNM_age,  min_age_HNM ~ GO)


my_comparisons <- list(#c("vesicle_mediated_transport", "cytoskeleton_organization" ),
                       #c("vesicle_mediated_transport", "signal_transduction" ),
                       #c("vesicle_mediated_transport", "nervous_system_development"),
                       #c("vesicle_mediated_transport", "synapse_assembly" ),
                       c("cytoskeleton_organization", "signal_transduction" ),
                       c("cytoskeleton_organization", "nervous_system_development" ),
                       c("cytoskeleton_organization", "synapse_assembly")
                       #c("signal_transduction", "nervous_system_development" ),
                       #c("signal_transduction", "nervous_system_development" ),
                       #c("nervous_system_development", "synapse_assembly" )
                       )


F6I <- ggplot( GO_Biological_Process_centrality_HNM_age, aes(GO, min_age_HNM  )  )  +
  geom_boxplot() +
    ylab("Earliest microexon alternative inclusion per gene") +
   theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.6),
    axis.title.x = element_text(colour = NA),
    strip.text = element_text(hjust = 0.5)) +
      scale_y_continuous(breaks=seq(10.5,16.5,1)) +
  ggsignif::geom_signif(test = "wilcox.test", comparisons = my_comparisons, step_increase=0.15 , test.args = list(alternative = "less", paired = FALSE), map_signif_level = TRUE) +
  scale_x_discrete(labels=c("Vesicle mediated transportt", "Cytoskeleton organization", "Signal transduction", "Nervous system development", "Synapse assembly"))



kruskal.test(GO_Biological_Process_centrality_HNM_age$min_age_HNM, GO_Biological_Process_centrality_HNM_age$GO)
pairwise.wilcox.test(GO_Biological_Process_centrality_HNM_age$min_age_HNM, GO_Biological_Process_centrality_HNM_age$GO)

library(ggstatsplot)
df_pair <- ggstatsplot::pairwise_p(GO_Biological_Process_centrality, eigen_centrality,  type = "nonparametric", GO)
kruskal.test(data = GO_Biological_Process_centrality,  eigen_centrality ~ GO)
kruskal.test(data = GO_Biological_Process_centrality,  harmonic_centrality ~ GO)



pairwise.wilcox.test(GO_Biological_Process_centrality$eigen_centrality, GO_Biological_Process_centrality$GO)
pairwise.wilcox.test(GO_Biological_Process_centrality$harmonic_centrality, GO_Biological_Process_centrality$GO)
pairwise.wilcox.test(GO_Biological_Process_centrality$eigen_centrality, GO_Biological_Process_centrality$GO)
pairwise.wilcox.test(GO_Biological_Process_centrality$harmonic_centrality, GO_Biological_Process_centrality$GO)
Biological_Process <- fread("../Figures/New_Figure 5/STRING/GO/ME_genes_background/Biological_Process.tsv")
Cellular_Component <- fread("../Figures/New_Figure 5/STRING/GO/ME_genes_background/Cellular_Component.tsv")
Protein_Domains <- fread("../Figures/New_Figure 5/STRING/GO/ME_genes_background/INTERPRO Protein Domains and Features.tsv")



Biological_Process[ , `:=`( minus_log10_FDR=-log10(`false discovery rate`),
                            GO_size=(`observed gene count` + `background gene count`) ,
                            GO_fraction=(`observed gene count`/`background gene count`) ,
                            GO_class="Biological Process") ]



Cellular_Component[ , `:=`( minus_log10_FDR=-log10(`false discovery rate`),
                            GO_size=(`observed gene count` + `background gene count`) ,
                            GO_fraction=(`observed gene count`/`background gene count`) ,
                            GO_class="Cellular Component") ]


Protein_Domains[ , `:=`( minus_log10_FDR=-log10(`false discovery rate`),
                            GO_size=(`observed gene count` + `background gene count`) ,
                            GO_fraction=(`observed gene count`/`background gene count`) ,
                            GO_class="INTERPRO Protein Domains") ]


Total_GO <- rbind(Biological_Process, Cellular_Component, Protein_Domains)



ggplot(Total_GO[GO_size>20]) + 
  geom_point( aes(GO_fraction, minus_log10_FDR, size=log2(GO_size), colour=GO_class),  alpha=0.2) +
  facet_grid( . ~  GO_class ) +
  theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")
ggplot(PPI_diff_age_HNM_stats_EC) +
  geom_bar(aes(min_dif_age, fill=central), stat = "count", position="fill" ) +
  xlab("Earliest microexon differential inclusion detection per gene (DPC)")+
  ylab("Protein type proportion") +
  scale_x_continuous(breaks=seq(10.5,16.5,1)) +
  scale_fill_discrete(name = "Protein type", labels = c("Non-central", "Central"))  +
            theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")


ggplot(PPI_diff_age_HNM_stats_B) +
  geom_bar(aes(min_dif_age, fill=central), stat = "count", position="fill" ) +
  xlab("Earliest microexon differential inclusion detection per gene (DPC)")+
  ylab("Protein type proportion") +
  scale_x_continuous(breaks=seq(10.5,16.5,1)) +
  scale_fill_discrete(name = "Protein type", labels = c("Non-central", "Central"))  +
            theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")

Zebrafish


total_zebra <-  fread("../../Zebrafish/Final_Report/out_filtered_ME.txt")
total_zebra_cov <- fread("../../Zebrafish/Final_Report/out_filtered_ME.cov.txt")


exons.zebra <- fread("../../Zebrafish/danRer11.ensembl.bed12.exons")


mouse_to_zebra <- fread("../../Zebrafish/Conserved/out.high_quality.txt.mm10.bed.liftOver_minMatch_0.01.danRer11.closest_d.out.high_quality.txt.danRer11.bed.formatted")
zebra_to_mouse <- fread("../../Zebrafish/Conserved/out.high_quality.txt.danRer11.bed.liftOver_minMatch_0.01.mm10.closest_d.out.high_quality.txt.mm10.formatted")



mouse_to_zebra <- fread("../../Zebrafish/Conserved/Old/out.high_quality.mm10.bed_liftover_mm10todanRer11.overlap_wo.out.high_quality.bed")
zebra_to_mouse <- fread("../../Zebrafish/Conserved/Old/out.high_quality.bed.liftover_0.001_mm10.overlap_wo.out.high_quality.bed.liftover_0.001_mm10")

Mouse_Zebra.IDs <- unique(rbind(mouse_to_zebra[  , .(ME.mouse=V4, ME.zebra=V10 ) ], zebra_to_mouse[ , .(ME.mouse=V10, ME.zebra=V4) ] ))  #In here we are already applying a filter of 0.8 lengh


#View(Mouse_Zebra.IDs[, .(mouse.len, zebra.len, abs(mouse.len-abs(mouse.len-zebra.len))/max(mouse.len,zebra.len) )] )


mouse_len <- c(mouse_to_zebra[, V3-V2], zebra_to_mouse[, V9-V8])
names(mouse_len) <- c(mouse_to_zebra$V4, zebra_to_mouse$V10)


zebra_len <-  c(zebra_to_mouse[, V3-V2], mouse_to_zebra[, V9-V8])
names(zebra_len) <- c(zebra_to_mouse$V4, mouse_to_zebra$V10)




Mouse_Zebra.IDs[, mouse.len:=mouse_len[ME.mouse]]
Mouse_Zebra.IDs[, zebra.len:=zebra_len[ME.zebra]]


ggplot(Mouse_Zebra.IDs) +
  geom_bin2d(aes(mouse.len, zebra.len), bins = 30, color ="white") +
  theme_bw() +
  scale_fill_gradient(low =  "#00AFBB", high = "#FC4E07")


Mouse_Zebra.IDs <- Mouse_Zebra.IDs[mouse.len==zebra.len, ]

Mouse_Zebra.IDs.table <- merge(Mouse_Zebra.IDs, ME_cluster_names[ , c("ME_cluster.name" , "ME_cluster.type")], by="ME_cluster.name", all = TRUE)


Mouse_Zebra.IDs.table <- Mouse_Zebra.IDs.table[ , .(mouse.coord=ME.mouse, mouse.len=mouse.len, mouse.cluster_name=ME_cluster.name, mouse.cluster_type=ME_cluster.type, 
                           mouse.anno=type, mouse.ensembl=ensemble_mouse, 
                           zebra.coord=ME.zebra, zebra.len=zebra.len, zebra.ensembl=ensemble_zebra)] 


is.data.table(Mouse_Zebra.IDs.table)

Mouse_Zebra.IDs.table <- unique(merge(Mouse_Zebra.IDs.table, gene_table.TOTAL[ , c("ME", "mgi_symbol")], by.x="mouse.coord" , by.y="ME" ))

div_3 <- function(x) x %% 3 == 0]

Mouse_Zebra.IDs.table[ , .(mouse.coord, mgi_symbol, mouse.cluster_name, mouse.cluster_type, mouse.len, mouse.sym=(mouse.len%%3 == 0), mouse.ensembl, zebra.coord, zebra.len, zebra.sym=(zebra.len%%3 == 0), zebra.ensembl)  ]


Mouse_Zebra.IDs.thesis.table <- unique(Mouse_Zebra.IDs.table[ , .(mouse.coord, mgi_symbol, mouse.cluster_name, mouse.cluster_type,
                                                                  mouse.len, mouse.sym=(mouse.len%%3 == 0), mouse.ensembl,
                                                                  zebra.num=.N,
                                                                  zebra.coords=as.character(list(zebra.coord)),
                                                                  zebra.lens=as.character(list(zebra.len)),
                                                                  zebra.syms=as.character(list((zebra.len%%3 == 0))),
                                                                  zebra.ensembls=as.character(list(zebra.ensembl) )),
                                                              by= mouse.coord  ][order(mgi_symbol, mouse.coord)])



Mouse_Zebra.IDs.thesis.table[ , `:=`( zebra.coords=gsub('^c\\(|\\)$|\ |\"' , "", zebra.coords),
                                      zebra.lens=gsub('^c\\(|\\)$|\ |\"' , "", zebra.lens),
                                      zebra.syms=gsub('^c\\(|\\)$|\ |\"' , "", zebra.syms),
                                      zebra.ensembls=gsub('^c\\(|\\)$|\ |\"' , "", zebra.ensembls)
                                                        )]




fwrite(Mouse_Zebra.IDs.table, "../Final_Figures/Supplementary/Mouse_Zebra_microexons.tsv", append = FALSE, quote = "auto", sep = ",",  row.names = FALSE, col.names = TRUE )
fwrite(Mouse_Zebra.IDs.thesis.table, "~/Google_Drive/Thesis/Mouse_zebrafish.tsv", append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE )
Mouse_Zebra.IDs.thesis.table
                    mouse.coord                mouse.coord    mgi_symbol mouse.cluster_name mouse.cluster_type mouse.len mouse.sym mouse.ensembl zebra.num
  1: chr2_+_120495235_120495247 chr2_+_120495235_120495247                               O2              Other        12      TRUE          TRUE         1
  2: chr9_+_107346145_107346171 chr9_+_107346145_107346171 6430571L13Rik                 I1           Included        26     FALSE          TRUE         1
  3:   chr1_+_60438941_60438959   chr1_+_60438941_60438959          Abi2                 N4           Neuronal        18      TRUE          TRUE         2
  4:   chr1_+_60438952_60438959   chr1_+_60438952_60438959          Abi2                 E1           Excluded         7     FALSE         FALSE         2
  5:  chr11_+_84290287_84290311  chr11_+_84290287_84290311         Acaca                 I1           Included        24      TRUE          TRUE         1
 ---                                                                                                                                                      
312:   chr3_+_16183921_16183946   chr3_+_16183921_16183946        Ythdf3                 I1           Included        25     FALSE          TRUE         1
313: chr4_+_129340727_129340752 chr4_+_129340727_129340752       Zbtb8os                 I2           Included        25     FALSE          TRUE         1
314:  chr15_-_93395586_93395615  chr15_-_93395586_93395615         Zcrb1                 I1           Included        29     FALSE          TRUE         1
315:  chr19_+_41938807_41938836  chr19_+_41938807_41938836       Zdhhc16                 O2              Other        29     FALSE          TRUE         2
316:   chr8_-_82771338_82771358   chr8_-_82771338_82771358        Zfp330               <NA>               <NA>        20     FALSE          TRUE         1
                                          zebra.coords zebra.lens  zebra.syms zebra.ensembls
  1:                         chr22_-_26308944_26308956         12        TRUE          FALSE
  2:                          chr6_+_53364857_53364883         26       FALSE           TRUE
  3:   chr9_+_13770822_13770840,chr6_+_9985080_9985098      18,18   TRUE,TRUE     FALSE,TRUE
  4:   chr9_+_13770822_13770840,chr6_+_9985080_9985098      18,18   TRUE,TRUE     FALSE,TRUE
  5:                          chr5_-_56372282_56372306         24        TRUE           TRUE
 ---                                                                                        
312:                         chr24_+_24286425_24286450         25       FALSE           TRUE
313:                         chr13_+_44860543_44860568         25       FALSE           TRUE
314:                          chr4_-_13929692_13929721         29       FALSE           TRUE
315: chr12_-_2857061_2857090,chr13_-_31384958_31384987      29,29 FALSE,FALSE      TRUE,TRUE
316:                          chr1_-_53276279_53276299         20       FALSE           TRUE
Mouse_Zebra.IDs.table.summer[!is.na(mouse.coord), ]
                     mouse.coord mouse.len mouse.cluster_name mouse.cluster_type         mouse.anno mouse.ensembl               zebra.coord zebra.len zebra.ensembl
  1: chr10_+_106859776_106859806        30                 I1           Included  Missing in VastDB          TRUE  chr4_-_21989573_21989603        30          TRUE
  2:   chr10_+_24883203_24883221        18                NM2     Neuro-Muscular          Annotated          TRUE   chr20_-_2566047_2566065        18         FALSE
  3:   chr10_+_28554769_28554799        30                 N4           Neuronal          Annotated          TRUE   chr20_-_1490720_1490750        30          TRUE
  4:   chr10_+_28570173_28570191        18               <NA>               <NA>          Annotated          TRUE   chr20_-_1472362_1472380        18          TRUE
  5:   chr10_+_69823077_69823101        24                 E2           Excluded          Annotated          TRUE chr17_-_20794173_20794197        24          TRUE
 ---                                                                                                                                                               
398:    chrX_-_13596685_13596694         9                 N6           Neuronal Missing in GENCODE         FALSE  chr9_-_33407086_33407104        18         FALSE
399:    chrX_-_13596685_13596703        18                 N2           Neuronal  Missing in VastDB          TRUE  chr9_-_33407086_33407104        18         FALSE
400:    chrX_-_57241805_57241826        21                 I3           Included          Annotated          TRUE chr14_-_31813517_31813538        21          TRUE
401:    chrX_-_73855651_73855663        12                NM5     Neuro-Muscular          Annotated          TRUE chr23_+_28680393_28680405        12          TRUE
402:    chrX_-_74232966_74232990        24                NM3     Neuro-Muscular          Annotated          TRUE chr23_+_19793592_19793616        24          TRUE
     MHN.diff_age MHN.change_dir F.diff_age F.change_dir
  1:           NA             NA         NA           NA
  2:         13.5              1         NA           NA
  3:         13.5              1       14.5            1
  4:           NA             NA         NA           NA
  5:           NA             NA         NA           NA
 ---                                                    
398:           NA             NA       12.5            1
399:         11.5              1       12.5            1
400:           NA             NA         NA           NA
401:         14.5              1       14.5            1
402:           NA             NA         NA           NA
Mouse_Zebra.IDs[ME.mouse %in%  names(ME_clusters), ME_cluster:=ME_clusters[ME.mouse] ]



Mouse_Zebra.IDs<- merge(Mouse_Zebra.IDs , 
ME_cluster_names[, c("ME_cluster", "ME_cluster.name")] , by="ME_cluster", all.x=TRUE )


ggplot(Mouse_Zebra.IDs) +
  geom_bar(aes(x=ME_cluster.name), stat="count")




Mouse_Zebra.IDs[ , type:="Annotated"  ]

Mouse_Zebra.IDs[!ME.mouse %in% microexons_Vastdb, type:="Missing in VastDB"   ]
Mouse_Zebra.IDs[!ME.mouse %in% microexons_GENCODE, type:="Missing in GENCODE"   ]
Mouse_Zebra.IDs[!ME.mouse %in% microexons_GENCODE & !ME.mouse %in% microexons_Vastdb , type:="Novel"   ]


Mouse_Zebra.IDs[ , ensemble_zebra:=FALSE  ]
Mouse_Zebra.IDs[ME.zebra %in%  exons.zebra$V4, ensemble_zebra:=TRUE ]

Mouse_Zebra.IDs[ , ensemble_mouse:=FALSE  ]
Mouse_Zebra.IDs[ME.mouse %in%  microexons_GENCODE, ensemble_mouse:=TRUE ]







Delta_HNMF_whippet_ME_age_diff_total[exon_ID %in% Mouse_Zebra.IDs$ME.mouse]


Delta_HNMF_whippet_ME_age_diff_total[]


Mouse_Zebra.IDs[ME.mouse %in% Delta_HNMF_whippet_ME_age_diff_total$exon_ID][ ME.zebra %in% exons.zebra$V4]

 

ggplot(Mouse_Zebra.IDs) +
  geom_bar(aes(x=ME_cluster.name, fill=type), stat="count", position="fill")


length(unique(Mouse_Zebra.IDs$ME.mouse))

length(unique(Mouse_Zebra.IDs$ME.zebra))

PPI_diff_age_HNMF_central[ , conserved:=FALSE ]
PPI_diff_age_HNMF_central[exon_ID %in% Mouse_Zebra.IDs$ME.mouse, conserved:=TRUE ]

ggplot(PPI_diff_age_HNMF_central) +
  geom_jitter(aes(harmonic_centrality, diff_age,  shape= type, colour=conserved  ), width=0, height=0.1) + 
  xlab("Harmonic Centrality") +
  ylab("Earliest microexon differential inclusion detection (DPC)") +
   scale_y_continuous(breaks=seq(10.5,16.5,1)) +
    geom_text_repel(data=PPI_diff_age_HNMF_central[  central==TRUE  ], 
                  aes(x=harmonic_centrality, y=diff_age,  colour=conserved  ), 
                  nudge_y      = 0.5,
                  direction    = "x",
                  angle        = 90,
                  segment.size = 0.2,
                  label=PPI_diff_age_HNMF_central[central==TRUE  , mapped_gene],
                  show.legend = FALSE) +
          theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")


ggplot(PPI_diff_age_HNMF_central) +
  geom_boxplot(aes(conserved, log2(betweenness) ))
Mouse_Zebra.IDs[ensemble_zebra==FALSE]


zebra.zebra <-  ggplot(Mouse_Zebra.IDs) + 
  geom_bar(aes(ME_cluster.name, fill=ensemble_zebra), stat="count" ) +
    theme(axis.text.x = element_text(angle = 45), legend.position = "top", legend.direction = "horizontal")


zebra.mouse <- ggplot(Mouse_Zebra.IDs) + 
  geom_bar(aes(ME_cluster.name, fill=ensemble_mouse), stat="count") +
    theme(axis.text.x = element_text(angle = 45), legend.position = "top", legend.direction = "horizontal")
  



Mouse_Zebra.IDs.cluster_names <- merge(Mouse_Zebra.IDs, ME_cluster_names[, c("ME_cluster.name", "ME_cluster.type")], by="ME_cluster.name")
Mouse_Zebra_stats  <- Mouse_Zebra.IDs.cluster_names[ , .N , by=c("ME_cluster.type", "ensemble_zebra", "ensemble_mouse")]

Mouse_Zebra_stats[ , Type:=paste(ensemble_zebra, ensemble_mouse, sep = "_") ]

Mouse_Zebra_stats$ME_cluster.type <- factor(Mouse_Zebra_stats$ME_cluster.type,  levels=Mouse_Zebra_stats[!ensemble_zebra  | !ensemble_mouse , sum(N), by=c("ME_cluster.type")][order(V1), ME_cluster.type])

  
  

Fig7.E <- ggplot(Mouse_Zebra_stats[!ensemble_zebra  | !ensemble_mouse]) +
  geom_bar(aes(x=ME_cluster.type, y=N, fill=Type), stat = "identity", position="stack"  ) + 
  ylab("Number of microexons") +
  xlab("Microexon clusters") + 
   scale_fill_discrete(name = "Type", labels = c("Missing in both", "Missing in zebrafish", "Missing in mouse")) +
            theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal") +
    theme_bw() + 
  theme(axis.text.x = element_text(angle = 90))


Fig7.E
length(unique(Mouse_Zebra.IDs$ME.mouse))
length(unique(Mouse_Zebra.IDs$ME.zebra))

round((Mouse_Zebra_stats[ensemble_mouse==FALSE, sum(N) ] * 100) / length(unique(Mouse_Zebra.IDs$ME.mouse)), 1)
round((Mouse_Zebra_stats[ensemble_zebra==FALSE, sum(N) ] * 100 ) / length(unique(Mouse_Zebra.IDs$ME.zebra)), 1)
library(ggsignif)

#Mouse_Zebra.IDs[, ME_cluster.name:=ME_cluster.name.x]

Mouse_Zebra.conserved_frac <- unique(Mouse_Zebra.IDs[, c("ME.mouse", "ME_cluster.name")])[ ,.N , by=ME_cluster.name ]

Mouse_Zebra.conserved_frac <-  merge( Mouse_Zebra.conserved_frac , ME_clusters.names[ , .(Total=.N) , by=ME_cluster.name], by="ME_cluster.name")

Mouse_Zebra.conserved_frac[, percetage:=(N*100/Total)]

Mouse_Zebra.conserved_frac[order(percetage)]

Mouse_Zebra.conserved_frac <- merge(Mouse_Zebra.conserved_frac, ME_cluster_names[, c("ME_cluster.name", "ME_cluster.type")], by="ME_cluster.name" )

Mouse_Zebra.conserved_frac[, broad_class:="Other"]
Mouse_Zebra.conserved_frac[ME_cluster.type %in% c("Neuronal", "Neuro-Muscular", "Non-Neuronal", "Weak-Neuronal"), broad_class:="Neuronal"]
Mouse_Zebra.conserved_frac[ME_cluster.type %in% c("Neuro-Muscular"), broad_class:="Neuro-Muscular"]

Fig7.D <- ggplot(Mouse_Zebra.conserved_frac, aes(x = broad_class,  y = percetage )) +
  geom_jitter( aes( colour = ME_cluster.type), width=0.1) +
  geom_signif(comparisons = list(c("Neuronal", "Other")), map_signif_level=TRUE, test="wilcox.test", margin_top=5,  y_position=35, test.args = list(alternative = "two.sided", paired = FALSE)) +
  xlab("Microexon cluster with neuronal pattern") +
  ylab("Percentage of conserved microexons") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 90))

Fig7.D

Exploring lift zebra that do not overlap with mouse microexons

zebra.total_lift <- fread("../../Zebrafish/Conserved/out.high_quality.bed.liftover_0.001_mm10")

zebra.total_lift[!V4 %in%  zebra_to_mouse$V4, ]

plot_grid(Fig7.D, Fig7.E, ncol=1)

Zebra diff



diff.W.file <-  "../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-TOTAL.diff.microexons"
diff.ME.file <- "../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-TOTAL.diff.ME.microexons"
comp_name = "16Mo_vs_4Mo-TOTAL"


diff_zebra <- function(diff.W.file, diff.ME.file, comp_name) {


diff.W <- fread(diff.W.file)
diff.ME <- fread(diff.ME.file)

diff.WME <- merge(diff.W, diff.ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type"))

diff.WME[, Name:=comp_name]

diff.WME[ , diff:="NA"]
diff.WME[ (DeltaPsi.x>=0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y>=0.1 &  Probability.y >= 0.9), diff:="Included"]
diff.WME[ (DeltaPsi.x<=-0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y<=-0.1 &  Probability.y >= 0.9)  , diff:="Excluded" ]

return(diff.WME)
}
Zebra.16Mo_vs_4Mo.TOTAL <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-TOTAL.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-TOTAL.diff.ME.microexons", 
                                "16Mo_vs_4Mo-TOTAL")



Zebra.20Mo_vs_16Mo.ZT16 <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_16Mo-ZT16.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_16Mo-ZT16.diff.ME.microexons", 
                                "20Mo_vs_16Mo-ZT16")


Zebra.20Mo_vs_4M.ZT16 <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_4M-ZT16.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_4M-ZT16.diff.ME.microexons", 
                                "20Mo_vs_4M-ZT16")



Zebra.16Mo_vs_4Mo.ZT16 <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-ZT16.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-ZT16.diff.ME.microexons", 
                                "16Mo_vs_4Mo-ZT16")



Zebra.20Mo_vs_16Mo.ZT4 <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_16Mo-ZT4.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_16Mo-ZT4.diff.ME.microexons", 
                                "20Mo_vs_16Mo-ZT4")


Zebra.20Mo_vs_4M.ZT4 <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_4M-ZT4.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/20Mo_vs_4M-ZT4.diff.ME.microexons", 
                                "20Mo_vs_4M-ZT4")



Zebra.16Mo_vs_4Mo.ZT4 <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-ZT4.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/16Mo_vs_4Mo-ZT4.diff.ME.microexons", 
                                "16Mo_vs_4Mo-ZT4")




Zebra.ZT16_vs_ZT4.20M <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/ZT16_vs_ZT4-20Mo.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/ZT16_vs_ZT4-20Mo.diff.ME.microexons", 
                                "ZT16_vs_ZT4-20Mo")

Zebra.ZT16_vs_ZT4.16M <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/ZT16_vs_ZT4-16Mo.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/ZT16_vs_ZT4-16Mo.diff.ME.microexons", 
                                "ZT16_vs_ZT4-16Mo")

Zebra.ZT16_vs_ZT4.4M <- diff_zebra("../../Zebrafish/Final_Report/Whippet/Delta/ZT16_vs_ZT4-4Mo.diff.microexons",
                                "../../Zebrafish/Final_Report/Whippet/Delta/ZT16_vs_ZT4-4Mo.diff.ME.microexons", 
                                "ZT16_vs_ZT4-4Mo")




Zebra.TOTAL_diff <- rbind(
Zebra.20Mo_vs_16Mo.ZT16[diff!="NA"],
Zebra.20Mo_vs_4M.ZT16[diff!="NA"],
Zebra.16Mo_vs_4Mo.ZT16[diff!="NA"],
Zebra.20Mo_vs_16Mo.ZT4[diff!="NA"],
Zebra.20Mo_vs_4M.ZT4[diff!="NA"],
Zebra.16Mo_vs_4Mo.ZT4[diff!="NA"],
Zebra.ZT16_vs_ZT4.20M[diff!="NA"],
Zebra.ZT16_vs_ZT4.16M[diff!="NA"],
Zebra.ZT16_vs_ZT4.4M[diff!="NA"] )


Zebra.TOTAL_diff <- Zebra.TOTAL_diff[ , lapply(.SD, paste0, collapse=",") , by="exon_ID"]


Mouse_Zebra.IDs[ME.zebra %in% Zebra.TOTAL_diff$exon_ID]
Delta_HNM_whippet_ME <- merge(Delta_HNM_whippet, Delta_HNM_ME, by=c("exon_ID", "Gene", "Node",  "Coord", "Strand", "Type", "age") )

Delta_HNM_whippet_ME[ (abs(DeltaPsi.x)>=0.1 &  Probability.x >= 0.9) &  (abs(DeltaPsi.y)>=0.1 &  Probability.y >= 0.9) , .N , by=(age)]

Delta_HNM_whippet_ME_included <- Delta_HNM_whippet_ME[ (DeltaPsi.x<=-0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y<=-0.1 &  Probability.y >= 0.9)  , .N , by=(age)]
Delta_HNM_whippet_ME_excluded <- Delta_HNM_whippet_ME[ (DeltaPsi.x>=0.1 &  Probability.x >= 0.9) &  (DeltaPsi.y>=0.1 &  Probability.y >= 0.9)  , .N , by=(age)]


Delta_HNM_included_stats <- cbind(Delta_HNM_whippet_included[order(age)], Delta_HNM_ME_included[order(age), 2], Delta_HNM_whippet_ME_included[order(age), 2])
colnames(Delta_HNM_included_stats) <- c("age", "Whippet", "MicroExonator", "Both")

centrality_logistic_regression_HC <- glm(formula = central ~ min_dif_age, family = "binomial", data = PPI_diff_age_HNM_stats_HC)
summary(centrality_logistic_regression_HC)


centrality_logistic_regression_EC <- glm(formula = central ~ min_dif_age, family = "binomial", data = PPI_diff_age_HNM_stats_EC)
summary(centrality_logistic_regression_EC)


centrality_logistic_regression_B <- glm(formula = central ~ min_dif_age, family = "binomial", data = PPI_diff_age_HNM_stats_B)
summary(centrality_logistic_regression_B)

PPI_diff_age_HNM$L1cam_and_interactors <- factor(PPI_diff_age_HNM$L1cam_and_interactors, levels=c(TRUE, FALSE))


F6G <- ggplot(PPI_diff_age_HNM) +
  geom_point(aes(eigen_centrality, betweenness, colour=L1cam_and_interactors)) +
  facet_wrap( . ~ diff_age, ncol = 2)  +
      geom_text_repel(data=PPI_diff_age_HNM[ L1cam_and_interactors==TRUE,  ], 
                  aes(x=eigen_centrality, y=betweenness, colour=L1cam_and_interactors ), 
                  nudge_y      = 3000,
                  direction    = "x",
                  angle        = 90,
                  segment.size = 0.2,
                  label=PPI_diff_age_HNM[L1cam_and_interactors==TRUE  , mgi_symbol],
                  show.legend = FALSE) +
          theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal") +
    scale_fill_discrete(name = "L1CAM pathway", labels = c("Member", "Non-member"))  
F6G <- ggplot(PPI_diff_age_HNM) +
  geom_point(aes(eigen_centrality, betweenness, colour=L1cam_and_interactors)) +
  facet_wrap( . ~ diff_age, ncol = 2)  +
      geom_text_repel(data=PPI_diff_age_HNM[ L1cam_and_interactors==TRUE,  ], 
                  aes(x=eigen_centrality, y=betweenness, colour=L1cam_and_interactors ), 
                  nudge_y      = 3000,
                  direction    = "x",
                  angle        = 90,
                  segment.size = 0.2,
                  label=PPI_diff_age_HNM[L1cam_and_interactors==TRUE  , mgi_symbol],
                  show.legend = FALSE) +
          theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal") +
    scale_fill_discrete(name = "L1CAM pathway", labels = c("Member", "Non-member"))  
Error in ggplot(PPI_diff_age_HNM) : object 'PPI_diff_age_HNM' not found
 ggplot(PPI_diff_age_HNM) +
  geom_jitter(aes(log(betweenness), diff_age, color=type ), width=0, height=0.1) +
  xlab("Betweenness") +
  ylab("Earliest microexon differential inclusion detection (DPC)") +
   scale_y_continuous(breaks=seq(10.5,16.5,1)) +
    geom_text_repel(data=PPI_diff_age_HNM[  log(betweenness)>6,  ],
                  aes(x=log(betweenness), y=diff_age, colour=type), 
                  nudge_y      = 0.5,
                  direction    = "x",
                  angle        = 90,
                  segment.size = 0.2,
                  show.legend = FALSE,
                  label=PPI_diff_age_HNM[log(betweenness)>6  , mgi_symbol] ) 

Tasic

ensembl = useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl", host = "www.ensembl.org")


ME_final.transcript <- gsub("\\..*","", ME_final$transcript)

ME_final$ensembl_transcript_id <- gsub("\\..*","", ME_final$transcript) 

gene_table.TOTAL <- data.table(getBM(attributes=c('ensembl_transcript_id', 'ensembl_gene_id', "mgi_symbol"),filters = 'ensembl_transcript_id', values = ME_final.transcript[nchar(ME_final.transcript)!=0], mart = ensembl,  ))


gene_table.TOTAL <- merge(gene_table.TOTAL, ME_final[, c("ensembl_transcript_id", "ME")], by="ensembl_transcript_id")
ensembl = useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl", host = "www.ensembl.org")
ME_final.transcript <- gsub("\\..*","", ME_final$transcript)
ME_final$ensembl_transcript_id <- gsub("\\..*","", ME_final$transcript) 
gene_table.TOTAL <- data.table(getBM(attributes=c('ensembl_transcript_id', 'ensembl_gene_id', "mgi_symbol"),filters = 'ensembl_transcript_id', values = ME_final.transcript[nchar(ME_final.transcript)!=0], mart = ensembl,  ))

Batch submitting query [==================================>----------------------------------------------------------------------------------------]  29% eta: 26s
Batch submitting query [====================================================>----------------------------------------------------------------------]  43% eta: 22s
Batch submitting query [=====================================================================>-----------------------------------------------------]  57% eta: 16s
Batch submitting query [=======================================================================================>-----------------------------------]  71% eta: 11s
Batch submitting query [========================================================================================================>------------------]  86% eta:  5s
Batch submitting query [===========================================================================================================================] 100% eta:  0s
                                                                                                                                                                  
gene_table.TOTAL <- merge(gene_table.TOTAL, ME_final[, c("ensembl_transcript_id", "ME")], by="ensembl_transcript_id")
ME_clusters.info <- merge(PCA_loadings, ME_cluster_names, by="ME_cluster")
Error in bmerge(i, x, leftcols, rightcols, roll, rollends, nomatch, mult,  : 
  Incompatible join types: x.ME_cluster (integer) and i.ME_cluster (factor). Factor columns must join to factor or character columns.





#C7_interesting <- c("chr1_-_93026831_93026855", "chr1_+_118512704_118512728", "chr11_-_106180954_106180983", "chr11_-_3352899_3352917", "chr11_+_54671654_54671678", "chr3_-_148827211_148827238"," chr4_-_76138555_76138573", "chr5_+_143703372_143703375", "chr5_+_48263640_48263667", "chr8_-_54645337_54645358")


#ggplot(ME_cov_filtered_tasic_primary_type[ ME %in% C7_interesting, ]  ) +
#  geom_pointrange(aes(x=primary_type, y=PSI, colour= broad_type, group=broad_type, shape=broad_type, ymin=lower, ymax=upper))+
#  theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5)) +
#  facet_grid( gene_names[ME] ~ . )


target_genes <- c("Kif1a", "Dctn1", "Kdm1a", "Kdm2a",  "Itsn1", "Dnm1", "Dnm3", "Trrap", "Dclk2")

target_genes <- c("Kif1a", "Dctn1", "Kdm1a", "Kdm2a",  "Itsn1", "Dnm1", "Dnm3", "Trrap", "Dclk2")



ggplot(ME_cov_filtered_tasic_primary_type[ mgi_symbol %in% target_genes &  ME %in% Delta_HNMF_whippet_ME_age_diff_total$exon_ID , ]  ) +
  geom_pointrange(aes(x=primary_type, y=PSI, colour= broad_type, group=broad_type, shape=broad_type, ymin=lower, ymax=upper))+
  facet_grid( paste0( mgi_symbol, ME) ~ . ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5))



ggplot(ME_cov_filtered_tasic_primary_type[ mgi_symbol %in% target_genes, ]  ) +
  geom_pointrange(aes(x=primary_type, y=PSI, colour= broad_type, group=broad_type, shape=broad_type, ymin=lower, ymax=upper))+
  facet_grid( paste0( mgi_symbol, ME) ~ . ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5))

New Tasic


Tasic_total_diff_nodes <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Single_cell/GABA-ergic_Neuron_vs_Glutamatergic_Neuron.all_nodes.microexons.txt")



Tasic_total_diff_nodes.microexons <- merge( Tasic_total_diff_nodes[is.diff=="TRUE" & !is.na(microexon_ID), ], 
                                            unique(gene_table.TOTAL[, c("ME", "mgi_symbol")]),
                                            by.x="microexon_ID", by.y="ME")

Tasic_total_diff_nodes.microexons[!microexon_ID %in% Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron$exon_ID, ]
Tasic_total_diff_nodes <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Single_cell/GABA-ergic_Neuron_vs_Glutamatergic_Neuron.all_nodes.microexons.txt")
Tasic_total_diff_nodes.microexons <- merge( Tasic_total_diff_nodes[is.diff=="TRUE" & !is.na(microexon_ID), ], 
                                            unique(gene_table.TOTAL[, c("ME", "mgi_symbol")]),
                                            by.x="microexon_ID", by.y="ME")
Tasic_total_diff_nodes.microexons[!microexon_ID %in% Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron$exon_ID, ]
                  microexon_ID                  Gene Node                     Coord Strand Type Psi_A.mean Psi_B.mean DeltaPsi.mean DeltaPsi.sd Probability.mean
1: chr10_+_127728446_127728476 ENSMUSG00000025400.11    9 chr10:127728447-127728476      +   CE  0.9988179  0.8949054     0.1039116 0.005232909        0.9864583
2:    chr2_+_76569019_76569043 ENSMUSG00000042359.18   25    chr2:76569020-76569043      +   CE  0.4229608  0.9362494    -0.5132894 0.029304946        0.9067200
   Probability.sd Probability.var N.detected.reps     cdf.beta is.diff mgi_symbol
1:    0.005295355    2.804078e-05              48 8.007671e-38    TRUE       Tac2
2:    0.026344685    6.940424e-04              50 5.305384e-04    TRUE     Osbpl6
#ref Slc4a10 https://www.frontiersin.org/articles/10.3389/fncel.2015.00223/full
#ref Ank3 https://www.sciencedirect.com/science/article/pii/S0896627314009088
#ref KCNMA1 https://www.sciencedirect.com/science/article/pii/S0896627313001852
#ref Kif3a https://www.sciencedirect.com/science/article/pii/S089662730300062X
# ref DLGAP https://molecularbrain.biomedcentral.com/articles/10.1186/s13041-017-0324-9
Pre_synaptic <- c( "Ptprd" , "Ppfia2", "Dlgap1", "Gabrg2", "Kcnma1", "Kif3a", "Cadps") 
Post_synaptic <- c( "Nrxn3" , "Nrxn1")
PrePost_synaptic <- c("Ank3","Slc4a10")
Tasic_total_diff_nodes.microexons
                   microexon_ID                  Gene Node                     Coord Strand Type Psi_A.mean Psi_B.mean DeltaPsi.mean DeltaPsi.sd Probability.mean
 1: chr10_+_106882735_106882744 ENSMUSG00000053825.15   26 chr10:106882736-106882744      +   CE 0.04254298 0.40015960    -0.3576170 0.013543752        0.9423000
 2: chr10_+_127728446_127728476 ENSMUSG00000025400.11    9 chr10:127728447-127728476      +   CE 0.99881792 0.89490542     0.1039116 0.005232909        0.9864583
 3:   chr10_+_59767083_59767089 ENSMUSG00000020111.15   21   chr10:59767084-59767089      +   CE 0.50434320 0.90624060    -0.4018980 0.014786698        0.9524600
 4:   chr10_+_69980823_69980850 ENSMUSG00000069601.14   87   chr10:69980824-69980850      +   CE 0.83555840 0.14347140     0.6920866 0.020767159        0.9713200
 5:   chr11_+_53590716_53590725 ENSMUSG00000018395.19   21   chr11:53590717-53590725      +   CE 0.66200620 0.22252240     0.4394834 0.007520063        0.9907000
 6: chr11_-_112680921_112680950 ENSMUSG00000041674.16   31 chr11:112680922-112680950      -   CE 0.91463532 0.13275472     0.7818815 0.028238359        0.9804894
 7:   chr11_-_41913969_41913993 ENSMUSG00000020436.17   13   chr11:41913970-41913993      -   CE 0.78832760 0.49438140     0.2939452 0.003662291        0.9883000
 8:   chr12_+_89354452_89354479 ENSMUSG00000066392.11   28   chr12:89354453-89354479      +   CE 0.61528060 0.99319860    -0.3779194 0.015058715        0.9912800
 9: chr12_-_100340496_100340499  ENSMUSG00000033530.8   33 chr12:100340497-100340499      -   CE 0.08897032 0.45795740    -0.3689868 0.015599731        0.9196800
10:   chr14_-_12472397_12472406 ENSMUSG00000054423.14   37   chr14:12472398-12472406      -   CE 0.67774640 0.25092260     0.4268242 0.011500528        0.9458400
11:   chr14_-_23449199_23449208 ENSMUSG00000063142.15   37   chr14:23449200-23449208      -   CE 0.12223500 0.79508280    -0.6728472 0.011630571        0.9913000
12:   chr14_-_23449199_23449211 ENSMUSG00000063142.15   36   chr14:23449209-23449211      -   AA 0.12223200 0.79455880    -0.6723272 0.011685279        0.9914800
13:   chr16_+_58729215_58729223 ENSMUSG00000022744.12    9   chr16:58729216-58729223      +   CE 0.84074200 0.48995680     0.3507854 0.005506084        0.9701000
14:   chr17_+_70718195_70718225 ENSMUSG00000003279.16   29   chr17:70718196-70718225      +   CE 0.55267380 0.94271520    -0.3900420 0.010741757        0.9834200
15:   chr17_-_90561801_90561828 ENSMUSG00000024109.18   40   chr17:90561802-90561828      -   CE 0.05202180 0.42399920    -0.3719784 0.011344171        0.9733200
16:   chr17_-_90620864_90620891 ENSMUSG00000024109.18   30   chr17:90620865-90620891      -   CE 0.47302620 0.99741340    -0.5243858 0.024613597        0.9946200
17:    chr1_-_51786572_51786578 ENSMUSG00000018417.14   30    chr1:51786573-51786578      -   CE 0.28201560 0.90874460    -0.6267280 0.027523168        0.9336200
18:    chr2_+_62317497_62317514 ENSMUSG00000026904.17   35    chr2:62317498-62317514      +   CE 0.79882300 0.97748240    -0.1786596 0.005950109        0.9637600
19:    chr2_+_76569019_76569043 ENSMUSG00000042359.18   25    chr2:76569020-76569043      +   CE 0.42296080 0.93624940    -0.5132894 0.029304946        0.9067200
20:    chr3_+_69724256_69724270 ENSMUSG00000027787.14    8    chr3:69724257-69724270      +   CE 0.97109880 0.76964180     0.2014566 0.006564793        0.9468600
21:    chr4_-_76054588_76054600 ENSMUSG00000028399.18   71    chr4:76054589-76054600      -   CE 0.80153800 0.41104260     0.3904962 0.011873125        0.9194800
22:    chr4_-_76138555_76138573 ENSMUSG00000028399.18   37    chr4:76138556-76138573      -   CE 0.17602620 0.98994200    -0.8139158 0.021469308        0.9953000
23:    chr4_-_76139293_76139302 ENSMUSG00000028399.18   36    chr4:76139294-76139302      -   CE 0.07221076 0.92935940    -0.8571476 0.008669427        0.9976400
24:    chr5_+_48263640_48263667 ENSMUSG00000031558.15   66    chr5:48263641-48263667      +   CE 0.78063060 0.04944636     0.7311840 0.023120612        0.9710400
25:    chr5_-_73050866_73050875 ENSMUSG00000070733.13   85    chr5:73050867-73050875      -   CE 0.85641940 0.08213788     0.7742824 0.016144319        0.9724600
26:    chr7_+_75696321_75696338 ENSMUSG00000066406.15   37    chr7:75696322-75696338      +   CE 0.50410580 0.93627240    -0.4321666 0.027574986        0.9034200
27:  chr9_-_105493542_105493554 ENSMUSG00000032570.17   18  chr9:105493543-105493554      -   CE 0.31801920 0.93770440    -0.6196862 0.025207937        0.9588800
28:    chrX_+_71376886_71376910 ENSMUSG00000015214.14    5    chrX:71376887-71376910      +   CE 0.07470168 0.94029960    -0.8655980 0.010244770        0.9999800
29:  chrX_-_155214830_155214858 ENSMUSG00000025283.15   13  chrX:155214831-155214858      -   CE 0.25803900 0.47274340    -0.2147052 0.005831721        0.9018000
                   microexon_ID                  Gene Node                     Coord Strand Type Psi_A.mean Psi_B.mean DeltaPsi.mean DeltaPsi.sd Probability.mean
    Probability.sd Probability.var N.detected.reps      cdf.beta is.diff mgi_symbol     Location
 1:   0.0131028117    1.716837e-04              50  1.724799e-13    TRUE     Ppfia2  Presynaptic
 2:   0.0052953546    2.804078e-05              48  8.007671e-38    TRUE       Tac2         <NA>
 3:   0.0149054298    2.221718e-04              50  9.056338e-11    TRUE      Micu1         <NA>
 4:   0.0118484305    1.403853e-04              50  1.093610e-13    TRUE       Ank3         Both
 5:   0.0058458078    3.417347e-05              50  4.859664e-24    TRUE      Kif3a  Presynaptic
 6:   0.0129478424    1.676466e-04              47  7.190604e-10    TRUE   BC006965         <NA>
 7:   0.0056757630    3.221429e-05              50  5.694298e-30    TRUE     Gabrg2  Presynaptic
 8:   0.0057783551    3.338939e-05              50  2.074737e-23    TRUE      Nrxn3 Postsynaptic
 9:   0.0145116983    2.105894e-04              50  2.114484e-10    TRUE      Ttc7b         <NA>
10:   0.0147832779    2.185453e-04              50  5.108218e-11    TRUE      Cadps  Presynaptic
11:   0.0060684867    3.682653e-05              50  2.067205e-21    TRUE     Kcnma1  Presynaptic
12:   0.0063317793    4.009143e-05              50  1.461345e-19    TRUE     Kcnma1  Presynaptic
13:   0.0096895694    9.388776e-05              50  8.572693e-20    TRUE     Cldnd1         <NA>
14:   0.0092517842    8.559551e-05              50  9.431345e-16    TRUE     Dlgap1  Presynaptic
15:   0.0140006414    1.960180e-04              50  4.500230e-10    TRUE      Nrxn1 Postsynaptic
16:   0.0060132167    3.615878e-05              50  2.836469e-15    TRUE      Nrxn1 Postsynaptic
17:   0.0155404541    2.415057e-04              50  5.886836e-10    TRUE      Myo1b         <NA>
18:   0.0108035519    1.167167e-04              50  1.513312e-17    TRUE    Slc4a10         Both
19:   0.0263446854    6.940424e-04              50  5.305384e-04    TRUE     Osbpl6         <NA>
20:   0.0105907353    1.121637e-04              50  1.320554e-19    TRUE       Nmd3         <NA>
21:   0.0168925053    2.853567e-04              50  3.888772e-08    TRUE      Ptprd  Presynaptic
22:   0.0048075196    2.311224e-05              50  2.975151e-20    TRUE      Ptprd  Presynaptic
23:   0.0022835704    5.214694e-06              50  3.830921e-44    TRUE      Ptprd  Presynaptic
24:   0.0167891607    2.818759e-04              50  7.775889e-08    TRUE      Slit2         <NA>
25:   0.0154195873    2.377637e-04              50  9.252242e-09    TRUE       Fryl         <NA>
26:   0.0190135183    3.615139e-04              50  5.180120e-06    TRUE     Akap13         <NA>
27:   0.0152992463    2.340669e-04              50  4.602062e-10    TRUE     Atp2c1         <NA>
28:   0.0001414214    2.000000e-08              50 1.737462e-101    TRUE      Mtmr1         <NA>
29:   0.0184777859    3.414286e-04              50  3.573953e-06    TRUE       Sat1         <NA>
    Probability.sd Probability.var N.detected.reps      cdf.beta is.diff mgi_symbol     Location

ME_clusters.info <- merge(PCA_loadings, ME_cluster_names, by="ME_cluster")
Error in bmerge(i, x, leftcols, rightcols, roll, rollends, nomatch, mult,  : 
  Incompatible join types: x.ME_cluster (integer) and i.ME_cluster (factor). Factor columns must join to factor or character columns.
primary_type_levels <- c(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Endothelial Cell", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Oligodendrocyte", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Microglia", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Oligodendrocyte Precursor Cell", primary_type]),
sort(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Astrocyte", primary_type])),
sort(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Glutamatergic Neuron", primary_type])),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="GABA-ergic Neuron", primary_type]) )
ME_cov_filtered_tasic_primary_type$primary_type <- factor(ME_cov_filtered_tasic_primary_type$primary_type , levels = primary_type_levels)
target_genes <- c("Gabrg2", "Nrxn1", "Nrxn3", "Ptprd")
ME_cov_filtered_tasic_primary_type[(mgi_symbol %in% target_genes) & (ME %in% Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron$exon_ID)]
                            ME primary_type           broad_type sum_ME_coverage sum_SJ_coverage total_cov_alternatives_3 total_cov_alternatives_5       PSI     lower
  1: chr11_-_41913969_41913993   Vip Mybpc1    GABA-ergic Neuron            5881             887                        0                        0 0.8689421 0.8606926
  2: chr11_-_41913969_41913993    Vip Parm1    GABA-ergic Neuron            5465            3692                        0                        0 0.5968112 0.5867254
  3: chr11_-_41913969_41913993     L4 Ctxn3 Glutamatergic Neuron           12660            7816                        0                        0 0.6182848 0.6116091
  4: chr11_-_41913969_41913993     Vip Chat    GABA-ergic Neuron            5390            3757                        0                        0 0.5892642 0.5791469
  5: chr11_-_41913969_41913993   L2/3 Ptgs2 Glutamatergic Neuron            8799            3910                        0                        0 0.6923440 0.6842630
 ---                                                                                                                                                                  
339:  chr4_-_76139293_76139302  Pvalb Obox3    GABA-ergic Neuron               0               5                        0                        0 0.0000000 0.0000000
340:  chr4_-_76139293_76139302     L5b Tph2 Glutamatergic Neuron               3               0                        0                        0       NaN        NA
341:  chr4_-_76139293_76139302      L5 Ucma Glutamatergic Neuron              10               0                        0                        0 1.0000000 0.7224672
342:  chr4_-_76139293_76139302    L5 Chrna6 Glutamatergic Neuron               3               0                        0                        0       NaN        NA
343:  chr4_-_76139293_76139302    L5b Cdh13 Glutamatergic Neuron              19               0                        0                        0 1.0000000 0.8318208
         upper ensembl_transcript_id    ensembl_gene_id mgi_symbol
  1: 0.8767730    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  2: 0.6068157    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  3: 0.6249162    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  4: 0.5993067    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  5: 0.7003088    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
 ---                                                              
339: 0.4344825    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
340:        NA    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
341: 1.0000000    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
342:        NA    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
343: 1.0000000    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
ME_cov_filtered_tasic_primary_type[is.na(PSI), `:=`(lower=NA, upper=NA) ]
Fig8.c.top <- ggplot(ME_cov_filtered_tasic_primary_type[ mgi_symbol %in% target_genes & ME %in% Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron$exon_ID, ]  ) +
  geom_pointrange(aes(x=primary_type, y=PSI, colour= broad_type, group=broad_type, ymin=lower, ymax=upper))+
  facet_grid( paste0( mgi_symbol, ME) ~ . ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5)) +
  theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal") +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
Fig8.c.top


Tasic_clustering$primary_type <- factor(Tasic_clustering$primary_type , levels = primary_type_levels)

Fig8.c.bottom <- ggplot(Tasic_clustering[ broad_type!="Unclassified", .N , by=c("primary_type", "broad_type") ]) +
  geom_bar( aes(x=primary_type, y=N,  fill=broad_type) , stat="identity") +
    theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5)) +
  theme(axis.text.x = element_text(angle = 90), legend.position = "none", legend.direction = "horizontal", panel.grid.major = element_blank(), panel.grid.minor = element_blank())

Fig8.c.bottom

Single cell stast

plot_grid(Fig8.c.top, Fig8.c.bottom, ncol=1, rel_heights = c(7,2) )
Removed 65 rows containing missing values (geom_pointrange).

nrow(Tasic_total_diff_nodes)
[1] 195441
table(Tasic_total_diff_nodes$is.microexon)

 FALSE   TRUE 
193176   2265 
Tasic_total_diff_nodes[  , is.microexon:=!is.na(microexon_ID) ]
Tasic_total_diff_nodes[ is.na(is.diff) , is.diff:="FALSE"  ]
Coercing 'character' RHS to 'logical' to match the type of the target column (column 15 named 'is.diff').
Tasic_total_diff_nodes.stats <- Tasic_total_diff_nodes[ , .N , by=c("Type", "is.diff", "is.microexon")]
Tasic_total_diff_nodes.stats[ , Total:=sum(N), by=c("Type", "is.microexon")]
Tasic_total_diff_nodes.stats[is.diff=="TRUE", ]
   Type is.diff is.microexon  N  Total
1:   AF    TRUE        FALSE 36   4121
2:   CE    TRUE        FALSE 97 100168
3:   CE    TRUE         TRUE 28   2204
4:   AD    TRUE        FALSE 13   4749
5:   RI    TRUE        FALSE 18   7342
6:   AL    TRUE        FALSE  3   1708
7:   AA    TRUE        FALSE 12   5772
8:   AA    TRUE         TRUE  1     50
Tasic_total_diff_nodes.stats[, ratio:=N/Total]
Tasic_total_diff_nodes.stats <- Tasic_total_diff_nodes.stats[is.diff=="TRUE", ]
Tasic_total_diff_nodes.stats[is.diff=="TRUE", ]
   Type is.diff is.microexon  N  Total        ratio
1:   AF    TRUE        FALSE 36   4121 0.0087357438
2:   CE    TRUE        FALSE 97 100168 0.0009683731
3:   CE    TRUE         TRUE 28   2204 0.0127041742
4:   AD    TRUE        FALSE 13   4749 0.0027374184
5:   RI    TRUE        FALSE 18   7342 0.0024516481
6:   AL    TRUE        FALSE  3   1708 0.0017564403
7:   AA    TRUE        FALSE 12   5772 0.0020790021
8:   AA    TRUE         TRUE  1     50 0.0200000000
CE.cont <- matrix(nrow=2, c(Tasic_total_diff_nodes.stats[Type=="CE" & is.microexon=="TRUE", N ],
Tasic_total_diff_nodes.stats[Type=="CE" & is.microexon=="TRUE", Total-N ],
Tasic_total_diff_nodes.stats[Type=="CE" & is.microexon=="FALSE", N ],
Tasic_total_diff_nodes.stats[Type=="CE" & is.microexon=="FALSE", Total-N ]))
chisq.test(CE.cont)
Chi-squared approximation may be incorrect

    Pearson's Chi-squared test with Yates' continuity correction

data:  CE.cont
X-squared = 234.02, df = 1, p-value < 2.2e-16
CE.cont.total <- matrix(nrow=2, c(sum(Tasic_total_diff_nodes.stats[ is.microexon=="TRUE", N ]),
sum(Tasic_total_diff_nodes.stats[ is.microexon=="TRUE", Total-N ]),
sum(Tasic_total_diff_nodes.stats[is.microexon=="FALSE", N ]),
sum(Tasic_total_diff_nodes.stats[ is.microexon=="FALSE", Total-N ])))
chisq.test(CE.cont.total)
Chi-squared approximation may be incorrect

    Pearson's Chi-squared test with Yates' continuity correction

data:  CE.cont.total
X-squared = 168.49, df = 1, p-value < 2.2e-16
n=nrow(Tasic_total_diff_nodes)
a=nrow(Tasic_total_diff_nodes[ is.diff=="TRUE", ])
b=nrow(Tasic_total_diff_nodes[ is.microexon=="TRUE", ])
t=nrow(Tasic_total_diff_nodes[ is.microexon=="TRUE" &  is.diff=="TRUE", ])
dhyper(t, a, n - a, b)
[1] 1.875713e-22
sum(dhyper(t:b, a, n - a, b))
[1] 2.014586e-22
n=nrow(Tasic_total_diff_nodes[Type=="CE", ])
a=nrow(Tasic_total_diff_nodes[Type=="CE" & is.diff=="TRUE", ])
b=nrow(Tasic_total_diff_nodes[Type=="CE" & is.microexon=="TRUE", ])
t=nrow(Tasic_total_diff_nodes[Type=="CE" & is.microexon=="TRUE" &  is.diff=="TRUE", ])
dhyper(t, a, n - a, b)
[1] 1.423587e-20
sum(dhyper(t:b, a, n - a, b))
[1] 1.534839e-20
nrow(Tasic_total_diff_nodes[ is.microexon=="TRUE" &  is.diff=="TRUE", ])
[1] 29


Tasic_total_diff_nodes[is.diff==TRUE & is.microexon==TRUE, ]


Tasic_unpooled_diff_exons <- fread("/Users/gp7/Google_Drive/Results/ME/Paper/Final_Report/Reps/Rep1/Single_Cell/Unpooled/GABA-ergic_Neuron_vs_Glutamatergic_Neuron.diff.microexons")

Tasic_unpooled_pooled <- merge(Tasic_unpooled_diff_exons[ , c("exon_ID", "Probability", "DeltaPsi")], 
Tasic_total_diff_nodes[ is.microexon==TRUE , .(exon_ID=microexon_ID, mean_Probability=Probability.mean, mean_DeltaPsi=DeltaPsi.mean, sig_pool=is.diff)  ],
by="exon_ID")

Tasic_unpooled_pooled[ , sig_unpool:=FALSE ]
Tasic_unpooled_pooled[ abs(DeltaPsi) >= 0.1 &  Probability>=0.9, sig_unpool:=TRUE ]



 Tasic_unpooled_pooled[  ,.N, by=c( "sig_unpool", "sig_pool") ]
 Tasic_unpooled_pooled[  ,.N, by=c( "sig_unpool", "sig_pool") ]
   sig_unpool sig_pool    N
1:      FALSE    FALSE 2291
2:      FALSE     TRUE   19
3:       TRUE     TRUE   10
4:       TRUE    FALSE    4

Tasic

 Tasic_unpooled_pooled[  ,.N, by=c( "sig_unpool", "sig_pool") ]
   sig_unpool sig_pool    N
1:      FALSE    FALSE 2291
2:      FALSE     TRUE   19
3:       TRUE     TRUE   10
4:       TRUE    FALSE    4
library(ggplot2)
library(data.table)

ggplot(Tasic_unpooled_diff_exons) +
  geom_histogram(aes(Entropy), binwidth=0.1)

Tasic_unpooled_pooled <- merge(Tasic_unpooled_diff_exons[ , c("exon_ID", "Probability", "DeltaPsi")], 
Tasic_pooled_diff_exons[ , .( mean_Probability=mean(Probability), mean_DeltaPsi=mean(DeltaPsi)) , by="exon_ID"  ],
by="exon_ID")

Tasic_unpooled_pooled[ , sig_unpool:=FALSE ]
Tasic_unpooled_pooled[ abs(DeltaPsi) >= 0.1 &  Probability>=0.9, sig_unpool:=TRUE ]

Tasic_unpooled_pooled[ , sig_pool:=FALSE ]
Tasic_unpooled_pooled[ abs(mean_DeltaPsi) >= 0.1 &  mean_Probability>=0.9, sig_pool:=TRUE ]

Tasic_pooled_diff_exons[, sd_probability:=sd(Probability) , by="Coord"]

ggplot(Tasic_pooled_diff_exons) +
  geom_histogram(aes(x=sd_probability))
nrow(Tasic_unpooled_pooled[sig_unpool=="TRUE"])
nrow(Tasic_unpooled_pooled[sig_pool=="TRUE"])
ggplot() +
  
  geom_point( data=Tasic_unpooled_pooled, aes(mean_DeltaPsi - DeltaPsi , mean_Probability -Probability )) +
  geom_point( data=Tasic_unpooled_pooled[sig_unpool==FALSE & sig_pool==TRUE]  , aes(mean_DeltaPsi - DeltaPsi , mean_Probability -Probability ), color="red" ) +
  geom_point( data=Tasic_unpooled_pooled[sig_unpool==TRUE & sig_pool==TRUE]  , aes(mean_DeltaPsi - DeltaPsi , mean_Probability -Probability ), color="green" ) +
  geom_point( data=Tasic_unpooled_pooled[sig_unpool==TRUE & sig_pool==FALSE]  , aes(mean_DeltaPsi - DeltaPsi , mean_Probability -Probability ), color="blue" ) +
  theme_bw()
Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron_old <- Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron

Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron_old[ order(-abs(mean_DeltaPsi)), c("exon_ID", "mgi_symbol", "wikigene_description", "mean_DeltaPsi", "mean_Probability")]

Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron <- Tasic_unpooled_pooled[abs(mean_DeltaPsi - DeltaPsi) <=0.25 &  sig_pool==TRUE, ]

Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron <- merge( Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, unique(gene_table.TOTAL[, c("ME", "mgi_symbol")]), by.x="exon_ID", by.y="ME")

#Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron <- merge( Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron , unique(gene_info_total[ , c("mgi_symbol", "wikigene_description")]), by= "mgi_symbol")



Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[ order(-abs(mean_DeltaPsi))]

length(unique(Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[, mgi_symbol]))

#ref Slc4a10 https://www.frontiersin.org/articles/10.3389/fncel.2015.00223/full
#ref Ank3 https://www.sciencedirect.com/science/article/pii/S0896627314009088
#ref KCNMA1 https://www.sciencedirect.com/science/article/pii/S0896627313001852
#ref Kif3a https://www.sciencedirect.com/science/article/pii/S089662730300062X
# ref DLGAP https://molecularbrain.biomedcentral.com/articles/10.1186/s13041-017-0324-9


Pre_synaptic <- c( "Ptprd" , "Ppfia2", "Dlgap1", "Gabrg2", "Kcnma1", "Kif3a", "Cadps") 
Post_synaptic <- c( "Nrxn3" , "Nrxn1")
PrePost_synaptic <- c("Ank3","Slc4a10")

Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[  !exon_ID %in% microexons_GENCODE &  !exon_ID %in% microexons_Vastdb , c("exon_ID", "mgi_symbol", "wikigene_description", "mean_DeltaPsi", "mean_Probability")][order(-abs(mean_DeltaPsi))]

microexons_GENCODE
microexons_Vastdb
Brain_string_interactions_out[mgi_symbol=="Akap13",]
write.table(Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, file = "~/Desktop/Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron.txt", append = FALSE, quote = F, sep = "\t",
            eol = "\n", na = "NA", dec = ".", row.names = F,
            col.names = TRUE, qmethod = c("escape", "double"),
            fileEncoding = "")


ggplot()+
  geom_point(data=Tasic_unpooled_pooled, aes(mean_DeltaPsi, mean_Probability)) +
  geom_point(data=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, aes(mean_DeltaPsi, mean_Probability), color="red")
library("ggrepel")



Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Pre_synaptic, Location:="Presynaptic"]
Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Post_synaptic, Location:="Postsynaptic"]
Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% PrePost_synaptic, Location:="Both"]



#Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron <- Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[!exon_ID %in% ME_blacklist$ME]

ggplot()+
  geom_point(data=Tasic_unpooled_pooled, aes(mean_DeltaPsi, mean_Probability), color="grey") +
  geom_point(data=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, aes(mean_DeltaPsi, mean_Probability), color="black") +
  geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Pre_synaptic],
                  colour="forestgreen", aes(x=mean_DeltaPsi, y=mean_Probability), 
                  nudge_y      = 3,
                  direction    = "x",
                  angle        = 90,
                  vjust        = 1,
                  segment.size = 0.2,
                  label=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Pre_synaptic , mgi_symbol] ) +
  
  geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Post_synaptic],
                  colour="firebrick4", aes(x=mean_DeltaPsi, y=mean_Probability), 
                  nudge_y      = 3,
                  direction    = "x",
                  angle        = 90,
                  vjust        = 1,
                  segment.size = 0.2,
                  label=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Post_synaptic , mgi_symbol] ) +
  
  
  geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% PrePost_synaptic],
                  colour="darkgoldenrod", aes(x=mean_DeltaPsi, y=mean_Probability), 
                  nudge_y      = 3,
                  direction    = "x",
                  angle        = 90,
                  vjust        = 1,
                  segment.size = 0.2,
                  label=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% PrePost_synaptic , mgi_symbol] ) +
    
  
  ylim(0.5, 1.15) +
  theme_bw() +
  xlab("") +
  xlab("Mean delta PSI") +
  ylab("Mean probability")
ggplot()+
  geom_point(data=Tasic_unpooled_pooled, aes(mean_DeltaPsi, mean_Probability), color="grey") +
  geom_point(data=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, aes(mean_DeltaPsi, mean_Probability), color="black") +
  geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Pre_synaptic],
                  colour="forestgreen", aes(x=mean_DeltaPsi, y=mean_Probability), 
                  nudge_y      = 3,
                  direction    = "x",
                  angle        = 90,
                  vjust        = 1,
                  segment.size = 0.2,
                  label=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Pre_synaptic , mgi_symbol] ) +
  
  geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Post_synaptic],
                  colour="firebrick4", aes(x=mean_DeltaPsi, y=mean_Probability), 
                  nudge_y      = 3,
                  direction    = "x",
                  angle        = 90,
                  vjust        = 1,
                  segment.size = 0.2,
                  label=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Post_synaptic , mgi_symbol] ) +
  
  
  geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% PrePost_synaptic],
                  colour="darkgoldenrod", aes(x=mean_DeltaPsi, y=mean_Probability), 
                  nudge_y      = 3,
                  direction    = "x",
                  angle        = 90,
                  vjust        = 1,
                  segment.size = 0.2,
                  label=Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% PrePost_synaptic , mgi_symbol] ) +
    
  
  ylim(0.5, 1.15) +
  theme_bw() +
  xlab("") +
  xlab("Mean delta PSI") +
  ylab("Mean probability")
Error in geom_text_repel(data = Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in%  : 
  could not find function "geom_text_repel"
Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[mgi_symbol %in% Pre_synaptic, Location:="Presynaptic"]
Error in .checkTypos(e, names_x) : 
  Object 'mgi_symbol' not found amongst exon_ID, Probability, DeltaPsi, mean_Probability, mean_DeltaPsi and 2 more
primary_type_levels <- c(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Endothelial Cell", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Oligodendrocyte", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Microglia", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Oligodendrocyte Precursor Cell", primary_type]),
sort(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Astrocyte", primary_type])),
sort(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Glutamatergic Neuron", primary_type])),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="GABA-ergic Neuron", primary_type]) )

ME_cov_filtered_tasic_primary_type$primary_type <- factor(ME_cov_filtered_tasic_primary_type$primary_type , levels = primary_type_levels)
primary_type_levels <- c(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Endothelial Cell", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Oligodendrocyte", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Microglia", primary_type]),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="Oligodendrocyte Precursor Cell", primary_type]),
sort(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Astrocyte", primary_type])),
sort(unique(ME_cov_filtered_tasic_primary_type[broad_type=="Glutamatergic Neuron", primary_type])),
unique(ME_cov_filtered_tasic_primary_type[broad_type=="GABA-ergic Neuron", primary_type]) )
Error in unique(ME_cov_filtered_tasic_primary_type[broad_type == "Endothelial Cell",  : 
  object 'ME_cov_filtered_tasic_primary_type' not found
target_genes <- c("Gabrg2", "Nrxn1", "Nrxn3", "Ptprd")
ME_cov_filtered_tasic_primary_type[(mgi_symbol %in% target_genes) & (ME %in% Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron$exon_ID)]
                            ME primary_type           broad_type sum_ME_coverage sum_SJ_coverage total_cov_alternatives_3 total_cov_alternatives_5       PSI     lower
  1: chr11_-_41913969_41913993   Vip Mybpc1    GABA-ergic Neuron            5881             887                        0                        0 0.8689421 0.8606926
  2: chr11_-_41913969_41913993    Vip Parm1    GABA-ergic Neuron            5465            3692                        0                        0 0.5968112 0.5867254
  3: chr11_-_41913969_41913993     L4 Ctxn3 Glutamatergic Neuron           12660            7816                        0                        0 0.6182848 0.6116091
  4: chr11_-_41913969_41913993     Vip Chat    GABA-ergic Neuron            5390            3757                        0                        0 0.5892642 0.5791469
  5: chr11_-_41913969_41913993   L2/3 Ptgs2 Glutamatergic Neuron            8799            3910                        0                        0 0.6923440 0.6842630
 ---                                                                                                                                                                  
339:  chr4_-_76139293_76139302  Pvalb Obox3    GABA-ergic Neuron               0               5                        0                        0 0.0000000 0.0000000
340:  chr4_-_76139293_76139302     L5b Tph2 Glutamatergic Neuron               3               0                        0                        0       NaN 0.4385030
341:  chr4_-_76139293_76139302      L5 Ucma Glutamatergic Neuron              10               0                        0                        0 1.0000000 0.7224672
342:  chr4_-_76139293_76139302    L5 Chrna6 Glutamatergic Neuron               3               0                        0                        0       NaN 0.4385030
343:  chr4_-_76139293_76139302    L5b Cdh13 Glutamatergic Neuron              19               0                        0                        0 1.0000000 0.8318208
         upper ensembl_transcript_id    ensembl_gene_id mgi_symbol
  1: 0.8767730    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  2: 0.6068157    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  3: 0.6249162    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  4: 0.5993067    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
  5: 0.7003088    ENSMUST00000070725 ENSMUSG00000020436     Gabrg2
 ---                                                              
339: 0.4344825    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
340: 1.0000000    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
341: 1.0000000    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
342: 1.0000000    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
343: 1.0000000    ENSMUST00000174180 ENSMUSG00000028399      Ptprd
ME_cov_filtered_tasic_primary_type[is.na(PSI), `:=`(lower=NA, upper=NA) ]
ggplot(ME_cov_filtered_tasic_primary_type[ mgi_symbol %in% target_genes & ME %in% Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron$exon_ID, ]  ) +
  geom_pointrange(aes(x=primary_type, y=PSI, colour= broad_type, group=broad_type, ymin=lower, ymax=upper))+
  facet_grid( paste0( mgi_symbol, ME) ~ . ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5)) +
  theme(axis.text.x = element_text(angle = 90), legend.position = "top", legend.direction = "horizontal")

Numbers


###Abstract


Bulk_RNA_SEQ.Table <-  table(grepl("ENC",  unique(ME_cov_filtered[, FILE_NAME])))

Total_Bulk_RNA_seq <- sum(Bulk_RNA_SEQ.Table)
Total_scRNA_seq <-nrow(tasic_metadata)

Total_microexon.number <- length(ME_final[ , unique(ME)])
#HNMF_microexons.number <- length(Delta_HNMF_whippet_ME_age_diff_total_PPI[ , unique(exon_ID)])



diff_spliced.brain_dev <- length(unique(Delta_HNM_whippet_ME_age_diff_total$exon_ID , Delta_F_whippet_ME_age_diff_total$exon_ID))


diff_spliced.brain_dev.novel <-  length(unique(Delta_HNM_whippet_ME_age_diff_total[ ! exon_ID %in% c(microexons_Vastdb, microexons_GENCODE) ,  exon_ID  ], Delta_F_whippet_ME_age_diff_total[ ! exon_ID %in% c(microexons_Vastdb, microexons_GENCODE) ,  exon_ID  ]))


### Introduction

Total_microexon.number.not_gencode <- length(ME_final[ !ME %in% microexons_GENCODE , unique(ME)])
Total_microexon.number.not_gencode.percentage <- (Total_microexon.number.not_gencode / Total_microexon.number)*100


diff_spliced.brain_dev.not_vastdb <-  length(unique(Delta_HNM_whippet_ME_age_diff_total[ ! exon_ID %in% c(microexons_Vastdb) ,  exon_ID  ], Delta_F_whippet_ME_age_diff_total[ ! exon_ID %in% c(microexons_Vastdb) ,  exon_ID  ]))


diff_spliced.brain_dev.not_vastdb.percentage <- (diff_spliced.brain_dev.not_vastdb/diff_spliced.brain_dev)*100



conserved_zebra <-  length(unique(Mouse_Zebra.IDs[ , ME.zebra]))


conserved_zebra.not_ensembl <- length(unique(Mouse_Zebra.IDs[ensemble_zebra==FALSE, ME.zebra]))




#Results 

me_after_10_samples_filter <- rownames(Tissue_PSI_matrix_dcast[apply(Tissue_PSI_matrix_dcast, 1, function(x) length(which(is.na(x)))) < 289*0.9, ])

ME_final.out[ME %in% me_after_10_samples_filter, .N,  by="type"]



sym_percentage.neuronal_neuromuscular_muscular <- ME_cluster_names[ ME_cluster.type %in% c("Neuronal", "Neuro-Muscular", "Muscular") , 100- (sum(asym)/sum(total))*100]


Delta_HNM_whippet_ME_age_diff_total
Delta_HNMF_whippet_ME_age_diff_total

HNMF_microexons.not_vastdb.number <- nrow(Delta_HNMF_whippet_ME_age_diff_total_PPI[!exon_ID %in% microexons_Vastdb, ])
HNMF_microexons.not_vastdb.percentage <- round((HNMF_microexons.not_vastdb.number/HNMF_microexons.number)*100, 2)


Novel_gencode.number <- nrow(ME_final[ , .N, by=ME ][ !ME %in% microexons_GENCODE, ])
Novel_gencode.percentage <- (Novel_gencode.number / Total_microexon.number) *100


Novel_vastdb.number <- nrow(ME_final[ , .N, by=ME ][ !ME %in% microexons_Vastdb, ])
Novel_vastdb.percentage <- (Novel_vastdb.number / Total_microexon.number) *100


MHN_diff.whippet_ME <- length( unique(Delta_HNM_whippet[ abs(DeltaPsi)>=0.1 &  Probability >= 0.9 & exon_ID %in%  Delta_HNM_ME[ abs(DeltaPsi)>=0.1 &  Probability >= 0.9 ,  exon_ID ]   ,  exon_ID ]))
F_diff.whippet_ME <- length( unique(Delta_F_whippet[ abs(DeltaPsi)>=0.1 &  Probability >= 0.9 & exon_ID %in%  Delta_F_ME[ abs(DeltaPsi)>=0.1 &  Probability >= 0.9 ,  exon_ID ]   ,  exon_ID ]))


diff_heart <- nrow(Delta_Heart_whippet_ME[diff_high==TRUE, ])
diff_SKM <- nrow(Delta_SKM_whippet_ME[diff_high==TRUE, ])
diff_AG <- nrow(Delta_AG_whippet_ME[diff_high==TRUE, ])




Delta_HNMF_whippet_ME_age_diff_total[ , type:="Annotated"  ]
Delta_HNMF_whippet_ME_age_diff_total[!exon_ID %in% microexons_Vastdb, type:="Missing in VastDB"   ]
Delta_HNMF_whippet_ME_age_diff_total[!exon_ID %in% microexons_GENCODE, type:="Missing in GENCODE"   ]
Delta_HNMF_whippet_ME_age_diff_total[!exon_ID %in% microexons_GENCODE & !exon_ID %in% microexons_Vastdb , type:="Novel"   ]

table(Delta_HNMF_whippet_ME_age_diff_total$type)
ME_sizes <- ME_final.rep1$len_micro_exon_seq_found
names(ME_sizes) <- ME_final.rep1$ME


Delta_HNMF_whippet_ME_age_diff_total[ type=="Novel", .N , by=c("type", "ME_len") ]




Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron[ sig_unpool==TRUE ,  ]
ggplot(Delta_HNMF_whippet_ME_age_diff_total) +
  geom_histogram(aes(x=ME_len, fill=type), binwidth=1) +
  facet_grid(  type ~ .) +
  scale_fill_discrete(name="Microexon type") +
  xlab("Microexon length") +
  theme(legend.position="top") +
  theme(strip.background = element_blank(), strip.text = element_blank())
  
me_after_10_samples_filter <- rownames(Tissue_PSI_matrix_dcast[apply(Tissue_PSI_matrix_dcast, 1, function(x) length(which(is.na(x)))) < 289*0.9, ])
me_after_10_samples_filter <- rownames(Tissue_PSI_matrix_dcast[apply(Tissue_PSI_matrix_dcast, 1, function(x) length(which(is.na(x)))) < 289*0.9, ])
ME_final.out[ME %in% me_after_10_samples_filter, .N,  by="type"]
                 type    N
1:              Novel  866
2:          Annotated 1556
3: Missing in GENCODE  177

#rationale: https://www.gungorbudak.com/blog/2016/05/25/computing-significance-of-overlap/

AG_brain.intersect.number <- length(intersect(Delta_AG_whippet_ME[diff_high==TRUE]$exon_ID, Delta_HNMF_whippet_ME_age_diff_total$exon_ID))


AG.number <- length(unique(Delta_AG_whippet_ME[diff_high==TRUE, exon_ID]))
brain.number <- length(unique(Delta_HNMF_whippet_ME_age_diff_total$exon_ID))

Microexons.PPCA.number

phyper( AG_brain.intersect.number , AG.number,  Microexons.PPCA.number-AG.number, brain.number, lower.tail = FALSE  )
Supplemental




ME_clusters_table <- data.frame(ME_clusters)

ME_clusters_table$ME <- row.names(ME_clusters_table)
ME_clusters_table <- data.table(ME_clusters_table)


ME_clusters_PSI <-  data.table(merge(Tissue_PSI_matrix_melt_ppca, ME_clusters_table , by=c("ME")))


#ME_clusters_PSI <- ME_clusters_PSI[, c("ME", "FILE_NAME", "ME_clusters", "PSI")]


ME_Tissues_clusters_PSI<- merge(ME_clusters_PSI, PCA, by.x="FILE_NAME", by.y="File.accession")

ME_Tissues_clusters_PSI[, Tissue_clusters:=cluster]

PCA_loadings_stats$ME_cluster <- factor(PCA_loadings_stats$ME_cluster)
ME_Tissues_clusters_PSI$ME_clusters <- factor(ME_Tissues_clusters_PSI$ME_clusters)

ME_Tissues_clusters_PSI <- merge(ME_Tissues_clusters_PSI, PCA_loadings_stats, by.x="ME_clusters", by.y="ME_cluster" )

ME_Tissues_clusters_PSI$Tissue_clusters <- factor(ME_Tissues_clusters_PSI$Tissue_clusters, levels=PCA[, mean(PC1), by="cluster" ][order(V1)]$cluster) 



sub_ME_Tissues_clusters_PSI <- ME_Tissues_clusters_PSI[abs(PC1_mean)>0, ]
sub_ME_Tissues_clusters_PSI <- data.table(sub_ME_Tissues_clusters_PSI)
sub_ME_Tissues_clusters_PSI$ME_clusters <- factor(sub_ME_Tissues_clusters_PSI$ME_clusters , levels=ME_cluster_loading_order )


sub_ME_Tissues_clusters_PSI_aggregated <-  sub_ME_Tissues_clusters_PSI[, list(PSI=mean(PSI, na.rm = TRUE)), by=list(ME, ME_clusters,  Tissue_clusters) ]
sub_ME_Tissues_clusters_PSI_aggregated_mean <-  sub_ME_Tissues_clusters_PSI_aggregated[, list(PSI= mean(PSI, na.rm=TRUE) ), by=list(ME_clusters,  Tissue_clusters) ]


ggplot( ) +
        geom_line(data= sub_ME_Tissues_clusters_PSI_aggregated, aes(factor(Tissue_clusters), PSI, group=ME), colour="grey") +
        geom_line(data = sub_ME_Tissues_clusters_PSI_aggregated_mean, aes(factor(Tissue_clusters), PSI, group=ME_clusters),  colour="red" ) +
        facet_grid( ME_clusters ~ .) +
        xlab("Sample clusters (sorted by PC1)") +
        theme(panel.background = element_rect(fill = 'white', colour = 'black'))


col1 <- c("E1", "E2", "E3", "E4", "E5", "E6", "I1", "I2", "I3", "M1", "M2", "M3")
col2 <- c("N1", "N2", "N3", "N4", "NM1", "NM2", "NM3", "NN1", "O1", "O2", "WN1", "WN2")


sub_ME_Tissues_clusters_PSI_aggregated$ME_cluster <- as.numeric(as.character(sub_ME_Tissues_clusters_PSI_aggregated$ME_clusters)) 
sub_ME_Tissues_clusters_PSI_aggregated_mean$ME_cluster <- as.numeric(as.character(sub_ME_Tissues_clusters_PSI_aggregated_mean$ME_clusters)) 

plot_grid(
ggplot( ) +
        geom_line(data= merge(sub_ME_Tissues_clusters_PSI_aggregated, ME_cluster_names, by.x="ME_cluster",  by.y="ME_cluster")[ME_cluster.name %in% col1] , aes(factor(Tissue_clusters), PSI, group=ME), colour="grey") +
        geom_line(data = merge(sub_ME_Tissues_clusters_PSI_aggregated_mean, ME_cluster_names, by.x="ME_cluster",  by.y="ME_cluster")[ME_cluster.name %in% col1], aes(factor(Tissue_clusters), PSI, group=ME_clusters),  colour="red" ) +
        facet_grid( ME_cluster.name ~ .) +
        xlab("Sample clusters (sorted by PC1)") +
        theme(panel.background = element_rect(fill = 'white', colour = 'black')),

ggplot( ) +
        geom_line(data= merge(sub_ME_Tissues_clusters_PSI_aggregated, ME_cluster_names, by.x="ME_cluster",  by.y="ME_cluster")[ME_cluster.name %in% col2] , aes(factor(Tissue_clusters), PSI, group=ME), colour="grey") +
        geom_line(data = merge(sub_ME_Tissues_clusters_PSI_aggregated_mean, ME_cluster_names, by.x="ME_cluster",  by.y="ME_cluster")[ME_cluster.name %in% col2], aes(factor(Tissue_clusters), PSI, group=ME_clusters),  colour="red" ) +
        facet_grid( ME_cluster.name ~ .) +
        xlab("Sample clusters (sorted by PC1)") +
        theme(panel.background = element_rect(fill = 'white', colour = 'black'))

)


PCA_loadings$ME_clusters <- as.numeric(as.character(PCA_loadings$ME_cluster)) 


Sup_loading.A <- ggplot(merge(PCA_loadings, ME_cluster_names, by.x="ME_clusters",  by.y="ME_cluster")) +
  geom_boxplot(aes(ME_cluster.name , -PC1)) +
  ylab("Loading factors in PC1") +
  xlab("Microexon cluster") +
  theme(axis.text.x = element_text(angle = 90))



Sup_loading.B <-  ggplot(merge(PCA_loadings, ME_cluster_names, by.x="ME_clusters",  by.y="ME_cluster")) +
  geom_boxplot(aes(ME_cluster.name , PC2)) +
  ylab("Loading factors in PC2") +
  xlab("Microexon cluster") +
  theme(axis.text.x = element_text(angle = 90))



Sup_loading.C <-  ggplot(merge(PCA_loadings, ME_cluster_names, by.x="ME_clusters",  by.y="ME_cluster")) +
  geom_boxplot(aes(ME_cluster.name , PC3)) +
  ylab("Loading factors in PC3") +
  xlab("Microexon cluster") +
  theme(axis.text.x = element_text(angle = 90))


plot_grid(Sup_loading.A, Sup_loading.B, Sup_loading.C, labels = "AUTO", ncol=1)


PPI_centrality_GO[GO=="Axon_guidance", .N, by="mapped_gene" ]

Tables


TS2 <- unique(merge(Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, gene_table.TOTAL, by.x="exon_ID", by.y="ME"  ))


colnames(TS2) <- c("Coordinates", "Probability", "DeltaPsi", "Mean Probability", "Mean DeltaPsi", "Significance unpool", "Significance Pool", "Ensembl transcript ID",    "Ensembl gene ID", "Gene name")

fwrite(TS2, file = "../Final_Figures/Supplementary/Table_S2.csv", append = FALSE, quote = "auto", sep = ",",  row.names = FALSE, col.names = TRUE)
#R -e  'rmarkdown::render("src/final_filters2.Rmd",params = list(ME_table="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Round2/TOTAL.ME_centric.txt", ME_coverage="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Round2/TOTAL.filter1.ME_SJ_coverage", ME_matches_file="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Round2/TOTAL.ME_centric.ME_matches.txt", out_filtered_ME="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Report/test/out_filtered_ME.txt", out_low_scored_ME="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Report/test/out_low_scored_ME.txt", out_shorter_than_3_ME="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Report/test/out_shorter_than_3_ME.txt", min_number_files_detected=3, out_filtered_ME_cov="/lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Report/test/out_filtered_ME.cov.txt" ), output_format="pdf_document", output_file="/lustre/scratch117/cellgen /team218/gp7/Micro-exons/Runs/Paper/MicroExonator/Report/report.pdf")'

BetaDist

tasic.betadist <- fread("../../Single_cell/BetaDist/Sig_nodes/GABA-ergic_Neuron_vs_Glutamatergic_Neuron.txt")
ME_final[, .( stri_split_fixed(ME, "_")) ]
Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron

library(splitstackshape)
ME_final.chords <- cSplit(ME_final, 'ME', '_')[ is.na(ME_5) , .( paste0(ME_1, ":", as.numeric(as.character(ME_3))+1,  "-", ME_4) ) ]$V1


ME_final.chords <- cSplit(Sig_GABAergic_Neuron_vs_Glutamatergic_Neuron, 'exon_ID', '_')[  , .( paste0(exon_ID_1, ":", as.numeric(as.character(exon_ID_3))+1,  "-", exon_ID_4) ) ]$V1

tasic.betadist[ Coord %in%  ME_final.chords,]

cSplit(ME_final, 'ME', '_')[ is.na(ME_5) , .( paste0(ME_1, ":", ME_3,  "-", ME_4) ) ]


ME_final.chords[, .(as.numeric(as.character(ME_3)), ) ]

ME_final[ .(ME)]

Supplementary tables.


MHN.diff <- unique(Delta_HNM_merge[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9, exon_ID])
F.diff <- unique(Delta_F_merge[abs(DeltaPsi.x)>=0.1 & Probability.x>0.9 & abs(DeltaPsi.y)>=0.1 & Probability.y>0.9, exon_ID])
Delta_HNM_whippet_ME_age_diff_total[ , .(ME=exon_ID, HNM.diff_age=diff_age, HNM.change_dir=change_dir)] 
Delta_F_whippet_ME_age_diff_total[ , .(ME=exon_ID, F.diff_age=diff_age, F.change_dir=change_dir)] 
Delta_SKM_whippet_ME[ , .(ME=exon_ID, SKM.diff_age=diff_high)] 
Delta_Heart_whippet_ME[ , .(ME=exon_ID, Heart.diff_age=diff_high)] 
Delta_AG_whippet_ME[ , .(ME=exon_ID, AG.diff_age=diff_high)] 

TS1 <- ME_final.out[, .(ME,
             transcript,
             sum_total_coverage,
             total_SJs,
             total_coverages,
             len_micro_exon_seq_found,
             micro_exon_seq_found,
             total_number_of_micro_exons_matches,
             U2_scores,
             mean_conservations_vertebrates,
             P_MEs,
             total_ME,
             ME_P_value,
             ME_type,
             ensembl_transcript_id,
             type,
             Alt5_3,
             ME_cluster.type,
             ME_cluster.name,
             ME_cluster.number,
             PC1,
             PC2,
             PC3, 
             In.10_percent_of_bulk)]


TS1[ , MHN.diff:=(ME %in% MHN.diff) ]
TS1[ , F.diff:=(ME %in% F.diff) ]

TS1 <- merge(TS1, Delta_HNM_whippet_ME_age_diff_total[ , .(ME=exon_ID, HNM.diff_age=diff_age, HNM.change_dir=change_dir)], by="ME", all.x=TRUE) 
TS1 <- merge(TS1, Delta_F_whippet_ME_age_diff_total[ , .(ME=exon_ID, F.diff_age=diff_age, F.change_dir=change_dir)] , by="ME", all.x=TRUE) 
TS1 <- merge(TS1, Delta_SKM_whippet_ME[ , .(ME=exon_ID, SKM.diff=diff_high)] , by="ME", all.x=TRUE)
TS1 <- merge(TS1, Delta_Heart_whippet_ME[ , .(ME=exon_ID, Heart.diff=diff_high)] , by="ME", all.x=TRUE )
TS1 <- merge(TS1, Delta_AG_whippet_ME[ , .(ME=exon_ID, AG.diff=diff_high)] , by="ME", all.x=TRUE)

TS1[is.na(SKM.diff), SKM.diff:=FALSE ]
TS1[is.na(Heart.diff), Heart.diff:=FALSE ]
TS1[is.na(AG.diff), AG.diff:=FALSE ]

TS4 <- total_zebra

TS5 <- Mouse_Zebra.IDs.table.summer[!is.na(mouse.coord), .(mouse.coord,
                                                    mouse.len,
                                                    mouse.cluster_name,
                                                    mouse.cluster_type,
                                                    mouse.anno,
                                                    mouse.ensembl,
                                                    zebra.coord,
                                                    zebra.len,
                                                    zebra.ensembl)]


non_published <- Delta_HNMF_whippet_ME_age_diff_total[, .( ENSEMBL_gene_id=ensembl_gene_id,
                                          mgi_symbol=mgi_symbol.x,
                                          exon_ID,
                                          microexon.length=ME_len,
                                          change.direction=change_dir, 
                                          MHN.diff_start=diff_age.x,
                                          F.MHN.diff_start=diff_age.y,
                                          annotation.status=type
                                          )
                                     ]


TS6 <- Tasic_total_diff_nodes


ENCODE_metadata


Tissue_pool.table <- Tissue_clusters[ , .(File.accession, 
                                         Tissue=name,
                                         Age.DPC=age,
                                         Tissue.cluster=cluster)]


Tissue_pool.table[ Tissue %in% c("hindbrain", "neural tube", "midbrain") & Age.DPC %in% c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5),
                   Bulk.sample.group:=paste("MHN", Age.DPC, sep="_") ]
Tissue_pool.table[ Tissue %in% c("forebrain") & Age.DPC %in% c(10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 21) , 
                   Bulk.sample.group:=paste("F", Age.DPC, sep="_") ]

Tissue_pool.table[Tissue.cluster %in% c(1, 8, 6), Bulk.sample.group:="Control"]

Tissue_pool.table[Tissue=="heart" , Bulk.sample.group:="Heart" ]
Tissue_pool.table[Tissue=="skeletal muscle tissue" , Bulk.sample.group:="SKM" ]
Tissue_pool.table[Tissue=="adrenal gland" , Bulk.sample.group:="AD" ]

TS2 <- Tissue_pool.table
View(Tissue_pool.table)
TS1 
TS2
TS3
TS4
TS5
TS6[!is.na(microexon_ID) & is.diff==TRUE, ]



fwrite(TS1, file = "../Final_Figures/Supplementary/TS1.tsv", append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE)
fwrite(TS2, file = "../Final_Figures/Supplementary/TS2.tsv", append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE)
fwrite(TS4, file = "../Final_Figures/Supplementary/TS4.tsv", append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE)
fwrite(TS5, file = "../Final_Figures/Supplementary/TS5.tsv", append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE)
fwrite(TS6, file = "../Final_Figures/Supplementary/TS6.tsv", append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE)
