rm(list = ls()) options(warn=-1) #### PRELIMINARIES ############################################################################################# #*Uploads the needed libraries -------------------------------------------------------------------------------- require(ggplot2) require(data.table) require(plotly) require(DT) require(R2HTML) require(stringr) #** Sets the number of significant digits for the output -------------------------- sig_dig = 4 #** Sets the working directory --------------------------------------------------------------------------------- #Gets the default wd default_wd <- getwd() setwd("C:/Users/Furse/Documents/OneDrive/Work/Cambridge/LTA -- Ozanne mouse model of GDM/PWD/100. Data/CSVs for LTA/300. Data outputs/")# <--- insert here the path to the working (output) directory new_wd <- getwd() #Sets the input directory inputdir <-"C:/Users/Furse/Documents/OneDrive/Work/Cambridge/LTA -- Ozanne mouse model of GDM/PWD/100. Data/CSVs for LTA/200. Data sheets/"# <--- insert here the path to the input directory #### DEFINES FUNCTIONS ############################################################################################# count_zeroes <- function(x){length(which(x==0))} #considered_mode <- "\\+ve" considered_mode <- "\\+ve" considered_generation <- "PWD" considered_model_1 <-"lean" considered_model_2 <- "obese" percentage_of_zeroes <- 33 # Sets a threshold to exclude rows from the analysis: the rows containing more than [percentage_of_zeroes/100] zeroes will be excluded from the analysis #### DATA UPLOAD ############################################################################################################### # - Uploads the .csv files containing the information related to the F1A, PW, -ve datasets. # - Separates the metadata from the main data # - Picks only the considered_model_1 and considered_model_2 sets files_names_originals <- list.files(inputdir) files_names_originals <- files_names_originals[which(str_length(files_names_originals)==17)] modes <- unique(substr(files_names_originals, start=1, stop=3)) tissues <- unique(substr(files_names_originals, start=6, stop=8)) generations <- unique(substr(files_names_originals, start=11, stop=13)) # PRODUCES TISSUE-SPECIFIC MATRICES # For each tissue, produces two matrices (one for each considered_model) # in which the columns are the samples and the rows are the lipids for(j in 1: length(tissues)){ tissue <- tissues[j] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa<- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ bb<- read.csv(paste0(inputdir, aa), stringsAsFactors = F) assign(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_metadata"), bb) cc<- read.csv(paste0(inputdir, aa), stringsAsFactors = F , skip=10 ) cc <- cc[!is.na(cc$m.z),] assign(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4)), cc) cc_1 <- cc[,grep(as.vector(bb[4, ]),pattern=considered_model_1)] cc_2 <- cc[,grep(as.vector(bb[4, ]),pattern=considered_model_2)] rownames(cc_1) <- cc$Lipid.variable rownames(cc_2) <- cc$Lipid.variable cc_1_zeroes <- apply(cc_1[, c(3:ncol(cc_1))], 1, count_zeroes) cc_2_zeroes <- apply(cc_2[, c(3:ncol(cc_2))], 1, count_zeroes) cc_1_nozeroes <- cc_1[-which(cc_1_zeroes > ncol(cc_1)*percentage_of_zeroes/100) ,] # Excludes the rows in which the number of zeroes is greater than 20% cc_2_nozeroes <- cc_2[-which(cc_2_zeroes > ncol(cc_2)*percentage_of_zeroes/100) ,] assign(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1), cc_1) assign(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2), cc_2) assign(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_nozeroes"), cc_1_nozeroes) assign(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2, "_nozeroes"), cc_2_nozeroes) } } #### A LIPIDS ############################################################################################################### # Finds the A-type lipids for both the considered models -------------------------------------------------------------------------------------------------- # ** Considered_model_1 -------------------------------------------------------------------------------------- # Finds the A-lipids # Creates a list (called all_tissues) in which each element is # a list of lipids coming from the row names of # each Tissue-specific Matrix all_tissues <-list() for(k in 1:length(tissues)){ tissue <- tissues[k] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa<- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ print(tissue) yy <- rownames(get(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_nozeroes") ) ) assign(paste0("xx_",k), yy) all_tissues[[k]] <- yy } } names(all_tissues) <- tissues if(length(which(all_tissues=="NULL")) !=0){ all_tissues<-all_tissues[-which(all_tissues=="NULL")] } # intersects all the elements of the all_tissues list to find the A-lipids gg_1<-Reduce(intersect, all_tissues) assign(paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1), gg_1) write.csv(gg_1, file=paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, ".csv")) # Counts the A-lipids in each classes classes <- unique(substr(gg_1, start=1, stop =5)) classes_counts_model_1 <- matrix(ncol=1, nrow=length(classes)) rownames(classes_counts_model_1) <- classes colnames(classes_counts_model_1) <- paste0("A_lipids_",considered_model_1) for(i in 1:length(classes)){ classes_counts_model_1[i] <- length(grep(gg_1, pattern=paste0("^",classes[i]))) } # ** Considered_model_2 -------------------------------------------------------------------------------------- # Finds the A-lipids # Creates a list (called all_tissues) in which each element is # a list of lipids coming from the row names of # each Tissue-specific Matrix all_tissues <-list() for(k in 1:length(tissues)){ tissue <- tissues[k] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa<- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ print(tissue) yy <- rownames(get(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2, "_nozeroes") ) ) assign(paste0("xx_",k), yy) all_tissues[[k]] <- yy } } names(all_tissues) <- tissues if(length(which(all_tissues=="NULL")) !=0){ all_tissues<-all_tissues[-which(all_tissues=="NULL")] } # intersects all the elements of the all_tissues list to find the A-lipids gg_2<-Reduce(intersect, all_tissues) assign(paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2), gg_2) write.csv(gg_2, file=paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2, ".csv")) # Counts the A-lipids in each class classes <- unique(substr(gg_2, start=1, stop =5)) classes_counts_model_2 <- matrix(ncol=1, nrow=length(classes)) rownames(classes_counts_model_2) <- classes colnames(classes_counts_model_2) <- paste0("A_lipids_",considered_model_2) for(i in 1:length(classes)){ classes_counts_model_2[i] <- length(grep(gg_2, pattern= paste0("^",classes[i]))) } # Creates the A_lipids_classes_counts_tot dataframe, where the vectors classes_counts_model_1 and classes_counts_model_2 are merged classes_counts_model_1 <- as.data.frame(classes_counts_model_1) classes_counts_model_2 <- as.data.frame(classes_counts_model_2) A_lipids_classes_counts_tot <- merge(classes_counts_model_1, classes_counts_model_2, by.x="row.names", by.y="row.names", all=T) A_lipids_classes_counts_tot[is.na(A_lipids_classes_counts_tot)]<-0 rownames(A_lipids_classes_counts_tot) <- A_lipids_classes_counts_tot[,1] A_lipids_classes_counts_tot <- A_lipids_classes_counts_tot[,-1] # All_glicerids <- A_lipids_classes_counts_tot[grep(rownames(A_lipids_classes_counts_tot), pattern="DGX|MGX|TGX"),] A_lipids_classes_counts_tot[nrow(A_lipids_classes_counts_tot)+1,] <- colSums(All_glicerids) rownames(A_lipids_classes_counts_tot)[nrow(A_lipids_classes_counts_tot)] <- "Glyc" assign(paste0("A_lipids_classes_counts_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), A_lipids_classes_counts_tot) write.csv(A_lipids_classes_counts_tot, file=paste0("A_lipids_classes_counts_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv")) # Computes the Jaccard distances between models -------------------------------------------------------------------------------------------------- # This part is not needed for the computation Jaccard distances but only for printing the A_lipids_all_matrix - START # Merges the vectors containing the A lipids "substituted" for each model into the A_lipids_matrix ss <- union(gg_1, gg_2) ss<- sort(ss) # A_lipids_matrix<- cbind(ss, rep(0, length(ss)), rep(0, length(ss)) ) A_lipids_matrix[,2][which(A_lipids_matrix[,1] %in% gg_1)] <- A_lipids_matrix[,1][which(A_lipids_matrix[,1] %in% gg_1)] A_lipids_matrix[,3][which(A_lipids_matrix[,1] %in% gg_2)] <- A_lipids_matrix[,1][which(A_lipids_matrix[,1] %in% gg_2)] # colnames(A_lipids_matrix) <- c("rownames",considered_model_1, considered_model_2) rownames(A_lipids_matrix) <- A_lipids_matrix[,1] A_lipids_matrix <- A_lipids_matrix[,-1] assign(paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), A_lipids_matrix) write.csv(file= paste0 ("A_Lipids_all_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), A_lipids_matrix) # This part is not needed for the computation Jaccard distances but only for printing the A_lipids_all_matrix - END # Unifies the classes MG, DG and TG under the class Glyc, by changing the row names of the A-lipids gg_1_substituted <- gsub("MGXX|DGXX|TGXX", "Glyc", gg_1) gg_2_substituted <- gsub("MGXX|DGXX|TGXX", "Glyc", gg_2) # Merges the vectors containing the A lipids "substituted" for each model into the A_lipids_matrix ss <- union(gg_1_substituted, gg_2_substituted) ss<- sort(ss) # A_lipids_matrix<- cbind(ss, rep(0, length(ss)), rep(0, length(ss)) ) A_lipids_matrix[,2][which(A_lipids_matrix[,1] %in% gg_1_substituted)] <- A_lipids_matrix[,1][which(A_lipids_matrix[,1] %in% gg_1_substituted)] A_lipids_matrix[,3][which(A_lipids_matrix[,1] %in% gg_2_substituted)] <- A_lipids_matrix[,1][which(A_lipids_matrix[,1] %in% gg_2_substituted)] # colnames(A_lipids_matrix) <- c("rownames",considered_model_1, considered_model_2) rownames(A_lipids_matrix) <- A_lipids_matrix[,1] A_lipids_matrix <- A_lipids_matrix[,-1] assign(paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), A_lipids_matrix) write.csv(file= paste0 ("A_Lipids_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), A_lipids_matrix) # Computes the Jaccard distances require(jaccard) if(considered_mode =="\\+ve"){ classes <- rownames(A_lipids_classes_counts_tot)[-grep(rownames(A_lipids_classes_counts_tot), pattern="DG|TGX|MGX")] classes <- c(classes, "Glyc") Jaccard_distances <- matrix(ncol=2, nrow = length(classes)) colnames(Jaccard_distances) <- c("Distance", "Pvalue") rownames(Jaccard_distances) <- classes } else { classes <- rownames(A_lipids_classes_counts_tot) Jaccard_distances <- matrix(ncol=2, nrow = length(classes)) colnames(Jaccard_distances) <- c("Distance", "Pvalue") rownames(Jaccard_distances) <- classes } Global_jaccard_matrix <- A_lipids_matrix Global_jaccard_matrix[which(Global_jaccard_matrix!=0)] = 1 for(i in 1:length(classes)){ zz<- as.matrix(Global_jaccard_matrix[grep(rownames(Global_jaccard_matrix), pattern=classes[i]),]) if(length(grep(rownames(Global_jaccard_matrix), pattern=classes[i]))==1 ){ uu<- jaccard(as.numeric(zz[1]), as.numeric(zz[2])) vv <-jaccard.test(as.numeric(zz[1]), as.numeric(zz[2]), method = "exact") Jaccard_distances[i,c(1,2)] <- c(uu,vv$pvalue) } else if (length(grep(rownames(Global_jaccard_matrix), pattern=classes[i]))==0) { Jaccard_distances[i,c(1,2)] <- c("NA","NA") } else{ uu<- jaccard(as.numeric(zz[,1]), as.numeric(zz[,2])) vv <-jaccard.test(as.numeric(zz[,1]), as.numeric(zz[,2]), method = "exact") Jaccard_distances[i,c(1,2)] <- c(uu,vv$pvalue) } #print(zz) } assign(paste0 ("Jaccard_distances_A_Lipids_", "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), Jaccard_distances) write.csv(file=paste0 ("Jaccard_distances_A_Lipids_", "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), Jaccard_distances) #### U LIPIDS ############################################################################################################### # Finds the U-lipids for each tissue # ** Considered_model_1 -------------------------------------------------------------------------------------- # Creates a list (called all_tissues_1) in which each element is # a list of lipids coming from the row names of # each Tissue-specific and model-specific Matrix all_tissues_1 <-list() for(k in 1:length(tissues)){ tissue <- tissues[k] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa <- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ yy <- rownames(get(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_nozeroes") ) ) assign(paste0("xx_",k), yy) all_tissues_1[[k]] <- yy } } names(all_tissues_1) <- tissues if(length(which(all_tissues_1=="NULL")) !=0){ all_tissues<-all_tissues_1[-which(all_tissues_1=="NULL")] all_tissues_1<-all_tissues_1[-which(all_tissues_1=="NULL")] } # ** Considered_model_2 -------------------------------------------------------------------------------------- # Creates a list (called all_tissues_2) in which each element is # a list of lipids coming from the row names of # each Tissue-specific and model-specific Matrix all_tissues_2 <-list() for(k in 1:length(tissues)){ tissue <- tissues[k] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa <- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ yy <- rownames(get(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2, "_nozeroes") ) ) assign(paste0("xx_",k), yy) all_tissues_2[[k]] <- yy } } names(all_tissues_2) <- tissues if(length(which(all_tissues_2=="NULL")) !=0){ all_tissues<-all_tissues_2[-which(all_tissues_2=="NULL")] all_tissues_2<-all_tissues_2[-which(all_tissues_2=="NULL")] } # ** Tissue-specific U-lipids -------------------------------------------------------------------------------------- # **** Produces the Tissue-specific U-lipids matrices -------------------------------------------------------------------------------------- for(o in 1:length(names(all_tissues_1))){ Tissue <- names(all_tissues)[o] considered_tissue_1 <- all_tissues_1[which(names(all_tissues_1)==Tissue)] other_tissues_1 <- all_tissues_1[-which(names(all_tissues_1)==Tissue)] ss_1 <- Reduce(union, other_tissues_1) # Lists the lipids present in all the tissues but the considered one tt_1 <- considered_tissue_1[[1]] # Lists the lipids present in the considered tissue uu_1<- setdiff(tt_1,ss_1) # Lists the lipids that are in the considered tissue but not in all the others considered_tissue_2 <- all_tissues_2[which(names(all_tissues_2)==Tissue)] other_tissues_2 <- all_tissues_2[-which(names(all_tissues_2)==Tissue)] ss_2 <- Reduce(union, other_tissues_2) tt_2 <- considered_tissue_2[[1]] uu_2<- setdiff(tt_2,ss_2) vv <- union(uu_1, uu_2) vv<- sort(vv) # U_lipids_matrix<- cbind(vv, rep(0, length(vv)), rep(0, length(vv)) ) U_lipids_matrix[,2][which(U_lipids_matrix[,1] %in% uu_1)] <- U_lipids_matrix[,1][which(U_lipids_matrix[,1] %in% uu_1)] U_lipids_matrix[,3][which(U_lipids_matrix[,1] %in% uu_2)] <- U_lipids_matrix[,1][which(U_lipids_matrix[,1] %in% uu_2)] # colnames(U_lipids_matrix) <- c("rownames",considered_model_1, considered_model_2) rownames(U_lipids_matrix) <- U_lipids_matrix[,1] U_lipids_matrix <- U_lipids_matrix[,-1] assign(paste0 ("U_Lipids_", "all_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), U_lipids_matrix) write.csv(file= paste0 ("U_Lipids_", "all_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), U_lipids_matrix) # **** Counts the U-lipids in each class -------------------------------------------------------------------------------------- classes_1 <- unique(substr(uu_1, start=1, stop =5)) classes_counts_model_1 <- matrix(ncol=1, nrow=length(classes_1)) rownames(classes_counts_model_1) <- classes_1 colnames(classes_counts_model_1) <- paste0("U_lipids_",considered_model_1) if(length(uu_1) !=0){ for(i in 1:length(classes_1)){ classes_counts_model_1[i] <- length(grep(uu_1, pattern=classes_1[i])) } } classes_2 <- unique(substr(uu_2, start=1, stop =5)) classes_counts_model_2 <- matrix(ncol=1, nrow=length(classes_2)) rownames(classes_counts_model_2) <- classes_2 colnames(classes_counts_model_2) <- paste0("U_lipids_",considered_model_2) if(length(uu_2) !=0){ for(i in 1:length(classes_2)){ classes_counts_model_2[i] <- length(grep(uu_2, pattern=classes_2[i])) } } # Creates the U_lipids_classes_counts_tot dataframe, where the vectors classes_counts_model_1 and classes_counts_model_2 are merged classes_counts_model_1 <- as.data.frame(classes_counts_model_1) classes_counts_model_2 <- as.data.frame(classes_counts_model_2) U_lipids_classes_counts_tot <- merge(classes_counts_model_1, classes_counts_model_2, by.x="row.names", by.y="row.names", all=T) U_lipids_classes_counts_tot[is.na(U_lipids_classes_counts_tot)]<-0 rownames(U_lipids_classes_counts_tot) <- U_lipids_classes_counts_tot[,1] U_lipids_classes_counts_tot <- U_lipids_classes_counts_tot[,-1] # All_glicerids <- U_lipids_classes_counts_tot[grep(rownames(U_lipids_classes_counts_tot), pattern="DGX|MGX|TGX"),] U_lipids_classes_counts_tot[nrow(U_lipids_classes_counts_tot)+1,] <- colSums(All_glicerids) rownames(U_lipids_classes_counts_tot)[nrow(U_lipids_classes_counts_tot)] <- "Glyc" assign(paste0("U_lipids_classes_counts_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), U_lipids_classes_counts_tot) write.csv(U_lipids_classes_counts_tot, file=paste0("U_lipids_classes_counts_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv")) # Computes the Jaccard distances between models -------------------------------------------------------------------------------------------------- # Unifies the classes MG, DG and TG under the class Glyc, by changing the row names of the A-lipids uu_1_substituted <- gsub("MGXX|DGXX|TGXX", "Glyc", uu_1) uu_2_substituted <- gsub("MGXX|DGXX|TGXX", "Glyc", uu_2) # Merges the vectors containing the A lipids "substituted" for each model into the A_lipids_matrix ss <- union(uu_1_substituted, uu_2_substituted) ss<- sort(ss) # U_lipids_matrix<- cbind(ss, rep(0, length(ss)), rep(0, length(ss)) ) U_lipids_matrix[,2][which(U_lipids_matrix[,1] %in% uu_1_substituted)] <- U_lipids_matrix[,1][which(U_lipids_matrix[,1] %in% uu_1_substituted)] U_lipids_matrix[,3][which(U_lipids_matrix[,1] %in% uu_2_substituted)] <- U_lipids_matrix[,1][which(U_lipids_matrix[,1] %in% uu_2_substituted)] # colnames(U_lipids_matrix) <- c("rownames",considered_model_1, considered_model_2) rownames(U_lipids_matrix) <- U_lipids_matrix[,1] U_lipids_matrix <- U_lipids_matrix[,-1] assign(paste0 ("U_Lipids_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), U_lipids_matrix) write.csv(file= paste0 ("U_Lipids_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), U_lipids_matrix) if(length(uu_2)!=0&length(uu_1)!=0){ # Computes the Jaccard distances require(jaccard) classes <- rownames(U_lipids_classes_counts_tot) Jaccard_distances <- matrix(ncol=2, nrow = length(classes)) colnames(Jaccard_distances) <- c("Distance", "Pvalue") rownames(Jaccard_distances) <- classes Global_jaccard_matrix <- U_lipids_matrix Global_jaccard_matrix[which(Global_jaccard_matrix!=0)] = 1 for(i in 1:length(classes)){ zz<- as.matrix(Global_jaccard_matrix[grep(rownames(Global_jaccard_matrix), pattern=classes[i]),]) if(length(grep(rownames(Global_jaccard_matrix), pattern=classes[i]))==1 ){ uu<- jaccard(as.numeric(zz[1]), as.numeric(zz[2])) vv <-jaccard.test(as.numeric(zz[1]), as.numeric(zz[2]), method = "exact") Jaccard_distances[i,c(1,2)] <- c(uu,vv$pvalue) } else if (length(grep(rownames(Global_jaccard_matrix), pattern=classes[i]))==0) { Jaccard_distances[i,c(1,2)] <- c("NA","NA") } else{ uu<- jaccard(as.numeric(zz[,1]), as.numeric(zz[,2])) vv <-jaccard.test(as.numeric(zz[,1]), as.numeric(zz[,2]), method = "exact") Jaccard_distances[i,c(1,2)] <- c(uu,vv$pvalue) } #print(zz) } assign(paste0 ("Jaccard_distances_U_Lipids_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), Jaccard_distances) write.csv(file=paste0 ("Jaccard_distances_U_Lipids_", Tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), Jaccard_distances) } } #### B LIPIDS ############################################################################################################### # ** Considered_model_1 -------------------------------------------------------------------------------------- # Creates a list (called all_tissues_1) in which each element is # a list of lipids coming from the row names of # each Tissue-specific and model-specific Matrix all_tissues_1 <-list() for(k in 1:length(tissues)){ tissue <- tissues[k] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa <- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ yy <- rownames(get(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_nozeroes") ) ) assign(paste0("xx_",k), yy) all_tissues_1[[k]] <- yy } } names(all_tissues_1) <- tissues if(length(which(all_tissues_1=="NULL")) !=0){ all_tissues<-all_tissues_1[-which(all_tissues_1=="NULL")] } # ** Considered_model_2 -------------------------------------------------------------------------------------- # Creates a list (called all_tissues_2) in which each element is # a list of lipids coming from the row names of # each Tissue-specific and model-specific Matrix all_tissues_2 <-list() for(k in 1:length(tissues)){ tissue <- tissues[k] aa <- files_names_originals[grep(files_names_originals, pattern=considered_mode)] aa <- aa[grep(aa, pattern=considered_generation)] aa <- aa[grep(aa, pattern=tissue)] if(length(aa)!=0){ yy <- rownames(get(paste0(tissue, "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_2, "_nozeroes") ) ) assign(paste0("xx_",k), yy) all_tissues_2[[k]] <- yy } } names(all_tissues_2) <- tissues if(length(which(all_tissues_2=="NULL")) !=0){ all_tissues<-all_tissues_2[-which(all_tissues_2=="NULL")] } #Creates the IDs of the pairwise comparisons possible_pairs <- combn(names(all_tissues_1), 2) #Compares the two models for each pair of tissues for(d in 1:ncol(possible_pairs)){ tissues_to_compare <- possible_pairs[,d] B_lipids_1 <- (intersect( all_tissues_1[[tissues_to_compare[1]]] , all_tissues_1[[tissues_to_compare[2]]] )) B_lipids_2 <- (intersect( all_tissues_2[[tissues_to_compare[1]]] , all_tissues_2[[tissues_to_compare[2]]] )) B_lipids_tot <- union(B_lipids_1, B_lipids_2) B_lipids_matrix<- cbind(B_lipids_tot, rep(0, length(B_lipids_tot)), rep(0, length(B_lipids_tot)) ) B_lipids_matrix[,2][which(B_lipids_matrix[,1] %in% B_lipids_1)] <- B_lipids_matrix[,1][which(B_lipids_matrix[,1] %in% B_lipids_1)] B_lipids_matrix[,3][which(B_lipids_matrix[,1] %in% B_lipids_2)] <- B_lipids_matrix[,1][which(B_lipids_matrix[,1] %in% B_lipids_2)] # colnames(B_lipids_matrix) <- c("rownames",considered_model_1, considered_model_2) rownames(B_lipids_matrix) <- B_lipids_matrix[,1] B_lipids_matrix <- B_lipids_matrix[,-1] assign(paste0 ("B_Lipids_", "all_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), B_lipids_matrix) write.csv(file= paste0 ("B_Lipids_", "all_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), B_lipids_matrix) # Counts the B-lipids in each classes -------------------------------------------------------------------------------------- # Considered_model_1 classes <- unique(substr(B_lipids_1, start=1, stop =5)) classes_counts_model_1 <- matrix(ncol=1, nrow=length(classes)) rownames(classes_counts_model_1) <- classes colnames(classes_counts_model_1) <- paste0("B_lipids_",considered_model_1) for(i in 1:length(classes)){ classes_counts_model_1[i] <- length(grep(B_lipids_1, pattern=paste0("^",classes[i]))) } # Considered_model_2 classes <- unique(substr(B_lipids_2, start=1, stop =5)) classes_counts_model_2 <- matrix(ncol=1, nrow=length(classes)) rownames(classes_counts_model_2) <- classes colnames(classes_counts_model_2) <- paste0("B_lipids_",considered_model_2) for(i in 1:length(classes)){ classes_counts_model_2[i] <- length(grep(B_lipids_2, pattern=paste0("^",classes[i]))) } #Creates the B_lipids_classes_counts_tot dataframe, where the vectors classes_counts_model_1 and classes_counts_model_2 are merged classes_counts_model_1 <- as.data.frame(classes_counts_model_1) classes_counts_model_2 <- as.data.frame(classes_counts_model_2) B_lipids_classes_counts_tot <- merge(classes_counts_model_1, classes_counts_model_2, by.x="row.names", by.y="row.names", all=T) B_lipids_classes_counts_tot[is.na(B_lipids_classes_counts_tot)]<-0 rownames(B_lipids_classes_counts_tot) <- B_lipids_classes_counts_tot[,1] B_lipids_classes_counts_tot <- B_lipids_classes_counts_tot[,-1] # All_glicerids <- B_lipids_classes_counts_tot[grep(rownames(B_lipids_classes_counts_tot), pattern="DGX|MGX|TGX"),] B_lipids_classes_counts_tot[nrow(B_lipids_classes_counts_tot)+1,] <- colSums(All_glicerids) rownames(B_lipids_classes_counts_tot)[nrow(B_lipids_classes_counts_tot)] <- "Glyc" assign(paste0("B_lipids_classes_counts_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), B_lipids_classes_counts_tot) write.csv(B_lipids_classes_counts_tot, file=paste0("B_lipids_classes_counts_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv")) # Computes the Jaccard Distances -------------------------------------------------------------------------------------- # Unifies the classes MG, DG and TG under the class Glyc, by changing the row names of the B-lipids B_lipids_1_substituted <- gsub("MGXX|DGXX|TGXX", "Glyc", B_lipids_1) B_lipids_2_substituted <- gsub("MGXX|DGXX|TGXX", "Glyc", B_lipids_2) # Merges the vectors containing the A lipids "substituted" for each model into the B_lipids_matrix vv <- union(B_lipids_1_substituted, B_lipids_2_substituted) vv<- sort(vv) # B_lipids_matrix<- cbind(vv, rep(0, length(vv)), rep(0, length(vv)) ) B_lipids_matrix[,2][which(B_lipids_matrix[,1] %in% B_lipids_1_substituted)] <- B_lipids_matrix[,1][which(B_lipids_matrix[,1] %in% B_lipids_1_substituted)] B_lipids_matrix[,3][which(B_lipids_matrix[,1] %in% B_lipids_2_substituted)] <- B_lipids_matrix[,1][which(B_lipids_matrix[,1] %in% B_lipids_2_substituted)] # colnames(B_lipids_matrix) <- c("rownames",considered_model_1, considered_model_2) rownames(B_lipids_matrix) <- B_lipids_matrix[,1] B_lipids_matrix <- B_lipids_matrix[,-1] assign(paste0 ("B_Lipids_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), B_lipids_matrix) write.csv(file= paste0 ("B_Lipids_", tissues_to_compare[1], "_", tissues_to_compare[2], "_",considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), B_lipids_matrix) # Computes the Jaccard distances require(jaccard) classes <- rownames(B_lipids_classes_counts_tot) Jaccard_distances <- matrix(ncol=2, nrow = length(classes)) colnames(Jaccard_distances) <- c("Distance", "Pvalue") rownames(Jaccard_distances) <- classes Global_jaccard_matrix <- B_lipids_matrix Global_jaccard_matrix[which(Global_jaccard_matrix!=0)] = 1 for(i in 1:length(classes)){ zz<- as.matrix(Global_jaccard_matrix[grep(rownames(Global_jaccard_matrix), pattern=classes[i]),]) if(length(grep(rownames(Global_jaccard_matrix), pattern=classes[i]))==1 ){ uu<- jaccard(as.numeric(zz[1]), as.numeric(zz[2])) vv <-jaccard.test(as.numeric(zz[1]), as.numeric(zz[2]), method = "exact") Jaccard_distances[i,c(1,2)] <- c(uu,vv$pvalue) } else if (length(grep(rownames(Global_jaccard_matrix), pattern=classes[i]))==0) { Jaccard_distances[i,c(1,2)] <- c("NA","NA") } else{ uu<- jaccard(as.numeric(zz[,1]), as.numeric(zz[,2])) vv <-jaccard.test(as.numeric(zz[,1]), as.numeric(zz[,2]), method = "exact") Jaccard_distances[i,c(1,2)] <- c(uu,vv$pvalue) } #print(zz) } assign(paste0 ("Jaccard_distances_B_Lipids_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2), Jaccard_distances) write.csv(file=paste0 ("Jaccard_distances_B_Lipids_", tissues_to_compare[1], "_", tissues_to_compare[2], "_", considered_generation, "_", substr(considered_mode, 2,4), "_", considered_model_1, "_", considered_model_2,".csv"), Jaccard_distances) } options(warn=0)