################## ###InSpiRe######## ############################################################################################################################# #InSpiRe consists of 3 key functions: # #DE_JK, which intergrates expression data with a protein interaction network and computes differential local flux entropies # #evaluating signficance via the jackknife. # #KL_JK, which intergrates expression data with a protein interaction network and computes differential KL divergrences # #evaluating signficance via the jackknife. # #Weighcomp_jk, which performs the final step of InSpiRe, computing significance of differential expression correlations # #between a network subset defined by core genes across phenotypes via the jackknife. # # # #(1) DE_JK: A function which takes as input three numerical matrices, two containing normalised microarray data # # for a single phenotype from the same experiment and the final containing the adjacency matrix of a network topology # # NOTE: the rows of the microarray datasets MUST be in the same order as the rows of the network adjacency matrix # # This function outputs a matrix with 6 columns and the same number of rows as there are nodes in the network/genes # # in the microarray. The first column gives the gene names (extracted from the rownames of the microarray), the # # second column gives the entropy of the gene in the first phenotype, the third the entropy of the gene in the # # second phenotype, the fourth gives the differential entropy, the fifth gives the degree of the gene in the network # # the sixth gives the jackknife pvalue of the differential entropy. # #(2) KL_JK: A function which takes as input three numerical matrices, two containing normalised microarray data # # for a single phenotype from the same experiment and the final containing the adjacency matrix of a network topology. # # This function outputs a matrix with four columns and as many rows as genes in the microarray. The first column gives # # gene names, the second gives local symmetrised KL divergence of the gene between the two phenotypes, the third gives # # the degree of the node in the network and the fourth gives the jackknife p value of the KL divergence # #(3) Weighcomp_jk: A function which takes 3 numeric matrices and a vector as input. The first three matrices are the # # same as are input into Diff_Ent_JK, the vector is a list of genes (using the same identifier as nodes in the # # microarray input dataset) which will be used as the core genes to sparsify the full network. These are usually # # a reduced subset of genes found to be signifficantly differentially rewiring by differential entropy or KL # # divergence. This function outputs a matrix with 6 columns and the same number of rows as edges in the network # # of core genes and neighbours. The first two columns give the gene names of the connected nodes in the network. The # # third and fourth columns give the expression correlation between connected genes accross the first and second phenotype# # respectively. The fifth column gives absolute differential expression correlation between the connected nodes across # # the phenotypes. The sixth column gives the jackknife p-value evaluating the significance of the differential # # correlation. # ############################################################################################################################# ent <- function(p){ p.norm <- p[p>0]/sum(p) if(length(p)>1){-sum(log(p.norm)*p.norm)/log(length(p))}else{0} } DE_JK<-function(x_1,x_2,int.red){ d<-apply(int.red,1,sum) e.1.jk<-mapply(function(j){ ###run jackknife; if j=ncol(x_1)+1 then compute the true entropies if(j!=(ncol(x_1)+1)){x.r<-x_1[,-j]}else{x.r<-x_1} ####create SMs C.r <- cor(t(x.r)) a.r=abs(C.r) a.r[as.matrix(which(int.red==0))] <- 0; apply(a.r,1,sum) -> rowsum.r P.r <- a.r/rowsum.r ###compute entropies e.r<- mapply(function(i){return(ent(P.r[i,]))},1:nrow(P.r)) return(e.r)},1:(ncol(x_1)+1)) ###repeat for second matrix e.2.jk<-mapply(function(j){ ###run jackknife; if j=ncol(x_1)+1 then compute the true entropies if(j!=(ncol(x_2)+1)){x.r<-x_2[,-j]}else{x.r<-x_2} ####create SMs C.r <- cor(t(x.r)) a.r=abs(C.r) a.r[as.matrix(which(int.red==0))] <- 0; apply(a.r,1,sum) -> rowsum.r P.r <- a.r/rowsum.r ###compute entropies e.r<- mapply(function(i){return(ent(P.r[i,]))},1:nrow(P.r)) return(e.r)},1:(ncol(x_2)+1)) ###compute estimates of mean and variances of each entropy e.1.pseudo<-mapply(function(i){ return((ncol(e.1.jk)-1)*as.numeric(e.1.jk[,ncol(e.1.jk)])-(ncol(e.1.jk)-2)*as.numeric(e.1.jk[,i])) },1:(ncol(e.1.jk)-1)) ###compute estimates e.1.mean<-apply(e.1.pseudo,1,mean) e.1.var<-apply(e.1.pseudo,1,var)/(ncol(e.1.jk)-1) ###repeat for x_2 e.2.pseudo<-mapply(function(i){ return((ncol(e.2.jk)-1)*as.numeric(e.2.jk[,ncol(e.2.jk)])-(ncol(e.2.jk)-2)*as.numeric(e.2.jk[,i])) },1:(ncol(e.2.jk)-1)) e.2.mean<-apply(e.2.pseudo,1,mean) e.2.var<-apply(e.2.pseudo,1,var)/(ncol(e.2.jk)-1) ###compute z socres for differential entropy z<-(e.2.mean-e.1.mean)/sqrt(e.1.var+e.2.var) ###compute pvalues pvalue<-mapply(function(i){return(if(d[i]>1){2*pnorm(-abs(z[i]),mean=0,sd=1)}else{1}) },1:length(z)) ###summarise data<-cbind(rownames(x_1),e.1.jk[,ncol(e.1.jk)],e.2.jk[,ncol(e.2.jk)],as.numeric(e.2.jk[,ncol(e.2.jk)])-as.numeric(e.1.jk[,ncol(e.1.jk)]),d,pvalue) colnames(data)<-c("EntrezGene","x_1 Entropy","x_2 Entropy","Differential Entropy (x_2-x_1)","Degree","Differential Entropy P-value") hist(as.numeric(data[-which(d==1),6]),main="P-value histogram",xlab="p-values") as.matrix(data) } KL<-function(p,q){ p.norm <- p[p>0]/sum(p) q.norm <- q[q>0]/sum(q) sum(p.norm*log(p.norm/q.norm))} KL_JK<-function(x_1,x_2,int.red){ d<-apply(int.red,1,sum) ###create SMs for a refernce of KL C.r <- cor(t(x_1)) a.r=abs(C.r) a.r[as.matrix(which(int.red==0))] <- 0; apply(a.r,1,sum) -> rowsum.r P_1 <- a.r/rowsum.r C.r <- cor(t(x_2)) a.r=abs(C.r) a.r[as.matrix(which(int.red==0))] <- 0; apply(a.r,1,sum) -> rowsum.r P_2 <- a.r/rowsum.r ####compute true KLs kl.12<-mapply(function(i){ return(KL(P_1[i,],P_2[i,]))},1:nrow(P_1)) kl.21<-mapply(function(i){ return(KL(P_2[i,],P_1[i,])) },1:nrow(P_1)) (kl.12+kl.21)/2->kl ###Compute jackknife estimates of both sense kls removing x_1 samples one at a time kl.1.jk<-mapply(function(j){ x.r<-x_1[,-j] C.r <- cor(t(x.r)) a.r=abs(C.r) a.r[as.matrix(which(int.red==0))] <- 0; apply(a.r,1,sum) -> rowsum.r P.r <- a.r/rowsum.r ####compute kl from 1 to 2 after removing each sample kl.12.r<-mapply(function(i){ return(KL(P.r[i,],P_2[i,])) },1:nrow(P.r)) ###same for 2 to 1 kl.21.r<-mapply(function(i){ return(KL(P_2[i,],P.r[i,])) },1:nrow(P.r)) ##output symmetrised divergence estimate return((kl.12.r+kl.21.r)/2)},1:ncol(x_1)) #######same for x_2 kl.2.jk<-mapply(function(j){ x.r<-x_2[,-j] C.r <- cor(t(x.r)) a.r=abs(C.r) a.r[as.matrix(which(int.red==0))] <- 0; apply(a.r,1,sum) -> rowsum.r P.r <- a.r/rowsum.r ####compute kl from 1 to 2 after removing each sample kl.12.r<-mapply(function(i){ return(KL(P_1[i,],P.r[i,])) },1:nrow(P.r)) ###same for 2 to 1 kl.21.r<-mapply(function(i){ return(KL(P.r[i,],P_1[i,])) },1:nrow(P.r)) ##output symmetrised divergence estimate return((kl.12.r+kl.21.r)/2)},1:ncol(x_2)) cbind(kl.1.jk,kl.2.jk,kl)->kl.jk ####compute estimates of mean and variance for each gene kl.pseudo<-mapply(function(i){ return((ncol(kl.jk)-1)*as.numeric(kl.jk[,ncol(kl.jk)])-(ncol(kl.jk)-2)*as.numeric(kl.jk[,i])) },1:(ncol(kl.jk)-1)) kl.mean<-apply(kl.pseudo,1,mean) kl.var<-apply(kl.pseudo,1,var)/(ncol(kl.jk)-1) ###compute z scores and pvalues z<-kl.mean/sqrt(kl.var) pvalue<-mapply(function(i){return(if(d[i]>1){2*pnorm(-abs(z[i]),mean=0,sd=1)}else{1})},1:length(z)) ###output cbind(rownames(x_1),kl,d,pvalue)->data colnames(data)<-c("EntrezGene","KL Divergence (averaged over sense)","Degree","P-value") hist(as.numeric(data[-which(d==1),4]),main="pvalue histogram",xlab="pvalue") as.matrix(data)} Weighcomp_jk<-function(x_1,x_2,int.red,l){ #########obtain the true corelataions match(l,rownames(int.red))->m int2<-int.red[m,m] deg<-apply(int2,1,sum) x_1<-x_1[m,] x_2<-x_2[m,] if(length(which(deg==0)!=0)){ which(deg==0)->a int2[-a,-a]->int2 deg[-a]->deg x_1<-x_1[-a,] x_2<-x_2[-a,]} C1<-cor(t(x_1)) C1[as.matrix(which(int2==0))]<-0 C1[which(C1!=0)]->C1.z C2<-cor(t(x_2)) C2[as.matrix(which(int2==0))]<-0 C2[which(C2!=0)]->C2.z ###jackknife on differential correlations for the FSHD samples e.1.jk<-mapply(function(j){ x.r<-x_1[,-j] C.r <- cor(t(x.r)) C.r[as.matrix(which(int2==0))]<-0 C.r[which(C.r!=0)]->C.r.z return(as.numeric(abs(C.r.z-C2.z)))},1:(ncol(x_1))) ###repeat for Controls e.2.jk<-mapply(function(j){ x.r<-x_2[,-j] C.r <- cor(t(x.r)) C.r[as.matrix(which(int2==0))]<-0 C.r[which(C.r!=0)]->C.r.z return(as.numeric(abs(C1.z-C.r.z)))},1:(ncol(x_2))) cbind(e.1.jk,e.2.jk,as.numeric(abs(C1.z-C2.z)))->e ####compute jk estimates e.1.pseudo<-mapply(function(i){ return((ncol(e)-1)*as.numeric(e[,ncol(e)])-(ncol(e)-2)*as.numeric(e[,i]))},1:(ncol(e)-1)) e.1.mean<-apply(e.1.pseudo,1,mean) e.1.var<-apply(e.1.pseudo,1,var)/(ncol(e)-1) ###z scores and pvalues z<-e.1.mean/sqrt(e.1.var) pvalue<-mapply(function(i){ return(2*pnorm(-abs(z[i]),mean=0,sd=1))},1:length(z)) ###match up the connections mapply(function(i){x<-mat.or.vec(nrow(int2),1) x[which(int2[,i]==1)]<-rownames(int2)[i] return(x)},1:ncol(int2))->row t(row)->col row[which(int2!=0)]->row.z col[which(int2!=0)]->col.z cbind(row.z,col.z)->edges data<-cbind(edges,C1.z,C2.z,e.1.jk[,ncol(e.1.jk)],pvalue) colnames(data)<-c("EntrezGene_1","EntrezGene_2","x_1 correlation","x_2 correltaion","Absolute difference in correlation","Differential correlation P-value") #######if using symmetric weight matrix like correaltions this removes duplicate links### data<-data[order(as.numeric(data[,6])),] a<-mat.or.vec(nrow(data)/2,1) for(i in 1:(nrow(data)/2)){ a[i]<-2*i} data<-data[-a,] hist(as.numeric(data[,6])) as.matrix(data)}