##################
###InSpiRe########
#############################################################################################################################
#InSpiRe consists of 3 key functions:																						#
#DE_JK, which intergrates expression data with a protein interaction network and computes differential local flux entropies #
#evaluating signficance via the jackknife.																					#
#KL_JK, which intergrates expression data with a protein interaction network and computes differential KL divergrences      #
#evaluating signficance via the jackknife.																					#
#Weighcomp_jk, which performs the final step of InSpiRe, computing significance of differential expression correlations 	#
#between a network subset defined by core genes across phenotypes via the jackknife.										#
#																															#
#(1) DE_JK: A function which takes as input three numerical matrices, two containing normalised microarray data		        #
#	 for a single phenotype from the same experiment and the final containing the adjacency matrix of a network topology	#
#	 NOTE: the rows of the microarray datasets MUST be in the same order as the rows of the network adjacency matrix		#
#	 This function outputs a matrix with 6 columns and the same number of rows as there are nodes in the network/genes 		#
#	 in the microarray. The first column gives the gene names (extracted from the rownames of the microarray), the 			#
#	 second column gives the entropy of the gene in the first phenotype, the third the entropy of the gene in the			#
#	 second phenotype, the fourth gives the differential entropy, the fifth gives the degree of the gene in the network		#
#	 the sixth gives the jackknife pvalue of the differential entropy.														#
#(2) KL_JK: A function which takes as input three numerical matrices, two containing normalised microarray data		        #
#	 for a single phenotype from the same experiment and the final containing the adjacency matrix of a network topology.	# 
#    This function outputs a matrix with four columns and as many rows as genes in the microarray. The first column gives   #
#    gene names, the second gives local symmetrised KL divergence of the gene between the two phenotypes, the third gives   #
#	 the degree of the node in the network and the fourth gives the jackknife p value of the KL divergence					#
#(3) Weighcomp_jk: A function which takes 3 numeric matrices and a vector as input. The first three matrices are the 		#
#	 same as are input into Diff_Ent_JK, the vector is a list of genes (using the same identifier as nodes in the			#
#	 microarray input dataset) which will be used as the core genes to sparsify the full network. These are usually			#
#	 a reduced subset of genes found to be signifficantly differentially rewiring by differential entropy or KL				#
#	 divergence. This function outputs a matrix with 6 columns and the same number of rows as edges in the network			#
#    of core genes and neighbours. The first two columns give the gene names of the connected nodes in the network. The 	#
#    third and fourth columns give the expression correlation between connected genes accross the first and second phenotype#
#	 respectively. The fifth column gives absolute differential expression correlation between the connected nodes across 	#
#    the phenotypes. The sixth column gives the jackknife p-value evaluating the significance of the differential 			#
#    correlation.																											#
#############################################################################################################################

ent <- function(p){
    p.norm <- p[p>0]/sum(p)
    if(length(p)>1){-sum(log(p.norm)*p.norm)/log(length(p))}else{0}
  }

DE_JK<-function(x_1,x_2,int.red){
  d<-apply(int.red,1,sum)
  e.1.jk<-mapply(function(j){
    ###run jackknife; if j=ncol(x_1)+1 then compute the true entropies
    if(j!=(ncol(x_1)+1)){x.r<-x_1[,-j]}else{x.r<-x_1}
    ####create SMs
    C.r <- cor(t(x.r))
    a.r=abs(C.r)
    a.r[as.matrix(which(int.red==0))] <- 0;
    apply(a.r,1,sum) -> rowsum.r
    P.r <- a.r/rowsum.r
    ###compute entropies
    e.r<- mapply(function(i){return(ent(P.r[i,]))},1:nrow(P.r))
    return(e.r)},1:(ncol(x_1)+1))
  ###repeat for second matrix
  e.2.jk<-mapply(function(j){
    ###run jackknife; if j=ncol(x_1)+1 then compute the true entropies
    if(j!=(ncol(x_2)+1)){x.r<-x_2[,-j]}else{x.r<-x_2}
    ####create SMs
    C.r <- cor(t(x.r))
    a.r=abs(C.r)
    a.r[as.matrix(which(int.red==0))] <- 0;
    apply(a.r,1,sum) -> rowsum.r
    P.r <- a.r/rowsum.r
    ###compute entropies
    e.r<- mapply(function(i){return(ent(P.r[i,]))},1:nrow(P.r))
    return(e.r)},1:(ncol(x_2)+1))
  ###compute estimates of mean and variances of each entropy
  e.1.pseudo<-mapply(function(i){
    return((ncol(e.1.jk)-1)*as.numeric(e.1.jk[,ncol(e.1.jk)])-(ncol(e.1.jk)-2)*as.numeric(e.1.jk[,i]))
  },1:(ncol(e.1.jk)-1))
  ###compute estimates
  e.1.mean<-apply(e.1.pseudo,1,mean)
  e.1.var<-apply(e.1.pseudo,1,var)/(ncol(e.1.jk)-1)
  ###repeat for x_2
  e.2.pseudo<-mapply(function(i){
    return((ncol(e.2.jk)-1)*as.numeric(e.2.jk[,ncol(e.2.jk)])-(ncol(e.2.jk)-2)*as.numeric(e.2.jk[,i]))
  },1:(ncol(e.2.jk)-1))
  e.2.mean<-apply(e.2.pseudo,1,mean)
  e.2.var<-apply(e.2.pseudo,1,var)/(ncol(e.2.jk)-1)
  ###compute z socres for differential entropy
  z<-(e.2.mean-e.1.mean)/sqrt(e.1.var+e.2.var)
  ###compute pvalues
  pvalue<-mapply(function(i){return(if(d[i]>1){2*pnorm(-abs(z[i]),mean=0,sd=1)}else{1})
  },1:length(z))
  ###summarise
  data<-cbind(rownames(x_1),e.1.jk[,ncol(e.1.jk)],e.2.jk[,ncol(e.2.jk)],as.numeric(e.2.jk[,ncol(e.2.jk)])-as.numeric(e.1.jk[,ncol(e.1.jk)]),d,pvalue)
  colnames(data)<-c("EntrezGene","x_1 Entropy","x_2 Entropy","Differential Entropy (x_2-x_1)","Degree","Differential Entropy P-value")
  hist(as.numeric(data[-which(d==1),6]),main="P-value histogram",xlab="p-values")
  as.matrix(data)
}
 
 KL<-function(p,q){
  p.norm <- p[p>0]/sum(p)
  q.norm <- q[q>0]/sum(q)
  sum(p.norm*log(p.norm/q.norm))}
KL_JK<-function(x_1,x_2,int.red){
  d<-apply(int.red,1,sum)
  ###create SMs for a refernce of KL
  C.r <- cor(t(x_1))
  a.r=abs(C.r)
  a.r[as.matrix(which(int.red==0))] <- 0;
  apply(a.r,1,sum) -> rowsum.r
  P_1 <- a.r/rowsum.r
  
  C.r <- cor(t(x_2))
  a.r=abs(C.r)
  a.r[as.matrix(which(int.red==0))] <- 0;
  apply(a.r,1,sum) -> rowsum.r
  P_2 <- a.r/rowsum.r
  ####compute true KLs
  kl.12<-mapply(function(i){
    return(KL(P_1[i,],P_2[i,]))},1:nrow(P_1))
  kl.21<-mapply(function(i){
    return(KL(P_2[i,],P_1[i,]))
  },1:nrow(P_1))
  (kl.12+kl.21)/2->kl
  ###Compute jackknife estimates of both sense kls removing x_1 samples one at a time
  kl.1.jk<-mapply(function(j){
    x.r<-x_1[,-j]
    C.r <- cor(t(x.r))
    a.r=abs(C.r)
    a.r[as.matrix(which(int.red==0))] <- 0;
    apply(a.r,1,sum) -> rowsum.r
    P.r <- a.r/rowsum.r
    ####compute kl from 1 to 2 after removing each sample
    kl.12.r<-mapply(function(i){
      return(KL(P.r[i,],P_2[i,]))
    },1:nrow(P.r))
    ###same for 2 to 1
    kl.21.r<-mapply(function(i){
      return(KL(P_2[i,],P.r[i,]))
    },1:nrow(P.r))
    ##output symmetrised divergence estimate
    return((kl.12.r+kl.21.r)/2)},1:ncol(x_1))
  #######same for x_2
  kl.2.jk<-mapply(function(j){
    x.r<-x_2[,-j]
    C.r <- cor(t(x.r))
    a.r=abs(C.r)
    a.r[as.matrix(which(int.red==0))] <- 0;
    apply(a.r,1,sum) -> rowsum.r
    P.r <- a.r/rowsum.r
    ####compute kl from 1 to 2 after removing each sample
    kl.12.r<-mapply(function(i){
      return(KL(P_1[i,],P.r[i,]))
    },1:nrow(P.r))
    ###same for 2 to 1
    kl.21.r<-mapply(function(i){
      return(KL(P.r[i,],P_1[i,]))
    },1:nrow(P.r))
    ##output symmetrised divergence estimate
    return((kl.12.r+kl.21.r)/2)},1:ncol(x_2))
  cbind(kl.1.jk,kl.2.jk,kl)->kl.jk
  ####compute estimates of mean and variance for each gene
  kl.pseudo<-mapply(function(i){
    return((ncol(kl.jk)-1)*as.numeric(kl.jk[,ncol(kl.jk)])-(ncol(kl.jk)-2)*as.numeric(kl.jk[,i]))
  },1:(ncol(kl.jk)-1))
  kl.mean<-apply(kl.pseudo,1,mean)
  kl.var<-apply(kl.pseudo,1,var)/(ncol(kl.jk)-1)
  ###compute z scores and pvalues
  z<-kl.mean/sqrt(kl.var)
  pvalue<-mapply(function(i){return(if(d[i]>1){2*pnorm(-abs(z[i]),mean=0,sd=1)}else{1})},1:length(z))
  ###output
  cbind(rownames(x_1),kl,d,pvalue)->data
  colnames(data)<-c("EntrezGene","KL Divergence (averaged over sense)","Degree","P-value")
  hist(as.numeric(data[-which(d==1),4]),main="pvalue histogram",xlab="pvalue")
  as.matrix(data)}

  Weighcomp_jk<-function(x_1,x_2,int.red,l){  
  #########obtain the true corelataions
  match(l,rownames(int.red))->m
  int2<-int.red[m,m]
  deg<-apply(int2,1,sum)
  x_1<-x_1[m,]
  x_2<-x_2[m,]
  if(length(which(deg==0)!=0)){
    which(deg==0)->a
    int2[-a,-a]->int2
    deg[-a]->deg
    x_1<-x_1[-a,]
    x_2<-x_2[-a,]}
  C1<-cor(t(x_1))
  C1[as.matrix(which(int2==0))]<-0
  C1[which(C1!=0)]->C1.z
  C2<-cor(t(x_2))
  C2[as.matrix(which(int2==0))]<-0
  C2[which(C2!=0)]->C2.z
  ###jackknife on differential correlations for the FSHD samples
  e.1.jk<-mapply(function(j){
    x.r<-x_1[,-j]
    C.r <- cor(t(x.r))
    C.r[as.matrix(which(int2==0))]<-0
    C.r[which(C.r!=0)]->C.r.z
    return(as.numeric(abs(C.r.z-C2.z)))},1:(ncol(x_1)))
  ###repeat for Controls
  e.2.jk<-mapply(function(j){
    x.r<-x_2[,-j]
    C.r <- cor(t(x.r))
    C.r[as.matrix(which(int2==0))]<-0
    C.r[which(C.r!=0)]->C.r.z
    return(as.numeric(abs(C1.z-C.r.z)))},1:(ncol(x_2)))
  
  cbind(e.1.jk,e.2.jk,as.numeric(abs(C1.z-C2.z)))->e
  ####compute jk estimates
  e.1.pseudo<-mapply(function(i){
    return((ncol(e)-1)*as.numeric(e[,ncol(e)])-(ncol(e)-2)*as.numeric(e[,i]))},1:(ncol(e)-1))
  e.1.mean<-apply(e.1.pseudo,1,mean)
  e.1.var<-apply(e.1.pseudo,1,var)/(ncol(e)-1)
  ###z scores and pvalues
  z<-e.1.mean/sqrt(e.1.var)
  pvalue<-mapply(function(i){
    return(2*pnorm(-abs(z[i]),mean=0,sd=1))},1:length(z))
  ###match up the connections  
  mapply(function(i){x<-mat.or.vec(nrow(int2),1)
                     x[which(int2[,i]==1)]<-rownames(int2)[i]
                     return(x)},1:ncol(int2))->row
  t(row)->col
  row[which(int2!=0)]->row.z
  col[which(int2!=0)]->col.z
  cbind(row.z,col.z)->edges
  data<-cbind(edges,C1.z,C2.z,e.1.jk[,ncol(e.1.jk)],pvalue)
  colnames(data)<-c("EntrezGene_1","EntrezGene_2","x_1 correlation","x_2 correltaion","Absolute difference in correlation","Differential correlation P-value")
  #######if using symmetric weight matrix like correaltions this removes duplicate links###
  data<-data[order(as.numeric(data[,6])),]
  a<-mat.or.vec(nrow(data)/2,1)
  for(i in 1:(nrow(data)/2)){
    a[i]<-2*i}
  data<-data[-a,]
  hist(as.numeric(data[,6]))
  as.matrix(data)}