#Load necessary libraries
library(WGCNA);
library(vbsr); 
library(cluster);


#Define custom WGCNA function
WGCNA <- function (data, regulatorIndex = NULL, powers = 1:10){
    library(WGCNA)
    getFreq <- function(power, data) {
        return(rowSums(abs(cor(data, use = "p"))^power))
    }
    getFreq2 <- function(power, data1, regIndex) {
        cm <- abs(cor(data1[, regIndex], data1))^power
        diag(cm[, regIndex]) <- 0
        return(rowSums(cm))
    }
    getrsq <- function(kvec) {
        scaleFreeFitIndex(kvec)[[1]]
    }
    if (is.null(regulatorIndex)) {
        kmat <- sapply(powers, getFreq, data)
    }
    else {
        kmat <- sapply(powers, getFreq2, data, regulatorIndex)
    }
    scalefree <- apply(kmat, 2, getrsq)
    model <- list()
    if (is.null(regulatorIndex)) {
        cm <- abs(cor(data, use = "p"))^powers[which.max(scalefree)]
        diag(cm) <- 0
        model$wg <- cm
    }
    else {
        cm <- abs(cor(data[, regulatorIndex], data))^powers[which.max(scalefree)]
        diag(cm[, regulatorIndex]) <- 0
        model$wg <- cm
    }
    model$sf <- scalefree
    return(model)
}

##Run analysis for vbsr on the DMC microarray data

#Set the random seed
set.seed(47)

#Read in the data
x <- read.csv('Supplemental File 3.tsv',header=T,sep='\t',row.names=1)

#Separate data into experimental variables and microarray data
covInd <- 1:42;
exprInd <- 43:ncol(x);

#Scale data
svd1 <- svd(scale(x[,exprInd]))
xNorm <- t(scale(t(scale(x[,exprInd]))))
svd2 <- svd(xNorm);

#Condense data into 100 clusters using a k-means clustering technique
clust1<-kmeans(t(xNorm),100,nstart=1000)

#Determine the first principal component of each cluster
redExpr <- matrix(0,47,100);
for (i in 1:100){
	redExpr[,i] <- svd(xNorm[,clust1$cluster==i])$u[,1];
}

covar <- x[,covInd]

set.seed(1)

res <- WGCNA(x2);

diag(res$wg) <- 1;
dissADJ <-  1- res$wg;
pam100 <- pam(as.dist(dissADJ),100)

df <- cbind(pam100$clustering[o1],names(pam100$clustering[o1]))
colnames(df) <- c('cluster','variable')

write.table(df,file='~/wgcnaClusteringRootN.txt',quote=F,row.names=F)

#define function to use for penalty parameter selection
f <- function(x,a){
  return(x-(2*pi*x)^0.5-a*x+2*pi*a)
}

#set sqrt(n)/m penalty
l0use <- log(uniroot(f,a=sqrt(47)/142,interval=c(0,1))$root)

buildCondNetwork <- function(Y,X){
  #sigYY <- matrix(1,ncol(Y),ncol(Y));
  sigYX <- matrix(1,ncol(Y),ncol(X)+ncol(Y));
  sigYXsign <- matrix(0,ncol(Y),ncol(X)+ncol(Y))
  for (i in 1:ncol(Y)){
    print(i);
    vbsrRes <- vbsr(y=Y[,i],X=scale(cbind(Y[,-i],X)),n_orderings=500,post=NULL,l0_path=l0use)
    #sigYY[i,-i] <- vbsrRes$pval[1:(ncol(Y)-1)];
    sigYX[i,-i] <- -log10(vbsrRes$pval);
    sigYXsign[i,-i] <- sign(vbsrRes$beta);
  }
  A <- sigYX[1:ncol(Y),1:ncol(Y)]
  sigYX[1:ncol(Y),1:ncol(Y)] <- A/2+t(A)/2
  sigYX <- sigYX > -log10(0.05/(nrow(sigYX)*ncol(sigYX)))
  model <- list();
  model$sigYX <- sigYX;
  model$sigYXsign <- sigYXsign;
  return(model)
}


#infer SPINE given data
network <- buildCondNetwork(as.matrix(redExpr),as.matrix(covar))
colnames(network$sigYX) <- c(paste('C',1:100,sep=''),colnames(covar));
rownames(network$sigYX) <- colnames(network$sigYX)[1:100]
colnames(network$sigYXsign) <- colnames(network$sigYXsign)
rownames(network$sigYXsign) <- rownames(network$sigYXsign)

network$sigYXsign <- network$sigYXsign*network$sigYX

#save network output as 
save(network,file='~/signedDMCnetwork063014.rda')
load(file='~/signedDMCnetwork063014.rda')


#reorient edges to correspond to eigendecomposition sign
redExpr2 <- redExpr;
signvec <- rep(0,100)
for (i in 1:100){
  redExpr2[,i] <- rowMeans(xNorm[,as.character(cluster$ID[cluster$C.==i])])
  signvec[i] <- sign(cor(redExpr[,i],redExpr2[,i]))
}

network$sigYXsignCor <- network$sigYXsign
for (i in 1:100){
  network$sigYXsignCor[i,] <- network$sigYXsignCor[i,]*signvec[i];
  network$sigYXsignCor[,i] <- network$sigYXsignCor[,i]*signvec[i];
}


#save corrected edges
save(network,file='~/signedDMCnetwork063014.rda')


#make a table of edges to be input by cytoscape
whichNonZero <- which((network$sigYXsignCor*upper.tri(network$sigYXsignCor))!=0,T)
edgeList<-cbind(rownames(network$sigYXsignCor)[whichNonZero[,1]],colnames(network$sigYXsignCor)[whichNonZero[,2]],diag(network$sigYXsignCor[whichNonZero[,1],whichNonZero[,2]]))
colnames(edgeList) <- c('Variable1','Variable2','Sign')


#write the table of edges
write.table(edgeList,file='~/networkStructure.csv',sep=',',quote=F,row.names=FALSE)


#To generate the heatmap
library(gplots)

brpal <- colorRampPalette(c('blue','white','red'),space='Lab');
heatmap.2((scale(cbind(redExpr,covar))),trace='none',col=brpal(50))