# R code modules to be used with code generated by iDEP for stand-alone analysis
# Steven Xijin Ge  South Dakota State University
# gexijin@gmail.com
# 3/12/2018
# latest version on github:
#  https://raw.githubusercontent.com/iDEP-SDSU/idep/master/shinyapps/idep/iDEP_core_functions.R


################################################################
# Install packages
################################################################

# dplyr complains this required libraries: libudunits2-dev, libmariadb-client-lgpl-dev
# install.packages("plotly", repos="http://cran.rstudio.com/", dependencies=TRUE)
# sometimes need to remove all installed packages: https://www.r-bloggers.com/how-to-remove-all-user-installed-packages-in-r/ 
#cat("\n Installing lots of R packages, this may take several hours ... \n
#   Each of these packages took years to develop.\n So be a patient thief.\n
#   \n Note 1: Sometimes dependencies needs to be installed manually. 
#   \n Note 2: If you are using an older version of R, and having trouble with package installation,
#   \n         sometimes, it is easier to install the new version of R and delete all old packages, and start fresh. 
#  ")

list.of.packages <- c("dendextend", "htmlwidgets","RSQLite", 
	"gplots", "ggplot2", "dplyr", "tidyverse","plotly","e1071", 
	"reshape2",  "Rcpp","flashClust","statmod",
	"biclust","igraph","Rtsne")

list.of.bio.packages  <- c("STRINGdb", "limma", "DESeq2", "edgeR", "gage", 
	"PGSEA", "fgsea", "ReactomePA", "pathview", "PREDA", "impute", "runibic",
	"QUBIC","rhdf5", "PREDAsampledata", "sfsmisc", "lokern", "multtest", 
	"GSEABase","hgu133plus2.db")

#Install Require packages
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, repos="http://cran.rstudio.com/", dependencies=TRUE)

new.bio.packages <- list.of.bio.packages[!(list.of.bio.packages %in% installed.packages()[,"Package"])]
if(length(new.bio.packages)){
  source("https://bioconductor.org/biocLite.R")
  biocLite(new.bio.packages, suppressUpdates = T)
}	

# WGCNA must be installed after AnnotationDbi, impute, GO.db, preprocessCore
# install.packages(c("WGCNA"))

#Test Load Packages    This cause an error when too many packages are loaded.
# suc = unlist ( lapply(list.of.packages, require, character.only = TRUE) )
# if(sum(suc) < length(list.of.packages) )
# 	cat ("\n\nWarnning!!!!!! These R packages cannot be loaded:", list.of.packages[!suc] )

# suc = unlist ( lapply(list.of.bio.packages, require, character.only = TRUE) )
# if(sum(suc) < length(list.of.bio.packages) )
#	cat ("\n\nWarnning!!!!!! These Bioconductor packages cannot be loaded:", list.of.bio.packages[!suc] )


################################################################
# Global variables
################################################################

library(RSQLite,verbose=FALSE)	# for database connection
library(gplots,verbose=FALSE)		# for hierarchical clustering
library(ggplot2,verbose=FALSE)	# graphics
library(e1071,verbose=FALSE) 		# computing kurtosis
library(DT,verbose=FALSE) 		# for renderDataTable
library(plotly,verbose=FALSE) 	# for interactive heatmap
library(reshape2,verbose=FALSE) 	# for melt correlation matrix in heatmap
input_goButton = 0    # if 1, use demo data file
Min_overlap <- 2
minSetSize = 3; 
mappingCoverage = 0.60 # 60% percent genes has to be mapped for confident mapping
mappingEdge = 0.5  # Top species has 50% more genes mapped
PvalGeneInfo = 0.05; minGenes = 10 # min number of genes for ploting
kurtosis.log = 50  # log transform is enforced when kurtosis is big
kurtosis.warning = 10 # log transformation recommnded 
minGenesEnrichment = 2 # perform GO or promoter analysis only if more than this many genes
PREDA_Permutations =1000
maxGeneClustering = 12000  # max genes for hierarchical clustering and k-Means clustering. Slow if larger
maxGeneWGCNA = 2000 # max genes for co-expression network
maxFactors =6  # max number of factors in DESeq2 models
redudantGeneSetsRatio = 0.9  # remove redundant genesets in enrichment analysis
set.seed(2) # seed for random number generator
mycolors = sort(rainbow(20))[c(1,20,10,11,2,19,3,12,4,13,5,14,6,15,7,16,8,17,9,18)] # 20 colors for kNN clusters
#Each row of this matrix represents a color scheme;

hmcols <- colorRampPalette(rev(c("#D73027", "#FC8D59", "#FEE090", "#FFFFBF",
"#E0F3F8", "#91BFDB", "#4575B4")))(75)
heatColors = rbind(      greenred(75),     bluered(75),     colorpanel(75,"green","black","magenta"),colorpanel(75,"blue","yellow","red"),hmcols )
rownames(heatColors) = c("Green-Black-Red","Blue-White-Red","Green-Black-Magenta","Blue-Yellow-Red","Blue-white-brown")
colorChoices = setNames(1:dim(heatColors)[1],rownames(heatColors)) # for pull down menu

# Create a list for Select Input options
speciesChoice = list()
speciesChoice <- append( setNames( "NEW","**NEW SPECIES**"), speciesChoice  )
speciesChoice <- append( setNames( "BestMatch","Best matching species"), speciesChoice  )

################################################################
# Read data and pre-process
################################################################

readData <- function(inFile ) {

				# these packages moved here to reduce loading time
				library(edgeR,verbose=FALSE) # count data D.E.
				library(DESeq2,verbose=FALSE) # count data analysis

				dataTypeWarning =0
				dataType =c(TRUE)

				#---------------Read file
				x <- read.csv(inFile)	# try CSV
				if(dim(x)[2] <= 2 )   # if less than 3 columns, try tab-deliminated
					x <- read.table(inFile, sep="\t",header=TRUE)	
				#-------Remove non-numeric columns, except the first column
				
				for(i in 2:dim(x)[2])
					dataType = c( dataType, is.numeric(x[,i]) )
				if(sum(dataType) <=2) return (NULL)  # only less than 2 columns are numbers
				x <- x[,dataType]  # only keep numeric columns
				
				# rows with all missing values
				ix = which( apply(x[,-1],1, function(y) sum( is.na(y) ) ) != dim(x)[2]-1 )
				x <- x[ix,]
				
				dataSizeOriginal = dim(x); dataSizeOriginal[2] = dataSizeOriginal[2] -1
				
				x[,1] <- toupper(x[,1])
				x[,1] <- gsub(" ","",x[,1]) # remove spaces in gene ids
				x = x[order(- apply(x[,2:dim(x)[2]],1,sd) ),]  # sort by SD
				x <- x[!duplicated(x[,1]) ,]  # remove duplicated genes
				rownames(x) <- x[,1]
				x <- as.matrix(x[,c(-1)])
				
				# remove "-" or "." from sample names
				colnames(x) = gsub("-","",colnames(x))
				colnames(x) = gsub("\\.","",colnames(x))

				
				#cat("\nhere",dim(x))
				# missng value for median value
				if(sum(is.na(x))>0) {# if there is missing values
					if(input_missingValue =="geneMedian") { 
						rowMedians <- apply(x,1, function (y)  median(y,na.rm=T))
						for( i in 1:dim(x)[2] ) {
							ix = which(is.na(x[,i]) )
							x[ix,i] <- rowMedians[ix]						
						}
							
					} else if(input_missingValue =="treatAsZero") {
						x[is.na(x) ] <- 0					
					} else if (input_missingValue =="geneMedianInGroup") {
						sampleGroups = detectGroups( colnames(x))
						for (group in unique( sampleGroups) ){		
							samples = which( sampleGroups == group )
							rowMedians <- apply(x[,samples, drop=F],1, function (y)  median(y,na.rm=T))
							for( i in  samples ) { 
								ix = which(is.na(x[ ,i] ) )	
								if(length(ix) >0 )
									x[ix, i  ]  <- rowMedians[ix]
							}										
						}
						
						# missing for entire sample group, use median for all samples
						if(sum(is.na(x) )>0 ) { 
							rowMedians <- apply(x,1, function (y)  median(y,na.rm=T))
							for( i in 1:dim(x)[2] ) {
								ix = which(is.na(x[,i]) )
								x[ix,i] <- rowMedians[ix]						
							}						
						}
					}
				}

				# Compute kurtosis
				mean.kurtosis = mean(apply(x,2, kurtosis),na.rm=T)
				rawCounts = NULL
				pvals= NULL
				if (input_dataFileFormat == 2 ) {  # if FPKM, microarray

					if ( is.integer(x) ) dataTypeWarning = 1;  # Data appears to be read counts

					#-------------filtering
					#tem <- apply(x,1,max)
					#x <- x[which(tem > input_lowFilter),]  # max by row is at least 
					x <- x[ which( apply( x, 1,  function(y) sum(y >= input_lowFilter)) >= input_NminSamples2 ) , ] 

					
					x <- x[which(apply(x,1, function(y) max(y)- min(y) ) > 0  ),]  # remove rows with all the same levels

					#--------------Log transform
					# Takes log if log is selected OR kurtosis is big than 100
					if ( (input_transform == TRUE) | (mean.kurtosis > kurtosis.log ) ) 
						x = log(x+abs( input_logStart),2)

					tem <- apply(x,1,sd) 
					x <- x[order(-tem),]  # sort by SD

				} else 
					if( input_dataFileFormat == 1) {  # counts data


					# data not seems to be read counts
					if(!is.integer(x) & mean.kurtosis < kurtosis.log ) {
						dataTypeWarning = -1
					}
					# not used as some counts data like those from CRISPR screen
					#validate(   # if Kurtosis is less than a threshold, it is not read-count
					#	need(mean.kurtosis > kurtosis.log, "Data does not seem to be read count based on distribution. Please double check.")
					# )
					x <- round(x,0) # enforce the read counts to be integers. Sailfish outputs has decimal points.
					#x <- x[ which( apply(x,1,max) >= input_minCounts ) , ] # remove all zero counts
					

					# remove genes if it does not at least have minCounts in at least NminSamples
					#x <- x[ which( apply(x,1,function(y) sum(y>=input_minCounts)) >= input_NminSamples ) , ]  # filtering on raw counts
					# using counts per million (CPM) for filtering out genes.
                                             # CPM matrix                  #N samples > minCounts
					x <- x[ which( apply( cpm(DGEList(counts = x)), 1,  
							function(y) sum(y>=input_minCounts)) >= input_NminSamples ) , ] 
					


					rawCounts = x; # ??? 
					# construct DESeqExpression Object
					# colData = cbind(colnames(x), as.character(detectGroups( colnames(x) )) )
					tem = rep("A",dim(x)[2]); tem[1] <- "B"   # making a fake design matrix to allow process, even when there is no replicates
					colData = cbind(colnames(x), tem )
					colnames(colData)  = c("sample", "groups")
					dds <- DESeqDataSetFromMatrix(countData = x, colData = colData, design = ~ groups)
					dds <- estimateSizeFactors(dds) # estimate size factor for use in normalization later for started log method


					# regularized log  or VST transformation
					if( input_CountsTransform == 3 ) { # rlog is slow, only do it with 10 samples
						if(dim(x)[2]<=20 ) { 
						 x <- rlog(dds, blind=TRUE); x <- assay(x) } else 
						 x <- log2( counts(dds, normalized=TRUE) + input_countsLogStart ) 
						 }  

						else {
						if ( input_CountsTransform == 2 ) {    # vst is fast but aggressive
							x <- vst(dds, blind=TRUE)
							x <- assay(x)  
						} else{  # normalized by library sizes and add a constant.
							x <- log2( counts(dds, normalized=TRUE) + input_countsLogStart )   # log(x+c) 
							# This is equivalent to below. But the prior counts is more important
							#x <- cpm(DGEList(counts = x),log=TRUE, prior.count=input_countsLogStart )  #log CPM from edgeR
							#x <- x-min(x)  # shift values to avoid negative numbers
						}
					}
				} else 
					if( input_dataFileFormat == 3)	{  # other data type

						#neg_lfc neg_fdr pos_lfc pos_fdr 
						#11       1      11       1 
						
						n2 = ( dim(x)[2] %/% 2) # 5 --> 2
						# It looks like it contains P values
						# ranges of columns add 0.2 and round to whole. For P value columns this should be 1
						tem = round( apply(x, 2, function( y) max(y)- min(y))  + .2)     
						if( sum(tem[(1:n2)*2  ] ==  1 ) == n2 | 
							sum(tem[(1:n2)*2-1  ] ==  1 ) == n2 ) { 		
							x = x[,1:(2*n2) ,drop=FALSE ] # if 5, change it to 4			
							if(tem[2] == 1) { # FDR follows Fold-change
								pvals = x [,2*(1:n2 ),drop=FALSE ]  # 2, 4, 6
								x = x[, 2*(1:n2 )-1,drop=FALSE]   # 1, 3, 5

							} else {	# FDR follows Fold-change
								pvals = x [,2*(1:n2 )-1,drop=FALSE ]  # 2, 4, 6		
								x = x[, 2*(1: n2 ),drop=FALSE]   # 1, 3, 5
							}
						}
					ix =  which(apply(x,1, function(y) max(y)- min(y) ) > 0  )
					x <- x[ix,]  # remove rows with all the same levels
					if(!is.null(pvals) )
						pvals = pvals[ix,]
						
					}
					
					
				dataSize = dim(x);
				if(!(dim(x)[1]>5 & dim(x)[2]>1)) 
				stop ( "Data file not recognized. Please double check.")


				
				sampleInfoDemo=NULL
				if( input_goButton >0)
					sampleInfoDemo <- t( read.csv(demoDataFile2,row.names=1,header=T,colClasses="character") )

					finalResult <- list(data = as.matrix(x), mean.kurtosis = mean.kurtosis, rawCounts = rawCounts, dataTypeWarning=dataTypeWarning, dataSize=c(dataSizeOriginal,dataSize),sampleInfoDemo=sampleInfoDemo, pvals =pvals )
				return(finalResult)

	}

readSampleInfo <- function(inFile){

				dataTypeWarning =0
				dataType =c(TRUE)

				#---------------Read file
				x <- read.csv(inFile,row.names=1,header=T,colClasses="character")	# try CSV
				if(dim(x)[2] <= 2 )   # if less than 3 columns, try tab-deliminated
					x <- read.table(inFile, row.names=1,sep="\t",header=TRUE,colClasses="character")
				# remove "-" or "." from sample names
				colnames(x) = gsub("-","",colnames(x))
				colnames(x) = gsub("\\.","",colnames(x))	

				#----------------Matching with column names of expression file
				ix = match(toupper(colnames(readData.out$data)), toupper(colnames(x)) ) 
				ix = ix[which(!is.na(ix))] # remove NA

				if(! ( length(unique(ix) ) == dim(readData.out$data)[2] 
				       & dim(x)[1]>=1 & dim(x)[1] <500 ) ) # at least one row, it can not be more than 500 rows
					 stop( "Error!!! Sample information file not recognized. Sample names must be exactly the same. Each row is a factor. Each column represent a sample.  Please see documentation on format.")
				
				
				#-----------Double check factor levels, change if needed
				# remove "-" or "." from factor levels
				for( i in 1:dim(x)[1]) {
				   x[i,] = gsub("-","",x[i,])
				   x[i,] = gsub("\\.","",x[i,])				
				}
				# if levels from different factors match
				if( length(unique(ix) ) == dim(readData.out$data)[2]) { # matches exactly
					x = x[,ix]
					# if the levels of different factors are the same, it may cause problems
					if( sum( apply(x, 1, function(y) length(unique(y)))) > length(unique(unlist(x) ) ) ) {
						tem2 =apply(x,2, function(y) paste0( names(y),y)) # factor names are added to levels
						rownames(tem2) = rownames(x)
						x <- tem2				
					}
					return(t( x ) )			
				} else retrun(NULL)
							
				
}

textTransform <- function () { 
		k.value =  readData.out$mean.kurtosis	  
		tem = paste( "Mean Kurtosis =  ", round(k.value,2), ".\n",sep = "")
		if( k.value > kurtosis.log) tem = paste(tem, " Detected extremely large values. \n  When kurtosis >", kurtosis.log,
		", log transformation is automatically applied.\n", sep="") else if (k.value>kurtosis.warning)
		{tem = paste(tem, " Detected  large numbers with kurtosis >",
		kurtosis.warning,". \n Log transformation is recommended.", sep="") }
		if(readData.out$dataTypeWarning == 1 ) {
			tem = paste (tem, " ------Warning!!! \nData matrix contains all integers. \n It seems to be read counts!!! \n Please select appropriate data type in the previous page and reload file.")}
		if(readData.out$dataTypeWarning == -1 ) {
			tem = paste (tem, "  ------Warning!!! \nData does not look like read counts data. \n Please select appropriate data type in the previous page and reload file.")}
		tem
}


# Clean up gene sets. Remove spaces and other control characters from gene names  
cleanGeneSet <- function (x){
  # remove duplicate; upper case; remove special characters
  x <- unique( toupper( gsub("\n| ","",x) ) )
  x <- x[which( nchar(x)>1) ]  # genes should have at least two characters
  return(x)
}
findSpeciesById <- function (speciesID){ # find species name use id
  return( orgInfo[which(orgInfo$id == speciesID),]  )
}

# just return name
findSpeciesByIdName <- function (speciesID){ # find species name use id
  return( orgInfo[which(orgInfo$id == speciesID),3]  )
}

# convert gene IDs to ensembl gene ids and find species
convertID <- function (query,selectOrg, selectGO) {
	querySet <- cleanGeneSet( unlist( strsplit( toupper(query),'\t| |\n|\\,')))
	# querySet is ensgene data for example, ENSG00000198888, ENSG00000198763, ENSG00000198804
	querySTMT <- paste( "select distinct id,ens,species from mapping where id IN ('", paste(querySet,collapse="', '"),"')",sep="")
	result <- dbGetQuery(convert, querySTMT)
	if( dim(result)[1] == 0  ) return(NULL)
	if(selectOrg == speciesChoice[[1]]) {
		comb = paste( result$species,result$idType)
		sortedCounts = sort(table(comb),decreasing=T)
		recognized =names(sortedCounts[1])
		result <- result[which(comb == recognized),]
		speciesMatched=sortedCounts
		names(speciesMatched )= sapply(as.numeric(gsub(" .*","",names(sortedCounts) ) ), findSpeciesByIdName  ) 
		speciesMatched <- as.data.frame( speciesMatched )
		if(length(sortedCounts) == 1) { # if only  one species matched
		speciesMatched[1,1] <-paste( rownames(speciesMatched), "(",speciesMatched[1,1],")",sep="")
		} else {# if more than one species matched
			speciesMatched[,1] <- as.character(speciesMatched[,1])
			speciesMatched[,1] <- paste( speciesMatched[,1]," (",speciesMatched[,2], ")", sep="") 
			speciesMatched[1,1] <- paste( speciesMatched[1,1],"   ***Used in mapping***  To change, select from above and resubmit query.") 	
			speciesMatched <- as.data.frame(speciesMatched[,1])
		}
	} else { # if species is selected
		result <- result[which(result$species == selectOrg ) ,]
		if( dim(result)[1] == 0  ) return(NULL) #stop("ID not recognized!")
		speciesMatched <- as.data.frame(paste("Using selected species ", findSpeciesByIdName(selectOrg) )  )
	}
	result <- result[which(!duplicated(result[,2]) ),] # remove duplicates in ensembl_gene_id
	result <- result[which(!duplicated(result[,1]) ),] # remove duplicates in user ID
	colnames(speciesMatched) = c("Matched Species (genes)" ) 
	conversionTable <- result[,1:2]; colnames(conversionTable) = c("User_input","ensembl_gene_id")
	conversionTable$Species = sapply(result[,3], findSpeciesByIdName )

	return(list(originalIDs = querySet,IDs=unique( result[,2]), 
				species = findSpeciesById(result$species[1]), 
				#idType = findIDtypeById(result$idType[1] ),
				speciesMatched = speciesMatched,
				conversionTable = conversionTable
				) )
}


# retrieve detailed info on genes
geneInfo <- function (fileName = NULL){

	if(!is.null(fileName))  # if only one file           #WBGene0000001 some ensembl gene ids in lower case
	{ x = read.csv(as.character(fileName) ); x[,1]= toupper(x[,1]) } else # read in the chosen file 
	{ return(as.data.frame("No file.") )   }
	Set=rep("List",nrow(x))
	return( cbind(x,Set) )
 }

convertedData <- function() {
		if( is.null(converted.out ) ) return( readData.out$data) # if id or species is not recognized use original data.
			
				if(input_noIDConversion) return( readData.out$data )
				
				mapping <- converted.out$conversionTable
				# cat (paste( "\nData:",input_selectOrg) )
				x =readData.out$data

				rownames(x) = toupper(rownames(x))
				# any gene not recognized by the database is disregarded
				# x1 = merge(mapping[,1:2],x,  by.y = 'row.names', by.x = 'User_input')
				# the 3 lines keeps the unrecogized genes using original IDs
				x1 = merge(mapping[,1:2],x,  by.y = 'row.names', by.x = 'User_input', all.y=TRUE)

				# original IDs used if ID is not matched in database
				ix = which(is.na(x1[,2]) )
				x1[ix,2] = x1[ix,1] 
				
				#multiple matched IDs, use the one with highest SD
				tem = apply(x1[,3:(dim(x1)[2])],1,sd)
				x1 = x1[order(x1[,2],-tem),]
				x1 = x1[!duplicated(x1[,2]) ,]
				rownames(x1) = x1[,2]
				x1 = as.matrix(x1[,c(-1,-2)])
				tem = apply(x1,1,sd)
				x1 = x1[order(-tem),]  # sort again by SD
				return(x1)

	}

convertedCounts <- function() {
		      if( is.null(converted.out ) ) return( readData.out$rawCounts)
				mapping <- converted.out$conversionTable

				x =readData.out$rawCounts

					rownames(x) = toupper(rownames(x))
					# any gene not recognized by the database is disregarded
					# x1 = merge(mapping[,1:2],x,  by.y = 'row.names', by.x = 'User_input')
					# the 3 lines keeps the unrecogized genes using original IDs
					x1 = merge(mapping[,1:2],x,  by.y = 'row.names', by.x = 'User_input', all.y=TRUE)
					ix = which(is.na(x1[,2]) )
					x1[ix,2] = x1[ix,1] # original IDs used
					
					#multiple matched IDs, use the one with highest SD
					tem = apply(x1[,3:(dim(x1)[2])],1,sd)
					x1 = x1[order(x1[,2],-tem),]
					x1 = x1[!duplicated(x1[,2]) ,]
					rownames(x1) = x1[,2]
					x1 = as.matrix(x1[,c(-1,-2)])
					tem = apply(x1,1,sd)
					x1 = x1[order(-tem),]  # sort again by SD

					return(x1)
} # here

nGenesFilter <- function() { 
		tem = readData.out$dataSize
		ix = match( toupper( rownames(convertedData.out)), toupper(converted.out$conversionTable$ensembl_gene_id  ) )
		nMatched = sum( !is.na(ix) )
		if( input_noIDConversion) 
		return( paste(tem[1], "genes in", tem[4], "samples.", tem[3], " genes passed filter.\n Original gene IDs used." ) )  else
		return( paste(tem[1], "genes in", tem[4], "samples. Of the", tem[3], " genes passed filter, \n", 
			nMatched, " were converted to Ensembl gene IDs in our database. \n
			  The remaining ", tem[3]-nMatched, "genes were kept in the data using original IDs." ) ) 	  
}		

# Define sample groups based on column names
 detectGroups <- function (x){  # x are col names
	tem <- gsub("[0-9]*$","",x) # Remove all numbers from end
	#tem = gsub("_Rep|_rep|_REP","",tem)
	tem <- gsub("_$","",tem); # remove "_" from end
	tem <- gsub("_Rep$","",tem); # remove "_Rep" from end
	tem <- gsub("_rep$","",tem); # remove "_rep" from end
	tem <- gsub("_REP$","",tem)  # remove "_REP" from end
 	return( tem )
 }

  # EDA plots -------------------------------------
 densityPlot <- function() {
set.seed(2) # seed for random number generator
mycolors = sort(rainbow(20))[c(1,20,10,11,2,19,3,12,4,13,5,14,6,15,7,16,8,17,9,18)] # 20 colors for kNN clusters
#Each row of this matrix represents a color scheme;
	
    x <- readData.out$data

	myColors = mycolors
	plot(density(x[,1]),col = myColors[1], lwd=2,
		xlab="Expresson values", ylab="Density", main= "Distribution of transformed data",
		ylim=c(0, max(density(x[,1])$y)+.02 ) )
	  
	for( i in 2:dim(x)[2] )
	lines(density(x[,i]),col=myColors[i], lwd=2 )
	if(dim(x)[2]< 31 ) # if too many samples do not show legends
		legend("topright", cex=1,colnames(x), lty=rep(1,dim(x)[2]), col=myColors )	
   
}

 genePlot <- function() {
    x <- convertedData.out
	
	Symbols <- rownames(x)
	if( input_selectOrg != "NEW") {
		ix = match( rownames(x), allGeneInfo.out[,1])
		if( sum( is.na(allGeneInfo.out$symbol )) != dim(allGeneInfo.out )[1] ) {  # symbol really exists? 
			Symbols = as.character( allGeneInfo.out$symbol[ix] )
			Symbols[which( nchar(Symbols) <= 2 ) ] <- rownames(x) [which( nchar(Symbols) <= 2 ) ] 
			}
	   }
	x = as.data.frame(x)
	x$Genes = Symbols
	# matching from the beginning of symbol
	ix = which(regexpr(  paste("^" , toupper(input_geneSearch),sep="")   ,toupper(x$Genes)) > 0)
	
	if(grepl(" ", input_geneSearch)  )  # if there is space character, do exact match
		ix = match(gsub(" ","", toupper(input_geneSearch)),x$Genes)
	
	# too few or too many genes found
	if(length(ix) == 0 | length(ix) > 50 ) return(NULL)
	  # no genes found
	 	 
	mdf = melt(x[ix,],id.vars="Genes", value.name="value", variable.name="samples")

	p1 <- ggplot(data=mdf, aes(x=samples, y=value, group = Genes, shape=Genes, colour = Genes)) +
		geom_line() +
		geom_point( size=5,  fill="white")+ #shape=21  circle
		#theme(axis.text.x = element_text(size=16,angle = 45, hjust = 1)) +
		labs(y="Transformed expression level") +
		coord_cartesian(ylim = c(0, max(mdf$value)))
	p1 <- p1 + theme(plot.title = element_text(size = 16,hjust = 0.5)) + # theme(aspect.ratio=1) +
	 theme(axis.text.x = element_text(angle=45, size = 16, hjust=1),
	       axis.text.y = element_text( size = 16),
		   axis.title.x = element_blank(),
		   axis.title.y = element_text( size = 16) ) +
	theme(legend.text=element_text(size=12))	
	
	p1	
	}	

	
geneBarPlotError <- function() {
   x <- convertedData.out
	
	Symbols <- rownames(x)
	if( input_selectOrg != "NEW") {
		ix = match( rownames(x), allGeneInfo.out[,1])
		if( sum( is.na(allGeneInfo.out$symbol )) != dim(allGeneInfo.out )[1] ) {  # symbol really exists? 
			Symbols = as.character( allGeneInfo.out$symbol[ix] )
			Symbols[which( nchar(Symbols) <= 2 ) ] <- rownames(x) [which( nchar(Symbols) <= 2 ) ] 
			}
	   }
	x = as.data.frame(x)
	x$Genes = Symbols
    #write.csv(x,"tem.csv")
	# Search for genes
	#ix = grep("HOXA",toupper(x$Genes) )
	# ix = grep(toupper(input_geneSearch),toupper(x$Genes))  # sox --> Tsox  
	# matching from the beginning of symbol
	ix = which(regexpr(  paste("^" , toupper(input_geneSearch),sep="")   ,toupper(x$Genes)) > 0)
	
	if(grepl(" ", input_geneSearch)  )  # if there is space character, do exact match
		ix = match(gsub(" ","", toupper(input_geneSearch)),x$Genes)
	
	# too few or too many genes found
	if(length(ix) == 0 | length(ix) > 50 ) return(NULL)
	  # no genes found
	 	 
	mdf = melt(x[ix,],id.vars="Genes", value.name="value", variable.name="samples")
	mdf$count = 1
	g = detectGroups(mdf$samples)
	Means = aggregate(mdf$value,by=list( g, mdf$Genes ), FUN = mean, na.rm=TRUE  )
	SDs = aggregate(mdf$value,by=list( g, mdf$Genes ), FUN = sd, na.rm=TRUE  )
	Ns = aggregate(mdf$count, by= list(g, mdf$Genes) , FUN = sum  )
	summarized = cbind(Means,SDs[,3],Ns[,3])
	colnames(summarized)= c("Samples","Genes","Mean","SD","N")
	summarized$SE = summarized$SD / sqrt(summarized$N)	
		
	#http://www.sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization
	p2 <- ggplot(summarized, aes(x=Genes, y=Mean,fill=Samples) ) + # data & aesthetic mapping
		geom_bar(stat="identity", position=position_dodge()) + # bars represent average
		geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE), width=0.2,position=position_dodge(.9)) +
		labs(y="Expression Level") 
	if(input_useSD == 1) { 
	p2 <- ggplot(summarized, aes(x=Genes, y=Mean,fill=Samples) ) + # data & aesthetic mapping
		geom_bar(stat="identity", position=position_dodge()) + # bars represent average
		geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.2,position=position_dodge(.9)) +
		labs(y="Expression Level") 
	}
	
	p2 <- p2 +  theme(plot.title = element_text(size = 16,hjust = 0.5)) + # theme(aspect.ratio=1) +
	 theme(axis.text.x = element_text(angle=45, size = 16, hjust=1),
	       axis.text.y = element_text( size = 16),
		   axis.title.x = element_blank(),
		   axis.title.y = element_text( size = 16) ) +
	theme(legend.text=element_text(size=16))
	
	p2
}

################################################################
# Hierarchical clustering
################################################################

# Functions for hierarchical clustering
hclust2 <- function(x, method="average", ...)  # average linkage
  hclust(x, method=method, ...)
hclust.ward.D <- function(x, method="ward.D", ...)  # average linkage
  hclust(x, method=method, ...)
hclust.ward.D2 <- function(x, method="ward.D2", ...)  # average linkage
  hclust(x, method=method, ...)
hclust.single <- function(x, method="single", ...)  # average linkage
  hclust(x, method=method, ...)
hclust.mcquitty <- function(x, method="mcquitty", ...)  # average linkage
  hclust(x, method=method, ...)
hclust.median <- function(x, method="median", ...)  # average linkage
  hclust(x, method=method, ...)
hclust.centroid <- function(x, method="centroid", ...)  # average linkage
  hclust(x, method=method, ...)
  
hclustFuns <- list( averge = hclust2, complete=hclust, single=hclust.single,
					median=hclust.median, centroid=hclust.centroid, mcquitty=hclust.mcquitty)
hclustChoices = setNames(1:length(hclustFuns),names(hclustFuns)) # for pull down menu

dist2 <- function(x, ...)   # distance function = 1-PCC (Pearson's correlation coefficient)
  as.dist(1-cor(t(x), method="pearson"))
  
dist3 <- function(x, ...)   # distance function = 1-abs(PCC) (Pearson's correlation coefficient)
  as.dist(1-abs(cor(t(x), method="pearson")))  
  
 distFuns <- list(Correlation=dist2, Euclidean=dist,AbsolutePCC=dist3) 
  # hierarchical clustering tree ----------------------------------------

hmcols <- colorRampPalette(rev(c("#D73027", "#FC8D59", "#FEE090", "#FFFFBF",
"#E0F3F8", "#91BFDB", "#4575B4")))(75)
heatColors = rbind(      greenred(75),     bluered(75),     colorpanel(75,"green","black","magenta"),colorpanel(75,"blue","yellow","red"),hmcols )
rownames(heatColors) = c("Green-Black-Red","Blue-White-Red","Green-Black-Magenta","Blue-Yellow-Red","Blue-white-brown")
colorChoices = setNames(1:dim(heatColors)[1],rownames(heatColors)) # for pull down menu
# adding sample legends to heatmap; this is for the main heatmap
# https://stackoverflow.com/questions/3932038/plot-a-legend-outside-of-the-plotting-area-in-base-graphics
add_legend <- function(...) {
  opar <- par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), 
    mar=c(0, 0, 0, 6), new=TRUE)
  on.exit(par(opar))
  plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
  legend(...)
}

correlationMatrix <- function( ) {
	# heatmap of correlation matrix
	x <- readData.out$data
	maxGene <- apply(x,1,max)
	x <- x[which(maxGene > quantile(maxGene)[1] ) ,] # remove bottom 25% lowly expressed genes, which inflate the PPC
	
	   melted_cormat <- melt(round(cor(x),2), na.rm = TRUE)
	# melted_cormat <- melted_cormat[which(melted_cormat[,1] != melted_cormat[,2] ) , ]
		# Create a ggheatmap
		ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
		 geom_tile(color = "white")+
		 scale_fill_gradient2(low = "green", high = "red",  mid = "white", 
			space = "Lab",  limit = c(min(melted_cormat[,3]) ,max(melted_cormat[,3])), midpoint = median(melted_cormat[,3]),
			name="Pearson\nCorrelation"
		  ) +
		 theme_minimal()+ # minimal theme
		 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
			size = 15, hjust = 1))+
		 theme(axis.text.y = element_text( 
			size = 15))+
		 coord_fixed()+
		theme(
		  axis.title.x = element_blank(),
		  axis.title.y = element_blank(),
		  panel.grid.major = element_blank(),
		  panel.border = element_blank(),
		  panel.background = element_blank(),
		  axis.ticks = element_blank(),
		 legend.justification = c(1, 0),
		  legend.position = c(0.6, 0.7),
		 legend.direction = "horizontal")+
		 guides(fill = FALSE) # + ggtitle("Pearson's Correlation Coefficient (all genes)")
		 
		 if(input_labelPCC && ncol(x)<30)
				ggheatmap <- ggheatmap +  geom_text(aes(Var2, Var1, label = value), color = "black", size = 4)

		ggheatmap
		 
}		 

staticHeatmap <- function () { 
	 x <- readData.out$data   # x = read.csv("expression1.csv")

	n=input_nGenes
	#if(n>6000) n = 6000 # max
	if(n>dim(x)[1]) n = dim(x)[1] # max	as data
	# this will cutoff very large values, which could skew the color 
	if(input_geneCentering)
		x=as.matrix(x[1:n,])-apply(x[1:n,],1,mean)
	
	# standardize by gene
	if(input_geneNormalize) 
		x <- x / apply(x,1,sd)
		
	# row centering and normalize
	x <- scale(x, center = input_sampleCentering, scale = input_sampleNormalize) 

	
	
	cutoff = median(unlist(x)) + input_heatmapCutoff * sd (unlist(x)) 
	x[x>cutoff] <- cutoff
	cutoff = median(unlist(x)) - input_heatmapCutoff *sd (unlist(x)) 
	x[x< cutoff] <- cutoff
	
    groups = detectGroups(colnames(x) )
	# if sample info file is uploaded us that info:

	if(!is.null(readSampleInfo.out) &&  !is.null(input_selectFactorsHeatmap) ) { 
		if(input_selectFactorsHeatmap == "Sample_Name" )
			groups = detectGroups(colnames(x) ) else 
			{ 	ix = match(input_selectFactorsHeatmap, colnames(readSampleInfo.out ) ) 
				groups = readSampleInfo.out[,ix]
			}
	}	
	
	groups.colors = rainbow(length(unique(groups) ) )

	#http://stackoverflow.com/questions/15351575/moving-color-key-in-r-heatmap-2-function-of-gplots-package
	lmat = rbind(c(0,4),c(0,1),c(3,2),c(5,0))
	lwid = c(2,6) # width of gene tree; width of heatmap
	lhei = c(1.5,.2,8,1.1)
	#layout matrix
	#		 [,1] [,2]
	#	[1,]    0    4
	#	[2,]    0    1
	#	[3,]    3    2
	#	[4,]    5    0
	# 4--> column tree; 1--> column color bar; 2--> heatmap; 3-> row tree; 5--> color key.
	# height of 4 rows is specified by lhei; width of columns is given by lwid


	par(mar = c(12, 4, 1.4, 0.2))


	if( n>110) 
	heatmap.2(x, distfun = distFuns[[as.integer(input_distFunctions)]]
		,hclustfun=hclustFuns[[as.integer(input_hclustFunctions)]]
		,Colv=!input_noSampleClustering
		#col=colorpanel(75,"green","black","magenta")  ,
		#col=bluered(75),
		#col=greenred(75), 
		,col= heatColors[as.integer(input_heatColors1),]
		,density.info="none", trace="none", scale="none", keysize=.5
		,key=T, symkey=F
		,ColSideColors=groups.colors[ as.factor(groups)]
		,labRow=""
		,margins=c(10,2)
		,srtCol=45
		,cexCol=2  # size of font for sample names
		,lmat = lmat, lwid = lwid, lhei = lhei
		)

	if( n<=110) 
	heatmap.2(x, distfun =  distFuns[[as.integer(input_distFunctions)]]
		,hclustfun=hclustFuns[[as.integer(input_hclustFunctions)]]
		,Colv=!input_noSampleClustering		
		,col= heatColors[as.integer(input_heatColors1),], density.info="none", trace="none", scale="none", keysize=.5
		,key=T, symkey=F,
		#,labRow=labRow
		,ColSideColors=groups.colors[ as.factor(groups)]
		,margins=c(18,12)
		,cexRow=1
		,srtCol=45
		,cexCol=1.8  # size of font for sample names
		,lmat = lmat, lwid = lwid, lhei = lhei
	)
	
	par(lend = 1)           # square line ends for the color legend
	add_legend("topleft",
		legend = unique(groups), # category labels
		col = groups.colors[ unique(as.factor(groups))],  # color key
		lty= 1,             # line style
		lwd = 10            # line width
	)
	
}


	#Interactive heatmap via Plotly plot ----------------------------------------------
	# interactive heatmap with plotly
heatmapPlotly <- function () { 	
	input_nGenesPlotly= 50
	
	

	   x <- convertedData.out

	n=input_nGenesPlotly
	#if(n>6000) n = 6000 # max
	if(n>dim(x)[1]) n = dim(x)[1] # max	as data

	if(input_geneCentering)
		x=as.matrix(x[1:n,])-apply(x[1:n,],1,mean)
	# standardize by gene
	if(input_geneNormalize) 
		x <- x / apply(x,1,sd)
	# row centering and normalize
	x <- scale(x, center = input_sampleCentering, scale = input_sampleNormalize) 

	cutoff = median(unlist(x)) + 3*sd (unlist(x)) 
	x[x>cutoff] <- cutoff
	cutoff = median(unlist(x)) - 3*sd (unlist(x)) 
	x[x< cutoff] <- cutoff

	# adding gene symbol
	ix <- match( rownames(x), allGeneInfo.out[,1])
	geneSymbols <- as.character( allGeneInfo.out$symbol)[ix]
	# if missing or duplicated, use Ensembl ID
	ix <- which(nchar( geneSymbols) <=1 | duplicated(geneSymbols ) | is.null(geneSymbols)  | is.na(geneSymbols)    );	geneSymbols[ ix ] <- rownames(x)[ix]
	rownames( x) = geneSymbols;


	clust <- x %>% 
	  dist2() %>% 
	  hclust2()

	# Get order
	ord <- clust$order

	# Re-arrange based on order
	df <- t( x[ord,] )%>%
	   melt()
	   
	colnames(df)[1:2] <- c("X","Y")
    colorNames = unlist(strsplit(tolower(rownames(heatColors)[ as.integer(input_heatColors1)   ]),"-" ) )
	p <- df %>%
	  ggplot(aes(X, Y, fill = value)) + 
		   geom_tile()+ scale_fill_gradient2(low = colorNames[1], mid = colorNames[2],high = colorNames[3]) +
		   theme(axis.title.y=element_blank(),   # remove y labels
		   # axis.text.y=element_blank.out,  # keep gene names for zooming
			axis.ticks.y=element_blank(),
			axis.title.x=element_blank()) +
			theme(axis.text.x = element_text(size=10,angle = 45, hjust = 1))


	ggplotly(p) %>% 
		layout(margin = list(b = 150,l=200))  # prevent cutoff of sample names
}


################################################################
# PCA
################################################################

PCAplot <- function() {   #PCA
	 x <- convertedData.out;
	 pca.object <- prcomp(t(x))
		
	pcaData = as.data.frame(pca.object$x[,1:2]); pcaData = cbind(pcaData,detectGroups(colnames(x)) )
	colnames(pcaData) = c("PC1", "PC2", "Sample_Name")
	percentVar=round(100*summary(pca.object)$importance[2,1:2],0)
	if(is.null(readSampleInfo.out)) { 
		p=ggplot(pcaData, aes(PC1, PC2, color=Sample_Name, shape = Sample_Name)) + geom_point(size=5) 
		} else {
		pcaData = cbind(pcaData,readSampleInfo.out )
		p=ggplot(pcaData, aes_string("PC1", "PC2", color=input_selectFactors,shape=input_selectFactors2)) + geom_point(size=5) 
		
		}
	p=p+xlab(paste0("PC1: ",percentVar[1],"% variance")) 
	p=p+ylab(paste0("PC2: ",percentVar[2],"% variance")) 
	p=p+ggtitle("Principal component analysis (PCA)")+coord_fixed(ratio=1.0)+ 
     theme(plot.title = element_text(size = 16,hjust = 0.5)) + theme(aspect.ratio=1) +
	 theme(axis.text.x = element_text( size = 16),
	       axis.text.y = element_text( size = 16),
		   axis.title.x = element_text( size = 16),
		   axis.title.y = element_text( size = 16) ) +
	theme(legend.text=element_text(size=16))
	   print(p)
	   }
	# variance chart
	# plot(pca.object,type="bar", xlab="Principal Components", main ="Variances explained")
	
	 
MDSplot <- function() {  # MDS
	 	x <- convertedData.out;
	 fit = cmdscale( dist2(t(x) ), eig=T, k=2)
	 
	# par(pin=c(5,5))
	pcaData = as.data.frame(fit$points[,1:2]); pcaData = cbind(pcaData,detectGroups(colnames(x)) )
	colnames(pcaData) = c("x1", "x2", "Sample_Name")
	

	if(is.null(readSampleInfo.out)) { 
	p=ggplot(pcaData, aes(x1, x2, color=Sample_Name, shape = Sample_Name)) + geom_point(size=5) 
	} else {
		pcaData = cbind(pcaData,readSampleInfo.out )
		p=ggplot(pcaData, aes_string("x1", "x2", color=input_selectFactors,shape=input_selectFactors2)) + geom_point(size=5) 
		}
	p=p+xlab("Dimension 1") 
	p=p+ylab("Dimension 2") 
	p=p+ggtitle("Multidimensional scaling (MDS)")+ coord_fixed(ratio=1.)+ 
     theme(plot.title = element_text(hjust = 0.5)) + theme(aspect.ratio=1) +
	 	 theme(axis.text.x = element_text( size = 16),
	       axis.text.y = element_text( size = 16),
		   axis.title.x = element_text( size = 16),
		   axis.title.y = element_text( size = 16) ) +
	theme(legend.text=element_text(size=16))
	   print(p)
	
	 }

tSNEplot <- function() {  # t-SNE
	 x <- convertedData.out;
	 library(Rtsne,verbose=FALSE)
	 set.seed(input_tsneSeed2)
	 tsne <- Rtsne(t(x), dims = 2, perplexity=1, verbose=FALSE, max_iter = 400)

	pcaData = as.data.frame(tsne$Y); pcaData = cbind(pcaData,detectGroups(colnames(x)) )
	colnames(pcaData) = c("x1", "x2", "Sample_Name")
	

	if(is.null(readSampleInfo.out)) { 
	p=ggplot(pcaData, aes(x1, x2, color=Sample_Name, shape = Sample_Name)) + geom_point(size=5) 
	} else {
		pcaData = cbind(pcaData,readSampleInfo.out )
		p=ggplot(pcaData, aes_string("x1", "x2", color=input_selectFactors,shape=input_selectFactors2)) + geom_point(size=5) 
		}
	p=p+xlab("Dimension 1") 
	p=p+ylab("Dimension 2") 
	p=p+ggtitle("t-SNE plot")+ coord_fixed(ratio=1.)+ 
     theme(plot.title = element_text(hjust = 0.5)) + theme(aspect.ratio=1) +
	 	 theme(axis.text.x = element_text( size = 16),
	       axis.text.y = element_text( size = 16),
		   axis.title.x = element_text( size = 16),
		   axis.title.y = element_text( size = 16) ) +
	theme(legend.text=element_text(size=16))
	   print(p)
	
	 }

 # read pathway data from local file, not used if using database
 # Read gene sets GMT file
# This functions cleans and converts to upper case

readGMTRobust <- function (file1) {   # size restriction
		# Read in the first file 
		x <- scan(file1, what="", sep="\n")
		# x <- gsub("\t\t.","",x)     # GMT files saved by Excel has a lot of empty cells "\t\t\t\t"   "\t." means one or more tab
		x <- gsub(" ","",x)  # remove white space
		x <- toupper(x)    # convert to upper case

		#----Process the first file
		# Separate elements by one or more whitespace
		y <- strsplit(x, "\t")
		# Extract the first vector element and set it as the list element name
		names(y) <- sapply(y, `[[`, 1)
		#names(y) <- sapply(y, function(x) x[[1]]) # same as above
		# Remove the first vector element from each list element
		y <- lapply(y, `[`, -c(1,2))
		#y <- lapply(y, function(x) x[-1]) # same as above
		# remove duplicated elements
		for ( i in 1:length(y) )  y[[i]] <- cleanGeneSet(y[[i]])
		# check the distribution of the size of gene lists sapply(y, length) hold a vector of sizes
		if( max( sapply(y,length) ) <5) cat("Warning! Gene sets have very small number of genes!\n Please double check format.")
		y <- y[which(sapply(y,length) > 1)]  # gene sets smaller than 1 is ignored!!!

		return(y)
	}


readGeneSets <- function (fileName, convertedData, GO,selectOrg, myrange) {

	pathway <- dbConnect(sqlite,fileName)
	
	if(is.null(GO)  ) GO <- "GOBP"   # initial value not properly set; enforcing  

	# get Gene sets
	querySet = rownames(convertedData)
	sqlQuery = paste( " select distinct gene,pathwayID from pathway where gene IN ('", paste(querySet,collapse="', '"),"')" ,sep="")
	# cat(paste0("\n\nhere:",GO,"There"))

	if( GO != "All") sqlQuery = paste0(sqlQuery, " AND category ='",GO,"'")
	result <- dbGetQuery( pathway, sqlQuery  )
	if( dim(result)[1] ==0) {return(list( x=as.data.frame("No matching species or gene ID file!" )) )}
	# list pathways and frequency of genes
	pathwayIDs = aggregate( result$pathwayID, by   = list(unique.values = result$pathwayID), FUN = length)
	pathwayIDs = pathwayIDs[which(pathwayIDs[,2]>= myrange[1] ),]
	pathwayIDs = pathwayIDs[which( pathwayIDs[,2] <= myrange[2] ),]
	if(dim(pathwayIDs)[1] ==0 ) geneSets = NULL;
	
	# convert pathways into lists like those generated by readGMT
	geneSets = lapply(pathwayIDs[,1], function(x)  result[which(result$pathwayID == x ),1]     )
	pathwayInfo <- dbGetQuery( pathway, paste( " select distinct id,Description from pathwayInfo where id IN ('", 
							paste(pathwayIDs[,1],collapse="', '"),   "') ",sep="") )
	ix = match( pathwayIDs[,1], pathwayInfo[,1])
	names(geneSets) <- pathwayInfo[ix,2]  
	#geneSets <- geneSets[ -which(duplicated(names(geneSets) ))] # remove geneSets with the same name
	dbDisconnect(pathway)
	return( geneSets )
} 


# Runs pathway analysis using PGSEA; this is copied and revised from PGSEA package
myPGSEA  <- function (exprs, cl, range = c(25, 500), ref = NULL, center = TRUE, 
    p.value = 0.005, weighted = TRUE, nPermutation=100, enforceRange = TRUE, ...) {
    if (is(exprs, "ExpressionSet")) 
        exprs <- exprs(exprs)
    if (!is.list(cl)) 
        stop("cl need to be a list")
    if (!is.null(ref)) {
        if (!is.numeric(ref)) 
            stop("column index's required")
    }
    if (!is.null(ref)) {
        if (options.out$verbose) 
            cat("Creating ratios...", "\n")
        ref_mean <- apply(exprs[, ref], 1, mean, na.rm = TRUE)
        exprs <- sweep(exprs, 1, ref_mean, "-")
    }
    if (center) 
        exprs <- scale(exprs, scale = FALSE)         # column centering is done
    results <- matrix(NA, length(cl), ncol(exprs))
    rownames(results) <- names(cl)
    colnames(results) <- colnames(exprs)
    mode(results) <- "numeric"
	Setsize = c(rep(0,length(cl)))     # gene set size vector
	mean2 = c(rep(0,length(cl)))     # mean of the range of means 
	meanSD = c(rep(0,length(cl)))     # SD of the range of means	
    if (is.logical(p.value)) 
        { p.results <- results; mean.results <- results;}
    for (i in 1:length(cl)) {              # for each gene list
		#cat("\nProcessing gene set",i);
        if (class(cl[[i]]) == "smc") {
            clids <- cl[[i]]@ids
        }
        else if (class(cl[[i]]) %in% c("GeneColorSet", "GeneSet")) {
            clids <- cl[[i]]@geneIds
        }
        else {
            clids <- cl[[i]]
        }
        if (options()$verbose) 
            cat("Testing region ", i, "\n")
        ix <- match(clids, rownames(exprs))
        ix <- unique(ix[!is.na(ix)])
        present <- sum(!is.na(ix))
		Setsize[i] <- present 
        if (present < range[1]) {
            if (options()$verbose) 
                cat("Skipping region ", i, " because too small-", 
                  present, ",\n")
            next
        }
        if (present > range[2]) {
            if (options()$verbose) 
                cat("Skipping region ", i, " because too large-", 
                  present, "\n")
            next
        }
        texprs <- exprs[ix, ]           # expression matrix for genes in gene set
        if (any(is.na(texprs))) 
            cat("Warning - 'NA' values within expression data, enrichment scores are estimates only.\n")
        if (!is.matrix(texprs)) 
            texprs <- as.matrix(texprs)
                            
        stat <- try(apply(texprs, 2, t.test, ...))
		means <- try(apply(texprs, 2, mean,trim=0.1))   # trim mean
		ps <- unlist(lapply(stat, function(x) x$p.value))
        stat <- unlist(lapply(stat, function(x) x$statistic))
        p.results[i, ] <- ps
		mean.results[i,] <- means
        results[i, ] <- as.numeric(stat)
		
		# permutation of gene sets of the same size
		if(nPermutation > 2 )  { # no permutation if <=2
			meansRanges = c(0, rep(nPermutation))
			for( k in 1:nPermutation ) {
				ix <- sample.int( dim(exprs)[1], length(ix) )
				texprs <- exprs[ix, ] 
				means <- try(apply(texprs, 2, mean,trim=0.1))   # trim mean
				meansRanges[k] = dynamicRange(means)
			}
			mean2[i] = mean(meansRanges)
			meanSD[i]= sd(meansRanges,na.rm=TRUE)   # NA are removed before calculating standard deviation
		}
    }
    return(list(results = results, p.results = p.results, means = mean.results, size=Setsize, mean2=mean2, meanSD=meanSD))
}

PCApathway<- function () {  # pathway
	x <- convertedData.out;
	library(PGSEA,verbose=FALSE)
	pca.object <- prcomp(t(x))
	pca = 100*pca.object$rotation 
	Npca = 5
	if (Npca > dim(pca)[2]) { Npca = dim(pca)[2] } else pca <-  pca[,1:Npca]
	#pca = pca[,1:5]
	if(is.null(GeneSets.out ) ) return(NULL)  # no species recognized
	if(length(GeneSets.out ) <= 1 ) return(NULL)
	#cat("\n\nGene Sets:",length( GeneSets.out))
	pg = myPGSEA (pca,cl=GeneSets.out,range=c(15,2000),p.value=TRUE, weighted=FALSE,nPermutation=1)

	# correcting for multiple testing
	p.matrix = pg$p.result
	tem = p.adjust(as.numeric(p.matrix),"fdr")
	p.matrix = matrix(tem, nrow=dim(p.matrix)[1], ncol = dim(p.matrix)[2] )
	rownames(p.matrix) = rownames(pg$p.result); colnames(p.matrix) = colnames(pg$p.result)


	selected =c()
	for( i in 1:dim(p.matrix)[2]) {
	  tem = which( rank(p.matrix[,i],ties.method='first') <= 5)  # rank by P value
	 #tem = which( rank(pg$result[,i],ties.method='first') >= dim(p.matrix)[1]-3.1) # rank by mean
	 names(tem) = paste("PC",i," ", rownames(p.matrix)[tem], sep="" )
	 selected = c(selected, tem)
	}
	rowids = gsub(" .*","",names(selected))
	rowids = as.numeric( gsub("PC","",rowids) )
	pvals = p.matrix[ cbind(selected,rowids) ]
	a=sprintf("%-1.0e",pvals)
	tem = pg$result[selected,]
	rownames(tem) = paste(a,names(selected)); #colnames(tem)= paste("PC",colnames(tem),sep="")
	
	tem = tem[!duplicated(selected),] 

	#tem = t(tem); tem = t( (tem - apply(tem,1,mean)) ) #/apply(tem,1,sd) )

	smcPlot(tem,scale =  c(-max(tem), max(tem)), show.grid = T, margins = c(3,1, 6, 23), col = .rwb,cex.lab=0.5, main="Pathways analysis on PCA")
	
	 }

# correlation PCA with factors  
PCA2factor <- function( ){

	if(is.null(readSampleInfo.out)) return(NULL)
	npc = 5 # number of Principal components
    x <- readData.out$data
	y <- readSampleInfo.out

	pca.object <- prcomp(t(x))
	pcaData = as.data.frame(pca.object$x[,1:npc]); 
	pvals = matrix(1,nrow=npc,ncol=ncol(y))
	for (i in 1:npc ){
		for (j in 1:ncol(y) )
			pvals[i,j] =summary( aov(pcaData[,i] ~ as.factor(y[,j])))[[1]][["Pr(>F)"]][1]
	}
	
	pvals = pvals * npc* ncol(y)   # correcting for multiple testing
	pvals[pvals>1] = 1

	colnames(pvals) = colnames(y)
	rownames(pvals) = paste0("PC",1:npc)
	a="\n Correlation between Principal Components (PCs) with factors\n"
	nchar0 = nchar(a)
	for ( i in 1:npc){
		j = which.min(pvals[i,])
		if(pvals[i,j]< 0.05) a=paste0(a,rownames(pvals)[i], 
					" is correlated with ", colnames(pvals)[j],
					" (p=",   sprintf("%-3.2e",pvals[i,j]),").\n")
	}
	if(nchar(a) == nchar0 ) return(NULL) else 
	  return( a )
		  
} 

# detecting sequencing depth bias
readCountsBias <- function( ){

	totalCounts = colSums(readData.out$rawCounts) 
	groups = as.factor( detectGroups(colnames(readData.out$rawCounts ) ) )
	tem = NULL
	# ANOVA of total read counts vs sample groups parsed by sample name
	pval = summary( aov(totalCounts ~ groups ))[[1]][["Pr(>F)"]][1]
	if(pval <0.05)
	  tem = paste("Warning! Sequencing depth bias detected. Total read counts are significantly different among sample groups (p=",
				sprintf("%-3.2e",pval),") based on ANOVA.")

	# ANOVA of total read counts vs factors in experiment design
	if(!is.null(readSampleInfo.out   )  ) {
	  y <- readSampleInfo.out
		for (j in 1:ncol(y) ) { 
		pval = summary( aov(totalCounts ~ as.factor(y[,j])))[[1]][["Pr(>F)"]][1]

		if(pval <0.05)
		tem = paste(tem, " Total read counts seem to be correlated with factor",colnames(y)[j], 
					"(p=",  sprintf("%-3.2e",pval),").  ")
	  }
	 }
	if(is.null(tem)) return( "No bias detected") else
	return( tem )
		  
} 


################################################################
# K-means clustering
################################################################
	 
  #Distribution of SDs 
distributionSD <- function() {

		SDs=apply(convertedData.out,1,sd)
		maxSD = mean(SDs)+ 4*sd(SDs)
		SDs[ SDs > maxSD] = maxSD
		
		top = input_nGenesKNN
		if(top > length(SDs)) top = length(SDs)
		Cutoff=sort(SDs,decreasing=TRUE)[top] 

		SDs = as.data.frame(SDs)

		p <- ggplot(SDs, aes(x=SDs)) + 
		  geom_density(color="darkblue", fill="lightblue") +
		  labs(x = "Standard deviations of all genes", y="Density")+
		  geom_vline(aes(xintercept=Cutoff),
					color="red", linetype="dashed", size=1) +
					annotate("text", x = Cutoff + 0.4*sd(SDs[,1]), y = 1,colour = "red", label = paste0("Top ", top))

		p
}


# Decide number of clusters
KmeansNclusters <- function() { # Kmeans clustering

    x <- convertedData.out
	#x <- readData.out
	#par(mfrow=c(1,2))
	n=input_nGenesKNN
	#if(n>6000) n = 6000 # max
	if(n>dim(x)[1]) n = dim(x)[1] # max	as data
	#x1 <- x;
	#x=as.matrix(x[1:n,])-apply(x[1:n,],1,mean)
	x = 100* x[1:n,] / apply(x[1:n,],1,sum)  # this is causing problem??????
	#x = x - apply(x,1,mean)  # this is causing problem??????
	#colnames(x) = gsub("_.*","",colnames(x))
	set.seed(2)
	
	k = 30
	wss <- (nrow(x)-1)*sum(apply(x,2,var))
	  for (i in 2:k) wss[i] <- sum(kmeans(x,centers=i,iter.max = 30)$withinss)
		par(mar=c(4,5,4,4))
	plot(1:k, wss, type="b", xlab="Number of Clusters (k)",
		 ylab="Within groups sum of squares",
		 cex=2,cex.lab=2, cex.axis=2, cex.main=2, cex.sub=2	,xaxt="n"	 )
	axis(1, at = seq(1, 30, by = 2),cex.axis=1.5,cex=1.5)
	
}
 
Kmeans <- function() { # Kmeans clustering

    x <- convertedData.out
	#x <- readData.out
	#par(mfrow=c(1,2))
	n=input_nGenesKNN
	if(n>maxGeneClustering) n = maxGeneClustering # max
	if(n>dim(x)[1]) n = dim(x)[1] # max	as data
	#x1 <- x;
	#x=as.matrix(x[1:n,])-apply(x[1:n,],1,mean)
	#x = 100* x[1:n,] / apply(x[1:n,],1,sum) 
	x = x[1:n,]
	if( input_kmeansNormalization == 'L1Norm')
		x = 100* x / apply(x,1,function(y) sum(abs(y))) else # L1 norm
	if( input_kmeansNormalization == 'geneMean')
		x = x - apply(x,1,mean)  else # this is causing problem??????
	if( input_kmeansNormalization == 'geneStandardization')	
		x = (x - apply(x,1,mean) ) / apply(x,1,sd)
	#colnames(x) = gsub("_.*","",colnames(x))
	set.seed(input_KmeansReRun)
	k=input_nClusters

	cl = kmeans(x,k,iter.max = 50)
	#myheatmap(cl$centers)	
 

	hc <- hclust2(dist2(cl$centers-apply(cl$centers,1,mean) )  )# perform cluster for the reordering of samples
	tem = match(cl$cluster,hc$order) #  new order 
	x = x[order(tem),] ; 	bar = sort(tem)

	#myheatmap2(x-apply(x,1,mean), bar,1000)
	return(list( x = x, bar = bar)) 

  } 


myheatmap2 <- function (x,bar=NULL,n=-1,mycolor=1,clusterNames=NULL,sideColors=NULL ) {
	# number of genes to show
	ngenes = as.character( table(bar))
	if(length(bar) >n && n != -1) {ix = sort( sample(1:length(bar),n) ); bar = bar[ix]; x = x[ix,]  }
	if(! is.null(bar) )
		if(is.null(sideColors) ) 
			sideColors = mycolors

	# this will cutoff very large values, which could skew the color 
	x=as.matrix(x)-apply(x,1,mean)
	cutoff = median(unlist(x)) + 3*sd (unlist(x)) 
	x[x>cutoff] <- cutoff
	cutoff = median(unlist(x)) - 3*sd (unlist(x)) 
	x[x< cutoff] <- cutoff
	#colnames(x)= detectGroups(colnames(x))
	if(is.null(bar)) # no side colors
		heatmap.2(x,  Rowv =F,Colv=F, dendrogram ="none",
			col=heatColors[as.integer(mycolor),], density.info="none", trace="none", scale="none", keysize=.3
			,key=F, labRow = F,
			#,RowSideColors = mycolors[bar]
			,margins = c(8, 24)
			,srtCol=45
		) else
		heatmap.2(x,  Rowv =F,Colv=F, dendrogram ="none",
			col=heatColors[as.integer(mycolor),], density.info="none", trace="none", scale="none", keysize=.3
			,key=F, labRow = F,
			,RowSideColors = sideColors[bar]
			,margins = c(8, 24)
			,srtCol=45
		)
		
	if(!is.null(bar)) { 

		legend.text = paste("Cluster ", toupper(letters)[unique(bar)], " (N=", ngenes,")", sep="") 
		if( !is.null( clusterNames ) && length(clusterNames)>= length( unique(bar) ) )  
			legend.text = paste(clusterNames[ 1:length( unique(bar) )  ], " (N=", ngenes,")", sep="") 
		
		par(lend = 1)           # square line ends for the color legend
		legend("topright",      # location of the legend on the heatmap plot
		legend = legend.text, # category labels
		col = sideColors,  # color key
		lty= 1,             # line style
		lwd = 10 )           # line width
		}
}

KmeansHeatmap <- function() { # Kmeans clustering

	myheatmap2(Kmeans.out$x-apply(Kmeans.out$x,1,mean), Kmeans.out$bar,1000,mycolor=input_heatColors1)
}
 

tSNEgenePlot <- function() {
			Cluster <- Kmeans.out$bar
			train <- as.data.frame( cbind(Cluster,Kmeans.out$x) )

			library(Rtsne,verbose=FALSE)

			train = unique(train)
			Cluster = train$Cluster	
			set.seed(input_seedTSNE)
			
			## Executing the algorithm on curated data
			tsne <- Rtsne(train[,-1], dims = 2, perplexity=30, verbose=FALSE, max_iter = 400)


			nClusters = length(unique(Cluster) )
			if(input_colorGenes) {			
				plot(tsne$Y[,1], tsne$Y[,2], pch = (0:(nClusters-1))[Cluster], cex = 1.,col = mycolors[Cluster], xlab="X",ylab="Y")
				legend("topright",toupper(letters)[1:nClusters], pch = 0:(nClusters-1), col=mycolors, title="Cluster"  )
			} else
				plot(tsne$Y[,1], tsne$Y[,2],  cex = 1., xlab="X",ylab="Y")

}


# Main function. Find a query set of genes enriched with functional category
FindOverlap <- function (converted,gInfo, GO,selectOrg,minFDR, reduced = FALSE) {
	maxTerms =15 # max number of enriched terms
	idNotRecognized = as.data.frame("ID not recognized!")
	
	if(is.null(converted) ) return(idNotRecognized) # no ID 
	
	# only coding
	gInfo <- gInfo[which( gInfo$gene_biotype == "protein_coding"),]  
	querySet <- intersect( converted$IDs, gInfo[,1]);
	
	if(length(querySet) == 0) return(idNotRecognized )
	
	ix = grep(converted$species[1,1],gmtFiles)
	totalGenes <- converted$species[1,7]
	
	if (length(ix) == 0 ) {return(idNotRecognized )}
	
	# If selected species is not the default "bestMatch", use that species directly
	if(selectOrg != speciesChoice[[1]]) {  
		ix = grep(findSpeciesById(selectOrg)[1,1], gmtFiles )
		if (length(ix) == 0 ) {return(idNotRecognized )}
		totalGenes <- orgInfo[which(orgInfo$id == as.numeric(selectOrg)),7]
	}
	pathway <- dbConnect(sqlite,gmtFiles[ix],flags=SQLITE_RO)
	
		
	sqlQuery = paste( " select distinct gene,pathwayID from pathway where gene IN ('", paste(querySet,collapse="', '"),"')" ,sep="")
	
	#cat(paste0("HH",GO,"HH") )
	
	if( GO != "All") sqlQuery = paste0(sqlQuery, " AND category ='",GO,"'")
	result <- dbGetQuery( pathway, sqlQuery  )
	if( dim(result)[1] ==0) {return(as.data.frame("No matching species or gene ID file!" )) }

	# given a pathway id, it finds the overlapped genes, symbol preferred
	sharedGenesPrefered <- function(pathwayID) {
		tem <- result[which(result[,2]== pathwayID ),1]
		ix = match(tem, converted$conversionTable$ensembl_gene_id) # convert back to original
		tem2 <- unique( converted$conversionTable$User_input[ix] )
		if(length(unique(gInfo$symbol) )/dim(gInfo)[1] >.7  ) # if 70% genes has symbol in geneInfo
		{ ix = match(tem, gInfo$ensembl_gene_id); 
		tem2 <- unique( gInfo$symbol[ix] )      }
	return( paste( tem2 ,collapse=" ",sep="") )}
	
	x0 = table(result$pathwayID)					
	x0 = as.data.frame( x0[which(x0>=Min_overlap)] )# remove low overlaps
	if(dim(x0)[1] <= 5 ) return(idNotRecognized) # no data
	colnames(x0)=c("pathwayID","overlap")
	pathwayInfo <- dbGetQuery( pathway, paste( " select distinct id,n,Description from pathwayInfo where id IN ('", 
							paste(x0$pathwayID,collapse="', '"),   "') ",sep="") )
	
	x = merge(x0,pathwayInfo, by.x='pathwayID', by.y='id')
	
	x$Pval=phyper(x$overlap-1,length(querySet),totalGenes - length(querySet),as.numeric(x$n), lower.tail=FALSE );
	x$FDR = p.adjust(x$Pval,method="fdr")
	x <- x[ order( x$FDR)  ,]  # sort according to FDR
	
	if(dim(x)[1] > maxTerms ) x = x[1:maxTerms,]	
	
	if(min(x$FDR) > minFDR) x=as.data.frame("No significant enrichment found!") else {
		x <- x[which(x$FDR < minFDR),] 

		x= cbind(x,sapply( x$pathwayID, sharedGenesPrefered ) )
		colnames(x)[7]= "Genes"
		x <- subset(x,select = c(FDR,overlap,n,description,Genes) )
		colnames(x) = c("Corrected P value (FDR)", "Genes in list", "Total genes in category","Functional Category","Genes"  )
		
		# remove redudant gene sets
		if(reduced != FALSE ){  # reduced=FALSE no filtering,  reduced = 0.9 filter sets overlap with 90%
			n=  nrow(x)
			tem=rep(TRUE,n )
			geneLists = lapply(x$Genes, function(y) unlist( strsplit(as.character(y)," " )   ) )
			for( i in 2:n)
				for( j in 1:(i-1) ) { 
				  if(tem[j]) { # skip if this one is already removed
					  commonGenes = length(intersect(geneLists[i] ,geneLists[j] ) )
					  if( commonGenes/ length(geneLists[j] ) > reduced )
						tem[i] = FALSE	
				  }			
				}								
			x <- x[which(tem),]		
		}
		

	}
			
	dbDisconnect(pathway)
	return(x )
} 
 
# Given a gene set, finds significant overlaps with a gene set database  object 
findOverlapGMT <- function ( query, geneSet, minFDR=.2 ,minSize=2,maxSize=10000 ){ 
	#geneSets <- readGMT("exampleData/MousePath_TF_gmt.gmt")
	#query <-  geneSets[['TF_MM_FRIARD_C-REL']] 
	#query <- query[1:60]
	total_elements = 30000
	Min_overlap <- 1
	maxTerms =10 # max number of enriched terms
	noSig <- as.data.frame("No significant enrichment found!")
	query <- cleanGeneSet(query)   # convert to upper case, unique()

	if(length(query) <=2) return(noSig)
	if(length(geneSet) <1) return(noSig)
	  geneSet <- geneSet[which(sapply(geneSet,length) > minSize)]  # gene sets smaller than 1 is ignored!!!
	  geneSet <- geneSet[which(sapply(geneSet,length) < maxSize)]  # gene sets smaller than 1 is ignored!!!
	result <- unlist( lapply(geneSet, function(x) length( intersect (query, x) ) ) )
	result <- cbind(unlist( lapply(geneSet, length) ), result )
	result <- result[ which(result[,2]>Min_overlap), ,drop=F]
	if(dim(result)[1] == 0) return( noSig)
	xx <- result[,2]
	mm <- length(query)
	nn <- total_elements - mm
	kk <- result[,1]
	Pval_enrich=phyper(xx-1,mm,nn,kk, lower.tail=FALSE );
	FDR <- p.adjust(Pval_enrich,method="fdr",n=length(geneSet) )
	result <- as.data.frame(cbind(FDR,result))
	result <- result[,c(1,3,2)]
	result$pathway = rownames(result)
	result$Genes = ""  # place holder just 
	colnames(result)= c("Corrected P value (FDR)", "Genes in list", "Total genes in category","Functional Category","Genes"  )
	result <- result[ which( result[,1] < minFDR),,drop=F]
	if( dim( result)[1] == 0) return(noSig) 
	if(min(FDR) > minFDR) return(noSig) 
	result <- result[order(result[,1] ),]
	if(dim(result)[1] > maxTerms ) result <- result[1:maxTerms,]

	return( result)
}

KmeansGO <- function() {
		pp=0
		minFDR = 0.01

		for( i in 1:input_nClusters) {

			query = rownames(Kmeans.out$x)[which(Kmeans.out$bar == i)]

				result <- findOverlapGMT( query, GeneSets.out,1) 

			if( dim(result)[2] ==1) next;   # result could be NULL
			result$Genes = toupper(letters)[i] 
			if (pp==0 ) { results <- result; pp <- 1;
			} else {
				results <- rbind(results,result)
			}
		}

		if(pp == 0) return( as.data.frame("No enrichment found."))
		results= results[,c(5,1,2,4)]
		colnames(results)= c("Cluster","FDR","Genes","Pathways")
		if(min(results$FDR) > minFDR ) results = as.data.frame("No signficant enrichment found.") else
		results = results[which(results$FDR < minFDR),]
	
	if( is.null(results) )  return ( as.matrix("No significant enrichment.") )	
	if( class(results) != "data.frame")  return ( as.matrix("No significant enrichment.") )
	if( dim(results)[2] ==1)  return ( as.matrix("No significant enrichment.") )
	colnames(results)[2] = "adj.Pval"
	results$Genes <- as.character(results$Genes)
	results$Cluster[which( duplicated(results$Cluster) ) ] <- ""
	results
  }


################################################################
# Differential expression
################################################################
 
# Differential expression using LIMMA 
DEG.limma <- function (x, maxP_limma=.1, minFC_limma=2, rawCounts,countsDEGMethods,priorCounts, dataFormat, selectedComparisons=NULL, sampleInfo = NULL,modelFactors=NULL, blockFactor = NULL){
	library(limma,verbose=FALSE) # Differential expression
	library(statmod,verbose=FALSE)
	
	# many different situations: 1. just use sample names 2. just one factor  3. two factors no interaction
	# 4. two factors with interaction   5. block factor 
	
	
	topGenes = list();  limmaTrend = FALSE
	if( dataFormat == 2) {   # if normalized data
		eset = new("ExpressionSet", exprs=as.matrix(x)) } else { # counts data
			if (countsDEGMethods == 1 ) { # limma-trend method selected for counts data
				#dge <- DGEList(counts=rawCounts);
				#dge <- calcNormFactors(dge, method = "TMM")
				#eset <- cpm(dge, log=TRUE, prior.count=priorCounts)
				eset = new("ExpressionSet", exprs=as.matrix(x))  # use transformed data for limma 
				limmaTrend = TRUE
			}
	}

	groups = colnames(x)
	groups = detectGroups( groups)
	g =  unique(groups)  
	
	# check for replicates, removes samples without replicates
	reps = as.matrix(table(groups)) # number of replicates per biological sample
	if ( sum( reps[,1] >= 2) <2    ) # if less than 2 samples with replicates
	return( list(results= NULL, comparisons = NULL, Exp.type="Failed to parse sample names to define groups. 
		Cannot perform DEGs and pathway analysis. Please double check column names! Use WT_Rep1, WT_Rep2 etc. ", topGenes=NULL)) 
	# remove samples without replicates
	g <- rownames(reps)[which(reps[,1] >1)]
	ix <- which( groups %in% g)  
	groups <- groups[ix]   
	x<- x[,ix]; rawCounts <- rawCounts[,ix] 
	
	if(length(g) ==2 ) { 
		g= unique(groups)
		comparisons <-  paste(g[2],"-",g[1],sep="")  # "Mutant-WT"
		
		# no sample file, but user selected comparisons using column names
		if( is.null(modelFactors) & length( selectedComparisons) >0  ) 	
			comparisons = selectedComparisons
		
		design <- model.matrix(~0+groups)
		colnames(design) <- g
		
		if( !is.null(rawCounts) && countsDEGMethods == 2) {  # voom
			dge <- DGEList(counts=rawCounts);
			dge <- calcNormFactors(dge, method = "TMM")  # normalization
			v <- voom(dge, design); fit <- lmFit(v, design) } else 
			fit <- lmFit(eset, design)      # regular limma
			
		cont.matrix <- makeContrasts(contrasts=comparisons, levels=design)
		fit2 <- contrasts.fit(fit, cont.matrix)
		fit2 <- eBayes(fit2, trend=limmaTrend)

		# calls differential gene expression 1 for up, -1 for down
		results <- decideTests(fit2, p.value=maxP_limma, lfc=log2(minFC_limma) )
		#vennDiagram(results,circle.col=rainbow(5))
		topGenes1 =topTable(fit2, number = 1e12,sort.by="M" )
		if (dim(topGenes1)[1] != 0) {
		topGenes1 = topGenes1[,c('logFC','adj.P.Val')] 
		# topGenes1[,1] <-  -1* topGenes1[,1] # reverse direction
		topGenes[[1]] <- topGenes1 }
		# log fold change is actually substract of means. So if the data is natral log transformed, it shoudl be natral log.
		Exp.type = "2 sample groups."
	}
	
	if(length(g) > 2 ) { # more than two sample groups
	
		design <- model.matrix(~ 0+factor(groups))
		colnames(design) <- gsub(".*)","",colnames(design))
		
		if( !is.null(rawCounts) && countsDEGMethods == 2) {  # voom
			v <- voom(rawCounts, design); 
			fit <- lmFit(v, design) 
		} else 
			fit <- lmFit(eset, design)
		
		fit <- eBayes(fit, trend=limmaTrend)
		
		comparisons = ""
		for( i in 1:(length(g)-1) )
			for (j in (i+1):length(g)) 
			comparisons = c(comparisons,paste(g[j],"-",g[i],sep="" ) )
		comparisons <- comparisons[-1]

		# no sample file, but user selected comparisons using column names
		if( is.null(modelFactors) & length( selectedComparisons) >0  ) 	
			comparisons = selectedComparisons
		
		contrast1 <- makeContrasts(contrasts=comparisons[1], levels=design)
		if(length(comparisons)>1 )
			for( kk in 2:length(comparisons) )
				contrast1<-  cbind(contrast1,makeContrasts(contrasts=comparisons[kk], levels=design)   )
		Exp.type = paste(length(g)," sample groups detected.")	 
		
		# if factorial design 2x2, 2x3, 3x5 etc.
			# all samples must be something like WT_control_rep1
		if ( sum(sapply(strsplit(g,"_"),length) == 2 ) == length(g) ) {
		
			#comparisons
			comparisons = ""
			for( i in 1:(length(g)-1) )
				for (j in (i+1):length(g)) 
				if( strsplit(g[i],"_")[[1]][1] == strsplit(g[j],"_")[[1]][1]| strsplit(g[i],"_")[[1]][2] == strsplit(g[j],"_")[[1]][2]) # only compare WT_control vs. WT_treatment
					comparisons = c(comparisons,paste(g[j],"-",g[i],sep="" ) )
			comparisons <- comparisons[-1]

			#factors genotype treatment levels
			extract_treatment <- function (x) paste( gsub( ".*_","",unlist( strsplit(x,"-")) ), collapse="-")
			extract_genotype <- function (x) gsub( "_.*","",unlist( strsplit(x,"-")) )[1]
			extract_treatment_counting <- unique( gsub( ".*_","",unlist( strsplit(g,"-")) ))
			treatments = sapply(comparisons, extract_treatment)
			genotypes = sapply(comparisons, extract_genotype)
			Exp.type = paste( Exp.type, "\nFactorial design:",length(unique(genotypes)),"X", length( extract_treatment_counting ), sep="" )

			# pairwise contrasts
			contrast1 <- makeContrasts(contrasts=comparisons[1], levels=design)
			for( kk in 2:length(comparisons) )
				contrast1<-  cbind(contrast1,makeContrasts(contrasts=comparisons[kk], levels=design)   )
			contrast.names = colnames(contrast1)

			# interaction contrasts
			for ( kk in 1:(length(comparisons)-1) ) {
			for( kp in (kk+1):length(comparisons)) 
				if( treatments[kp]== treatments[kk] ) 
				{  
					contrast1 = cbind(contrast1, contrast1[,kp]- contrast1[,kk] )
					contrast.names = c(contrast.names, paste("I:",  genotypes[kp], "-", genotypes[kk],"(",gsub("-",".vs.",treatments[kp]),")",sep="" ) )
				}   
			}
			colnames(contrast1)=contrast.names
			comparisons = contrast.names

		} # if interaction terms	
		

		# if sample information is uploaded and user selected factors and comparisons
		if( !is.null(modelFactors) & length( selectedComparisons) >0  ) {
			Exp.type = paste("Model: ~", paste(modelFactors,collapse=" + ") )
			interactionTerm = FALSE # default value to be re-write if needed
			
			# model factors that does not contain interaction terms
			# modelFactors "genotype", "condition", "genotype:condition"
			keyModelFactors = modelFactors[ !grepl(":",modelFactors) ]
			
			# "genotype: control vs. mutant"
			factorsVector= gsub(":.*","",selectedComparisons) # corresponding factors for each comparison
			# remove factors not used in comparison, these are batch effects/pairs/blocks, 
			# keyModelFactors = keyModelFactors[ !is.na(match(keyModelFactors, factorsVector)) ]	
			
			# if a factor is selected both in block and main factors, then use it as block factor
			keyModelFactors = keyModelFactors[ is.na(match(keyModelFactors, blockFactor)) ]	
		
            #------Design matrix 			
			sampleInfo2 = sampleInfo[,keyModelFactors,drop=F] # remove factors not used.
			groups = apply(sampleInfo2,1, function(x) paste(x,collapse="_")  )
			g =  unique(groups)  
			
			design <- model.matrix(~ 0+factor(groups))
			colnames(design) <- gsub(".*)","",colnames(design))
		
			if( !is.null(rawCounts) && countsDEGMethods == 2) {  # voom
				v <- voom(rawCounts, design); 
				fit <- lmFit(v, design) 
			} else 
				fit <- lmFit(eset, design)
		
			fit <- eBayes(fit, trend=limmaTrend)	
			
			#-----------Making comaprisons
			if( length(keyModelFactors) != 2 | length(blockFactor) >1 )  { # if only one factor, or more than two then use all pairwise comparisons
				comparisons = gsub(".*: ","",selectedComparisons)
				comparisons = gsub(" vs\\. ","-",comparisons)
			} else if( length(keyModelFactors) == 2 ){ # two key factors

				if( sum(grepl(":",modelFactors)	>0) ) {  # interaction? 
					interactionTerm =TRUE
					# all pairwise comparisons
					comparisons = ""
					for( i in 1:(length(g)-1) )
						for (j in (i+1):length(g)) 
						if( strsplit(g[i],"_")[[1]][1] == strsplit(g[j],"_")[[1]][1]| strsplit(g[i],"_")[[1]][2] == strsplit(g[j],"_")[[1]][2]) # only compare WT_control vs. WT_treatment
							comparisons = c(comparisons,paste(g[j],"-",g[i],sep="" ) )
					comparisons <- comparisons[-1]
					
					# pairwise contrasts
					contrast1 <- makeContrasts(contrasts=comparisons[1], levels=design)
					if(length(comparisons)>1 )
					for( kk in 2:length(comparisons) )
						contrast1<-  cbind(contrast1,makeContrasts(contrasts=comparisons[kk], levels=design)   )
					contrast.names = colnames(contrast1)		
				
					# all possible interactions
					# interaction contrasts
					
					contrast2 = NULL
					contrast.names =""
					for ( kk in 1:(dim(contrast1)[2]-1) ) {
					for( kp in (kk+1):dim(contrast1)[2]) 
						#if( treatments[kp]== treatments[kk] ) 
						{  if(is.null(contrast2))  
							contrast2 = contrast1[,kp]- contrast1[,kk]  else					
							contrast2 = cbind(contrast2, contrast1[,kp]- contrast1[,kk] )
							
							contrast.names = c(contrast.names, paste0("I:",  colnames(contrast1)[kp], ".vs.", colnames(contrast1)[kk] ) )
						}   
					}
					colnames(contrast2)=contrast.names[-1]
					
					# remove nonsense contrasts from interactions
					 contrast2 = contrast2[,which(apply(abs(contrast2),2,max)==1),drop=F]
					 contrast2 = contrast2[,which(apply(abs(contrast2),2,sum)==4),drop=F]	
					 contrast2 = t( unique(t(contrast2)) ) # remove duplicate columns		
					
					# remove unwanted contrasts involving more than three levels in either factor
					keep= c()
					for( i in 1:dim(contrast2)[2]) {
						tem = rownames(contrast2)[ contrast2[ ,i ] != 0   ]
						tem1 = unique (  unlist(gsub("_.*","", tem) ) )
						tem2 = unique (  unlist(gsub(".*_","", tem) ) )
						if( length(tem1) == 2 & length(tem2) ==2 )
						keep = c(keep, colnames(contrast2) [i] )
					}
					contrast2 = contrast2[,keep,drop=F]
					comparisons2 = colnames(contrast2) 				 
				}

				# "stage: MN vs. EN"  -->  c("MN_AB-EN_AB", "EN_Nodule-EN_AB") 
				#  comparisons in all levels of the other factor 
				transformComparisons <- function (comparison1){
					tem = gsub(".*: ","",comparison1)
					tem = unlist(strsplit(tem, " vs\\. ") ) # control  mutant							
					factor1= gsub(":.*","",comparison1)

					ix = match(factor1, keyModelFactors) # 1: first factor, 2: 2nd factor
					otherFactor = keyModelFactors[3-ix]   # 3-1 = 2; 3-1=1
					otherFactorLevels = unique( sampleInfo2[,otherFactor] )				
					comparisons = c( )
					
					for (factorLevels in otherFactorLevels) {
						if( ix == 1){
							comparisons = c( comparisons, paste(paste0( tem, "_",factorLevels),collapse="-") )
						} else {
							comparisons = c( comparisons,  paste(paste0(factorLevels, "_", tem),collapse="-") )
						}
					}
					return(comparisons)		
				}	
				
				comparisons = unlist( sapply(selectedComparisons, transformComparisons ))
				comparisons = as.vector(comparisons)
			
			} # two factors
			
			# make contrasts
			contrast1 <- makeContrasts(contrasts=comparisons[1], levels=design)
			if(length(comparisons) >1 )
			for( kk in 2:length(comparisons) )
				contrast1<-  cbind(contrast1,makeContrasts(contrasts=comparisons[kk], levels=design)   )

			if( interactionTerm ) {  # if interaction terms
				contrast1 = cbind(contrast1,contrast2)
				contrast.names = c(colnames(contrast1), colnames(contrast2) )
				comparisons = c(comparisons,comparisons2)
			}
			
			# block design to remove batch effect or paired samples
			# corfit <- duplicateCorrelation(eset,design,block=targets$Subject)
			# corfit$consensus
			#Then this inter-subject correlation is input into the linear model fit:
			# fit <- lmFit(eset,design,block=targets$Subject,correlation=corfit$consensus)

			if(length(blockFactor) >= 1 ) { # if a factor is selected as block
				if(length(blockFactor) >= 1 ) 
					blockFactor = blockFactor[1] # if multiple use the first one

				block = sampleInfo[, blockFactor]  # the column not used
			
				if( !is.null(rawCounts) && countsDEGMethods == 2) {  # voom
					v <- voom(rawCounts, design);
					corfit <- duplicateCorrelation(v,design,block= block)			
					fit <- lmFit(v, design,block=block,correlation=corfit$consensus) 
				} else {
					corfit <- duplicateCorrelation(eset,design,block= block)
					fit <- lmFit(eset, design,block=block,correlation=corfit$consensus)
				}
				fit <- eBayes(fit, trend=limmaTrend)			
				
			} # block factors


		} # use selected factors

		
		fit2 <- contrasts.fit(fit, contrast1)
		fit2 <- eBayes(fit2, trend=limmaTrend)
		#topTable(fit2, coef=1, adjust="BH")
		results <- decideTests(fit2, p.value=maxP_limma, lfc= log2(minFC_limma ))
		#vennDiagram(results[,1:5],circle.col=rainbow(5))

		#colnames(results) = unlist(sapply( colnames(results), changeNames  ) )
		#comparisons3 <-  unlist(sapply( comparisons, changeNames  ) ) 

		# extract fold change for each comparison
		# there is issues with direction of foldchange. Sometimes opposite
		top <- function (comp) {
			tem <- topTable(fit2, number = 1e12,coef=comp,sort.by="M" ) 
			if(dim(tem)[1] == 0) { return (1) 
			} else	{ 			
				# compute fold change for the first gene (ranked by absolute value)
				tem2 = as.numeric( x[ which(rownames(x)== rownames(tem)[1]) , ] )
				names(tem2) = colnames(x) 
					
				return( tem[,c(1,5)]) 
			}  
													
		}  # no significant gene returns 1, otherwise a data frame
		
		
		topGenes <- lapply(comparisons, top)
		topGenes <- setNames(topGenes, comparisons )

		ix <- which( unlist( lapply(topGenes, class) ) == "numeric")
		if( length(ix)>0) topGenes <- topGenes[ - ix ]
		# if (length(topGenes) == 0) topGenes = NULL;
	}
	
	#cat("\n", names(topGenes) )
	#cat("\n", colnames(results))
	#cat("\n", comparisons3)
	#comparisons <- comparisons3
	# it does not make any sense! comparisons can not be changed!
	#comparisons =comparisons3	
	#cat("\n", comparisons)		
		
	return( list(results= results, comparisons=comparisons, Exp.type=Exp.type, topGenes=topGenes)) 
}

# Differential expression using DESeq2
DEG.DESeq2 <- function (  rawCounts,maxP_limma=.05, minFC_limma=2, selectedComparisons=NULL, sampleInfo = NULL,modelFactors=NULL, blockFactor = NULL, referenceLevels=NULL){
	library(DESeq2,verbose=FALSE) # count data analysis
	groups = as.character ( detectGroups( colnames( rawCounts ) ) )
	g = unique(groups)# order is reversed
	
	
	# check for replicates, removes samples without replicates
	reps = as.matrix(table(groups)) # number of replicates per biological sample
	if ( sum( reps[,1] >= 2) <2 ) # if less than 2 samples with replicates
	return( list(results= NULL, comparisons = NULL, Exp.type="Failed to parse sample names to define groups. 
		Cannot perform DEGs and pathway analysis. Please double check column names! Use WT_Rep1, WT_Rep2 etc. ", topGenes=NULL)) 
	# remove samples without replicates
	g <- rownames(reps)[which(reps[,1] >1)]
	ix <- which( groups %in% g)  
	groups <- groups[ix]   
	rawCounts <- rawCounts[,ix] 
	
	
	Exp.type = paste(length(g)," sample groups detected.")
	comparisons = ""
	for( i in 1:(length(g)-1) )
		for (j in (i+1):length(g)) 
		comparisons = c(comparisons,paste(g[j],"-",g[i],sep="" ) )
	comparisons <- comparisons[-1]

	colData = cbind(colnames(rawCounts), groups )

	# no sample file, but user selected comparisons using column names
	if( is.null(modelFactors) & length( selectedComparisons) >0  ) 	
		comparisons = selectedComparisons
		
	comparisons2 = comparisons	 # this is for showing comparison names, which might be different from internally	
	# Set up the DESeqDataSet Object and run the DESeq pipeline
	dds = DESeqDataSetFromMatrix(countData=rawCounts,
								colData=colData,
								design=~groups)								
	
	if( is.null(modelFactors)  ) 
		dds = DESeq(dds)  	else  
	{    # using selected factors and comparisons
		# build model
		modelFactors = c(modelFactors,blockFactor) # block factor is just added in. 

		factors = modelFactors   # selected factors and interactions: c( "strain", "treatment",  "strain:treatment")
		factors = factors[ !grepl(":",factors )]   # non-interaction terms
		# interaction terms like strain:treatment
		Interactions = modelFactors[ grepl(":",modelFactors )]
		
		colData = sampleInfo  
		factorsCoded = toupper(letters )[1: dim(colData)[2] ]   # Factors are encoded as "A", "B", "C"; this avoid illigal letters
		names(factorsCoded) =  colnames(colData)  # this is for look up; each column of sampleInfo  
		colnames(colData) = factorsCoded # all columns named A B C D 

		colData = as.data.frame(colData)
		
		# set reference levels for factors
		if(! is.null( referenceLevels) ) {   # c("genotype:wt", "treatment:control" )
			# first factor
			for ( refs in referenceLevels)
				if(! is.null( refs) ) {
					ix = match(gsub(":.*","",refs), colnames(sampleInfo) ) # corresponding column id for factor
					colData[,ix] = as.factor( colData[,ix] )
					colData[,ix] = relevel(colData[,ix],gsub(".*:","",refs)  )
				}
		}
				
		# base model
        DESeq2.Object= paste("dds = DESeqDataSetFromMatrix(countData=rawCounts, colData=colData, design=~ ", 
							 paste( factorsCoded[factors],collapse="+")) # only use selected factors		
		Exp.type = paste("Model: ~", paste(modelFactors,collapse=" + ") )

		
		# create model
		if( length(Interactions)>0 ) { # if there is interaction
			for( interactionTerms in Interactions) {
				interactingFactors = unlist(strsplit(interactionTerms,":" ) )  # split strain:treatment as "strain" and "mutant"
				tem = paste(factorsCoded [ interactingFactors ],collapse=":")   # convert "strain:mutant" to "A:B"
				DESeq2.Object = paste(DESeq2.Object, " + ",tem)
			}			
		}
		DESeq2.Object= paste( DESeq2.Object, ")") # ends the model

		eval(parse(text = DESeq2.Object) )
		dds = DESeq(dds)  # main function		
		
		# comparisons 
		# "group: control vs. mutant"
		comparisons = gsub(".*: ","",selectedComparisons)
		comparisons = gsub(" vs\\. ","-",comparisons)
		factorsVector= gsub(":.*","",selectedComparisons) # corresponding factors for each comparison
		
		# comparison2 holds names for display with real factor names
		# comparison  is used in calculation it is A, B, C for factors
		comparisons2 = comparisons    
		#comparisons2 = gsub(" vs\\. ","-",selectedComparisons) 
		#comparisons2 = gsub(":","_",comparisons2)		
		# Note that with interaction terms, not all meaningful comparisons is listed for selection. 
		# this is complex. Only under reference level.
		
		# comparisons due to interaction terms
		if( length(Interactions)>0 ) { # if there is interaction
			interactionComparisons = resultsNames(dds)
			interactionComparisons = interactionComparisons[ grepl("\\.",interactionComparisons )   ]
	
			comparisons = c(comparisons,interactionComparisons )
			
			# translate comparisons generated in interaction terms back to real factor names
			interactionComparisons2 = interactionComparisons
			for ( i in 1:length(interactionComparisons2 ) ) {
				tem = unlist(strsplit(interactionComparisons2[i],"\\." ) )
				tem_factors = substr(tem,1,1) 
				
				tem_factors[1] = names(factorsCoded)[factorsCoded == tem_factors[1]]  # get the first letter and translate into real factor names
				tem_factors[2] = names(factorsCoded)[factorsCoded == tem_factors[2]]  # get the 2nd letters and translate into real factor names

				interactionComparisons2[i] <- paste0( "I:",tem_factors[1], "_",substr(tem[1],2,nchar(tem[1]) ),".",
													          tem_factors[2], "_",substr(tem[2],2,nchar(tem[2]) ) 
													)				
			}
			comparisons2 = c(comparisons2,interactionComparisons2 )
		}
	} # if selected factors	
	
	# extract contrasts according to comprisons defined above
	result1 = NULL; allCalls = NULL;
	topGenes = list(); pk = 1 # counter
	pp=0 # first results?
	for( kk in 1:length(comparisons) ) {
		tem = unlist( strsplit(comparisons[kk],"-") )
		
		if(is.null(modelFactors)) # if just group comparison using sample names
			selected = results(dds, contrast=c("groups", tem[1], tem[2]) )   else {
			if(!grepl("\\.", comparisons[kk] ) )    # if not interaction term: they contain .  interaction term
				selected = results(dds, contrast=c( factorsCoded[ factorsVector[kk] ],tem[1], tem[2]) ) else # either A, B, C ...
				selected = results(dds, name=comparisons[kk] ) # interaction term
			}

		selected$calls =0   
		selected$calls [which( selected$log2FoldChange > log2(minFC_limma) & selected$padj < maxP_limma ) ]  <-  1
		selected$calls [ which( selected$log2FoldChange <  -log2(minFC_limma) & selected$padj < maxP_limma ) ] <-  -1
		colnames(selected)= paste( as.character(comparisons2[kk]), "___",colnames(selected),sep="" )
		selected = as.data.frame(selected)
		if (pp==0){  # if first one with significant genes, collect gene list and Pval+ fold
		result1 = selected; pp = 1; 
		# selected[,2] <- -1 * selected[,2] # reverse fold change direction
		topGenes[[1]] = selected[,c(2,6)]; 
		names(topGenes)[1] = comparisons2[kk]; } else 
			{ result1 = merge(result1,selected,by="row.names"); 
				rownames(result1) = result1[,1]; 
				result1 <- result1[,-1]
				pk= pk+1; 
				# selected[,2] <- -1 * selected[,2] # reverse fold change direction
				topGenes[[pk]] = selected[,c(2,6)]; 
				names(topGenes)[pk] = comparisons2[kk];  # assign name to comprison
			}
	}

	Interactions = c()
	if( !is.null(modelFactors) )
		Interactions = modelFactors[ grepl(":",modelFactors )]
		
#---  add comprisons for non-reference levels. It adds to the results1 object.	
	if( length(Interactions)>0 ) { # if there is interaction
		factorLookup=c() # a factor whose values are factors and names are factor and level combination conditionTreated, genotypeWT
		levelLookup = c()
		
		for( i in 1:dim(sampleInfo)[2]) {
			sampleInfo2 = unique(sampleInfo)
			tem = rep(toupper(letters)[i],dim(sampleInfo2)[1]  )
			names(tem) = paste0(toupper(letters)[i],sampleInfo2[,i])
			factorLookup = c(factorLookup,tem)  
			
			tem = as.character( sampleInfo2[,i] )
			names(tem) = paste0(toupper(letters)[i],sampleInfo2[,i])
			levelLookup = c(levelLookup, tem)
		}
		
		# split  genotypeI.conditionTrt --> c("genotype","I","conditoin","Trt")
		splitInteractionTerms <- function (term) {
			if(!grepl("\\.",term) ) return(NULL)
			terms2 = unlist(strsplit(term,"\\.") )
					 # factor1, level1, factor2, level2
			return(c(factorLookup[terms2[1]], levelLookup[terms2[1]],factorLookup[terms2[2]], levelLookup[terms2[2]]   ) )
		}
		# none interaction terms 
		NoneInterTerms = resultsNames(dds)[ !grepl( "\\.", resultsNames(dds)) ]
		NoneInterTerms=NoneInterTerms[-1]
		allInteractionTerms = resultsNames(dds)[ grepl( "\\.", resultsNames(dds)) ]


		for( kk in 1:length(NoneInterTerms) ) { # for each none interaction term
			if(!is.null(modelFactors) ) {# if not just group comparison using sample names
					#current factor
					cFactor = gsub("_.*","",NoneInterTerms[kk] )
				
					for(interactionTerm in allInteractionTerms ) {
					
						splited = splitInteractionTerms (interactionTerm)  # 4 components
						if (cFactor != splited[1] & cFactor != splited[3]  ) 
							next;						
						
						selected = results(dds, list(c( NoneInterTerms[kk],interactionTerm ) ) ) 
						comparisonName = paste0( NoneInterTerms[kk],"__", gsub("\\.","",interactionTerm) )
						
						if( cFactor == splited[1] )
							otherLevel = splited[4] else otherLevel = splited[2]
							
						comparisonName = paste0(#names(factorsCoded)[which(factorsCoded==cFactor)], # real factor name
												gsub("_vs_","-", substr(NoneInterTerms[kk], 3, nchar(NoneInterTerms[kk]  )  )), # the comparison
												"_for_",otherLevel)
						comparisons2 = c(comparisons2, comparisonName)
						selected$calls =0   
						selected$calls [which( selected$log2FoldChange > log2(minFC_limma) & selected$padj < maxP_limma ) ]  <-  1
						selected$calls [ which( selected$log2FoldChange <  -log2(minFC_limma) & selected$padj < maxP_limma ) ] <-  -1
						colnames(selected)= paste( comparisonName, "___",colnames(selected),sep="" )
						selected = as.data.frame(selected)
						if (pp==0){  # if first one with significant genes, collect gene list and Pval+ fold
							result1 = selected; pp = 1; 
							# selected[,2] <- -1 * selected[,2] # reverse fold change direction
							topGenes[[1]] = selected[,c(2,6)]; 
							names(topGenes)[1] = comparisonName; } else 
							{ result1 = merge(result1,selected,by="row.names"); 
								rownames(result1) = result1[,1]; 
								result1 <- result1[,-1]
								pk= pk+1; 
								# selected[,2] <- -1 * selected[,2] # reverse fold change direction
								topGenes[[pk]] = selected[,c(2,6)]; 
								names(topGenes)[pk] = comparisonName;  # assign name to comprison
							}
					} #for	
						
			} #if
		} #for
	
	
	} #if




#---
	#if( length(comparisons) == 1) topGenes <- topGenes[[1]] # if only one comparison, topGenes is not a list, just a data frame itself.
	if(! is.null(result1)) { 
	# note that when you only select 1 column from a data frame it automatically converts to a vector. drop =FALSE prevents that.
	allCalls = as.matrix( result1[,grep("calls",colnames(result1)), drop = FALSE  ] )
	colnames(allCalls)= gsub("___.*","", colnames(allCalls))
	colnames(allCalls)= gsub("\\.","-", colnames(allCalls)) # note that samples names should have no "."
	colnames(allCalls)= gsub("^I-","I:", colnames(allCalls))
	}
	return( list(results= allCalls, comparisons = comparisons2, Exp.type=Exp.type, topGenes=topGenes)) 
}

# main function
limma <- function() {  
	if(input_dataFileFormat == 1 ) {  # if count data
		 if(input_CountsDEGMethod == 3 ) {    # if DESeq2 method
				# rawCounts = read.csv("exampleData/airway_GSE52778.csv", row.names=1)
				# res =DEG.DESeq2(rawCounts, .05, 2) 
				# res1 =DEG.limma(rawCounts, .1, 1.5,rawCounts, 2,3) 
					
			return(   DEG.DESeq2( convertedCounts.out,input_limmaPval, input_limmaFC,
									input_selectModelComprions, readSampleInfo.out,
									c(input_selectFactorsModel,input_selectInteractions), 
									input_selectBlockFactorsModel, factorReferenceLevels.out )  )
			}
			if(input_CountsDEGMethod < 3 )    # voom or limma-trend
				return( DEG.limma(convertedData.out, input_limmaPval, input_limmaFC,
									convertedCounts.out, input_CountsDEGMethod,
									priorCounts=input_countsLogStart,input_dataFileFormat,
									input_selectModelComprions, readSampleInfo.out,
									c(input_selectFactorsModel,input_selectInteractions),
									input_selectBlockFactorsModel) )
	} else if (input_dataFileFormat == 2 ){ # normalized data
	 return( DEG.limma(convertedData.out, input_limmaPval, input_limmaFC,
						convertedCounts.out, input_CountsDEGMethod,
						priorCounts=input_countsLogStart,input_dataFileFormat,
						input_selectModelComprions, readSampleInfo.out,
						c(input_selectFactorsModel,input_selectInteractions),
						input_selectBlockFactorsModel) )
	} else {   # dataFileFormat == 3 user just uploaded fold change matrix
	
		x = convertedData.out
		
		pvals = convertedPvals.out
		if(!is.null(pvals) ) {
		  ix = match(rownames(x), rownames(pvals))
		  pvals = pvals[ix,]
		}


		# looks like ratio data, take log2
		if( sum(round(apply(x,2, median) + .2) == 1 ) == dim(x)[2] & min(x) > 0) 
			x = log2(x)
		
		Exp.type = "None standard data without replicates."
		all.Calls = x # fake calls
		for( i in 1: dim(all.Calls)[2]) { 
			tem <- all.Calls[,i]
			all.Calls[which( tem <= log2(input_limmaFC) & tem >=  -log2(input_limmaFC) ) ,i] = 0			
			all.Calls[which( tem >  log2(input_limmaFC) ) ,i] = 1
			all.Calls[which( tem < -log2(input_limmaFC) ) ,i] = -1		
			if(!is.null(pvals) ) 
				all.Calls[ which( pvals[,i] > input_limmaPval),i] = 0
		}
		comparisons = colnames(all.Calls)
		extractColumn <- function (i) {
			topGenes = as.data.frame( convertedData.out[,i,drop=FALSE])
			if(is.null(pvals) ) topGenes$FDR = 0 else 
				topGenes$FDR = pvals[,i]# fake fdr
				
			colnames(topGenes) = c("Fold","FDR")
			return(topGenes)	
		} 
		topGenes = lapply( 1:dim( x )[2], extractColumn )
		topGenes <- setNames(topGenes, colnames(x ) )
		
		return( list(results= all.Calls, comparisons = colnames(x ), Exp.type=Exp.type, topGenes=topGenes) )
	}
		
}

	
DEG.data <- function() {

		  genes = limma.out$results
		  genes = as.data.frame( genes[which( rowSums(genes) != 0 ),] )
		  colnames(genes) = colnames( limma.out$results )
		  genes = merge(genes,convertedData.out, by='row.names')
		  colnames(genes)[1] = "1: upregulation, -1: downregulation"
			# add gene symbol
		ix = match( genes[,1], allGeneInfo.out[,1])
		genes <- cbind(as.character( allGeneInfo.out$symbol)[ix],genes) 
		colnames(genes)[1] = "Symbol"
		genes <- genes[,c(2,1,3:dim(genes)[2]) ]
		return(genes)
		}
  

vennPlot <- function() {
	
			results = limma.out$results

			# split by up or down regulation
			if(input_UpDownRegulated) { 	
			    resultUp = results; 
				resultUp[resultUp < 0 ] <- 0;
				colnames(resultUp) = paste0("Up_", colnames(resultUp))
			    resultDown = results; 
				resultDown[resultDown > 0] <- 0;
				colnames(resultDown) = paste0("Down_", colnames(resultDown))				
				results <- cbind(resultUp, resultDown)
			}			
			
			ixa = c()
			
			
			for (comps in  input_selectComparisonsVenn) { 
				 if(!grepl("^I:|^I-|^Up_I:|^Up_I-|^Down_I:|^Down_I-", comps) ) {  # if not interaction term
					ix = match(comps, colnames(results)) 
				  } else {
						#mismatch in comparison names for interaction terms for DESeq2
						#I:water_Wet.genetic_Hy 	 in the selected Contrast
						#Diff-water_Wet-genetic_Hy   in column names
						tem = gsub("^I-","I:" ,colnames(results))
						tem = gsub("-","\\.",tem)
						ix = match(comps, tem) 

						if(is.na(ix) ) # this is for limma package
							ix = match(comps, colnames(results)) 						
					  }
					ixa = c(ixa,ix)
				  }
					
			results = results[,ixa,drop=FALSE] # only use selected comparisons
			if(dim(results)[2] >5) results <- results[,1:5]
			colnames(results) = gsub("^I-","I:" ,colnames(results))	
			
			vennDiagram(results,circle.col=rainbow(5), cex=c(1.,1, 0.7) ) # part of limma package


  }


sigGeneStats <- function( ){
		
		results = limma.out$results

		 library(reshape2)
		 Up =  apply(results, 2, function(x) sum(x == 1) )
		 Down = apply(results, 2, function(x) sum(x == -1) ) 
		 stats = rbind(Up, Down)
				 
		 gg <- melt(stats)

		 colnames(gg) = c("Regulation","Comparisons","Genes")
		 
 
		 p= ggplot(gg, aes(x=Comparisons, y=Genes, fill=  Regulation )  )+
			 geom_bar(position="dodge", stat="identity") + coord_flip() +
			 theme(legend.position = "top") + 
			 scale_fill_manual(values=c("red", "blue")) +
			 ylab("Number of differntially expressed genes") +
			 theme(axis.title.y=element_blank(),
				axis.text=element_text(size=14)) 
			
		p
			

}


sigGeneStatsTable <- function( ) {

		results = limma.out$results

		 Up =  apply(results, 2, function(x) sum(x == 1) )
		 Down = apply(results, 2, function(x) sum(x == -1) ) 
		 stats = rbind(Up, Down)
		 stats = t(stats)
		 stats=cbind(rownames(stats), stats)
		 colnames(stats)[1]="Comparisons"

		 return(as.data.frame(stats))

}


selectedHeatmap.data <- function() {
 		  genes <- limma.out$results
		  if( is.null(genes) ) return(NULL)
		  if(!grepl("I:", input_selectContrast) ) {  # if not interaction term
			ix = match(input_selectContrast, colnames(genes)) 
		  } else {
			#mismatch in comparison names for interaction terms for DESeq2
			#I:water_Wet.genetic_Hy 	 in the selected Contrast
			#Diff-water_Wet-genetic_Hy   in column names
			tem = gsub("I-","I:" ,colnames(genes))
			tem = gsub("-","\\.",tem)
			ix = match(input_selectContrast, tem) 
			
			if(is.na(ix) ) # this is for limma package
				ix = match(input_selectContrast, colnames(genes)) 			
			
		  }
	  
		  if(is.null(ix)) return(NULL)
		  if(is.na(ix)) return(NULL)
		  if( sum(abs(genes[,ix] )  ) <= 1 ) return(NULL) # no significant genes for this comparison
		  if(dim(genes)[2] < ix ) return(NULL)
		  query = rownames(genes)[which(genes[,ix] != 0)]
		  if(length(query) == 0) return(NULL)
		  iy = match(query, rownames(convertedData.out  ) )
		  

		 iz = findContrastSamples(	input_selectContrast, 
									colnames(convertedData.out),
									readSampleInfo.out,
									input_selectFactorsModel,
									input_selectModelComprions, 
									factorReferenceLevels.out,
									input_CountsDEGMethod,
									input_dataFileFormat
								)
	
		# color bar
		 bar = genes[,ix]
		 bar = bar[bar!=0]

		 # retreive related data		 
		 genes = convertedData.out[iy,iz,drop=FALSE]
		 
		 genes = genes[order(bar),,drop=FALSE] # needs to be sorted because myheatmap2 does not reorder genes
		 bar = sort(bar)


		 return(list(genes=genes, bar=bar ))

	
}
# find sample index for selected comparisons
findContrastSamples <- function(selectContrast, allSampleNames,sampleInfo=NULL, selectFactorsModel=NULL,selectModelComprions =NULL , referenceLevels=NULL, countsDEGMethod=NULL, dataFileFormat=NULL ){
	iz= match( detectGroups(allSampleNames), unlist(strsplit( selectContrast, "-"))	  )
	iz = which(!is.na(iz))		 
	if ( !is.null(sampleInfo) & !is.null(selectFactorsModel) & length(selectModelComprions)>0 ) {

		comparisons = gsub(".*: ","",selectModelComprions)   # strings like: "groups: mutant vs. control"
		comparisons = gsub(" vs\\. ","-",comparisons)		
		factorsVector= gsub(":.*","",selectModelComprions) # corresponding factors

		  # if read counts data and DESeq2
		if(dataFileFormat==1 & countsDEGMethod == 3) { # if DESeq2
			contrast = gsub("_for_.*","",selectContrast) # could be "wt-mu"   or "wt-mu_for_conditionB"
			ik = match( contrast, comparisons )   # selected contrast lookes like: "mutant-control"

			otherFactorLevel = gsub(".*_for_","",selectContrast)
			# find the corresponding factor for the other factor
			otherFactor=" "
			if(nchar( otherFactorLevel ) >0){
				for( eachFactor in colnames(sampleInfo) )
					if ( otherFactorLevel %in%  sampleInfo[,eachFactor ] )
						otherFactor = eachFactor		
			}
			
			if (is.na(ik)) iz=1:(length(allSampleNames))  else {  # interaction term, use all samples		
				selectedfactor= factorsVector[ ik ] # corresponding factors

				iz = which(sampleInfo[,selectedfactor] %in%  unlist(strsplit( contrast, "-")) )

				#filter by other factors: reference level
				if(! is.null( referenceLevels) ) {   # c("genotype:wt", "treatment:control" )
					for ( refs in referenceLevels)
						if(! is.null( refs) & gsub(":.*","",refs) != selectedfactor ) {
							currentFactor = gsub(":.*","",refs)
							if(nchar( otherFactorLevel ) >0 & currentFactor == otherFactor ) { # if not reference level
								iz = intersect( iz, which(sampleInfo[,currentFactor] == otherFactorLevel  ) )
							} else
								iz = intersect( iz, which(sampleInfo[,currentFactor] == gsub(".*:","",refs)  ) )
							
						}
				}			
				iz = iz[which(!is.na(iz))]
				
			# switching from limma to DESeq2 causes problem, as reference level is not defined.

			} 
			
		} else {  # not DESeq2
			
					# given level find corresponding sample ids
				    findIDsFromLevel <- function (aLevel){
						# find factor
						currentFactor=""
						for( eachFactor in colnames(sampleInfo) )
							if ( aLevel %in%  sampleInfo[,eachFactor ] )
								currentFactor = eachFactor			
						if(nchar(currentFactor) >0 ) 
							return( which(sampleInfo[,currentFactor ] %in% aLevel   )  ) else return(NULL)
					}		
					
					if( !grepl(".*_.*-.*_.*",selectContrast )) iz = c.out
					levels4 = unlist( strsplit( unlist( strsplit(selectContrast,"-") ), "_") ) #double split!
					if(length(levels4)!=4) { 
						iz = c.out 
					} else {
						iz = intersect( findIDsFromLevel(levels4[1]),  findIDsFromLevel(levels4[2])  ) # first sample
						iz = c(iz, intersect( findIDsFromLevel(levels4[3]),  findIDsFromLevel(levels4[4])  )   ) # 2nd sample
						}
				} #else
			
			
			
		}	
	 
	 if (grepl("I:",selectContrast)) iz=1:length(allSampleNames) # if it is factor design use all samples
	 if( is.na(iz)[1] | length(iz)<=1 )    iz=1:length(allSampleNames)

	return(iz)
}



selectedHeatmap <- function() {

		 bar = selectedHeatmap.data.out$bar +2;
		 bar[bar==3] =2

	  	 myheatmap2( selectedHeatmap.data.out$genes,bar,200,mycolor=input_heatColors1,c("Down","Up") )
	 
}



selectedHeatmap.data <- function(){

		  genes <- limma.out$results
		  if( is.null(genes) ) return(NULL)
		  if(!grepl("I:", input_selectContrast) ) {  # if not interaction term
			ix = match(input_selectContrast, colnames(genes)) 
		  } else {
			#mismatch in comparison names for interaction terms for DESeq2
			#I:water_Wet.genetic_Hy 	 in the selected Contrast
			#Diff-water_Wet-genetic_Hy   in column names
			tem = gsub("I-","I:" ,colnames(genes))
			tem = gsub("-","\\.",tem)
			ix = match(input_selectContrast, tem) 
			
			if(is.na(ix) ) # this is for limma package
				ix = match(input_selectContrast, colnames(genes)) 			
			
		  }
	  
		  if(is.null(ix)) return(NULL)
		  if(is.na(ix)) return(NULL)
		  if( sum(abs(genes[,ix] )  ) <= 1 ) return(NULL) # no significant genes for this comparison
		  if(dim(genes)[2] < ix ) return(NULL)
		  query = rownames(genes)[which(genes[,ix] != 0)]
		  if(length(query) == 0) return(NULL)
		  iy = match(query, rownames(convertedData.out  ) )
		  

		 iz = findContrastSamples(	input_selectContrast, 
									colnames(convertedData.out),
									readSampleInfo.out,
									input_selectFactorsModel,
									input_selectModelComprions, 
									factorReferenceLevels.out,
									input_CountsDEGMethod,
									input_dataFileFormat
								)
	
		# color bar
		 bar = genes[,ix]
		 bar = bar[bar!=0]

		 # retreive related data		 
		 genes = convertedData()[iy,iz,drop=FALSE]
		 
		 genes = genes[order(bar),,drop=FALSE] # needs to be sorted because myheatmap2 does not reorder genes
		 bar = sort(bar)


		 return(list(genes=genes, bar=bar ))

}



DEG.data <- function(){

	genes = limma.out$results
	genes = as.data.frame( genes[which( rowSums(genes) != 0 ),] )
	colnames(genes) = colnames( limma.out$results )
	genes = merge(genes,convertedData.out, by='row.names')
	colnames(genes)[1] = "1: upregulation, -1: downregulation"
	# add gene symbol
	ix = match( genes[,1], allGeneInfo.out[,1])
	genes <- cbind(as.character( allGeneInfo.out$symbol)[ix],genes) 
	colnames(genes)[1] = "Symbol"
	genes <- genes[,c(2,1,3:dim(genes)[2]) ]
	return(genes)
}


AllGeneListsGMT <- function() {

		
			results = limma.out$results

			results2 = cbind(results, results)
			colnames(results2)= c( paste0("UP_", colnames(results)),paste0("Down_", colnames(results)) )
			for(i in 1:dim(results)[2] ) {
				results2[,i*2-1] = results[,i]
				results2[ which(results2[,i*2-1] < 0 ) , i*2-1] = 0
				results2[,i*2] = results[,i]	
				results2[ which(results2[,i*2] > 0 ) , i*2] = 0	
			}

			geneList1 <- function (i) {
				ix = which(results2[,i] !=0 )
				return(paste0(colnames(results2)[i],"\t", length(ix),"\t",
						paste(rownames(results2 )[ix], collapse="\t" ) ) )			
			}
			tem = sapply(1:dim(results2)[2],geneList1 )
				
			return( paste(tem, collapse="\n") )
}



geneListData <- function( ) {
		
		noSig = as.data.frame("No significant genes find!")
		if( is.null(input_selectContrast) ) return(NULL)
		if( is.null( limma.out$comparisons ) ) return(NULL) # if no significant genes found
		if( length(limma.out$topGenes) == 0 ) return(noSig)
		if(length( limma.out$comparisons)  ==1 )  
		{ top1=limma.out$topGenes[[1]]  
		} else {
		  top = limma.out$topGenes
		  ix = match(input_selectContrast, names(top))
		  if( is.na(ix)) return (noSig)
		  top1 <- top[[ix]]; 
		  }
		  if(dim(top1)[1] == 0 ) return (noSig)
		  colnames(top1)= c("Fold","FDR")
		  #top1 = merge(top1,convertedData(), by='row.names')
		  #colnames(top1)[1] = "Genes"
		  top1 = top1[order(-abs(top1$Fold)) ,]
		  if ( length( which( top1$FDR <=  input_limmaPval  &  abs(top1$Fold)  >= log2(input_limmaFC) ) ) == 0 )
			return( noSig)
		  top1 <- top1[which(top1$FDR <=  input_limmaPval ) ,]
		  top1 <- top1[which(abs(top1$Fold)  >= log2( input_limmaFC)) ,]
		  top1$Top_Genes <- rownames(top1)
		  top1 <- top1[,c(3,1,2)]
		  
		  # if new species
		  if( input_selectGO2 == "ID not recognized!" | input_selectOrg == "NEW") return (top1); 
		  
		  #convertedID = convertID(top1[,1],input_selectOrg, "GOBP" );#"gmax_eg_gene"
		  # tem <- geneInfo(convertedID,input_selectOrg) #input_selectOrg ) ;
		 #  tem <- geneInfo(converted(),input_selectOrg)
		  top1 <- merge(top1, allGeneInfo.out, by.x ="Top_Genes", by.y="ensembl_gene_id",all.x=T )
		  
		  if ( sum( is.na(top1$band)) == dim(top1)[1] ) top1$chr = top1$chromosome_name else
			top1$chr = paste( top1$chromosome_name, top1$band,sep="")
		  
		  top1 <- top1[,c('Top_Genes','Fold','FDR','symbol','chr','gene_biotype')]
		  
		 
		#  ix = match(top1[,1], tem$ensembl_gene_id)
		 # if( sum(is.na( tem$Symbol[ix]) ) != length(ix) ) 
		  # { top1 <- cbind(top1, tem$Symbol[ix]); colnames(top1)[4]= "Symbol" }
		  top1 = top1[order(-abs(as.numeric( top1$Fold))) ,]
		  top1$FDR <- sprintf("%-3.2e",top1$FDR )
		  colnames(top1) <- c("Ensembl ID", "log2 Fold Change", "Adj.Pval", "Symbol","Chr","Type")
		  if ( sum( is.na(top1$Symbol)) == dim(top1)[1] ) top1 <- top1[,-4] 

		  return(top1)
	
  }
  

  
volcanoPlot <- function( ) {
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast, names(top))
	  if( is.na(ix)) return (NULL)
	  top1 <- top[[ix]]; 
	  }

	  if(dim(top1)[1] == 0 ) return (NULL)
	  colnames(top1)= c("Fold","FDR")
	 top1 <- as.data.frame(top1) # convert to data frame
     top1 <- top1[which(!(is.na(top1$Fold)|is.na(top1$FDR)    )),] # remove NA's 
	 top1$upOrDown <- 1
	 #write.csv(top1,"tem.csv")
	 top1$upOrDown[ which(top1$FDR <=  input_limmaPval& top1$Fold  >= log2( input_limmaFC)) ]  <- 2
	 top1$upOrDown[ which(top1$FDR <=  input_limmaPval & top1$Fold  <= -log2( input_limmaFC)) ]  <- 3
	 par(mar=c(5,5,1,1))
	 plot(top1$Fold,-log10(top1$FDR),col = c("grey30", "red","blue")[top1$upOrDown],
	 pch =16, cex = .3, xlab= "log2 fold change", ylab = "- log10 (FDR)",
	 cex.lab=2, cex.axis=2, cex.main=2, cex.sub=2	)    
     legend("bottomright",c("Upregulated","Downregulated"),fill = c("red","blue"),cex=1.1 )
	 
}

scatterPlot <- function( ){
 
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast, names(top))
	  if( is.na(ix)) return (NULL)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (NULL)
	  colnames(top1)= c("Fold","FDR")
	 top1 <- as.data.frame(top1) # convert to data frame
     top1 <- top1[which(!(is.na(top1$Fold)|is.na(top1$FDR)    )),] # remove NA's 
	 top1$upOrDown <- 1
	 #write.csv(top1,"tem.csv")
	 top1$upOrDown[ which(top1$FDR <=  input_limmaPval& top1$Fold  >= log2( input_limmaFC)) ]  <- 2
	 top1$upOrDown[ which(top1$FDR <=  input_limmaPval & top1$Fold  <= -log2( input_limmaFC)) ]  <- 3


	 
		 iz = findContrastSamples(	input_selectContrast, 
									colnames(convertedData.out),
									readSampleInfo.out,
									input_selectFactorsModel,
									input_selectModelComprions, 
									factorReferenceLevels.out,
									input_CountsDEGMethod,
									input_dataFileFormat
								)
	 
	 genes <- convertedData.out[,iz]
	 
	 g = detectGroups(colnames(genes))
	 
	 if(length(unique(g))  > 2) { plot.new(); text(0.5,0.5, "Not available.") } else{
		average1 <- apply( genes[, which( g == unique(g)[1] ) ],1,mean)

		average2 <- apply(  genes[, which( g == unique(g)[2] ) ],1,mean)

		genes2 <- cbind(average1,average2)
		rownames(genes2) = rownames(genes)
		genes2 <-  merge(genes2,top1,by="row.names")

		
		par(mar=c(5,5,1,1))
		plot(genes2$average2,genes2$average1,col = c("grey45","red","blue")[genes2$upOrDown],
		pch =16, cex = .3, xlab= paste("Average expression in", unique(g)[2] ), 
		ylab = paste("Average expression in", unique(g)[1] ),
		cex.lab=2, cex.axis=2, cex.main=2, cex.sub=2	)    
		legend("bottomright",c("Upregulated","Downregulated"),fill = c("red","blue"),cex=1.3 )


	 
	 }
	 
}


MAplot <- function ( ) {
 
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast, names(top))
	  if( is.na(ix)) return (NULL)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (NULL)
	  colnames(top1)= c("Fold","FDR")
	 top1 <- as.data.frame(top1) # convert to data frame
     top1 <- top1[which(!(is.na(top1$Fold)|is.na(top1$FDR)    )),] # remove NA's 
	 top1$upOrDown <- 1
	 #write.csv(top1,"tem.csv")
	 top1$upOrDown[ which(top1$FDR <=  input_limmaPval& top1$Fold  >= log2( input_limmaFC)) ]  <- 2
	 top1$upOrDown[ which(top1$FDR <=  input_limmaPval & top1$Fold  <= -log2( input_limmaFC)) ]  <- 3
	 
		 iz = findContrastSamples(	input_selectContrast, 
									colnames(convertedData.out),
									readSampleInfo.out,
									input_selectFactorsModel,
									input_selectModelComprions, 
									factorReferenceLevels.out,
									input_CountsDEGMethod,
									input_dataFileFormat
								)
	 


		average1 <- as.data.frame( apply( convertedData()[,iz],1,mean) )
		colnames(average1) = "Average"
		rownames(average1) = rownames(convertedData.out)
		
		genes2 <-  merge(average1,top1,by="row.names")

		par(mar=c(5,5,1,1))
		plot(genes2$Average,genes2$Fold,col = c("grey45","red","blue")[genes2$upOrDown],
		pch =16, cex = .3, xlab= "Average expression", 
		ylab = "Log2 fold change",
		cex.lab=2, cex.axis=2, cex.main=2, cex.sub=2	)   
			abline(h=0)
		legend("bottomright",c("Upregulated","Downregulated"),fill = c("red","blue"),cex=1.3 )
 
}


geneListGOTable <- function() {		
		NoSig=NULL
		# using expression data
		genes <- selectedHeatmap.data.out$genes
		if(is.null(genes) ) return(NULL) 
		if(dim(genes)[1] <= minGenesEnrichment ) return(NoSig) # if has only few genes
		
		fc = selectedHeatmap.data.out$bar		
		# GO
		results1 <- NULL; result <- NULL
		pp <- 0
		for( i in c(1,-1) ) {

			
			if( length(which(fc*i<0)) <= minGenesEnrichment) next; 
			query = rownames(genes)[which(fc*i<0)]
			if( length(query) <= minGenesEnrichment) next; 	
			

			result <- findOverlapGMT( query, GeneSets.out,1) 


			if( dim(result)[2] ==1) next;   # result could be NULL
			if(i == -1) result$direction = "Up regulated"  else result$direction = "Down regulated" # changed
			if (pp==0 ) { results1 <- result; pp = 1;} else  results1 = rbind(results1,result)
		}

		if ( pp == 0 ) return (NoSig)
		if ( is.null( results1) ) return (NoSig)
		if( dim(results1)[2] == 1 ) return(NoSig)  # Returns a data frame: "No significant results found!"
		
		results1= results1[,c(6,1,2,4,5)] # changed
		colnames(results1)= c("List","FDR","nGenes","GO terms or pathways","Genes") # changed
		minFDR = 0.01
		if(min(results1$FDR) > minFDR ) results1 = as.data.frame("No signficant enrichment found.") else
		results1 = results1[which(results1$FDR < minFDR),]
		
		if(dim(results1)[2] != 5) return(NoSig)  # changed
		colnames(results1)= c("Direction","adj.Pval","nGenes","Pathways", "Genes") # changed
		
		results1
	
  }


geneListGO <- function() {	
  results1 = geneListGOTable.out
  tem = input_removeRedudantSets
  if(dim(results1)[2] ==1) return(results1) else { 
	results1$adj.Pval <- sprintf("%-2.1e",as.numeric(results1$adj.Pval) )
	results1[,1] <- as.character(results1[,1])
	results1[ duplicated (results1[,1] ),1 ] <- ""  
	
	return( results1[,-5] )
	}	
  }


# a program for ploting enrichment results by highlighting the similarities among terms
# must have columns: Direction, adj.Pval   Pathways Genes
#  Direction	adj.Pval	nGenes	Pathways		Genes
#Down regulated	3.58E-59	131	Ribonucleoprotein complex biogenesis	36	Nsun5 Nhp2 Rrp15 
#Down regulated	2.55E-57	135	NcRNA metabolic process	23	Nsun5 Nhp2 Rrp15 Emg1 Ddx56 Rsl1d1 enrichmentPlot <- function( enrichedTerms){
# Up or down regulation is color-coded
# gene set size if represented by the size of marker
enrichmentPlot <- function( enrichedTerms, rightMargin=33) {
  if(class(enrichedTerms) != "data.frame") return(NULL)
  library(dendextend) # customizing tree
  
  geneLists = lapply(enrichedTerms$Genes, function(x) unlist( strsplit(as.character(x)," " )   ) )
  names(geneLists)= enrichedTerms$Pathways

  # compute overlaps percentage--------------------

  n = length(geneLists)
  w <- matrix(NA, nrow = n, ncol = n)
# compute overlaps among all gene lists
    for (i in 1:n) {
        for (j in i:n) {
            u <- unlist(geneLists[i])
            v <- unlist(geneLists[j])
            w[i, j] = length(intersect(u, v))/length(unique(c(u,v)))
        }
    }
# the lower half of the matrix filled in based on symmetry
    for (i in 1:n) 
        for (j in 1:(i-1)) 
            w[i, j] = w[j,i] 

  Terms = paste( sprintf("%-1.0e",as.numeric(enrichedTerms$adj.Pval)), 
				names(geneLists))
  rownames(w) = Terms
  colnames(w) = Terms
  par(mar=c(0,0,1,rightMargin)) # a large margin for showing 

  dend <- as.dist(1-w) %>%
	hclust (method="average") 
  ix = dend$order # permutated order of leaves

  leafType= as.factor( gsub(" .*","", enrichedTerms$Direction[ix] ) )
  if(length(unique(enrichedTerms$Direction)  ) ==2 )
	leafColors = c("green","red")  else  # mycolors
	leafColors = mycolors 
  #leafSize = unlist( lapply(geneLists,length) ) # leaf size represent number of genes
  #leafSize = sqrt( leafSize[ix] )  
  leafSize = -log10(as.numeric( enrichedTerms$adj.Pval[ix] ) ) # leaf size represent P values
  leafSize = 1.5*leafSize/max( leafSize ) + .2
  
	dend %>% 
	as.dendrogram(hang=-1) %>%
	set("leaves_pch", 19) %>%   # type of marker
	set("leaves_cex", leafSize) %>% #Size
	set("leaves_col", leafColors[leafType]) %>% # up or down genes
	plot(horiz=TRUE)
	
  #legend("top",pch=19, col=leafColors[1:2],legend=levels(leafType),bty = "n",horiz =T  )
  # add legend using a second layer
  	par(lend = 1)           # square line ends for the color legend
	add_legend("top",pch=19, col=leafColors[1:2],legend=levels(leafType),bty = "n",horiz =T 

	)
  
}


# numChar=100 maximum number of characters
# n=200  maximum number of nodes
# degree.cutoff = 0    Remove node if less connected
#from PPInfer
enrich.net2 <-  function (x, gene.set, node.id, node.name = node.id, pvalue, 
    n = 50, numChar = NULL, pvalue.cutoff = 0.05, edge.cutoff = 0.05, 
    degree.cutoff = 0, edge.width = function(x) {
        5 * x^2
    }, node.size = function(x) {
        2.5 * log10(x)
    }, group = FALSE, group.color = c("green","red" ), group.shape = c("circle", 
        "square"), legend.parameter = list("topright"), show.legend = TRUE, plotting=TRUE,
    ...) 
{
	library(igraph)
	
    x <- data.frame(x, group)
    colnames(x)[length(colnames(x))] <- "Group"
    x <- x[as.numeric( x[, pvalue]) < pvalue.cutoff, ]
    x <- x[order(x[, pvalue]), ]
    n <- min(nrow(x), n)
    if (n == 0) {
        stop("no enriched term found...")
    }
    x <- x[1:n, ]
    index <- match(x[, node.id], names(gene.set))
    geneSets <- list()
    for (i in 1:n) {
        geneSets[[i]] <- gene.set[[index[i]]]
    }
    names(geneSets) <- x[, node.name]
    if (is.null(numChar)) {
        numChar <- max(nchar(as.character(x[, node.name])))
    }
    else {
        if (length(unique(substr(x[, node.name], 1, numChar))) < 
            nrow(x)) {
            numChar <- max(nchar(as.character(x[, node.name])))
            message("Note : numChar is too small.", "\n")
        }
    }
    x[, node.name] <- paste(substr(x[, node.name], 1, numChar), 
        ifelse(nchar(as.character(x[, node.name])) > numChar, 
            "...", ""), sep = "")
    w <- matrix(NA, nrow = n, ncol = n)

    for (i in 1:n) {
        for (j in i:n) {
            u <- unlist(geneSets[i])
            v <- unlist(geneSets[j])
            w[i, j] = length(intersect(u, v))/length(unique(c(u, 
                v)))
        }
    }
    list.edges <- stack(data.frame(w))
    list.edges <- cbind(list.edges[, 1], rep(x[, node.name], 
        n), rep(x[, node.name], each = n))
    list.edges <- list.edges[list.edges[, 2] != list.edges[,3], ]
    list.edges <- list.edges[!is.na(list.edges[, 1]), ]
    g <- graph.data.frame(list.edges[, -1], directed = FALSE)
    E(g)$width = edge.width(as.numeric(list.edges[, 1]))
    V(g)$size <- node.size(lengths(geneSets))
    g <- delete.edges(g, E(g)[as.numeric(list.edges[, 1]) < edge.cutoff])
    index.deg <- igraph::degree(g) >= degree.cutoff
    g <- delete.vertices(g, V(g)[!index.deg])
    x <- x[index.deg, ]
    index <- index[index.deg]
    if (length(V(g)) == 0) {
        stop("no categories greater than degree.cutoff...")
    }
    n <- min(nrow(x), n)
    x <- x[1:n, ]
    group.level <- sort(unique(group))
    pvalues <- x[, pvalue]
    for (i in 1:length(group.level)) {
        index <- x[, "Group"] == group.level[i]
        V(g)$shape[index] <- group.shape[i]
        group.pvalues <- pvalues[index]
        if (length(group.pvalues) > 0) {
            if (max(group.pvalues) == min(group.pvalues)) {
                V(g)$color[index] <- adjustcolor(group.color[i], 
                  alpha.f = 0.5)
            }
            else {
                V(g)$color[index] <- sapply(1 - (group.pvalues - 
                  min(group.pvalues))/(max(group.pvalues) - min(group.pvalues)), 
                  function(x) {
                    adjustcolor(group.color[i], alpha.f = x)
                  })
            }
        }
    }
	if(plotting) { 
		plot(g,, vertex.label.dist=1, ...)
		if (show.legend) {
			legend.parameter$legend <- group.level
			legend.parameter$text.col <- group.color
			legend.parameter$bty <- "n"	
			do.call(legend, legend.parameter)
		}}
    return(g)
}


enrichmentNetwork <- function(enrichedTerms){
	geneLists = lapply(enrichedTerms$Genes, function(x) unlist( strsplit(as.character(x)," " )   ) )
	names(geneLists)= enrichedTerms$Pathways
	enrichedTerms$Direction = gsub(" .*","",enrichedTerms$Direction )

	g <- enrich.net2(enrichedTerms, geneLists, node.id = "Pathways", numChar = 100, 
	   pvalue = "adj.Pval", edge.cutoff = 0.2, pvalue.cutoff = 1, degree.cutoff = 0,
	   n = 200, group = enrichedTerms$Direction, vertex.label.cex = 1, vertex.label.color = "black")

}
  
enrichmentNetworkPlotly <- function(enrichedTerms){
	geneLists = lapply(enrichedTerms$Genes, function(x) unlist( strsplit(as.character(x)," " )   ) )
	names(geneLists)= enrichedTerms$Pathways

	g <- enrich.net2(enrichedTerms, geneLists, node.id = "Pathways", numChar = 100, 
	   pvalue = "adj.Pval", edge.cutoff = 0.2, pvalue.cutoff = 1, degree.cutoff = 0,
	   n = 200, group = enrichedTerms$Direction, vertex.label.cex = 0.8, vertex.label.color = "black"
	   ,plotting=TRUE)

	vs <- V(g)
	es <- as.data.frame(get.edgelist(g))
	Nv <- length(vs)
	Ne <- length(es[1]$V1)
	# create nodes
	L <- layout.kamada.kawai(g)
	Xn <- L[,1]
	Yn <- L[,2]
	inc = (max(Yn)-min(Yn))/50  # for shifting
	#group <- ifelse(V(g)$shape == "circle", "GO", "KEGG")
	 group <- as.character(enrichedTerms$Direction)
	network <- plot_ly(x = ~Xn, y = ~Yn, type = "scatter", mode = "markers",
	   marker = list(color = V(g)$color, size = V(g)$size*2,
		  symbol= ~V(g)$shape, line = list(color = "gray", width = 2)),
	   hoverinfo = "text", text = ~paste("</br>", group, "</br>", names(vs))) %>%
	   add_annotations( x = ~Xn, y = ~Yn+inc, text = names(vs), showarrow = FALSE,
		  font = list(color = "#030303", size = 12))
	# create edges
	edge_shapes <- list()
	for(i in 1:Ne)
	{
	   v0 <- es[i,]$V1
	   v1 <- es[i,]$V2
	   index0 <- match(v0, names(V(g)))
	   index1 <- match(v1, names(V(g)))
	   edge_shape <- list(type = "line", line = list(color = "gray" ,
		  width = E(g)$width[i]/2), x0 = Xn[index0], y0 = Yn[index0],
		  x1 = Xn[index1], y1 = Yn[index1])
	   edge_shapes[[i]] <- edge_shape
	}
	# create network
	axis <- list(title = "", showgrid = FALSE, showticklabels = FALSE, zeroline = FALSE)
	h <- layout(network, title = "Enrichment Network", shapes = edge_shapes,
	   xaxis = axis, yaxis = axis)
	config(h, showLink = TRUE)
	   
	   
}
  
 #Homo sapies --> hsapiens
shortSpeciesNames <- function(tem){
	 tem2 = strsplit(as.character(tem)," " ) 	   
	 return( tolower( paste0(substr(tem2[[1]][1],1,1), tem2[[1]][2]  ) ) )
}

findTaxonomyID <- function( speciesName = "mmusculus") {
	
    if(!is.null(input_speciesName) ) { # if species name is entered
	   ix = match(input_speciesName, STRING10_species$official_name)
	   } else if( input_selectGO2 != "ID not recognized!" )
	   { # if no species is entered, try to resolve species using existing info 	
			codedNames = sapply(STRING10_species$compact_name,shortSpeciesNames )
			ix = match(speciesName , codedNames)
			if(input_selectOrg != speciesChoice[[1]]) {  # if species is entered
				selectedSpecies = findSpeciesById(input_selectOrg)[1,1]
				ix = match( gsub("_.*","", selectedSpecies ), codedNames)				
			}

		} else return(NULL) 
 
    if(length(ix) == 0 | is.na(ix) ) return(NULL) 
    return(STRING10_species$species_id[ix])
}

STRINGdb_geneList <- function() {
	library(STRINGdb,verbose=FALSE)
	taxonomyID = findTaxonomyID.out

		if(is.null( taxonomyID ) ) return(NULL)
		#Intialization
		string_db <- STRINGdb$new( version="10", species=taxonomyID,
							   score_threshold=0, input_directory="" )
				
		# using expression data
		genes <- geneListData.out
		colnames(genes)[1:2]=c("gene","lfc")
		mapped <- string_db$map(genes,"gene", removeUnmappedRows = TRUE )

		up= subset(mapped, lfc>0, select="STRING_id", drop=TRUE )

		down= subset(mapped, lfc<0, select="STRING_id", drop=TRUE )		
		
		mappingRatio = nrow(mapped)/ nrow(genes)
		if(nrow(mapped) == 0) return(NULL) else
		 return( list(up=up, down=down, ratio=mappingRatio, geneTable=mapped ) )
}


STRINGDB_mapping_stat <- function( ) {

		if( is.null(STRINGdb_geneList.out ) ) return("No genes mapped by STRINGdb. Please enter or double-check species name above.")
		if(! is.null(STRINGdb_geneList.out ) ) { 
			tem=paste0( 100*round(STRINGdb_geneList()$ratio,3), "% genes mapped by STRING web server.")
			if(STRINGdb_geneList()$ratio <0.3 ) tem = paste(tem, "Warning!!! Very few gene mapped. Double check if the correct species is selected.")
			return( tem  )
		}
}


stringDB_GO_enrichmentData <- function() {
		library(STRINGdb,verbose=FALSE)
						   
		tem = input_STRINGdbGO
		taxonomyID = findTaxonomyID.out
		if(is.null( taxonomyID ) ) return(NULL)		
		NoSig = as.data.frame("No significant enrichment found.")
		####################################
		#Intialization
		string_db <- STRINGdb$new( version="10", species=taxonomyID,
							   score_threshold=0, input_directory="" )
				
		# using expression data
		genes <- selectedHeatmap.data.out$genes
		if(is.null(genes) ) return(NULL) 
		if(dim(genes)[1] <= minGenesEnrichment ) return(NoSig) # if has only few genes
		
		fc = selectedHeatmap.data.out$bar		
		# GO
		results1 <- NULL; result <- NULL
		pp <- 0
		for( i in c(1:2) ) {
			
			if( length(which(fc*i<0)) <= minGenesEnrichment) next; 
			query = rownames(genes)[which(fc*i<0)]
			
			if( length(query) <= minGenesEnrichment) next; 	
			
			ids = STRINGdb_geneList.out[[i]]
			
			result <- string_db$get_enrichment( ids, category = input_STRINGdbGO, methodMT = "fdr", iea = TRUE )
			if(nrow(result) == 0 ) next; 
			if(nrow(result) > 15)  result <- result[1:15,]

			if( dim(result)[2] ==1) next;   # result could be NULL
			if(i == 1) result$direction = "Up regulated"  else result$direction = "Down regulated"
			if (pp==0 ) { results1 <- result; pp = 1;} else  results1 = rbind(results1,result)
		}

		if ( pp == 0 ) return (NoSig)
		if ( is.null( results1) ) return (NoSig)
		if( dim(results1)[2] == 1 ) return(NoSig)  # Returns a data frame: "No significant results found!"
		
		results1= results1[,c(7,5,3, 6)]
		colnames(results1)= c("List","FDR","nGenes","GO terms or pathways")
		minFDR = 0.01
		if(min(results1$FDR) > minFDR ) results1 = as.data.frame("No signficant enrichment found.") else
		results1 = results1[which(results1$FDR < minFDR),]
		
		if(dim(results1)[2] != 4) return(NoSig)
		colnames(results1)= c("Direction","adj.Pval","nGenes","Pathways")
		results1$adj.Pval <- sprintf("%-2.1e",as.numeric(results1$adj.Pval) )	
		rownames(results1)=1:nrow(results1)
		results1[ duplicated (results1[,1] ),1 ] <- ""  
		
		return( results1 )
}


stringDB_network1 <- function( geneLists = 1 ) {  # geneLists =2 for both up and down
	library(STRINGdb)
						   
		tem = input_nGenesPPI
		taxonomyID = findTaxonomyID.out
		if(is.null( taxonomyID ) ) return(NULL)		
		####################################
		NoSig = as.data.frame("No significant enrichment found.")
		if(is.null(STRINGdb_geneList.out ) ) return(NULL)
		

		#Intialization
		string_db <- STRINGdb$new( version="10", species=taxonomyID,
							   score_threshold=0, input_directory="" )
		# only up regulated is ploted		
		for( i in c(1:geneLists) ) {
			ids = STRINGdb_geneList.out[[i]]
			if(length(ids)> input_nGenesPPI )  # n of genes cannot be more than 400
				ids <- ids[1:input_nGenesPPI]

			string_db$plot_network( ids,add_link=FALSE)

		}
}



# generates a html file containing results and also links to STRING-db
stringDB_network_link <- function(){
		library(STRINGdb,verbose=FALSE)
		taxonomyID = findTaxonomyID.out
		if(is.null( taxonomyID ) ) return(NULL)		
		
		NoSig = as.data.frame("No significant enrichment found.")
		if(is.null(STRINGdb_geneList.out ) ) return(NULL)

		#Intialization
		string_db <- STRINGdb$new( version="10", species=taxonomyID,
							   score_threshold=0, input_directory="" )
			# upregulated
		   ids = STRINGdb_geneList.out[[1]]
			if(length(ids)> input_nGenesPPI )  # n of genes cannot be more than 400
				ids <- ids[1:input_nGenesPPI]

			link1 = string_db$get_link( ids)
			Pval1 = string_db$get_ppi_enrichment( ids)
			tem = "<h5> Interactive and annotated PPI networks among DEGs: <br/>  "
			tem = paste(tem, "<a href=\"", link1, "\" target=\"_blank\"> Up-regulated; </a>"  )

			# downregulated
			ids = STRINGdb_geneList()[[2]]
			if(length(ids)> input_nGenesPPI )
				ids <- ids[1:input_nGenesPPI]

			link2 = string_db$get_link( ids)
			Pval2 = string_db$get_ppi_enrichment( ids)				
			tem = paste(tem, " &nbsp  <a href=\"", link2, "\"target=\"_blank\"> Down-regulated; </a>"  )
			
			# both up and down with color code

			geneTable = STRINGdb_geneList.out$geneTable
			if(nrow(geneTable)> input_nGenesPPI ) 
				geneTable <- geneTable[1:input_nGenesPPI,]
			geneTable =  string_db$add_diff_exp_color( geneTable, logFcColStr="lfc" ) 
			payload_id <- string_db$post_payload( geneTable$STRING_id,colors=geneTable$color )
			link3 = string_db$get_link(geneTable$STRING_id, payload_id = payload_id)			
			tem = paste(tem, " &nbsp  <a href=\"", link3, "\"target=\"_blank\"> Both with fold-changes color coded.</a></h5>"  )

            tem2 = paste("<h5> PPI enrichment P values: ")  
			tem2 = paste0(tem2,"Up-regulated: ", sprintf("%-3.2e",Pval1[1]), " &nbsp Down-regulated: ", sprintf("%-3.2e",Pval2[1]),".")
			tem2 = paste(tem2, " Small P values indicate more PPIs among DEGs than background. </h5>" )
			tem = paste(tem2,tem )
			return(tem)	
}


################################################################
# Pathway analysis
################################################################

gagePathwayData <- function( ){
	library(gage,verbose=FALSE) # pathway analysis	



	####################################
	
		myrange = c(input_minSetSize, input_maxSetSize)
		noSig = as.data.frame("No significant pathway found.")
		if( length(limma.out$topGenes) == 0 ) return(noSig)
		if(length( limma.out$comparisons)  ==1 )  
		{ top1=limma.out$topGenes[[1]]  
		} else {
		  top = limma.out$topGenes
		  ix = match(input_selectContrast1, names(top))
		  if( is.na(ix)) return (noSig)
		  top1 <- top[[ix]]; 
		  }
		  if(dim(top1)[1] == 0 ) return (noSig)
		  colnames(top1)= c("Fold","FDR")

		  top1 = top1[which(top1$FDR <input_GenePvalCutoff) , ]
 

		  gmt = GeneSets.out 
		  if(length( GeneSets.out )  == 0)  { return(as.data.frame("No gene set found!"))}
		  #converted = convertID(rownames(top1),input_selectOrg)
		  #
		   #gmt = readGeneSets(converted, top1, input_selectGO, input_selectOrg, myrange )
		 # cat("Sets",length(gmt))

		 fold = top1[,1]; names(fold) <- rownames(top1)
		 
		 if(input_absoluteFold) fold <- abs(fold)
		 paths <- gage(fold, gsets = gmt, ref = NULL, samp = NULL)

		  paths <-  rbind(paths$greater,paths$less)
		 # write.csv(paths,"tem.csv")	  
		 # cat( dim(paths) )
		  if(dim(paths)[1] < 1 | dim(paths)[2]< 6 ) return( noSig )
		  top1 <- paths[,c('stat.mean','set.size','q.val')]
		  colnames(top1)= c("statistic","Genes","adj.Pval")
		  top1 <- top1[order(top1[,3]) ,]  
		  if ( length( which( top1[,3] <=  input_pathwayPvalCutoff   ) ) == 0 )
			return( noSig)
		  top1 <- top1[which(top1[,3] <=  input_pathwayPvalCutoff ) ,,drop=FALSE]
		  if(dim(top1)[1] > input_nPathwayShow ) 
			 top1 <- top1[1:input_nPathwayShow, ,drop=FALSE]
			 
			top1 <- as.data.frame(top1)
			top1 <- cbind(rep( input_selectContrast1, dim(top1)[1]),row.names(top1), top1); 
			top1$statistic <- as.character( round(as.numeric(top1$statistic),4)); 
			top1$adj.Pval <- sprintf("%-2.1e",as.numeric(top1$adj.Pval) )
			top1[,2] <- as.character(top1[,2]);top1[,1] <- as.character(top1[,1])
			colnames(top1)[1] <- "Direction"
			p.m <- "GAGE"

			colnames(top1)[2] <- paste(p.m," analysis:", gsub("-"," vs ",input_selectContrast1 ) )
			top1[ which( top1[,3] >0),1 ] <- "Up" #gsub("-"," > ",input_selectContrast1 )
			top1[ which( top1[,3] <0),1 ] <- "Down" # gsub("-"," < ",input_selectContrast1 )
			top1 <- top1[order( top1[,1], -abs(as.numeric( top1[,3]) ) ) ,]
			top1[ duplicated (top1[,1] ),1 ] <- ""

		  return( top1)
}

fgseaPathwayData <- function() {
	library(fgsea,verbose=FALSE) # fast GSEA

	noSig = as.data.frame("No significant pathway found.")
	if( length(limma.out$topGenes) == 0 ) return(noSig)
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast1, names(top))
	  if( is.na(ix)) return (noSig)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (noSig)
	  colnames(top1)= c("Fold","FDR")
	  
	  # remove some genes
	  top1 = top1[which(top1$FDR <input_GenePvalCutoff) , ]
	  

	  gmt = GeneSets.out 
     if(length( GeneSets.out )  == 0)  { return(as.data.frame("No gene set found!"))}

	  #converted = convertID(rownames(top1),input_selectOrg)
	  #
	   #gmt = readGeneSets(converted, top1, input_selectGO, input_selectOrg, myrange )
     # cat("Sets",length(gmt))

	 fold = top1[,1]; names(fold) <- rownames(top1)
	 if(input_absoluteFold) fold <- abs(fold) # use absolute value of fold change, disregard direction
	 paths <- fgsea(pathways = gmt, 
                  stats = fold,
                  minSize=input_minSetSize,
                  maxSize=input_maxSetSize,
                  nperm=10000)
	 # paths <-  rbind(paths$greater,paths$less)
	  if(dim(paths)[1] < 1  ) return( noSig )
	       paths <- as.data.frame(paths)
       paths <- paths[order(-abs( paths[,5])) ,]  # sort by NES
	  # paths <- paths[order( paths[,3]) ,]  # sort by FDR
	  top1 <- paths[,c(1,5,7,3)]
	  # rownames(top1) <- paths[,1] #paste(1:dim(paths)[1],": ",paths[,1],sep="" )
	  colnames(top1)= c("Pathway", "NES","Genes","adj.Pval")
	  
	  if ( length( which( top1[,4] <=  input_pathwayPvalCutoff   ) ) == 0 )
	    return( noSig)
	  top1 <- top1[which(top1[,4] <=  input_pathwayPvalCutoff ) , ,drop=FALSE]
	  if(dim(top1)[1] > input_nPathwayShow ) 
	     top1 <- top1[1:input_nPathwayShow,,drop=FALSE]
	 #top1 <- cbind(row.names(top1), top1); colnames(top1)[1] <-input_selectContrast1 	
		top1 <- as.data.frame(top1)
		top1 <- cbind(rep( input_selectContrast1, dim(top1)[1]), top1); 
		top1[,4] = as.character( round(as.numeric(top1[,4]),4)); 
		top1$adj.Pval <- sprintf("%-2.1e",as.numeric(top1$adj.Pval) )
		top1[,1] <- as.character(top1[,1])
		colnames(top1)[1] <- "Direction"

		p.m <- "GSEA"

		colnames(top1)[2] <- paste(p.m," analysis:", gsub("-"," vs ",input_selectContrast1 ) )
		top1[ which( as.numeric( top1[,3]) >0),1 ] <- "Up" #gsub("-"," > ",input_selectContrast1 )
		top1[ which( as.numeric( top1[,3]) <0),1 ] <- "Down" #gsub("-"," < ",input_selectContrast1 )
		top1 <- top1[order( top1[,1], -abs(as.numeric( top1[,3]) ) ) ,]
		top1[ duplicated (top1[,1] ),1 ] <- ""	 
	    top1[,3] = as.character( round(as.numeric(top1[,3]),4));

	 return( top1)
}

extract1 <- function (x) {
  words <- unlist ( strsplit(x,"_"))
  if(length( words )  <=4 ) return(gsub("_"," ",x)) else {
  words <- words[-c(1:4)]
  return( proper(paste(words,collapse = " ") ) )}
}

PGSEApathway <- function (converted,convertedData, selectOrg,GO,gmt, myrange,Pval_pathway,top){
  	subtype = detectGroups(colnames(convertedData))
	library(PGSEA,verbose=FALSE)
	Pvalue = 0.01  # cut off to report in PGSEA. Otherwise NA
	#Pval_pathway = 0.2   # cut off for P value of ANOVA test  to writ to file 
	# top = 30   # number of pathways to show
	if(length(gmt) ==0 ) return( list(pg3 = NULL, best = 1 ) )
    # centering by mean
	#pg = myPGSEA (convertedData - rowMeans(convertedData),
	 #            cl=gmt,range=myrange,p.value=TRUE, weighted=FALSE,nPermutation=100)

	#if( class(convertedData) != "data.frame" | class(convertedData) != "matarix") return( list(pg3 = NULL, best = 1 ) )
	#if( dim(convertedData)[2] <2 ) return( list(pg3 = NULL, best = 1 ) )
	
	 pg = PGSEA (convertedData - rowMeans(convertedData),cl=gmt,range=myrange,p.value=TRUE, weighted=FALSE)
	
	pg2 = pg$results;
	pg2 = pg2[rowSums(is.na(pg2))<ncol(pg2) ,]  # remove se/wrts with all missing(non-signficant)
	if (dim(pg2)[1] < 2 ) return.out
	best = max(abs(pg2))
	
	if(length(subtype) < 4 || length(unique(subtype)) <2 ||length(unique(subtype)) == dim(convertedData)[2] ) { 
	 pg2 = pg2[order(-apply(pg2,1,sd)     )   ,]
	 return( list(pg3 = pg2[1:top,], best = best ) )
	} 
    
	cat("\nComputing P values using ANOVA\n");
	pathPvalue <- function ( k){
	 return( summary(aov(pg2[k,]~subtype) )[[1]][["Pr(>F)"]][1] )
	}
	Pvalues = sapply(1:dim(pg2)[1], pathPvalue)
	Pvalues = p.adjust(Pvalues, "fdr")
	
	#if(min(Pvalues) > Pval_pathway ) return( list(pg3 = NULL, best = best ) )  else {  
    if(sort(Pvalues)[2] > Pval_pathway ) return( list(pg3 = NULL, best = best ) )  else {  

	NsigT = rowSums(pg$p.results<Pvalue)
	
	result=cbind( as.matrix(Pvalues),NsigT,pg2); 
	result = result[ order(result[,1])   ,]	
	
	result = result[which(result[,1] < Pval_pathway),,drop=F]
	#result = result[which(result[,2] >2)    ,]
	pg2 = result[,-2]

	# when there is only 1 left in the matrix pg2 becomes a vector
	if(sum( Pvalues<Pval_pathway) == 1) { pg3 = t( as.matrix(pg2));pg3 = rbind(pg3,pg3);} else
	{ if(dim(pg2)[1] > top ) {  pg3 = pg2[1:top,]; } else {  pg3 = pg2;  } }

	rownames(pg3) = sapply(rownames(pg3) , extract1)
	a=sprintf("%-3.2e",pg3[,1])
	rownames(pg3) = paste(a,rownames(pg3),sep=" ")
	pg3 =pg3[,-1]
	
	pg3 <- pg3[order( -apply(pg3,1,sd)    ),] # sort by SD
 
    return( list(pg3 = pg3, best = best ) )
    }
 }

PGSEAplot <- function(){
  
	library(PGSEA,verbose=FALSE)

	if(input_selectGO == "ID not recognized!" ) return( NULL)

	myrange = c(input_minSetSize, input_maxSetSize)
	genes = convertedData.out
if (is.null(input_selectContrast1 ) ) return(NULL)

	gmt = GeneSets.out


	# find related samples
	iz = findContrastSamples(input_selectContrast1, colnames(convertedData.out),readSampleInfo.out,
										input_selectFactorsModel,input_selectModelComprions, 
										factorReferenceLevels.out,input_CountsDEGMethod,
										input_dataFileFormat  )

	
	
	genes = genes[,iz]	
	
	subtype = detectGroups(colnames(genes )) 
    if(length( GeneSets.out )  == 0)  { plot.new(); text(0.5,0.5, "No gene sets!")} else {
	result = PGSEApathway(converted.out,genes, input_selectOrg,input_selectGO,
	             GeneSets.out,  myrange, input_pathwayPvalCutoff, input_nPathwayShow 	)
					 
	if( is.null(result$pg3) ) { plot.new(); text(0.5,1, "No significant pathway found!")} else 
	smcPlot(result$pg3,factor(subtype),scale = c(-max(result$pg3), max(result$pg3)), 
	show.grid = T, margins = c(3,1, 13,28), col = .rwb,cex.lab=0.5, main="Pathway Analysis:PGSEA")
    }
	
}

convertEnsembl2Entrez <- function (query,Species) { 
	querySet <- cleanGeneSet( unlist( strsplit( toupper(names( query)),'\t| |\n|\\,' )  ) )
	speciesID <- orgInfo$id[ which(orgInfo$ensembl_dataset == Species)]  # note uses species Identifying
	# idType 6 for entrez gene ID
	result <- dbGetQuery( convert,
						paste( " select  id,ens,species from mapping where ens IN ('", paste(querySet,collapse="', '"),
								"') AND  idType ='",idType_Entrez,"'",sep="") )	# slow
							
	if( dim(result)[1] == 0  ) return(NULL)
	result <- subset(result, species==speciesID, select = -species)

	ix = match(result$ens,names(query)  )

	tem <- query[ix];  names(tem) = result$id
	return(tem)
  
}

ReactomePAPathwayData <- function( ){
	library(ReactomePA,verbose=FALSE) # pathway analysis
 
	ensemblSpecies <- c("hsapiens_gene_ensembl","rnorvegicus_gene_ensembl", "mmusculus_gene_ensembl",
		               "celegans_gene_ensembl","scerevisiae_gene_ensembl", "drerio_gene_ensembl", "dmelanogaster_gene_ensembl")
    ReactomePASpecies= c("human", "rat", "mouse", "celegans", "yeast", "zebrafish", "fly" )
    # cbind(ensemblSpecies,ReactomePASpecies)  # double check mapping
    
	
	myrange = c(input_minSetSize, input_maxSetSize)
	noSig = as.data.frame("No significant pathway found.")
	if( length(limma.out$topGenes) == 0 ) return(noSig)
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast1, names(top))
	  if( is.na(ix)) return (noSig)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (noSig)
	  colnames(top1)= c("Fold","FDR")

	  # remove some genes
	  top1 = top1[which(top1$FDR <input_GenePvalCutoff) , ]
	  
	 
	 fold = top1[,1]; names(fold) <- rownames(top1)
	 if(input_absoluteFold) fold <- abs(fold) # use absolute value of fold change, disregard direction

       Species <- converted.out$species
      ix <- match( Species,ensemblSpecies )	
	  if(is.na(ix) ) return(as.data.frame("Species not coverted by ReactomePA package!"))
	  
	  fold <- convertEnsembl2Entrez (fold, Species)  
		 fold <- sort(fold,decreasing =T)
	  paths <- gsePathway(fold, nPerm=5000, organism = ReactomePASpecies[ix],
                minGSSize= input_minSetSize, 
				maxGSSize= input_maxSetSize,
				pvalueCutoff=0.5,
                pAdjustMethod="BH", verbose=FALSE)
      paths <- as.data.frame(paths)
	  
	  if(is.null(paths) ) return( noSig)
	  if(dim(paths)[1] ==0 ) return( noSig)

	 # paths <-  rbind(paths$greater,paths$less)
	  if(dim(paths)[1] < 1  ) return( noSig )
	       paths <- as.data.frame(paths)
       paths <- paths[order(-abs( paths[,5])) ,]  # sort by NES
	  # paths <- paths[order( paths[,3]) ,]  # sort by FDR
	  top1 <- paths[,c(2,5,3,7)]
	  # rownames(top1) <- paths[,1] #paste(1:dim(paths)[1],": ",paths[,1],sep="" )
	  colnames(top1)= c("Pathway", "NES","Genes","adj.Pval")
	  
	  if ( length( which( top1[,4] <=  input_pathwayPvalCutoff   ) ) == 0 )
	    return( noSig)
	  top1 <- top1[which(top1[,4] <=  input_pathwayPvalCutoff ) ,,drop=FALSE]
	  if(dim(top1)[1] > input_nPathwayShow ) 
	     top1 <- top1[1:input_nPathwayShow,,drop=FALSE]
	 #top1 <- cbind(row.names(top1), top1); colnames(top1)[1] <-input_selectContrast1 	
		top1 <- as.data.frame(top1)
		top1 <- cbind(rep( input_selectContrast1, dim(top1)[1]), top1); 
		top1[,4] = as.character( round(as.numeric(top1[,4]),4)); 
		top1$adj.Pval <- sprintf("%-2.1e",as.numeric(top1$adj.Pval) )
		top1[,1] <- as.character(top1[,1])
		colnames(top1)[1] <- "Direction"
		if(input_pathwayMethod == 1 ) p.m <- "GAGE"
		else if(input_pathwayMethod == 2 ) p.m <- "PGSEA"
		else if(input_pathwayMethod == 3 ) p.m <- "GSEA"
		else if(input_pathwayMethod == 4 ) p.m <- "PGSEA_All"
		else if(input_pathwayMethod == 5 ) p.m <- "ReactomePA"
		colnames(top1)[2] <- paste(p.m," analysis:", gsub("-"," vs ",input_selectContrast1 ) )
		top1[ which( as.numeric( top1[,3]) >0),1 ] <- "Up" #gsub("-"," > ",input_selectContrast1 )
		top1[ which( as.numeric( top1[,3]) <0),1 ] <- "Down" #gsub("-"," < ",input_selectContrast1 )
		top1 <- top1[order( top1[,1], -abs(as.numeric( top1[,3]) ) ) ,]
		top1[ duplicated (top1[,1] ),1 ] <- ""	 
	  top1[,3] = as.character( round(as.numeric(top1[,3]),4));
	 return( top1)
}

selectedPathwayData <- function( ){
 
    if(input_sigPathways == "All") return (NULL) 
	ix <- which(names(GeneSets.out ) == input_sigPathways   ) # find the gene set
	if(length(ix) == 0 ) return(NULL)
    genes <- GeneSets.out[[ix]] # retrieve genes

	
	# find related samples	
	iz = findContrastSamples(input_selectContrast1, colnames(convertedData.out),readSampleInfo.out,
										input_selectFactorsModel,input_selectModelComprions, 
										factorReferenceLevels.out,input_CountsDEGMethod ,
										input_dataFileFormat )
	x <-  convertedData.out[which(rownames(convertedData.out) %in% genes), iz ]
	if( input_selectOrg != "NEW") {
		ix = match( rownames(x), allGeneInfo.out[,1])
		if( sum( is.na(allGeneInfo.out$symbol )) != dim(allGeneInfo.out )[1] )  # symbol really exists? 
		   rownames(x) <- paste(rownames(x),":", as.character( allGeneInfo.out$symbol)[ix])
   }
	
	return( x )
}

selectedPathwayHeatmap <- function() {

	x = selectedPathwayData.out
	if(dim(x)[1]<=2 | dim(x)[2]<=2 ) return(NULL)
	groups = detectGroups(colnames(x))

	# this will cutoff very large values, which could skew the color 
	x=as.matrix(x)-apply(as.matrix(x),1,mean)
	cutoff = median(unlist(x)) + 3*sd (unlist(x)) 
	x[x>cutoff] <- cutoff
	cutoff = median(unlist(x)) - 3*sd (unlist(x)) 
	x[x< cutoff] <- cutoff
	
	# sometimes, a gene can be all zero in selected samples.
	x <- x[which(apply(x,1,sd)>0) ,]
	
	lmat = rbind(c(5,4),c(0,1),c(3,2))
	lwid = c(1.5,6)
	lhei = c(.5,.2,8)
	
	
	if( dim(x)[1]>200) 
	heatmap.2(x, distfun = dist2,hclustfun=hclust2,
	 col=heatColors[as.integer(input_heatColors1),], density.info="none", trace="none", scale="none", keysize=.5
	,key=T, symkey=F
	,dendrogram = "row"
	,ColSideColors=mycolors[ groups]
	,labRow=""
	,margins=c(10,15)
	,srtCol=45
	,lmat = lmat, lwid = lwid, lhei = lhei
	#,main ="Title"
	)

	if( dim(x)[1]<=200) 
	heatmap.2(x, distfun = dist2,hclustfun=hclust2,
	 col=heatColors[as.integer(input_heatColors1),], density.info="none", trace="none", scale="none", keysize=.5
	,key=T, symkey=F, 
	dendrogram = "row",
	,labRow=gsub(".*:","",rownames(x))
	,ColSideColors=mycolors[ groups]
	,margins=c(10,15)
	,cexRow=1.2
	,srtCol=45
	,lmat = lmat, lwid = lwid, lhei = lhei
	#,main ="Title"
	)
}

convertEnsembl2KEGG <- function (query,Species) {  # not working
	querySet <- cleanGeneSet( unlist( strsplit( toupper(names( query)),'\t| |\n|\\,' )  ) )
	speciesID <- orgInfo$id[ which(orgInfo$ensembl_dataset == Species)]  # note uses species Identifying
	# idType 6 for entrez gene ID
	result <- dbGetQuery( convert,
						paste( " select  id,ens,species from mapping where ens IN ('", paste(querySet,collapse="', '"),
								"') AND  idType ='",idType_KEGG,"'",sep="") )	# slow
							
	if( dim(result)[1] == 0  ) return(NULL)
	result <- subset(result, species==speciesID, select = -species)

	ix = match(result$ens,names(query)  )

	tem <- query[ix];  names(tem) = result$id
	return(tem)  
}

keggPathwayID <- function (pathwayDescription, Species, GO,selectOrg) {
	ix = grep(Species,gmtFiles)

	if (length(ix) == 0 ) {return(NULL)}
	
	# If selected species is not the default "bestMatch", use that species directly
	if(selectOrg != speciesChoice[[1]]) {  
		ix = grep(findSpeciesById(selectOrg)[1,1], gmtFiles )
		if (length(ix) == 0 ) {return(NULL )}
		totalGenes <- orgInfo[which(orgInfo$id == as.numeric(selectOrg)),7]
	}
	pathway <- dbConnect(sqlite,gmtFiles[ix],flags=SQLITE_RO)
	
	# change Parkinson's disease to Parkinson\'s disease    otherwise SQL 
	pathwayDescription <- gsub("\'","\'\'",pathwayDescription)
							
	pathwayInfo <- dbGetQuery( pathway, paste( " select * from pathwayInfo where description =  '", 
							pathwayDescription,   "' AND name LIKE '",GO,"%'",sep="") )		
	pathwayInfo <- dbGetQuery( pathway, paste( " select * from pathwayInfo where description =  '", 
							pathwayDescription,   "'",sep="") )	
							
	dbDisconnect(pathway);
	if(dim(pathwayInfo)[1] != 1 ) {return(NULL) }
	tem = gsub(".*:","",pathwayInfo[1,2])  
	return( gsub("_.*","",tem) )
}

KeggImage <- function(){

   # First generate a blank image. Otherwise return(NULL) gives us errors.
    outfile <- tempfile(fileext='.png')
    png(outfile, width=400, height=300)

    frame()
	dev.off()
    blank <- list(src = outfile,
         contentType = 'image/png',
         width = 400,
         height = 300,
         alt = " ")	

	if(is.null( input_selectGO ) ) return(outfile)
	if(input_selectGO != "KEGG") return(outfile)
	if(is.null(gagePathwayData.out ) ) return(outfile)
	if(is.null( input_sigPathways))  return (outfile) 
	# if( is.null(selectedPathwayData()) ) return(blank)

	library(pathview)


	if (is.null(input_selectContrast1 ) ) return(outfile)
	
	if(input_sigPathways == "All") return (outfile) 
	
	if( length(limma.out$topGenes) == 0 ) return(outfile)

	# get fold change
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast1, names(top))
	  if( is.na(ix)) return (outfile)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (outfile)
	 # cat("here5")
	  colnames(top1)= c("Fold","FDR")
      Species <- converted.out$species
	  
	 fold = top1[,1]; names(fold) <- rownames(top1)
	 fold <- convertEnsembl2Entrez(fold,Species)
	 
     keggSpecies <- as.character( keggSpeciesID[which(keggSpeciesID[,1] == Species),3] )
	 
     if(nchar( keggSpecies) <=2 ) return(outfile) # not in KEGG

	 # kegg pathway id

	pathID = keggPathwayID(input_sigPathways, Species, "KEGG",input_selectOrg)
	
	#cat("\nhere5  ",keggSpecies, " ",Species," ",input_sigPathways, "pathID:",pathID,"End", fold[1:5],names(fold)[1:5],"\n")
	#cat("\npathway:",is.na(input_sigPathways))
	#cat("\n",fold[1:5],"\n",keggSpecies,"\n",pathID)
    if(is.null(pathID) ) return(blank) # kegg pathway id not found.
	if(nchar(pathID)<3 ) return(blank)
	randomString <- gsub(".*file","",tempfile()) 

	outfile <- paste(pathID,".",randomString,".png",sep="")
	
		try( 
		 pv.out <- pathview(gene.data = fold, pathway.id = pathID, out.suffix = randomString, species = keggSpecies, kegg.native=TRUE)
		)
		
	
	return( outfile )
}

# list of pathways with details
pathwayListData  <- function(){
    
	pathways = NULL
	if( input_pathwayMethod == 1)  
		if(!is.null(gagePathwayData.out)) 
			if(dim(gagePathwayData.out)[2] >1) { 
				pathways <- gagePathwayData.out
				colnames(pathways)[2] ="Pathways"; 	
				colnames(pathways)[4] ="nGenes"; 
			}
	if( input_pathwayMethod == 3) 
		if(!is.null(fgseaPathwayData.out)) 
			if(dim(fgseaPathwayData.out)[2] >1) {
				pathways <- fgseaPathwayData.out
				colnames(pathways)[2] ="Pathways"; 	
				colnames(pathways)[4] ="nGenes"; 
			}
	
	if( input_pathwayMethod == 2) 
		if(!is.null(PGSEAplot.data.out)) 
			if(dim(PGSEAplot.data.out)[2] >1) {
				pathways <- as.data.frame( PGSEAplot.data.out)
				pathways$Pathways = substr(rownames(pathways),10, nchar( rownames(pathways)) )
				pathways$adj.Pval = gsub(" .*","", rownames(pathways))
				pathways$Direction ="Diff"
				
				}
	if( input_pathwayMethod == 4) 
		if(!is.null(PGSEAplotAllSamples.data.out)) 
			if(dim(PGSEAplotAllSamples.data.out)[2] >1) {
				pathways <- as.data.frame( PGSEAplotAllSamples.data.out)
				pathways$Pathways = substr(rownames(pathways),10, nchar( rownames(pathways)) )
				pathways$adj.Pval = gsub(" .*","", rownames(pathways))
				pathways$Direction ="Diff"
				
				}			
	if( is.null( pathways) ) return(NULL)
	
	# if no gene set data, return pathway list
	if(is.null(GeneSets.out ) ) return(pathways) 
	

	pathways$adj.Pval = as.numeric(pathways$adj.Pval)
	
	if(nrow(pathways)>1)
	for( i in 2:nrow(pathways) )
		if(nchar(pathways$Direction[i]) <=1)
			pathways$Direction[i] = pathways$Direction[i-1]
			
	# gene symbol matching symbols 
	probeToGene = NULL
	if( input_selectGO != "ID not recognized!" & input_selectOrg != "NEW")
	if(sum(is.na( allGeneInfo.out$symbol ) )/ dim( allGeneInfo.out )[1] <.5 ) { # if more than 50% genes has symbol
		probeToGene = allGeneInfo.out[,c("ensembl_gene_id","symbol")]
		probeToGene$symbol = gsub(" ","",probeToGene$symbol)

		ix = which( is.na(probeToGene$symbol) |
					nchar(probeToGene$symbol)<2 | 
					toupper(probeToGene$symbol)=="NA" |  
					toupper(probeToGene$symbol)=="0"  ) 			
		probeToGene[ix,2] = probeToGene[ix,1]  # use gene ID

	}		

	
	pathways$Genes =""
	# looking up genes for each pathway
	for(i in 1:nrow(pathways) ){ 
		ix <- which(names(GeneSets.out ) == pathways$Pathways[i]   ) # find the gene set
		if(length(ix) != 0 ) { 
			genes <- GeneSets.out[[ix]] # retrieve genes
			
			if(!is.null(probeToGene) ) { 
				iy = match(genes,probeToGene[,1])
				genes = probeToGene[iy,2]
			}
			
			pathways$Genes[i] = paste(genes, collapse=" ")
		}
	}
	return(pathways)


}

################################################################
# Genome-wide view
################################################################
 
genomePlotly <- function( ){
		# default plot
		fake = data.frame(a=1:3,b=1:3)
		p <- ggplot(fake, aes(x = a, y = b)) +
							 geom_blank() + ggtitle("No genes with position info.") +
							 theme(axis.title.x=element_blank(),axis.title.y=element_blank())
							 
		if(length( limma.out$comparisons)  ==1 )  {
			top1=limma.out$topGenes[[1]]  
		} else {
		  top = limma.out$topGenes
		  ix = match(input_selectContrast2, names(top))
		  if( is.na(ix)) return (ggplotly(p))
		  top1 <- top[[ix]]; 
		}
		  if(dim(top1)[1] == 0 ) return (ggplotly(p))
		  colnames(top1)= c("Fold","FDR")
		  
		 # write.csv(merge(top1,allGeneInfo(), by.x="row.names",by.y="ensembl_gene_id"  ),"tem.csv"  )
		 x <- merge(top1,allGeneInfo.out, by.x="row.names",by.y="ensembl_gene_id"  )

		 # if no chromosomes found. For example if user do not convert gene IDs.
		 if( dim(x)[1] >5  ) { 
		
			 x <- x[order(x$chromosome_name,x$start_position),]
	 
			 tem = sort( table( x$chromosome_name), decreasing=T)

			 chromosomes <- names( tem[tem >= 5 ] )  # chromosomes with less than 100 genes are excluded
			 if(length(chromosomes) > 50) chromosomes <- chromosomes[1:50]  # at most 50 chromosomes
			 chromosomes <- chromosomes[ nchar(chromosomes)<=12] # chr. name less than 10 characters
			 chromosomes = chromosomes[order(as.numeric(chromosomes) ) ]
			 # chromosomes = chromosomes[!is.na(as.numeric(chromosomes) ) ]
			 chromosomesNumbers = as.numeric(chromosomes)

			 # convert chr.x to numbers		 
			  j = max( chromosomesNumbers,na.rm=T) 
			  for( i in 1:length( chromosomes)) {
			   if ( is.na(chromosomesNumbers[i]) ) 
			   { chromosomesNumbers[i] <- j+1; j <- j+1; }
			 }
			  
			 x <- x[which(x$chromosome_name %in% chromosomes   ),]
			 x <- droplevels(x)
			 
			# find the number coding for chromosome 
			 getChrNumber <- function (chrName){
			 return( chromosomesNumbers[ which( chromosomes == chrName)] )
			 }
			  x$chrNum = 1 # numeric coding
			  x$chrNum <- unlist( lapply( x$chromosome_name, getChrNumber) )
			 
			 x$Row.names <- as.character( x$Row.names)
		 
			 # if symbol is missing use Ensembl id
			 x$symbol = as.character(x$symbol)	 
			 ix = which(is.na(x$symbol))
			 ix2 = which(nchar(as.character(x$symbol))<= 2 )
			 ix3 = which( duplicated(x$symbol))
			 ix = unique( c(ix,ix2,ix3))
			 x$symbol[ix] <- x$Row.names[ix] 
						 
		 
			 ##################################
			 # plotting
			# x = read.csv("tem_genome.csv")
			x = x[!is.na(x$chromosome_name),]
			x = x[!is.na(x$start_position),]	
			# only keep significant genes
			#x = x[which(x$FDR<input_limmaPval),]
			# x = x[which(abs(x$Fold) > log2( input_limmaFC)),]
			
			ix = which( (x$FDR< as.numeric(input_limmaPvalViz)) & (abs(x$Fold) > as.numeric(input_limmaFCViz) ) )
			
			if (length(ix) > 5) { 

				x = x[ix,]		
				
				x$start_position = x$start_position/1000000 # Mbp
				chrD = 20 # distance between chrs.
				foldCutoff = 3   # max log2 fold 
				
				x$Fold = x$Fold / sd(x$Fold)  # standardize fold change
				
				x$Fold[which(x$Fold > foldCutoff )] = foldCutoff   # log2fold within -5 to 5
				x$Fold[which(x$Fold <   -1*foldCutoff )] = -1*foldCutoff 
				x$Fold = 4* x$Fold
				

				
				x$y = x$chrNum*chrD + x$Fold
				chrLengthTable = aggregate(start_position~chrNum, data=x,max )
				chrTotal = dim(chrLengthTable)[1]	
				x$R = as.factor(sign(x$Fold))
				
				colnames(x)[ which(colnames(x) == "start_position")] = "x"

				p= ggplot(x, aes(x = x, y = y, colour = R, text = symbol ) ) + geom_point(shape = 20, size = .2)
				
					#label y with chr names
				p <- p +  scale_y_continuous(labels = paste("chr",chromosomes[chrLengthTable$chrNum],sep=""), breaks = chrD* (1:chrTotal), limits = c(0, 
					chrD*chrTotal + 5) )
				# draw horizontal lines for each chr.
				for( i in 1:dim(chrLengthTable)[1] )
					p = p+ annotate( "segment",x = 0, xend = chrLengthTable$start_position[i],
						y = chrLengthTable$chrNum[i]*chrD, yend = chrLengthTable$chrNum[i]*chrD)
				# change legend		http://ggplot2.tidyverse.org/reference/scale_manual.html
				p=p+scale_colour_manual(name="",   # customize legend text
					values=c("blue", "red"),
					breaks=c("1","-1"),
					labels=c("Up", "Dn"))	
				p = p + xlab("Position on chrs. (Mbp)") +	 theme(axis.title.y=element_blank())					 
				p= p + theme(legend.position="none")
				# p <- ggplot(mtcars, aes(x=hp, y=mpg)) + geom_point(shape=20, size=17)
			   # p=p+ geom_smooth(method = "lm",se=FALSE)
				#p+ geom_line(aes(y=rollmean(y,7, na.pad=TRUE)))
			  
			   # Customize hover text https://cran.r-project.org/web/packages/plotly/plotly.pdf
			   # style(ggplotly(p),hoverinfo="text")  # not working
			  }  # if no genes else
			
		}
		ggplotly(p)
}

# results from PREDA
genomePlotData <- function( ){
 	if (is.null(input_selectContrast2 ) ) return(NULL)
	
	if( length(limma.out$topGenes) == 0 ) return(NULL)
	
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast2, names(top))
	  if( is.na(ix)) return (NULL)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (NULL)
	  colnames(top1)= c("Fold","FDR")
	  
	 # write.csv(merge(top1,allGeneInfo(), by.x="row.names",by.y="ensembl_gene_id"  ),"tem.csv"  )
	 x <- merge(top1,allGeneInfo.out, by.x="row.names",by.y="ensembl_gene_id"  )
	 
	 x <- x[order(x$chromosome_name,x$start_position),]
	 tem = sort( table( x$chromosome_name), decreasing=T) 
	 chromosomes <- names( tem[tem > 100 ] )  # chromosomes with less than 100 genes are excluded
	 if(length(chromosomes) > 50) chromosomes <- chromosomes[1:50]  # at most 50 chromosomes

	 chromosomes = chromosomes[order(as.numeric(chromosomes) ) ]
	 # chromosomes = chromosomes[!is.na(as.numeric(chromosomes) ) ]
	 chromosomesNumbers = as.numeric(chromosomes)
	 # convert chr.x to numbers
	  j = max( chromosomesNumbers,na.rm=T) 
	  for( i in 1:length( chromosomes)) {
	   if ( is.na(chromosomesNumbers[i]) ) 
	   { chromosomesNumbers[i] <- j+1; j <- j+1; }
	 }
	  
	 x <- x[which(x$chromosome_name %in% chromosomes   ),]
	 x <- droplevels(x)
	 
	# find the number coding for chromosome 
	 getChrNumber <- function (chrName){
	 return( chromosomesNumbers[ which( chromosomes == chrName)] )
	 }
	  x$chrNum = 1 # numeric coding
	  x$chrNum <- unlist( lapply( x$chromosome_name, getChrNumber) )
	 
	 x$Row.names <- as.character( x$Row.names)
	 fold = x$Fold; names(fold) = x$Row.names
	 
	# write.csv(x,"tem.csv")
	 x <- x[!duplicated(x$Row.names),]
	 #rownames(x) = x$Row.names
	 

	GEanalysisResults <- genomePlotDataPre.out; 
	
	genomic_regions_UP<-PREDAResults2GenomicRegions(GEanalysisResults, qval.threshold=input_RegionsPvalCutoff, smoothStatistic.tail="upper", smoothStatistic.threshold= input_StatisticCutoff)
	genomic_regions_DOWN<-PREDAResults2GenomicRegions(GEanalysisResults, qval.threshold=input_RegionsPvalCutoff, smoothStatistic.tail="lower", smoothStatistic.threshold= -1 *input_StatisticCutoff)

	if( is.null(genomic_regions_UP$Test) &&  is.null(genomic_regions_UP$Test)  ) return(-1) # no significant regions
	
	regions <- 0 
	if( !is.null(genomic_regions_UP$Test)) { 
	dataframe_UPregions<-GenomicRegions2dataframe(genomic_regions_UP[[1]])
	dataframe_UPregions$Regulation <- "Up"
	regions = dataframe_UPregions
	}

	if( !is.null(genomic_regions_DOWN$Test) ){ 
	dataframe_DOWNregions<-GenomicRegions2dataframe(genomic_regions_DOWN[[1]])
	dataframe_DOWNregions$Regulation <- "Down"
	if ( class(regions) != "data.frame" ) regions <- dataframe_DOWNregions else # if UP regions is NULL
	 regions = rbind(regions,dataframe_DOWNregions  )
	}

	if( class(regions) != "data.frame") return(-1)

	Regions <- regions[,c(4,1:3)] 
	Regions <- Regions[which(Regions$end != Regions$start),]
	Regions$size = round((Regions$end - Regions$start )/1000000,3)
	Regions$chr <- chromosomes[ Regions$chr ] # convert from chr. number to names

	# find the gene indices in the up or down regulated regions
	regulatedGenes <- function (i) {
	ix =  which( Regions$chr == x$chromosome_name[i] &
		 Regions$start < x$start_position[i] #&(Regions$end > x$start_position[i] + x$genomeSpan[i])
		 &(Regions$end > x$start_position[i] )) # if the start position is within the region
	if( length(ix) == 0 | length(ix) >1  ) return(NA) else return( ix )
	}

	regionID = unlist( lapply(1:dim(x)[1], regulatedGenes) )
	x1 = x[which(!is.na(regionID)),]
	regionID = regionID[!is.na(regionID)]
	x1 = cbind(regionID,Regions[regionID, ], x1[,c('symbol', 'Row.names', 'Fold','FDR', 'band','start_position')])
	x1 = x1[order(x1$regionID,x1$start_position ), ]	
    colnames(x1)[8] = "Ensembl"; colnames(x1)[4] = "Region.Start";colnames(x1)[5] = "Region.End"; colnames(x1)[12] = "Gene.Start"

	# number of genes
	tem = table(x1$regionID)
	Regions$Ngenes = 0
	Regions$Ngenes[as.integer(names(tem) ) ] <- tem

	# cytoband per region
	tem = unique(x1[,c('regionID','band')])
	tem$band = gsub("\\..*","",tem$band)
	tem = unique(tem)
	Regions$band =""
	Regions$ID <- 1:dim(Regions)[1]
	for ( i in 1:dim(Regions)[1] )
	  Regions$band[i] <- paste( tem[ which( tem[,1] == Regions$ID[i]),2],  collapse=";" )

	# Genes
	Regions$Genes <- ""
	for ( i in 1:dim(Regions)[1] )
	  Regions$Genes[i] <- paste( x1$symbol[ which( x1[,1] == Regions$ID[i])],  collapse=" " )
	return( list(mainResult = GEanalysisResults, Regions = Regions, Genes = x1,legend.x=max(x$start_position)*.6, legend.y=max(chromosomesNumbers)-3  ) )
}

genomePlot <- function(){
	library(PREDA,verbose=FALSE)  # showing expression on genome
	library(PREDAsampledata,verbose=FALSE) 
	library(hgu133plus2.db,verbose=FALSE)
	
  if( class(genomePlotData.out) != "list" ) { plot.new(); text(0.2,1, "No significant regions found!")} else {
   GEanalysisResults <- genomePlotData.out$mainResult;
     if( is.null( GEanalysisResults ) ) return(NULL)	
	genomic_regions_UP<-PREDAResults2GenomicRegions(GEanalysisResults, qval.threshold=input_RegionsPvalCutoff, smoothStatistic.tail="upper", smoothStatistic.threshold= -1 *input_StatisticCutoff)
	genomic_regions_DOWN<-PREDAResults2GenomicRegions(GEanalysisResults, qval.threshold=input_RegionsPvalCutoff, smoothStatistic.tail="lower", smoothStatistic.threshold= -1 *input_StatisticCutoff)
	 
	checkplot<-genomePlot(GEanalysisResults, genomicRegions=c(genomic_regions_UP, genomic_regions_DOWN), grouping=c(1, 1), scale.positions="Mb", region.colors=c("red","blue"))
	legend(x=genomePlotData.out$legend.x, y=genomePlotData.out$legend.y , legend=c("UP", "DOWN"), fill=c("red","blue"))
	} #if  else
}

# pre-calculating PREDA, so that changing FDR cutoffs does not trigger entire calculation
genomePlotDataPre <- function( ){
	library(PREDA,verbose=FALSE)  # showing expression on genome
	library(PREDAsampledata,verbose=FALSE) 
	library(hgu133plus2.db,verbose=FALSE)
	if (is.null(input_selectContrast2 ) ) return(NULL)
	
	if( length(limma.out$topGenes) == 0 ) return(NULL)
	
	if(length( limma.out$comparisons)  ==1 )  
    { top1=limma.out$topGenes[[1]]  
	} else {
	  top = limma.out$topGenes
	  ix = match(input_selectContrast2, names(top))
	  if( is.na(ix)) return (NULL)
	  top1 <- top[[ix]]; 
	  }
	  if(dim(top1)[1] == 0 ) return (NULL)
	  colnames(top1)= c("Fold","FDR")
	  
	 # write.csv(merge(top1,allGeneInfo(), by.x="row.names",by.y="ensembl_gene_id"  ),"tem.csv"  )
	 x <- merge(top1,allGeneInfo.out, by.x="row.names",by.y="ensembl_gene_id"  )
	 
	 #hist(top1[,1])
	 
	 ########## PREDA
	infofile <- system.file("sampledata", "GeneExpression", "sampleinfoGE_PREDA.txt", package = "PREDAsampledata")
	sampleinfo<-read.table(infofile, sep="\t", header=TRUE)
	head(sampleinfo)

	data(ExpressionSetRCC)
	GEstatisticsForPREDA<-statisticsForPREDAfromEset(ExpressionSetRCC, statisticType="tstatistic", referenceGroupLabel="normal", classVector=sampleinfo[,"Class"])
	analysesNames(GEstatisticsForPREDA)
	
	GEGenomicAnnotations<-eset2GenomicAnnotations(ExpressionSetRCC, retain.chrs=1:22) # needs hgu133plus2.db package
	GEGenomicAnnotationsForPREDA<-GenomicAnnotations2GenomicAnnotationsForPREDA(GEGenomicAnnotations, reference_position_type="median")
	GEDataForPREDA<-MergeStatisticAnnotations2DataForPREDA(GEstatisticsForPREDA, GEGenomicAnnotationsForPREDA, sortAndCleanNA=TRUE)

	#x = read.csv("PREDA_test.csv")

	#x <- x[,-1] # remove the row index, not needed in Shiny

	 x <- x[order(x$chromosome_name,x$start_position),]
	 tem = sort( table( x$chromosome_name), decreasing=T) 
	 chromosomes <- names( tem[tem > 100 ] )  # chromosomes with less than 100 genes are excluded
	 if(length(chromosomes) > 50) chromosomes <- chromosomes[1:50]  # at most 50 chromosomes

	 chromosomes = chromosomes[order(as.numeric(chromosomes) ) ]
	 # chromosomes = chromosomes[!is.na(as.numeric(chromosomes) ) ]
	 chromosomesNumbers = as.numeric(chromosomes)
	 # convert chr.x to numbers
	  j = max( chromosomesNumbers,na.rm=T) 
	  for( i in 1:length( chromosomes)) {
	   if ( is.na(chromosomesNumbers[i]) ) 
	   { chromosomesNumbers[i] <- j+1; j <- j+1; }
	 }
	  
	 x <- x[which(x$chromosome_name %in% chromosomes   ),]
	 x <- droplevels(x)
	 
	# find the number coding for chromosome 
	 getChrNumber <- function (chrName){
	 return( chromosomesNumbers[ which( chromosomes == chrName)] )
	 }
	  x$chrNum = 1 # numeric coding
	  x$chrNum <- unlist( lapply( x$chromosome_name, getChrNumber) )
	 
	 x$Row.names <- as.character( x$Row.names)
	 fold = x$Fold; names(fold) = x$Row.names
	 
	# write.csv(x,"tem.csv")
	 x <- x[!duplicated(x$Row.names),]
	 #rownames(x) = x$Row.names
	 
	 myData <- GEDataForPREDA
	  
	 myData@position <- as.integer( x$start_position+ x$genomeSpan/2 )
	 myData@ids <- as.character( x$Row.names)
	 myData@chr <- as.integer( x$chrNum )
	 myData@start <- x$start_position
	 myData@end <- x$start_position + x$genomeSpan
	 myData@strand <- rep(1,dim(x)[1])
	 myData@chromosomesLabels <- chromosomes
	 myData@chromosomesNumbers <- as.integer( chromosomesNumbers )
	 myData@statistic <- as.matrix( x[,2,drop=FALSE] )
	 myData@analysesNames <- "Test"
	 myData@testedTail <- "both"
	 myData@optionalAnnotations <- as.matrix( x[,1:2,drop=FALSE] )
	 myData@optionalAnnotationsHeaders <- c("a","b")
	 
	set.seed(2)
	GEanalysisResults<-PREDA_main(myData,nperms = PREDA_Permutations)

	return( GEanalysisResults )
}

################################################################
# Biclustering
################################################################
 
biclustering <- function( ){
			library(biclust,verbose=FALSE)

			if(input_biclustMethod == "BCQU()" )
				library(QUBIC,verbose=FALSE) # have trouble installing on Linux
			if(input_biclustMethod == "BCUnibic()" )
				library(runibic,verbose=FALSE) # 
			
		
			x <- convertedData.out
			n=input_nGenesBiclust	
			if(n>dim(x)[1]) n = dim(x)[1] # max	as data
			if(n<10) n = 10 
			if(n> 2000 ) n = 2000 			
			x=as.matrix(x[1:n,])-apply(x[1:n,],1,mean)
			
			if( input_biclustMethod == "BCXmotifs()"  )
				x<-discretize(x)

			#res <- biclust::biclust(as.matrix( x), method = BCQU()) 
			runR = paste( "res <- biclust::biclust(as.matrix( x), method =", input_biclustMethod ,")" )
			eval(parse(text = runR ) )
			

			return(list( x=x, res=res)	)	 
}


biclustHeatmap <- function ( ){
			res = biclustering()$res
			if( res@Number == 0 ) { plot.new(); text(0.5,0.5, "No cluster found!")} else {
		
				x = biclust::bicluster(biclustering.out$x, res, as.numeric( input_selectBicluster)  )[[1]]

					par(mar = c(5, 4, 1.4, 0.2))

				if( dim(x)[1] <=30 )
					heatmap.2(x,  Rowv =T,Colv=F, dendrogram ="none",
					col=heatColors[as.integer(input_heatColors1),], density.info="none", trace="none", scale="none", keysize=.2
					,key=F, #labRow = T,
					,margins = c(8, 24)
					,cexRow=1
					#,srtCol=45
					,cexCol=1.  # size of font for sample names
					) else 		
					heatmap.2(x,  Rowv =T,Colv=F, dendrogram ="none",
					col=heatColors[as.integer(input_heatColors1),], density.info="none", trace="none", scale="none", keysize=.2
					,key=F, labRow = F,
					,margins = c(8, 4)
					,cexRow=1
					#,srtCol=45
					,cexCol=1.  # size of font for sample names

					)				
			}
}


geneListBclustGO <- function( ){		
		
			res = biclustering.out$res
			if( res@Number == 0 ) return(as.data.frame("No clusters found!") ) 
			x = biclust::bicluster(biclustering.out$x, res, as.numeric( input_selectBicluster)  )[[1]]
			
			if( dim(x)[1]  <= minGenesEnrichment) return(NoSig) 
			query = rownames(x)
			

			result <- findOverlapGMT( query, GeneSets.out,1) 
			
				
			result$Genes = "Up regulated"
			

			results1 = result
			if ( is.null( results1) ) return (NoSig)
			if( dim(results1)[2] <5 ) return(NoSig)  # Returns a data frame: "No significant results found!"

			results1= results1[,c(5,1,2,4)]
			colnames(results1)= c("List","FDR","Genes","GO terms or pathways")
			minFDR = 0.01
			if(min(results1$FDR) > minFDR ) results1 = as.data.frame("No signficant enrichment found.") else
			results1 = results1[which(results1$FDR < minFDR),]

			
			if(dim(results1)[2] != 4) return(NoSig)
			colnames(results1)= c("Direction","adj.Pval","Genes","Pathways")
			
			results1$adj.Pval <- sprintf("%-2.1e",as.numeric(results1$adj.Pval) )
			results1[,1] <- as.character(results1[,1])
			tem <- results1[,1]

			results1[ duplicated (results1[,1] ),1 ] <- ""
			
			results1[,-1]			

}

################################################################
# Co-expression network
################################################################

wgcna <- function ( ){
			#http://pklab.med.harvard.edu/scw2014/WGCNA.html
			library(WGCNA)

			x <- convertedData.out
			n=input_nGenesNetwork	
			if(n>dim(x)[1]) n = dim(x)[1] # max	as data
			if(n<50) return(NULL)
			if(dim(x)[2] <4) return(NULL)
			if(n> maxGeneWGCNA ) n = maxGeneWGCNA 			
			#x=as.matrix(x[1:n,])-apply(x[1:n,],1,mean)
			
			datExpr=t(x[1:n,])

			subGeneNames=colnames(datExpr)
			
			#Choosing a soft-threshold to fit a scale-free topology to the network
			powers = c(c(1:10), seq(from = 12, to=20, by=2));
			sft=pickSoftThreshold(datExpr,dataIsExpr = TRUE,powerVector = powers,corFnc = cor,corOptions = list(use = 'p'),networkType = "unsigned")


			softPower = input_mySoftPower;

			#calclute the adjacency matrix
			adj= adjacency(datExpr,type = "unsigned", power = softPower);

			#turn adjacency matrix into topological overlap to minimize the effects of noise and spurious associations
			TOM=TOMsimilarityFromExpr(datExpr,networkType = "unsigned", TOMType = "unsigned", power = softPower);
			colnames(TOM) =subGeneNames
			rownames(TOM) =subGeneNames
			
			##########################################
			# module detection

			library(flashClust,verbose=FALSE)

			geneTree = flashClust(as.dist(1-TOM),method="average");

			# Set the minimum module size
			minModuleSize = input_minModuleSize;

			# Module identification using dynamic tree cut
			dynamicMods = cutreeDynamic(dendro = geneTree,  method="tree", minClusterSize = minModuleSize);
			#dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, method="hybrid", deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize);
			# table(dynamicMods)
			
			dynamicColors = labels2colors(dynamicMods)
			#table(dynamicColors)

			#discard the unassigned genes, and focus on the rest  # this causes problems
			#restGenes= (dynamicColors != "grey")
			#diss1=1-TOMsimilarityFromExpr(datExpr[,restGenes], power = softPower)
			#colnames(diss1) =rownames(diss1) =subGeneNames[restGenes]
			
			#colorCode = as.character(dynamicColors[restGenes])

			moduleInfo = cbind( subGeneNames, dynamicColors, dynamicMods)
			moduleInfo = moduleInfo[which(moduleInfo[,2] != "grey") ,] # remove genes not in any modules
			moduleInfo = moduleInfo[order(moduleInfo[,3]),] # sort
			
			n.modules = length(unique(dynamicColors) ) -1 ; nGenes = dim(moduleInfo)[1]	
			
			return(list(x = t(datExpr),powers=powers,sft=sft, TOM = TOM, dynamicColors = dynamicColors, moduleInfo = moduleInfo,n.modules=n.modules, nGenes =nGenes) )
}


softPower <- function(){

		####################################  
		
		sft = wgcna.out$sft;
		powers=wgcna.out$powers;
		# Plot the results
		#sizeGrWindow(9, 5)
		par(mfrow = c(1,2));
		cex1 = 0.9;

		# Scale-free topology fit index as a function of the soft-thresholding power
		plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit, signed R^2",type="n", main = paste("Scale independence"));
		text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");

		# Red line corresponds to using an R^2 cut-off
		abline(h=0.80,col="red")

		# Mean connectivity as a function of the soft-thresholding power
		plot(sft$fitIndices[,1], sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",main = paste("Mean connectivity"))
		text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
}


modulePlot <- function(){
	
		diss1 = 1-wgcna.out$TOM;
		dynamicColors = wgcna.out$dynamicColors
		
		hier1=flashClust(as.dist(diss1), method="average" )
		#set the diagonal of the dissimilarity to NA 
		diag(diss1) = NA;
		plotDendroAndColors(hier1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors")

		#Visualize the Tom plot. Raise the dissimilarity matrix to the power of 4 to bring out the module structure
		#TOMplot(diss1, hier1, colorCode)			
}


listWGCNA.Modules <- function(){

		if (is.null(wgcna.out ) ){ # if sample info is uploaded and correctly parsed.
		   return(NULL)	   
		} else { 
			if( dim(wgcna.out$moduleInfo)[1] == 0  ) {  # if no module
				return(NULL) 
			}	else  { 
					modules = unique(wgcna.out$moduleInfo[, c("dynamicMods","dynamicColors")] )
					moduleList = apply(modules,1,paste,collapse=". ")
					moduleList  = paste0( moduleList, " (", table(wgcna.out$moduleInfo[,"dynamicMods"] )," genes)"  )
					moduleList = c(moduleList,"Entire network")
					return(moduleList)


				}

		}			
}


moduleNetwork <- function(){

		outfile <- tempfile(fileext='.txt')
	
		#module = gsub(".* ","",input_selectWGCNA.Module)
		module = unlist(strsplit(input_selectWGCNA.Module," " ) )[2]
		
		moduleColors = wgcna.out$dynamicColors 
		inModule = (moduleColors==module);
		
		if( input_selectWGCNA.Module == "Entire network")
		  inModule = rep(TRUE, length(inModule))
		
		datExpr = t(wgcna.out$x )
		probes = colnames(datExpr)
		modProbes = probes[inModule];
		
		modTOM = wgcna.out$TOM[inModule, inModule];
		dimnames(modTOM) = list(modProbes, modProbes)

		nTop = input_topGenesNetwork;
		
		if( nTop > 1000) nTop = 1000; 
		
		IMConn = softConnectivity(datExpr[, modProbes]);
		top = (rank(-IMConn) <= nTop)
		
		# adding symbols 
		probeToGene = NULL
		if( input_selectGO5 != "ID not recognized!" & input_selectOrg != "NEW")
	    if(sum(is.na( allGeneInfo.out$symbol ) )/ dim( allGeneInfo.out )[1] <.5 ) { # if more than 50% genes has symbol
			probeToGene = allGeneInfo.out[,c("ensembl_gene_id","symbol")]
			probeToGene$symbol = gsub(" ","",probeToGene$symbol)

			ix = which( is.na(probeToGene$symbol) |
						nchar(probeToGene$symbol)<2 | 
						toupper(probeToGene$symbol)=="NA" |  
						toupper(probeToGene$symbol)=="0"  ) 			
			probeToGene[ix,2] = probeToGene[ix,1]  # use gene ID

		}
		
		net <- modTOM[top,top] >input_edgeThreshold	

		
		for( i in 1:dim(net)[1])  # remove self connection
			net[i,i] = FALSE
		if(!is.null(probeToGene) & !input_noIDConversion) { # if gene symbol exist
			ix = match( colnames(net), probeToGene[,1])		
			colnames(net) = probeToGene[ix,2]
			ix = match( rownames(net), probeToGene[,1])		
			rownames(net) = probeToGene[ix,2]		
		}
		library(igraph,verbose=FALSE)
		#plot(graph_from_data_frame(d=data.frame(1:10,ncol=2)  ,directed=F) )
		# http://www.kateto.net/wp-content/uploads/2016/01/NetSciX_2016_Workshop.pdf
		plot( graph_from_adjacency_matrix( net, mod ="undirected" ), 
				vertex.label.color="black", vertex.label.dist=3,vertex.size=7)
		
}


networkModuleGO <- function(){		

	
			#module = gsub(".* ","",input_selectWGCNA.Module)
			module = unlist(strsplit(input_selectWGCNA.Module," " ) )[2]	
			moduleColors = wgcna.out$dynamicColors 
			inModule = (moduleColors==module);
			
			if( input_selectWGCNA.Module == "Entire network")
			  inModule = rep(TRUE, length(inModule))

			probes = rownames(wgcna.out$x   )
			query  = probes[inModule];
		
			if( length(query)  <= minGenesEnrichment) return(NoSig) 

			

			result <- findOverlapGMT( query, GeneSets.out,1) 
		
				
			result$Genes = "Up regulated"
			

			results1 = result
			if ( is.null( results1) ) return (NoSig)
			if( dim(results1)[2] <5 ) return(NoSig)  # Returns a data frame: "No significant results found!"

			results1= results1[,c(5,1,2,4)]
			colnames(results1)= c("List","FDR","Genes","GO terms or pathways")
			minFDR = 0.01
			if(min(results1$FDR) > minFDR ) results1 = as.data.frame("No signficant enrichment found.") else
			results1 = results1[which(results1$FDR < minFDR),]
			
 
			
			if(dim(results1)[2] != 4) return(NoSig)
			colnames(results1)= c("Direction","adj.Pval","Genes","Pathways")
			
			results1$adj.Pval <- sprintf("%-2.1e",as.numeric(results1$adj.Pval) )
			results1[,1] <- as.character(results1[,1])
			tem <- results1[,1]

			results1[ duplicated (results1[,1] ),1 ] <- ""
			
			results1[,-1]			
}