#----Preliminaries----

# READ ME
#Format your data in excel similar to the supplied Sample Data file (.xlsx) with samples as rows and measured variables as columns
#Column A: "Sample" in A1; the rest of your A cells should have your sample labels; label background samples "Background" in the end rows
#Column B: your variable of interest to be regressed against, the response variable; B1 should have the name of the response variable (i.e. Iba1); 
  #  the rest of the B cells have measured values of the variable
#Column C and onward: the first row should have the label, the following rows should have measured values of predictor variables (i.e. cytokines or phospho-proteins)

rm(list=ls()) #Remove all variables from current environment



#User Input   (Change this so it fits for your data)
dataFileName="MyData.xlsx"  #the name of your data file, which should be saved in the same folder as this R file 
SwapAxes=FALSE  #If you want to swap LV1 and LV2, make this TRUE, otherwise FALSE; must capitalize


#Set working directory and load needed libraries
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #Set working directory to the folder where the code file resides
if(!"pacman" %in% installed.packages()){install.packages("pacman")}
pacman::p_load(tidyverse,rio,gplots,matrixStats,readxl,heatmap3,ggplot2,plsRglm,RColorBrewer,stats)  #loads and installs all needed packages

#Set heatmap3 color bar parameters
breakBarColors=c(-200,seq(-1.5, 1.5, 0.01),200) #Outside numbers clip outliers. This is for zscoring.
barColors = colorpanel(length(breakBarColors)-1, "blue", "white", "red2")


#Define function for making color bars (taken from http://www.colbyimaging.com/wiki/statistics/color-bars)
color.bar = function(lut, min, max=-min, nticks=11, ticks=seq(min, max, len=nticks), title='') {
  scale = (length(lut)-1)/(max-min)
  
  plot(c(0,10), c(min,max), type='n', bty='n', xaxt='n', xlab='', yaxt='n', ylab='', main=title)
  axis(2, ticks, las=1)
  for (i in 1:(length(lut)-1)) {
    y = (i-1)/scale + min
    rect(0,y,10,y+1/scale, col=lut[i], border=NA)
  }
}



#----Read Data and Subtract Background----
df <- import(dataFileName)

dataBackground <- df %>%
  filter(Sample=="Background") %>%
  select(3:ncol(df)) %>%
  as.matrix()
 
dataPredictorVariablesIn <- df %>%
  filter(Sample!="Background") %>%
  select(3:ncol(df)) %>%
  as.matrix()

dataResponseVariable <- df %>%
  filter(Sample!="Background") %>%
  select(2) %>%
  as.matrix()

#subtract background from predictor variables
dataPredictorVariables=dataPredictorVariablesIn
for (i in 1:ncol(dataPredictorVariables))
{
  dataPredictorVariables[,i]=dataPredictorVariables[,i]-mean((dataBackground[,i]))  #subtract each column of data (each cytokine) by the mean of that cytokine's background values
} 
dataPredictorVariables[dataPredictorVariables<0]=0; #if value is below background, set to zero
rownames(dataPredictorVariables)=df$Sample[which(df$Sample!="Background")]

dataPredictorVariablesZ=apply(dataPredictorVariables,2,scale); #z-score the data
dataResponseVariableZ=apply(dataResponseVariable,2,scale); #z-score response variable (for use in heatmap)
rownames(dataPredictorVariablesZ)=rownames(dataPredictorVariables)



#----PLS----
#Remove constant columns and then run the PLS
indConstantColumns=colSds(dataPredictorVariablesZ)==0 
plsOut=plsRglm(dataY=dataResponseVariable,dataX=dataPredictorVariablesZ[,!indConstantColumns],2,modele="pls", scaleX = TRUE)

P=varimax(plsOut$pp) #Use varimax to rotate the LVs

#Return the loadings with missing values set to 0
P1=matrix(0,ncol = 1, nrow=dim(dataPredictorVariablesZ)[2])
P2=matrix(0,ncol = 1, nrow=dim(dataPredictorVariablesZ)[2])
P1[!indConstantColumns]=P$loadings[,1]
P2[!indConstantColumns]=P$loadings[,2]
rownames(P1)=colnames(dataPredictorVariablesZ)
rownames(P2)=colnames(dataPredictorVariablesZ)

rotSave=P$rotmat
scores = plsOut$tt %*% rotSave
T1=scores[,1]
T2=scores[,2]

if(SwapAxes==TRUE){
  P1[!indConstantColumns]=P$loadings[,2]
  P2[!indConstantColumns]=P$loadings[,1]
  T1=scores[,2]
  T2=scores[,1]
}


#Scores plot
myPalette = colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc = scale_colour_gradientn(colours = barColors, limits=c(min(dataResponseVariable), max(dataResponseVariable)))
ScoresPlot=data.frame(T1,T2,(dataResponseVariable))
colnames(ScoresPlot)=c("T1","T2","ResponseVariable")
pdf("ScoresPlot.pdf", width=5,height=4,useDingbats=FALSE, pointsize = 18)
ggplot(ScoresPlot, aes(x=T1, y=T2, color=ResponseVariable)) + 
  geom_vline(xintercept=0)+
  geom_hline(yintercept=0)+
  geom_point( size=6)+
  sc+
  xlab("Scores on LV1")+
  ylab("Scores on LV2")+
  ggtitle("PLSR Scores")+
  xlim(-1.1*max(abs(T1)),1.1*max(abs(T1)))+
  ylim(-1.1*max(abs(T2)),1.1*max(abs(T2)))+
  theme(panel.background = element_rect(fill = 'white',colour='black'), 
        text = element_text(size=20),
        panel.border=element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text = element_text(color = "black"),
        axis.ticks=element_line(color="black")
        )+
  labs(color = "")
dev.off()
     
#LV1 versus Response Variable graph to determine acceptability of regression
LV1Regression=data.frame(T1,(dataResponseVariable))
colnames(LV1Regression)=c("T1","ResponseVariable")
linmodel=lm(ResponseVariable~T1,LV1Regression)
RSQ=summary(linmodel)$r.squared

#Bar plot the loadings
P1Sort=sort(t(P1))/max(abs(P1))
indP1=sort(P1, index.return=TRUE)$ix
pdf("LV1vsResponseVariable.pdf", width=4,height=4,useDingbats=FALSE, pointsize = 18)
ggplot(LV1Regression, aes(x=T1, y=ResponseVariable)) + 
  geom_smooth(method='lm',se=FALSE,color='red',linetype="longdash")+
  geom_point(size=4)+
  sc+
  xlab("Scores on LV1")+
  ylab("ResponseVariable")+
  ggtitle("PLSR")+
  xlim(1.03*min(T1),1.03*max(T1))+
  ylim(0.97*min(dataResponseVariable),1.03*max(dataResponseVariable))+
  annotate("text",size=5, x=0.8*max(T1)+0.2*min(T1), y=min(dataResponseVariable), label=bquote({R^2}[PLS]==.(round(RSQ,digits=2))))+
  theme(panel.background = element_rect(fill = 'white'), 
        plot.title=element_text(hjust=0.5),
        text = element_text(size=20),
        panel.border=element_blank(),
        axis.line=element_line(color='black'),
        axis.text.x = element_text(face = "bold", color = "black", 
                                   size = 18),
        axis.text.y = element_text(face = "bold", color = "black", 
                                   size = 16))+
  labs(color = "")
dev.off()

pdf("LV1_Loadings.pdf", width=10,height=5,pointsize = 18)
barplot(P1Sort,
        main = "Signals in LV1",
        xlab = "",
        ylab = "",
        names.arg = rownames(P1)[indP1],
        col = "lightblue",
        horiz = FALSE,
        las=2,
        ylim=c(-1,1))
dev.off()

P2Sort=sort(t(P2))/max(abs(P2))
indP2=sort(P2, index.return=TRUE)$ix
pdf("LV2_Loadings.pdf", width=10,height=5,pointsize = 18)
barplot(P2Sort,
        main = "Signals in LV2",
        xlab = "",
        ylab = "",
        names.arg = rownames(P2)[indP2],
        col = "lightblue",
        horiz = FALSE,
        las=2,
        ylim=c(-1,1))
dev.off()
   

#Sort data based on Response Variable values (used in displaying heatmap)
ResponseVariableSort=sort(dataResponseVariableZ, decreasing=FALSE, index.return=TRUE)
indSort=ResponseVariableSort$ix
dataPredictorVariablesZ_sort=dataPredictorVariablesZ[indSort,]
dataResponseVariableZ_sort=dataResponseVariableZ[indSort]

#Heatmap with rows sorted by response variable and columns sorted by LV1
dataHeatmap_P1sort=data.frame(dataPredictorVariablesZ_sort[,indP1],dataResponseVariableZ_sort)
ColLabels=colnames(dataHeatmap_P1sort)
pdf("Heatmap_LV1sort.pdf", width=7,height=5,pointsize = 10)
heatmap3(dataHeatmap_P1sort, 
         col=barColors, breaks=breakBarColors,legendfun=function()showLegend(legend=c(NA),col=c(NA),cex=2.5), 
         Rowv=NA, Colv=NA,  scale="none", cexCol=1,cexRow=1.1, margins=c(20,2),
         highlightCell=data.frame(rep(1:dim(dataPredictorVariables)[1],each=(dim(dataPredictorVariables)[2]+1)),rep(1:(dim(dataPredictorVariables)[2]+1),times=dim(dataPredictorVariables)[1]),'black'),
         labCol=ColLabels
)
dev.off()


#----LOOCV----

LOOCV_runs=200
randomSample=sample(1:dim(dataPredictorVariables)[1],LOOCV_runs,replace=TRUE) #creates randomized array of sample indices

#Create blank matrices to record scores/loadings from each iteration
Load_LV1_LOOCV=matrix(0, nrow=LOOCV_runs, ncol=dim(dataPredictorVariables)[2])
Load_LV2_LOOCV=matrix(0, nrow=LOOCV_runs, ncol=dim(dataPredictorVariables)[2])

#iterations of PLSR
for (i in 1:LOOCV_runs)
{
  leftOut=randomSample[i] #index of sample to leave out
  dataPredictorVariablesLOOCVZ=dataPredictorVariablesZ[-leftOut,]
  dataResponseVariableLOOCV=dataResponseVariable[-leftOut]

  #Conduct PLSR, store loadings
  indConstantColumns=colSds(dataPredictorVariablesLOOCVZ)<1e-10  #Remove constant columns and then run the PLS
  plsOut_LOOCV=plsRglm(dataY=dataResponseVariableLOOCV,dataX=dataPredictorVariablesLOOCVZ[,!indConstantColumns],nt=2,modele="pls", scaleX = TRUE,EstimXNA=TRUE)
  
  P$loadings=plsOut_LOOCV$pp%*% rotSave
  
  #Return the loadings with missing values set to 0
  Load_LV1_LOOCV[i,!indConstantColumns]=P$loadings[,1]
  Load_LV2_LOOCV[i,!indConstantColumns]=P$loadings[,2]
  if(SwapAxes==TRUE){
    Load_LV1_LOOCV[i,!indConstantColumns]=P$loadings[,2]
    Load_LV2_LOOCV[i,!indConstantColumns]=P$loadings[,1]
  }
}


#compute mean and standard deviation for stored loadings over all iterations
P1_LOOCV=colMeans(Load_LV1_LOOCV)
P2_LOOCV=colMeans(Load_LV2_LOOCV)
stdevLV1_load=colSds(Load_LV1_LOOCV)/max(abs(P1_LOOCV))
stdevLV2_load=colSds(Load_LV2_LOOCV)/max(abs(P2_LOOCV))



#Bar plot the loadings with LOOCV error bars
P1Sort_LOOCV=sort(t(P1_LOOCV))/max(abs(P1_LOOCV))
indP1_LOOCV=sort(P1_LOOCV, index.return=TRUE)$ix
ColLabels=colnames(dataPredictorVariables)[indP1_LOOCV]
pdf("LV1_Loadings_SD.pdf", width=10,height=5,pointsize = 18)
barCenters <- barplot(P1Sort_LOOCV,
        main = "Signals in LV1",
        xlab = "",
        ylab = "",
        names.arg = ColLabels,
        col = "lightblue",
        horiz = FALSE,
        las=2,
        ylim=c(-1.49,1.49))
segments(barCenters,P1Sort_LOOCV-stdevLV1_load[indP1_LOOCV],barCenters,P1Sort_LOOCV+stdevLV1_load[indP1_LOOCV])
arrows(barCenters,P1Sort_LOOCV-stdevLV1_load[indP1_LOOCV],barCenters,P1Sort_LOOCV+stdevLV1_load[indP1_LOOCV],code=3,angle=90,length=0.05)
dev.off()

P2Sort_LOOCV=sort(t(P2_LOOCV))/max(abs(P2_LOOCV))
indP2_LOOCV=sort(P2_LOOCV, index.return=TRUE)$ix
ColLabels=colnames(dataPredictorVariables)[indP2_LOOCV]
pdf("LV2_Loadings_SD.pdf", width=10,height=5,pointsize = 18)
barCenters <- barplot(P2Sort_LOOCV,
        main = "Signals in LV2",
        xlab = "",
        ylab = "",
        names.arg = ColLabels,
        col = "lightblue",
        horiz = FALSE,
        las=2,
        ylim=c(-1.49,1.49))
segments(barCenters,P2Sort_LOOCV-stdevLV2_load[indP2_LOOCV],barCenters,P2Sort_LOOCV+stdevLV2_load[indP2_LOOCV])
arrows(barCenters,P2Sort_LOOCV-stdevLV2_load[indP2_LOOCV],barCenters,P2Sort_LOOCV+stdevLV2_load[indP2_LOOCV],code=3,angle=90,length=0.05)
dev.off()

#Heatmap with rows sorted by response variable and columns sorted by LV1
dataHeatmap_P1sort_LOOCV=data.frame(dataPredictorVariablesZ_sort[,indP1_LOOCV],dataResponseVariableZ_sort)
ColLabels=colnames(dataHeatmap_P1sort_LOOCV)
pdf("Heatmap_LV1sort_LOOCV.pdf", width=7,height=5,pointsize = 10)
heatmap3(dataHeatmap_P1sort_LOOCV, 
         col=barColors, breaks=breakBarColors,legendfun=function()showLegend(legend=c(NA),col=c(NA),cex=2.5), 
         Rowv=NA, Colv=NA,  scale="none", cexCol=1,cexRow=1.1, margins=c(20,2),
         highlightCell=data.frame(rep(1:dim(dataPredictorVariables)[1],each=(dim(dataPredictorVariables)[2]+1)),rep(1:(dim(dataPredictorVariables)[2]+1),times=dim(dataPredictorVariables)[1]),'black'),
         labCol=ColLabels
)
dev.off()


#Make the color-axis scale bar
pdfPath="Scalebar_cytokines.pdf"
pdf(pdfPath, width=2,height=8,pointsize = 14)
color.bar(barColors,-1.5)
dev.off()