################################################################################################
#                                                                                              #
#     Fitting to data - code for "Epithelial stratification shapes infection dynamics"         #
#                                                                                              #
#     by Carmen Lia Murall, Plos CB submission 2018                                            #
#                                                                                              #
################################################################################################


#libraries:
library(stats4)
library(bbmle)
library(deSolve)
library(ggplot2)
library(magrittr)
#pomp prelims:
library(pomp)
stopifnot(packageVersion("pomp")>"1.4.9")
library(plyr)
library(reshape2)
options(stringsAsFactors=FALSE)
theme_set(theme_bw())
set.seed(11734391)


# #-------------------------------- full dataset, FOV 1n3 ----------------------------------------------
# #-----------------------------------------------------------------------------------------------------

#FOV1&3 dataset
fileNIKS_FOV1n3full<-read.csv("S1_data.csv")#, sep=';')

#get summary the data (mean,SD, SE)
library(lattice)
library(Rmisc)
filefull<-na.omit(fileNIKS_FOV1n3full)
sumryFOV13full <-summarySE(data=filefull, measurevar="DAPI.counts",groupvars=c("Week","Cell.type"))
sumryFOV13full #gives mean, sd, se and ci

sumryBrdU <-summarySE(data=filefull, measurevar="BrdU.counts",groupvars=c("Week","Cell.type"))
sumryBrdU #gives mean, sd, se and ci

sumryprop_week <-summarySE(data=filefull, measurevar="proptn.dividing",groupvars=c("Week","Cell.type"))
sumryprop_week #gives mean, sd, se and ci

sumryprop_celltype <-summarySE(data=filefull, measurevar="proptn.dividing",groupvars=c("Cell.type"))
sumryprop_celltype #gives mean, sd, se and ci


#reorganize, new dataframe
sumryBf <-subset(sumryFOV13full, Cell.type== "Basal")
sumrySf <-subset(sumryFOV13full, Cell.type== "Supra")
sumryKf <-subset(sumryFOV13full, Cell.type== "Keratinized")
datasetfull<-data.frame("Week"=sumryBf$Week, "abundB"=sumryBf$DAPI.counts, "abundS"=sumrySf$DAPI.counts, "abundK"=sumryKf$DAPI.counts)
# gives three columns (week, abdB, abdS)


#-------------------------------- Reduced dataset, FOV 1n3 ---------------------
#-------------------------------------------------------------------------------
#FOV1&3 dataset
fileNIKS_FOV1n3<-read.csv("S2_data.csv")#, sep=';')

#get summary the data (mean,SD, SE)
library(lattice)
library(Rmisc)
file<-na.omit(fileNIKS_FOV1n3)
sumryFOV13 <-summarySE(data=file, measurevar="DAPI.counts",groupvars=c("Week","Cell.type"))
sumryFOV13 #gives mean, sd, se and ci

#reorganize, new dataframe
sumryB <-subset(sumryFOV13, Cell.type== "Basal")
sumryS <-subset(sumryFOV13, Cell.type== "Supra")
sumryK <-subset(sumryFOV13, Cell.type== "Keratinized")
dataset<-data.frame("Week"=sumryB$Week, "abundB"=sumryB$DAPI.counts, "abundS"=sumryS$DAPI.counts, "abundK"=sumryK$DAPI.counts)
# gives three columns (week, abdB, abdS)

ggplot()+
  geom_point(data=file,mapping=aes(x=file$Week,y=file$DAPI.counts,color=Cell.type))+
  geom_line(data=dataset,aes(x=dataset$Week,y=dataset$abundB),col='red')+
  geom_line(data=dataset,aes(x=dataset$Week,y=dataset$abundS),col='blue')+
  geom_line(data=dataset,aes(x=dataset$Week,y=dataset$abundK),col='green')+
  scale_x_continuous(breaks=c(0,1,2,3))+
  labs(title="FOV1&3 - data with lines connecting means",x="Week",y="DAPI counts")



#------------------------- Fit d, m, rb, p, q -------------------------------------------------
#----------------------------------------------------------------------------------------------

#FOV1&3 dataset
file_reps<-read.csv("S3_data.csv") #18 reps per layer

#make a pomp object that encodes the model and the data
pomp(
  data=file_reps, #call All DATA
  times="Days",t0=0,  #name the times 'week' and set initial time to 0
  skeleton=vectorfield( #define the model with incidence object:  
    #Note: to fit a positvie p, changed the signs of the eqns
    #note: different S eqn
    Csnippet("
             double incidence;      
             DB = rb*B*p*(1-B/Nb),
             DP = rb*B*(1+p) - (rb*(0.02/0.15)*(P/P+D))*q*P,
             DD = (rb*(0.02/0.15)*(P/P+D))*P*(1+q) - d*D,
             DK = d*D - m*K;")),
  initializer=Csnippet("       
                       #define initial conditions:
                       B = B_0;
                       P = P_0;
                       D = D_0;
                       K = K_0;"),
  statenames=c("B","P","D","K"),  #variable names
  paramnames=c("rb","d","p","q","Nb","m","B_0","P_0","D_0","K_0")) -> dataReps  #NAME POMP 'data'


loglik.pois <- function (params) {
  x <- trajectory(dataReps,params=params) #runs model
  trajB<-c(trajectory(dataReps,params)["B",,][[1]],trajectory(dataReps,params)["B",,][[7]],trajectory(dataReps,params)["B",,][[14]],trajectory(dataReps,params)["B",,][[21]])
  trajS<-c(trajectory(dataReps,params)["P",,][[1]]+trajectory(dataReps,params)["D",,][[1]],trajectory(dataReps,params)["P",,][[7]]+trajectory(dataReps,params)["D",,][[7]],
           trajectory(dataReps,params)["P",,][[14]]+trajectory(dataReps,params)["D",,][[14]],trajectory(dataReps,params)["P",,][[21]]+trajectory(dataReps,params)["D",,][[21]])
  trajK<-c(trajectory(dataReps,params)["K",,][[1]],trajectory(dataReps,params)["K",,][[7]],trajectory(dataReps,params)["K",,][[14]],trajectory(dataReps,params)["K",,][[21]])
  sum(#fit B to basal data
    dpois(x=t(obs(dataReps)[1:18,c(1,7,14,21)]), #18 reps per layer           
          lambda=params["bp"]*trajB,log = TRUE)+ 
      #fit P+D to suprabasal data
      dpois(x=t(obs(dataReps)[19:36,c(1,7,14,21)]),               
            lambda=params["bp"]*trajS,log = TRUE)+ 
      #fit K to keratinzed data
      dpois(x= t(obs(dataReps)[37:54,c(1,7,14,21)]),
            lambda=params["bp"]*trajK,log = TRUE))
}

logit <- function (g) log(g/(1-g))    # the logit transform   --> For 0 < param <1
expit <- function (f) 1/(1+exp(-f))   # inverse logit   ---> For parm > 0

#fixed paramters
Nb_0val <-max(obs(dataReps)[1:18,21]) #max of the last timepoint <-initial guess
B_0val<-median(obs(dataReps)[1:18,1])#sumryB$DAPI.counts[[1]] #use mean of 1st timepoint as initial condition
P_0val<-1e-20
D_0val<-0
K_0val<-sumryK$DAPI.counts[[1]]
mval  <-0  #assume mu = 0 because cells aren't washed way by mucus in experimental system

f3 <- function (par) {
  params <- c(p=expit(par[1]),q=expit(par[2]),d=expit(par[3]),rb=expit(par[4]),bp=expit(par[5]), Nb=exp(par[6]),
              m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val)
  -loglik.pois(params)}

guess <- c(logit(0.012),logit(0.012),logit(0.4),logit(0.06),logit(0.001),log(Nb_0val))
fit3 <- optim(f3, par=guess,control=list(maxit=1000))
fit3


mle5 <- c(p=expit(fit3$par[1]),q=expit(fit3$par[2]),d=expit(fit3$par[3]),
          rb=expit(fit3$par[4]),bp=expit(fit3$par[5]),Nb=exp(fit3$par[6]))
signif(mle5,6)

#now compute the model's trajectory with the infered value of r, and then plot it:
coef(dataReps) <- c(mle5, m=mval,
                    B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) #new vector, now with estimated beta
x <- trajectory(dataReps,as.data.frame=TRUE) #run model

model.predB <- trajectory(dataReps)["B",,]
model.predS <- trajectory(dataReps)["P",,]+trajectory(dataReps)["D",,]  
model.predK <- trajectory(dataReps)["K",,]

library(plyr)
raply(2000,rpois(n=length(model.predB),
                 lambda=coef(dataReps,"bp")*model.predB))-> simdatB
aaply(simdatB,2,quantile,probs=c(0.025,0.5,0.975))->quantilesB 

raply(2000,rpois(n=length(model.predS),
                 lambda=coef(dataReps,"bp")*model.predS))-> simdatS
aaply(simdatS,2,quantile,probs=c(0.025,0.5,0.975))->quantilesS

raply(2000,rpois(n=length(model.predK),
                 lambda=coef(dataReps,"bp")*model.predK))-> simdatK
aaply(simdatK,2,quantile,probs=c(0.025,0.5,0.975))->quantilesK

typB<-sample(nrow(simdatB),1)
typS<-sample(nrow(simdatS),1)
typK<-sample(nrow(simdatK),1)

gB1<- cbind(as.data.frame(dataReps)[seq(1,21),c(1,2:19)],quantilesB,typical=simdatB[typB,])
gS1<- cbind(as.data.frame(dataReps)[seq(1,21),c(1,20:37)],quantilesS,typical=simdatS[typS,])
gK1<- cbind(as.data.frame(dataReps)[seq(1,21),c(1,38:55)],quantilesK,typical=simdatK[typK,])
gB<-na.omit(gB1)
gS<-na.omit(gS1)
gK<-na.omit(gK1)

#--------------
#plot model prediction with means
ggplot(data=join(as.data.frame(dataReps),x,by='time'), #plot the model and the data together
       mapping=aes(x=time))+
  geom_line(aes(y=B),color="blue")+
  geom_line(aes(y=(P+D)),color='purple')+ 
  geom_line(aes(y=K),color='red')+
  geom_point(data=dataset,aes(x=c(1,7,14,21),y=dataset$abundB),color='blue',size=4)+
  geom_point(data=dataset,aes(x=c(1,7,14,21),y=dataset$abundS),color='purple',size=4)+
  geom_point(data=dataset,aes(x=c(1,7,14,21),y=dataset$abundK),color='red',size=4)+
  geom_linerange(data=dataset,aes(x=c(1,7,14,21),ymin=dataset$abundB-sumryB$sd, ymax=dataset$abundB+sumryB$sd),color='blue') +
  geom_linerange(data=dataset,aes(x=c(1,7,14,21),ymin=dataset$abundS-sumryS$sd, ymax=dataset$abundS+sumryS$sd),color='purple') +
  geom_linerange(data=dataset,aes(x=c(1,7,14,21),ymin=dataset$abundK-sumryK$sd, ymax=dataset$abundK+sumryK$sd),color='red') +
  geom_ribbon(data=gB,aes(x=time,ymin=gB$`2.5%`,ymax=gB$`97.5%`),fill="cadetblue2",alpha=0.2,size=1.5)+
  geom_ribbon(data=gS,aes(x=time,ymin=gS$`2.5%`,ymax=gS$`97.5%`),fill="mediumpurple",alpha=0.2,size=1.5)+
  geom_ribbon(data=gK,aes(x=time,ymin=gK$`2.5%`,ymax=gK$`97.5%`),fill="indianred2",alpha=0.2,size=1.5)+
  theme_classic()+
  scale_x_continuous(breaks=c(1,7,14,21))+
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=20))+ 
  labs(y="DAPI counts",x="time, d")

#with data and ribbons
times<-seq(1,21)
ggplot()+
  geom_point(file,mapping=aes(x=Days,y=DAPI.counts,color=Cell.type), size=2)+
  scale_color_manual(values=c("blue","red","purple"))+
  geom_line(aes(x=times,y=model.predB),color="blue")+
  geom_line(aes(x=times,y=model.predS),color='purple')+ 
  geom_line(aes(x=times,y=model.predK),color='red')+
  geom_ribbon(data=gB,aes(x=time,ymin=gB$`2.5%`,ymax=gB$`97.5%`),fill="cadetblue2",alpha=0.2)+
  geom_ribbon(data=gS,aes(x=time,ymin=gS$`2.5%`,ymax=gS$`97.5%`),fill="mediumpurple",alpha=0.2)+
  geom_ribbon(data=gK,aes(x=time,ymin=gK$`2.5%`,ymax=gK$`97.5%`),fill="indianred2",alpha=0.2)+
  theme_classic()+
  scale_x_continuous(breaks=c(1,7,14,21))+
  labs(y="DAPI counts",x="time, d")

coef(dataReps) #print params
rpval<-mle5[[4]]*(0.02/0.15);rpval


#---------------seprate plots--------------------------------------------------------
#with data and ribbons
times<-seq(1,21)

bdat<- file[with(file, Cell.type=="Basal"),]
sdat<- file[with(file, Cell.type=="Supra"),]
kdat<- file[with(file, Cell.type=="Keratinized"),]
sztik<-30
szaxi<-31
sztit<-31

#basal
plotb<- ggplot()+
  geom_point(bdat, mapping=aes(x=Days,y=DAPI.counts), color='blue', size=4)+ 
  geom_line(aes(x=times,y=model.predB),color="blue",size=1.2)+
  geom_ribbon(data=gB,aes(x=time,ymin=gB$`2.5%`,ymax=gB$`97.5%`),fill="cadetblue2",alpha=0.2)+
  theme_classic()+
  scale_x_continuous(breaks=c(1,7,14,21))+
  theme(title=element_text(size=sztit),plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=szaxi),axis.title=element_text(size=sztik))+
  ylim(0,65)+
  labs(title="Basal",y="DAPI counts",x="time, d");plotb

#supra
plots<- ggplot()+
  geom_point(sdat, mapping=aes(x=Days,y=DAPI.counts), color='purple', size=4)+ 
  geom_line(aes(x=times,y=model.predS),color="purple",size=1.2)+
  geom_ribbon(data=gS,aes(x=time,ymin=gS$`2.5%`,ymax=gS$`97.5%`),fill="mediumpurple",alpha=0.2)+
  theme_classic()+
  scale_x_continuous(breaks=c(1,7,14,21))+
  theme(title=element_text(size=sztit),plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=szaxi),axis.title=element_text(size=sztik))+ 
  ylim(0,65)+
  labs(title="Suprabasal",y="DAPI counts",x="time, d");plots

#keratinized
plotk<- ggplot()+
  geom_point(kdat, mapping=aes(x=Days,y=DAPI.counts), color='red', size=4)+ 
  geom_line(aes(x=times,y=model.predK),color="red",size=1.2)+
  geom_ribbon(data=gK,aes(x=time,ymin=gK$`2.5%`,ymax=gK$`97.5%`),fill="indianred2",alpha=0.2)+
  theme_classic()+
  scale_x_continuous(breaks=c(1,7,14,21))+
  theme(title=element_text(size=sztit),plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=szaxi),axis.title=element_text(size=sztik))+ 
  ylim(0,65)+
  labs(title="Keratinized",y="DAPI counts",x="time, d");plotk





# ##-------------------------------- likelihood plots ----------------------------------------------
# 
# #p
# fp <- function (p) {
#   params <- c(p=p,q=mle5[[2]],d=mle5[[3]], rb=mle5[[4]], bp=mle5[[5]],Nb = mle5[[6]],
#               m=mval,  B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) 
#   -loglik.pois(params)}
# prof.p <- expand.grid(p=seq(0,0.05,length=100))
# prof.p$loglik.pois <- -sapply(prof.p$p,fp)
# maxloglik <- -fit3$value
# plot(loglik.pois~p,data=prof.p,type="l")#,ylim=c(-835,-845),type="l")
# cutoff <- maxloglik-qchisq(p=0.95,df=1)/2
# abline(h=c(0,cutoff))
# abline(v=range(subset(prof.p,loglik.pois>cutoff)$p),lty=2)
# pCI <-range(subset(prof.p,loglik.pois>cutoff)$p)
# 
# #q
# fq <- function (q) {
#   params <- c(p=mle5[[1]], q=q, d=mle5[[3]], rb=mle5[[4]], bp=mle5[[5]],Nb = mle5[[6]],
#               m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) 
#   -loglik.pois(params)}
# prof.q <- expand.grid(q=seq(0.0,1,length=100))
# prof.q$loglik.pois <- -sapply(prof.q$q,fq)
# maxloglik <- -fit3$value
# plot(loglik.pois~q,data=prof.q,type="l")#,ylim=c(-835,-845),type="l")
# cutoff <- maxloglik-qchisq(p=0.95,df=1)/2
# abline(h=c(0,cutoff))
# abline(v=range(subset(prof.q,loglik.pois>cutoff)$q),lty=2)
# qCI <-range(subset(prof.q,loglik.pois>cutoff)$q)
# 
# #d
# fd <- function (d) {
#   params <- c(p=mle5[[1]],q=mle5[[2]],d=d, rb=mle5[[4]], bp=mle5[[5]],Nb = mle5[[6]],
#               m=mval,  B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) 
#   -loglik.pois(params)}
# prof.d <- expand.grid(d=seq(0.01,0.5,length=100))
# prof.d$loglik.pois <- -sapply(prof.d$d,fd)
# maxloglik <- -fit3$value
# plot(loglik.pois~d,data=prof.d,type="l")
# cutoff <- maxloglik-qchisq(p=0.95,df=1)/2
# abline(h=c(0,cutoff))
# abline(v=range(subset(prof.d,loglik.pois>cutoff)$d),lty=2)
# dCI <-range(subset(prof.d,loglik.pois>cutoff)$d)
# 
# #rb
# frb <- function (rb) {
#   params <- c(p=mle5[[1]],q=mle5[[2]],d=mle5[[3]], rb=rb, bp=mle5[[5]],Nb = mle5[[6]],
#               m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) 
#   -loglik.pois(params)}
# prof.rb <- expand.grid(rb=seq(0,0.1,length=100))
# prof.rb$loglik.pois <- -sapply(prof.rb$rb,frb)
# maxloglik <- -fit3$value
# plot(loglik.pois~rb,data=prof.rb,type="l")
# cutoff <- maxloglik-qchisq(p=0.95,df=1)/2
# abline(h=c(0,cutoff))
# abline(v=range(subset(prof.rb,loglik.pois>cutoff)$rb),lty=2)
# rbCI <-range(subset(prof.rb,loglik.pois>cutoff)$rb)
# 
# #bp
# fbp <- function (bp) {
#   params <- c(p=mle5[[1]],q=mle5[[2]],d=mle5[[3]], rb=mle5[[4]], bp=bp, Nb = mle5[[6]],
#               m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) 
#   -loglik.pois(params)}
# prof.bp <- expand.grid(bp=seq(0.1,1.0,length=100))
# prof.bp$loglik.pois <- -sapply(prof.bp$bp,fbp)
# maxloglik <- -fit3$value
# plot(loglik.pois~bp,data=prof.bp,type="l")
# cutoff <- maxloglik-qchisq(p=0.95,df=1)/2
# abline(h=c(0,cutoff))
# abline(v=range(subset(prof.bp,loglik.pois>cutoff)$bp),lty=2)
# bpCI <-range(subset(prof.bp,loglik.pois>cutoff)$bp)
# 
# #Nb
# fNb <- function (Nb) {
#   params <- c(p=mle5[[1]],q=mle5[[2]],d=mle5[[3]], rb=mle5[[4]], bp=mle5[[5]], Nb=Nb,
#               m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) 
#   -loglik.pois(params)}
# prof.Nb <- expand.grid(Nb=seq(0.1,1.0,length=100))
# prof.Nb$loglik.pois <- -sapply(prof.Nb$Nb,fNb)
# maxloglik <- -fit3$value
# plot(loglik.pois~Nb,data=prof.Nb,type="l")
# cutoff <- maxloglik-qchisq(p=0.95,df=1)/2
# abline(h=c(0,cutoff))
# abline(v=range(subset(prof.Nb,loglik.pois>cutoff)$Nb),lty=2)
# NbCI <-range(subset(prof.Nb,loglik.pois>cutoff)$Nb)
# 
# tableCIs <-data.frame("p CIs"=pCI, "q CIs"=qCI, "d CIs"=dCI, "rb CIs"=rbCI, "bp CIs"=bpCI, "Nb CIs"=NbCI); tableCIs