################################################################################################ # # # Fitting to data - code for "Epithelial stratification shapes infection dynamics" # # # # by Carmen Lia Murall, Plos CB submission 2018 # # # ################################################################################################ #libraries: library(stats4) library(bbmle) library(deSolve) library(ggplot2) library(magrittr) #pomp prelims: library(pomp) stopifnot(packageVersion("pomp")>"1.4.9") library(plyr) library(reshape2) options(stringsAsFactors=FALSE) theme_set(theme_bw()) set.seed(11734391) # #-------------------------------- full dataset, FOV 1n3 ---------------------------------------------- # #----------------------------------------------------------------------------------------------------- #FOV1&3 dataset fileNIKS_FOV1n3full<-read.csv("S1_data.csv")#, sep=';') #get summary the data (mean,SD, SE) library(lattice) library(Rmisc) filefull<-na.omit(fileNIKS_FOV1n3full) sumryFOV13full <-summarySE(data=filefull, measurevar="DAPI.counts",groupvars=c("Week","Cell.type")) sumryFOV13full #gives mean, sd, se and ci sumryBrdU <-summarySE(data=filefull, measurevar="BrdU.counts",groupvars=c("Week","Cell.type")) sumryBrdU #gives mean, sd, se and ci sumryprop_week <-summarySE(data=filefull, measurevar="proptn.dividing",groupvars=c("Week","Cell.type")) sumryprop_week #gives mean, sd, se and ci sumryprop_celltype <-summarySE(data=filefull, measurevar="proptn.dividing",groupvars=c("Cell.type")) sumryprop_celltype #gives mean, sd, se and ci #reorganize, new dataframe sumryBf <-subset(sumryFOV13full, Cell.type== "Basal") sumrySf <-subset(sumryFOV13full, Cell.type== "Supra") sumryKf <-subset(sumryFOV13full, Cell.type== "Keratinized") datasetfull<-data.frame("Week"=sumryBf$Week, "abundB"=sumryBf$DAPI.counts, "abundS"=sumrySf$DAPI.counts, "abundK"=sumryKf$DAPI.counts) # gives three columns (week, abdB, abdS) #-------------------------------- Reduced dataset, FOV 1n3 --------------------- #------------------------------------------------------------------------------- #FOV1&3 dataset fileNIKS_FOV1n3<-read.csv("S2_data.csv")#, sep=';') #get summary the data (mean,SD, SE) library(lattice) library(Rmisc) file<-na.omit(fileNIKS_FOV1n3) sumryFOV13 <-summarySE(data=file, measurevar="DAPI.counts",groupvars=c("Week","Cell.type")) sumryFOV13 #gives mean, sd, se and ci #reorganize, new dataframe sumryB <-subset(sumryFOV13, Cell.type== "Basal") sumryS <-subset(sumryFOV13, Cell.type== "Supra") sumryK <-subset(sumryFOV13, Cell.type== "Keratinized") dataset<-data.frame("Week"=sumryB$Week, "abundB"=sumryB$DAPI.counts, "abundS"=sumryS$DAPI.counts, "abundK"=sumryK$DAPI.counts) # gives three columns (week, abdB, abdS) ggplot()+ geom_point(data=file,mapping=aes(x=file$Week,y=file$DAPI.counts,color=Cell.type))+ geom_line(data=dataset,aes(x=dataset$Week,y=dataset$abundB),col='red')+ geom_line(data=dataset,aes(x=dataset$Week,y=dataset$abundS),col='blue')+ geom_line(data=dataset,aes(x=dataset$Week,y=dataset$abundK),col='green')+ scale_x_continuous(breaks=c(0,1,2,3))+ labs(title="FOV1&3 - data with lines connecting means",x="Week",y="DAPI counts") #------------------------- Fit d, m, rb, p, q ------------------------------------------------- #---------------------------------------------------------------------------------------------- #FOV1&3 dataset file_reps<-read.csv("S3_data.csv") #18 reps per layer #make a pomp object that encodes the model and the data pomp( data=file_reps, #call All DATA times="Days",t0=0, #name the times 'week' and set initial time to 0 skeleton=vectorfield( #define the model with incidence object: #Note: to fit a positvie p, changed the signs of the eqns #note: different S eqn Csnippet(" double incidence; DB = rb*B*p*(1-B/Nb), DP = rb*B*(1+p) - (rb*(0.02/0.15)*(P/P+D))*q*P, DD = (rb*(0.02/0.15)*(P/P+D))*P*(1+q) - d*D, DK = d*D - m*K;")), initializer=Csnippet(" #define initial conditions: B = B_0; P = P_0; D = D_0; K = K_0;"), statenames=c("B","P","D","K"), #variable names paramnames=c("rb","d","p","q","Nb","m","B_0","P_0","D_0","K_0")) -> dataReps #NAME POMP 'data' loglik.pois <- function (params) { x <- trajectory(dataReps,params=params) #runs model trajB<-c(trajectory(dataReps,params)["B",,][[1]],trajectory(dataReps,params)["B",,][[7]],trajectory(dataReps,params)["B",,][[14]],trajectory(dataReps,params)["B",,][[21]]) trajS<-c(trajectory(dataReps,params)["P",,][[1]]+trajectory(dataReps,params)["D",,][[1]],trajectory(dataReps,params)["P",,][[7]]+trajectory(dataReps,params)["D",,][[7]], trajectory(dataReps,params)["P",,][[14]]+trajectory(dataReps,params)["D",,][[14]],trajectory(dataReps,params)["P",,][[21]]+trajectory(dataReps,params)["D",,][[21]]) trajK<-c(trajectory(dataReps,params)["K",,][[1]],trajectory(dataReps,params)["K",,][[7]],trajectory(dataReps,params)["K",,][[14]],trajectory(dataReps,params)["K",,][[21]]) sum(#fit B to basal data dpois(x=t(obs(dataReps)[1:18,c(1,7,14,21)]), #18 reps per layer lambda=params["bp"]*trajB,log = TRUE)+ #fit P+D to suprabasal data dpois(x=t(obs(dataReps)[19:36,c(1,7,14,21)]), lambda=params["bp"]*trajS,log = TRUE)+ #fit K to keratinzed data dpois(x= t(obs(dataReps)[37:54,c(1,7,14,21)]), lambda=params["bp"]*trajK,log = TRUE)) } logit <- function (g) log(g/(1-g)) # the logit transform --> For 0 < param <1 expit <- function (f) 1/(1+exp(-f)) # inverse logit ---> For parm > 0 #fixed paramters Nb_0val <-max(obs(dataReps)[1:18,21]) #max of the last timepoint <-initial guess B_0val<-median(obs(dataReps)[1:18,1])#sumryB$DAPI.counts[[1]] #use mean of 1st timepoint as initial condition P_0val<-1e-20 D_0val<-0 K_0val<-sumryK$DAPI.counts[[1]] mval <-0 #assume mu = 0 because cells aren't washed way by mucus in experimental system f3 <- function (par) { params <- c(p=expit(par[1]),q=expit(par[2]),d=expit(par[3]),rb=expit(par[4]),bp=expit(par[5]), Nb=exp(par[6]), m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) -loglik.pois(params)} guess <- c(logit(0.012),logit(0.012),logit(0.4),logit(0.06),logit(0.001),log(Nb_0val)) fit3 <- optim(f3, par=guess,control=list(maxit=1000)) fit3 mle5 <- c(p=expit(fit3$par[1]),q=expit(fit3$par[2]),d=expit(fit3$par[3]), rb=expit(fit3$par[4]),bp=expit(fit3$par[5]),Nb=exp(fit3$par[6])) signif(mle5,6) #now compute the model's trajectory with the infered value of r, and then plot it: coef(dataReps) <- c(mle5, m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) #new vector, now with estimated beta x <- trajectory(dataReps,as.data.frame=TRUE) #run model model.predB <- trajectory(dataReps)["B",,] model.predS <- trajectory(dataReps)["P",,]+trajectory(dataReps)["D",,] model.predK <- trajectory(dataReps)["K",,] library(plyr) raply(2000,rpois(n=length(model.predB), lambda=coef(dataReps,"bp")*model.predB))-> simdatB aaply(simdatB,2,quantile,probs=c(0.025,0.5,0.975))->quantilesB raply(2000,rpois(n=length(model.predS), lambda=coef(dataReps,"bp")*model.predS))-> simdatS aaply(simdatS,2,quantile,probs=c(0.025,0.5,0.975))->quantilesS raply(2000,rpois(n=length(model.predK), lambda=coef(dataReps,"bp")*model.predK))-> simdatK aaply(simdatK,2,quantile,probs=c(0.025,0.5,0.975))->quantilesK typB<-sample(nrow(simdatB),1) typS<-sample(nrow(simdatS),1) typK<-sample(nrow(simdatK),1) gB1<- cbind(as.data.frame(dataReps)[seq(1,21),c(1,2:19)],quantilesB,typical=simdatB[typB,]) gS1<- cbind(as.data.frame(dataReps)[seq(1,21),c(1,20:37)],quantilesS,typical=simdatS[typS,]) gK1<- cbind(as.data.frame(dataReps)[seq(1,21),c(1,38:55)],quantilesK,typical=simdatK[typK,]) gB<-na.omit(gB1) gS<-na.omit(gS1) gK<-na.omit(gK1) #-------------- #plot model prediction with means ggplot(data=join(as.data.frame(dataReps),x,by='time'), #plot the model and the data together mapping=aes(x=time))+ geom_line(aes(y=B),color="blue")+ geom_line(aes(y=(P+D)),color='purple')+ geom_line(aes(y=K),color='red')+ geom_point(data=dataset,aes(x=c(1,7,14,21),y=dataset$abundB),color='blue',size=4)+ geom_point(data=dataset,aes(x=c(1,7,14,21),y=dataset$abundS),color='purple',size=4)+ geom_point(data=dataset,aes(x=c(1,7,14,21),y=dataset$abundK),color='red',size=4)+ geom_linerange(data=dataset,aes(x=c(1,7,14,21),ymin=dataset$abundB-sumryB$sd, ymax=dataset$abundB+sumryB$sd),color='blue') + geom_linerange(data=dataset,aes(x=c(1,7,14,21),ymin=dataset$abundS-sumryS$sd, ymax=dataset$abundS+sumryS$sd),color='purple') + geom_linerange(data=dataset,aes(x=c(1,7,14,21),ymin=dataset$abundK-sumryK$sd, ymax=dataset$abundK+sumryK$sd),color='red') + geom_ribbon(data=gB,aes(x=time,ymin=gB$`2.5%`,ymax=gB$`97.5%`),fill="cadetblue2",alpha=0.2,size=1.5)+ geom_ribbon(data=gS,aes(x=time,ymin=gS$`2.5%`,ymax=gS$`97.5%`),fill="mediumpurple",alpha=0.2,size=1.5)+ geom_ribbon(data=gK,aes(x=time,ymin=gK$`2.5%`,ymax=gK$`97.5%`),fill="indianred2",alpha=0.2,size=1.5)+ theme_classic()+ scale_x_continuous(breaks=c(1,7,14,21))+ theme(axis.text=element_text(size=20), axis.title=element_text(size=20))+ labs(y="DAPI counts",x="time, d") #with data and ribbons times<-seq(1,21) ggplot()+ geom_point(file,mapping=aes(x=Days,y=DAPI.counts,color=Cell.type), size=2)+ scale_color_manual(values=c("blue","red","purple"))+ geom_line(aes(x=times,y=model.predB),color="blue")+ geom_line(aes(x=times,y=model.predS),color='purple')+ geom_line(aes(x=times,y=model.predK),color='red')+ geom_ribbon(data=gB,aes(x=time,ymin=gB$`2.5%`,ymax=gB$`97.5%`),fill="cadetblue2",alpha=0.2)+ geom_ribbon(data=gS,aes(x=time,ymin=gS$`2.5%`,ymax=gS$`97.5%`),fill="mediumpurple",alpha=0.2)+ geom_ribbon(data=gK,aes(x=time,ymin=gK$`2.5%`,ymax=gK$`97.5%`),fill="indianred2",alpha=0.2)+ theme_classic()+ scale_x_continuous(breaks=c(1,7,14,21))+ labs(y="DAPI counts",x="time, d") coef(dataReps) #print params rpval<-mle5[[4]]*(0.02/0.15);rpval #---------------seprate plots-------------------------------------------------------- #with data and ribbons times<-seq(1,21) bdat<- file[with(file, Cell.type=="Basal"),] sdat<- file[with(file, Cell.type=="Supra"),] kdat<- file[with(file, Cell.type=="Keratinized"),] sztik<-30 szaxi<-31 sztit<-31 #basal plotb<- ggplot()+ geom_point(bdat, mapping=aes(x=Days,y=DAPI.counts), color='blue', size=4)+ geom_line(aes(x=times,y=model.predB),color="blue",size=1.2)+ geom_ribbon(data=gB,aes(x=time,ymin=gB$`2.5%`,ymax=gB$`97.5%`),fill="cadetblue2",alpha=0.2)+ theme_classic()+ scale_x_continuous(breaks=c(1,7,14,21))+ theme(title=element_text(size=sztit),plot.title = element_text(hjust = 0.5), axis.text=element_text(size=szaxi),axis.title=element_text(size=sztik))+ ylim(0,65)+ labs(title="Basal",y="DAPI counts",x="time, d");plotb #supra plots<- ggplot()+ geom_point(sdat, mapping=aes(x=Days,y=DAPI.counts), color='purple', size=4)+ geom_line(aes(x=times,y=model.predS),color="purple",size=1.2)+ geom_ribbon(data=gS,aes(x=time,ymin=gS$`2.5%`,ymax=gS$`97.5%`),fill="mediumpurple",alpha=0.2)+ theme_classic()+ scale_x_continuous(breaks=c(1,7,14,21))+ theme(title=element_text(size=sztit),plot.title = element_text(hjust = 0.5), axis.text=element_text(size=szaxi),axis.title=element_text(size=sztik))+ ylim(0,65)+ labs(title="Suprabasal",y="DAPI counts",x="time, d");plots #keratinized plotk<- ggplot()+ geom_point(kdat, mapping=aes(x=Days,y=DAPI.counts), color='red', size=4)+ geom_line(aes(x=times,y=model.predK),color="red",size=1.2)+ geom_ribbon(data=gK,aes(x=time,ymin=gK$`2.5%`,ymax=gK$`97.5%`),fill="indianred2",alpha=0.2)+ theme_classic()+ scale_x_continuous(breaks=c(1,7,14,21))+ theme(title=element_text(size=sztit),plot.title = element_text(hjust = 0.5), axis.text=element_text(size=szaxi),axis.title=element_text(size=sztik))+ ylim(0,65)+ labs(title="Keratinized",y="DAPI counts",x="time, d");plotk # ##-------------------------------- likelihood plots ---------------------------------------------- # # #p # fp <- function (p) { # params <- c(p=p,q=mle5[[2]],d=mle5[[3]], rb=mle5[[4]], bp=mle5[[5]],Nb = mle5[[6]], # m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) # -loglik.pois(params)} # prof.p <- expand.grid(p=seq(0,0.05,length=100)) # prof.p$loglik.pois <- -sapply(prof.p$p,fp) # maxloglik <- -fit3$value # plot(loglik.pois~p,data=prof.p,type="l")#,ylim=c(-835,-845),type="l") # cutoff <- maxloglik-qchisq(p=0.95,df=1)/2 # abline(h=c(0,cutoff)) # abline(v=range(subset(prof.p,loglik.pois>cutoff)$p),lty=2) # pCI <-range(subset(prof.p,loglik.pois>cutoff)$p) # # #q # fq <- function (q) { # params <- c(p=mle5[[1]], q=q, d=mle5[[3]], rb=mle5[[4]], bp=mle5[[5]],Nb = mle5[[6]], # m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) # -loglik.pois(params)} # prof.q <- expand.grid(q=seq(0.0,1,length=100)) # prof.q$loglik.pois <- -sapply(prof.q$q,fq) # maxloglik <- -fit3$value # plot(loglik.pois~q,data=prof.q,type="l")#,ylim=c(-835,-845),type="l") # cutoff <- maxloglik-qchisq(p=0.95,df=1)/2 # abline(h=c(0,cutoff)) # abline(v=range(subset(prof.q,loglik.pois>cutoff)$q),lty=2) # qCI <-range(subset(prof.q,loglik.pois>cutoff)$q) # # #d # fd <- function (d) { # params <- c(p=mle5[[1]],q=mle5[[2]],d=d, rb=mle5[[4]], bp=mle5[[5]],Nb = mle5[[6]], # m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) # -loglik.pois(params)} # prof.d <- expand.grid(d=seq(0.01,0.5,length=100)) # prof.d$loglik.pois <- -sapply(prof.d$d,fd) # maxloglik <- -fit3$value # plot(loglik.pois~d,data=prof.d,type="l") # cutoff <- maxloglik-qchisq(p=0.95,df=1)/2 # abline(h=c(0,cutoff)) # abline(v=range(subset(prof.d,loglik.pois>cutoff)$d),lty=2) # dCI <-range(subset(prof.d,loglik.pois>cutoff)$d) # # #rb # frb <- function (rb) { # params <- c(p=mle5[[1]],q=mle5[[2]],d=mle5[[3]], rb=rb, bp=mle5[[5]],Nb = mle5[[6]], # m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) # -loglik.pois(params)} # prof.rb <- expand.grid(rb=seq(0,0.1,length=100)) # prof.rb$loglik.pois <- -sapply(prof.rb$rb,frb) # maxloglik <- -fit3$value # plot(loglik.pois~rb,data=prof.rb,type="l") # cutoff <- maxloglik-qchisq(p=0.95,df=1)/2 # abline(h=c(0,cutoff)) # abline(v=range(subset(prof.rb,loglik.pois>cutoff)$rb),lty=2) # rbCI <-range(subset(prof.rb,loglik.pois>cutoff)$rb) # # #bp # fbp <- function (bp) { # params <- c(p=mle5[[1]],q=mle5[[2]],d=mle5[[3]], rb=mle5[[4]], bp=bp, Nb = mle5[[6]], # m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) # -loglik.pois(params)} # prof.bp <- expand.grid(bp=seq(0.1,1.0,length=100)) # prof.bp$loglik.pois <- -sapply(prof.bp$bp,fbp) # maxloglik <- -fit3$value # plot(loglik.pois~bp,data=prof.bp,type="l") # cutoff <- maxloglik-qchisq(p=0.95,df=1)/2 # abline(h=c(0,cutoff)) # abline(v=range(subset(prof.bp,loglik.pois>cutoff)$bp),lty=2) # bpCI <-range(subset(prof.bp,loglik.pois>cutoff)$bp) # # #Nb # fNb <- function (Nb) { # params <- c(p=mle5[[1]],q=mle5[[2]],d=mle5[[3]], rb=mle5[[4]], bp=mle5[[5]], Nb=Nb, # m=mval, B_0= B_0val, P_0= P_0val, D_0=0, K_0= K_0val) # -loglik.pois(params)} # prof.Nb <- expand.grid(Nb=seq(0.1,1.0,length=100)) # prof.Nb$loglik.pois <- -sapply(prof.Nb$Nb,fNb) # maxloglik <- -fit3$value # plot(loglik.pois~Nb,data=prof.Nb,type="l") # cutoff <- maxloglik-qchisq(p=0.95,df=1)/2 # abline(h=c(0,cutoff)) # abline(v=range(subset(prof.Nb,loglik.pois>cutoff)$Nb),lty=2) # NbCI <-range(subset(prof.Nb,loglik.pois>cutoff)$Nb) # # tableCIs <-data.frame("p CIs"=pCI, "q CIs"=qCI, "d CIs"=dCI, "rb CIs"=rbCI, "bp CIs"=bpCI, "Nb CIs"=NbCI); tableCIs