---
title: "Nonlinear Regression Fitting of Rate Model to Experiment"
author: "Tim Abbott"
date: "October 27, 2017"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

We used the open source software, RStudio (version 1.0.136), to generate plots in Fig. 1c and

Supplementary Fig. 1 and to determine the goodness of fit of the rate model equations to experi-
mental data.

The purpose of the following R code is to load, interpolate and plot experimental data on GFP activation
by hM3D-CRISPR ChaCha (Fig. 1c, left and Supplementary Data 1 and 2). Nonlinear regression
fitting is also performed in this code to fit Equation (11) in Box 1 to experimental data and determine
parameter values and the goodness of fit.

```{r}
install.packages("readxl")
install.packages("akima")
install.packages("minpack.lm")
install.packages("ggplot2")
```


```{r}
library(readxl)
library(akima)
library(minpack.lm)
#read data
setwd("C:\\Users\\timmah91873\\Google Drive\\Tim Abbott\\05_ChaCha\\ChaCha Kinetic Model")
mCherry <- read_xlsx("Supplementary Data 1.xlsx")
GFP <- read_xlsx("Supplementary Data 2.xlsx")
mCherry_m <- data.matrix(mCherry)
GFP_m <- data.matrix(GFP)
#three column matrix of [CNO], mCherry, and GFP per column
d = data.frame(L = log10(rep(mCherry_m[-1,1], times = 13)), A = log10(c(mCherry_m[-1,-1])), Gstar = c(GFP_m[-1,-1]))
a <- interp(d$L, d$A, d$Gstar, xo=seq(min(d$L), max(d$L), length = 30), yo=seq(min(d$A), max(d$A), length = 25), extrap = TRUE, duplicate = "mean")

az <- as.data.frame(a$Gstar)
az[is.na(az)] <- 0
ax <- as.data.frame(a$L)
ay <- as.data.frame(a$A)
#Export plot as Fig 1c
pdf("Fig1c.pdf")
filled.contour(a$x, 10^a$y, a$z,
               xlim = range(a$x, finite = TRUE),
               ylim = c(1000,110000), zlim = c(0,6500),
               color.palette=heat.colors,
               plot.title = title (main = "EGFP Activation Heatmap at day 3",
                                   xlab = "log([CNO] in M)", ylab = "ARRB2-dCas9-VPR expression (a.u.)"),
               key.title = "EGFP fluorescence (a.u.)"
)
dev.off()
#Non-linear regression fitting of model to experimental data
#rescale inputs to the model
d$L <- 10^(d$L)
d$A <- 10^(d$A)
#Equation 11 in Box 1
m <- nlsLM(Gstar ~ Go+Gmax*L/(Kl+L)*(A^n/(Kd+A^n)), data = d, start = list(Go = 6600,
          Gmax = 1e3, Kl = 1e-9, Kd = 3.e10, n = 2), lower = c(1e3, 1e3, 1e-11, 2.5e10, 2),
          control = nls.control(maxiter = 100))
#Get correlation of the fit
cor(d, predict(m))
#Results: Go = 1290, Gmax = 6326; Kl = 6.680e-9; Kd = 2.577e10; n = 2.33; R^2 = (0.95)^2
#What if we just change n_amp to n = 1? e.g. Tango design.
m <- nlsLM(Gstar ~ Go+Gmax*L/(Kl+L)*(A/(Kd+A)), data = d, start = list(Go = 6600, Gmax =
             1e3, Kl = 1e-9, Kd = 2.5e10), lower = c(1290, 6326, 6.68e-9, 2.5e10), upper =
             c(1290, 6326, 6.68e-9, 2.577423795898897e10), control = nls.control(maxiter = 100))
#get correlation of the fit
cor(d, predict(m))
#Results: R^2 = (0.91)^2
```


The purpose of the following R code is to plot Equation (17) in the Supplementary Information with the fitted parameter values
(Fig. 1c, right).


```{r}
#Plot Model Equation (11) in Box 1 for GFP: "Gss" or Gstar here
#initialize
L <- c(seq(1e-10, 1e-8, 1e-9), seq(2e-8, 1e-6, 1e-8), seq(1.1e-6, 1e-5, 1e-7))
A <- seq(0, 110000,2000)
Gstar <- mat.or.vec(nr = length(L), nc = length(A))
# n = 2.333
#parameters for ChaCha case
Go <- 1290
Gmax <- 6326
Kl <- 6.68e-9
Kd <- 2.5e10
n <- 2.333
for(i in 1:length(L)){
  for(j in 1:length(A)){
    Gstar[i,j] <- Go+Gmax*L[i]/(Kl+L[i])*((A[j])^n/(Kd+(A[j])^n))
  }
}
pdf("Fig 1c model.pdf")
filled.contour(log10(L), A, Gstar,
  xlim = range(log10(L), finite = TRUE),
  ylim = c(1000,110000), zlim = c(0,6500),
  color.palette=heat.colors,
  plot.title = title (main = "EGFP Activation Heatmap at day 3",
  xlab = "log([CNO] in M)", ylab = "ARRB2-dCas9-VPR expression (a.u.)"),
  key.title = "EGFP fluorescence (a.u.)"
  )
dev.off()

```


The purpose of the following R code is to load, interpolate and plot experimental data on GFP activation
by dox-inducible dCas9-VPR (Fig. S2c, left, and Supplementary Data 3 and 4). Nonlinear regression
fitting is also performed in this code to fit Equation (6) in the Supplementary Information to the data and
determine parameter values and the goodness of fit.


```{r}
require(akima)
library(minpack.lm)
library(readxl)
library(ggplot2)
library(RColorBrewer) #perform each time you start up RStudio
mCherry <- read_xlsx("Supplementary Data 3.xlsx")
GFP <- read_xlsx("Supplementary Data 4.xlsx")
mCherry_m <- data.matrix(mCherry)
GFP_m <- data.matrix(GFP)

d = data.frame(L = log10(rep(mCherry_m[-1,1], times = 14)), D =
  log10(c(mCherry_m[-1,-1])), Gstar = c(GFP_m[-1,-1])) #three column matrix of CNO,mCherry, and GFP per column

a <- interp(d$L, d$D, d$Gstar,xo=seq(min(d$L), max(d$L), length = 30),
  yo=seq(min(d$D), max(d$D), length = 30), extrap = FALSE,duplicate = "mean")

az <- as.data.frame(a$z)
az[is.na(az)] <- 0
ax <- as.data.frame(a$x)
ay <- as.data.frame(a$y)

#Export plot as Fig S1a
pdf("Fig S2c.pdf")

filled.contour(a$x, 10^a$y, a$z,
  xlim = range(a$x, finite = TRUE),
  ylim = c(0,30000), zlim = c(1000,55000),
  color.palette=heat.colors,
  plot.title = title (main = "EGFP Activation Heatmap at day 3",
                    xlab = "log([CNO] in M)", ylab = "dCas9-VPR expression (a.u.)"),
  key.title = "EGFP fluorescence (a.u.)"
  )
dev.off()

#Non-linear regression fitting of model to experimental data
#rescale D
d$D <- 10^(d$D)

#parameters
kcsg <- 5e-3
G <- 100
C <- 1
n <- 1.6
kdgs <- 8e-6
Kd <- 1

#Gstar as a function of DOX
m <- nlsLM(Gstar ~ Go+ko*D^n/(Kd+D^n), data = d, start = list(Go = 1e4, ko = 1e4, Kd =
  6e3, n = 1.5), lower = c(7e3,1e3,1e3,0.2), control = nls.control(maxiter = 100))

#get some good estimates of the fit
cor(d, predict(m))
#Results: Go = 7000, ko = 26.636, n = 0.708, R^2 = 0.97^2
```


The purpose of the following R code is to plot Equation (6) in Supplementary Note 2 with the fitted
parameter values (Fig. S2c, right).

```{r}
#Plotting Model Equation (6) in Supplementary Note 2 for GFP: "Gss"" or Gstar here
#initialize
L <- log10(c(seq(1e-10, 1e-8, 1e-9), seq(2e-8, 1e-6, 1e-8), seq(1.1e-6, 1e-5, 1e-7)))
D <- seq(0, 3e4, 1000)
Gstar <- mat.or.vec(nr = length(L), nc = length(D))

#If dCas9-VPR-induced GFP is power-law dependent
for(i in 1:length(L)){
  for(j in 1:length(D)){
    Gstar[i,j] <- 7000+26.636*D[j]^.708
  }
}

pdf("Fig S2c model.pdf")

filled.contour(L, D, Gstar,
  xlim = range(L, finite = TRUE),
  ylim = c(0,30000), zlim = c(1000,55000),
  color.palette=heat.colors,
  plot.title = title (main = "EGFP Activation Heatmap at day 3",
  xlab = "log([CNO] in M)", ylab = "dCas9-VPR expression (a.u.)"),
  key.title = "EGFP fluorescence (a.u.)"
  )
dev.off()
```