--- title: "Nonlinear Regression Fitting of Rate Model to Experiment" author: "Tim Abbott" date: "October 27, 2017" output: pdf_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` We used the open source software, RStudio (version 1.0.136), to generate plots in Fig. 1c and Supplementary Fig. 1 and to determine the goodness of fit of the rate model equations to experi- mental data. The purpose of the following R code is to load, interpolate and plot experimental data on GFP activation by hM3D-CRISPR ChaCha (Fig. 1c, left and Supplementary Data 1 and 2). Nonlinear regression fitting is also performed in this code to fit Equation (11) in Box 1 to experimental data and determine parameter values and the goodness of fit. ```{r} install.packages("readxl") install.packages("akima") install.packages("minpack.lm") install.packages("ggplot2") ``` ```{r} library(readxl) library(akima) library(minpack.lm) #read data setwd("C:\\Users\\timmah91873\\Google Drive\\Tim Abbott\\05_ChaCha\\ChaCha Kinetic Model") mCherry <- read_xlsx("Supplementary Data 1.xlsx") GFP <- read_xlsx("Supplementary Data 2.xlsx") mCherry_m <- data.matrix(mCherry) GFP_m <- data.matrix(GFP) #three column matrix of [CNO], mCherry, and GFP per column d = data.frame(L = log10(rep(mCherry_m[-1,1], times = 13)), A = log10(c(mCherry_m[-1,-1])), Gstar = c(GFP_m[-1,-1])) a <- interp(d$L, d$A, d$Gstar, xo=seq(min(d$L), max(d$L), length = 30), yo=seq(min(d$A), max(d$A), length = 25), extrap = TRUE, duplicate = "mean") az <- as.data.frame(a$Gstar) az[is.na(az)] <- 0 ax <- as.data.frame(a$L) ay <- as.data.frame(a$A) #Export plot as Fig 1c pdf("Fig1c.pdf") filled.contour(a$x, 10^a$y, a$z, xlim = range(a$x, finite = TRUE), ylim = c(1000,110000), zlim = c(0,6500), color.palette=heat.colors, plot.title = title (main = "EGFP Activation Heatmap at day 3", xlab = "log([CNO] in M)", ylab = "ARRB2-dCas9-VPR expression (a.u.)"), key.title = "EGFP fluorescence (a.u.)" ) dev.off() #Non-linear regression fitting of model to experimental data #rescale inputs to the model d$L <- 10^(d$L) d$A <- 10^(d$A) #Equation 11 in Box 1 m <- nlsLM(Gstar ~ Go+Gmax*L/(Kl+L)*(A^n/(Kd+A^n)), data = d, start = list(Go = 6600, Gmax = 1e3, Kl = 1e-9, Kd = 3.e10, n = 2), lower = c(1e3, 1e3, 1e-11, 2.5e10, 2), control = nls.control(maxiter = 100)) #Get correlation of the fit cor(d, predict(m)) #Results: Go = 1290, Gmax = 6326; Kl = 6.680e-9; Kd = 2.577e10; n = 2.33; R^2 = (0.95)^2 #What if we just change n_amp to n = 1? e.g. Tango design. m <- nlsLM(Gstar ~ Go+Gmax*L/(Kl+L)*(A/(Kd+A)), data = d, start = list(Go = 6600, Gmax = 1e3, Kl = 1e-9, Kd = 2.5e10), lower = c(1290, 6326, 6.68e-9, 2.5e10), upper = c(1290, 6326, 6.68e-9, 2.577423795898897e10), control = nls.control(maxiter = 100)) #get correlation of the fit cor(d, predict(m)) #Results: R^2 = (0.91)^2 ``` The purpose of the following R code is to plot Equation (17) in the Supplementary Information with the fitted parameter values (Fig. 1c, right). ```{r} #Plot Model Equation (11) in Box 1 for GFP: "Gss" or Gstar here #initialize L <- c(seq(1e-10, 1e-8, 1e-9), seq(2e-8, 1e-6, 1e-8), seq(1.1e-6, 1e-5, 1e-7)) A <- seq(0, 110000,2000) Gstar <- mat.or.vec(nr = length(L), nc = length(A)) # n = 2.333 #parameters for ChaCha case Go <- 1290 Gmax <- 6326 Kl <- 6.68e-9 Kd <- 2.5e10 n <- 2.333 for(i in 1:length(L)){ for(j in 1:length(A)){ Gstar[i,j] <- Go+Gmax*L[i]/(Kl+L[i])*((A[j])^n/(Kd+(A[j])^n)) } } pdf("Fig 1c model.pdf") filled.contour(log10(L), A, Gstar, xlim = range(log10(L), finite = TRUE), ylim = c(1000,110000), zlim = c(0,6500), color.palette=heat.colors, plot.title = title (main = "EGFP Activation Heatmap at day 3", xlab = "log([CNO] in M)", ylab = "ARRB2-dCas9-VPR expression (a.u.)"), key.title = "EGFP fluorescence (a.u.)" ) dev.off() ``` The purpose of the following R code is to load, interpolate and plot experimental data on GFP activation by dox-inducible dCas9-VPR (Fig. S2c, left, and Supplementary Data 3 and 4). Nonlinear regression fitting is also performed in this code to fit Equation (6) in the Supplementary Information to the data and determine parameter values and the goodness of fit. ```{r} require(akima) library(minpack.lm) library(readxl) library(ggplot2) library(RColorBrewer) #perform each time you start up RStudio mCherry <- read_xlsx("Supplementary Data 3.xlsx") GFP <- read_xlsx("Supplementary Data 4.xlsx") mCherry_m <- data.matrix(mCherry) GFP_m <- data.matrix(GFP) d = data.frame(L = log10(rep(mCherry_m[-1,1], times = 14)), D = log10(c(mCherry_m[-1,-1])), Gstar = c(GFP_m[-1,-1])) #three column matrix of CNO,mCherry, and GFP per column a <- interp(d$L, d$D, d$Gstar,xo=seq(min(d$L), max(d$L), length = 30), yo=seq(min(d$D), max(d$D), length = 30), extrap = FALSE,duplicate = "mean") az <- as.data.frame(a$z) az[is.na(az)] <- 0 ax <- as.data.frame(a$x) ay <- as.data.frame(a$y) #Export plot as Fig S1a pdf("Fig S2c.pdf") filled.contour(a$x, 10^a$y, a$z, xlim = range(a$x, finite = TRUE), ylim = c(0,30000), zlim = c(1000,55000), color.palette=heat.colors, plot.title = title (main = "EGFP Activation Heatmap at day 3", xlab = "log([CNO] in M)", ylab = "dCas9-VPR expression (a.u.)"), key.title = "EGFP fluorescence (a.u.)" ) dev.off() #Non-linear regression fitting of model to experimental data #rescale D d$D <- 10^(d$D) #parameters kcsg <- 5e-3 G <- 100 C <- 1 n <- 1.6 kdgs <- 8e-6 Kd <- 1 #Gstar as a function of DOX m <- nlsLM(Gstar ~ Go+ko*D^n/(Kd+D^n), data = d, start = list(Go = 1e4, ko = 1e4, Kd = 6e3, n = 1.5), lower = c(7e3,1e3,1e3,0.2), control = nls.control(maxiter = 100)) #get some good estimates of the fit cor(d, predict(m)) #Results: Go = 7000, ko = 26.636, n = 0.708, R^2 = 0.97^2 ``` The purpose of the following R code is to plot Equation (6) in Supplementary Note 2 with the fitted parameter values (Fig. S2c, right). ```{r} #Plotting Model Equation (6) in Supplementary Note 2 for GFP: "Gss"" or Gstar here #initialize L <- log10(c(seq(1e-10, 1e-8, 1e-9), seq(2e-8, 1e-6, 1e-8), seq(1.1e-6, 1e-5, 1e-7))) D <- seq(0, 3e4, 1000) Gstar <- mat.or.vec(nr = length(L), nc = length(D)) #If dCas9-VPR-induced GFP is power-law dependent for(i in 1:length(L)){ for(j in 1:length(D)){ Gstar[i,j] <- 7000+26.636*D[j]^.708 } } pdf("Fig S2c model.pdf") filled.contour(L, D, Gstar, xlim = range(L, finite = TRUE), ylim = c(0,30000), zlim = c(1000,55000), color.palette=heat.colors, plot.title = title (main = "EGFP Activation Heatmap at day 3", xlab = "log([CNO] in M)", ylab = "dCas9-VPR expression (a.u.)"), key.title = "EGFP fluorescence (a.u.)" ) dev.off() ```