#################################################################################################### #### Supplementary File 3, Fitness.R ######################################################################## #### #### Fit linear regression on log2ratio of barcode abundance between generations 6 and 20 #### #### Wei Zhao and Celia Payen #### 2012 #################################################################################################### #### Wrap function for linear spline regression ## ## output contains list of estimated slopes and model p-values early.linear <- function (inputdir, inputfile, outputdir, output, constant=10, start.tp=6, end.tp=20){ ## constant: the constant added to each gene number; default is 10 ## start.tp: the starting time point; default is G6 ## end.tp: the ending time point; default is G20 ## #### Read file and prepare for analysis data ## expdata <- read.csv(paste(inputdir, inputfile, sep="")) timepoints <- colnames(expdata)[-1] # get the time points timepoints <- as.numeric(substr(timepoints,2,5)) # change mode from character to numeric timepoints.early <- timepoints[timepoints>=start.tp & timepoints<=end.tp] # get early time points (between G6 and G20) ## remove genes that have 0 copy number at G0, ## and genes with less than 1 average copy on each time point print(paste("Number of genes in", inputfile, ":", dim(expdata)[1])) # original number of genes avg.nums <- apply(expdata[,-1], 1, mean, na.rm=T) expdata <- expdata[expdata$G0!=0 & avg.nums>=1,] print(paste("Number of genes in", inputfile, "after exclusion of unwanted genes:", dim(expdata)[1])) ## #### Linear regression for each gene ## ## plus a constant to all numbers anal.data <- expdata[,-1] + constant point.total <- apply(anal.data, 2, sum, na.rm=T) # total numbers for each time point ## linear regression slope <- p.value <- vector(length=dim(expdata)[1]) library(lmtest) # for likelihood ratio test for (i in 1:dim(expdata)[1]){ ## calculate log2 ratio one.gene <- anal.data[i,] log2ratio <- as.numeric( log2( one.gene/point.total /as.numeric(one.gene["G0"]/point.total["G0"]) ) ) log2ratio <- log2ratio[timepoints>=start.tp & timepoints<=end.tp] fit <- lm( log2ratio ~ timepoints.early ) slope[i] <- coef(fit)[2] p.value[i] <- lrtest(fit)$P[2] ## p-value from likelihood ratio test } ## output to file results1 <- data.frame(genes=expdata[,1], log2ratio) results <- data.frame (genes=expdata[,1], slope=slope, p.value=p.value) write.csv(results, paste(outputdir, output, "-early-slopes.csv", sep="")) write.csv(results1, paste(outpudir,output,"-log2ratio",sep="")) } ## Write output (create output folder before running script) InputDir <- "/home/Barseq/" OutputDir <- "/home/Barseq/Fitness/" ## CEN-MoBY cen.glu.1 <- early.linear(inputdir=InputDir, inputfile="F8r1.csv", outputdir=OutputDir, output="CEN-GLU-1") cen.glu.2 <- early.linear(inputdir=InputDir, inputfile="F8r2.csv", outputdir=OutputDir, output="CEN-GLU-2") cen.pho.1 <- early.linear(inputdir=InputDir, inputfile="F9r1.csv", outputdir=OutputDir, output="CEN-PHO-1") cen.pho.2 <- early.linear(inputdir=InputDir, inputfile="F9r2.csv", outputdir=OutputDir, output="CEN-PHO-2") cen.sul.2 <- early.linear(inputdir=InputDir, inputfile="F7r3.csv", outputdir=OutputDir, output="CEN-SUL-1") cen.sul.3 <- early.linear(inputdir=InputDir, inputfile="F8r3.csv", outputdir=OutputDir, output="CEN-SUL-2") ## 2micron-MoBY micron.glu.1 <- early.linear(inputdir=InputDir, inputfile="2uF11.csv", outputdir=OutputDir, output="2micron-GLU-1") micron.glu.2 <- early.linear(inputdir=InputDir, inputfile="2uF7.csv", outputdir=OutputDir, output="2micron-GLU-2") micron.pho.1 <- early.linear(inputdir=InputDir, inputfile="2uF8.csv", outputdir=OutputDir, output="2micron-PHO-1") micron.pho.2 <- early.linear(inputdir=InputDir, inputfile="2uF9.csv", outputdir=OutputDir, output="2micron-PHO-2") micron.sul.1 <- early.linear(inputdir=InputDir, inputfile="2uF6.csv", outputdir=OutputDir, output="2micron-SUL-1") micron.sul.2 <- early.linear(inputdir=InputDir, inputfile="2uF10.csv", outputdir=OutputDir, output="2micron-SUL-2") ## MM1N MM1N.glu.1 <- early.linear(inputdir=InputDir, inputfile="MM1NF4.csv", outputdir=OutputDir, output="MM1N-GLU-1") MM1N.glu.2 <- early.linear(inputdir=InputDir, inputfile="MM1NF5.csv", outputdir=OutputDir, output="MM1N-GLU-2") MM1N.pho.1 <- early.linear(inputdir=InputDir, inputfile="MM1NF8.csv", outputdir=OutputDir, output="MM1N-PHO-1") MM1N.pho.2 <- early.linear(inputdir=InputDir, inputfile="MM1NF9.csv", outputdir=OutputDir, output="MM1N-PHO-2") MM1N.sul.1 <- early.linear(inputdir=InputDir, inputfile="MM1NF1.csv", outputdir=OutputDir, output="MM1N-SUL-1") MM1N.sul.2 <- early.linear(inputdir=InputDir, inputfile="MM1NF2.csv", outputdir=OutputDir, output="MM1N-SUL-2") ## MM2N MM2N.glu.1 <- early.linear(inputdir=InputDir, inputfile="MM2NF11.csv", outputdir=OutputDir, output="MM2N-GLU-1") MM2N.glu.2 <- early.linear(inputdir=InputDir, inputfile="MM2NF7.csv", outputdir=OutputDir, output="MM2N-GLU-2") MM2N.pho.1 <- early.linear(inputdir=InputDir, inputfile="MM2NF8.csv", outputdir=OutputDir, output="MM2N-PHO-1") MM2N.pho.2 <- early.linear(inputdir=InputDir, inputfile="MM2NF9.csv", outputdir=OutputDir, output="MM2N-PHO-2") MM2N.sul.1 <- early.linear(inputdir=InputDir, inputfile="MM2NF6.csv", outputdir=OutputDir, output="MM2N-SUL-1") MM2N.sul.2 <- early.linear(inputdir=InputDir, inputfile="MM2NF10.csv", outputdir=OutputDir, output="MM2N-SUL-2")