# these functions are work in porogress # they are provided only as a record of the procedures used # the author makes no claim as to their accuracy or suitability for any other work # the license allows you to make any use vyou like of them, but at your own risk # you can access them bu pasting this into the R console # the required libraries should be installed first... library(NormqPCR) library(nlme) library(multcomp) raw2log2 <-function(rawdata,slopes,exclude=NULL) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # Convert raw Ct data to log2 scale level using standard curve slopes # rawdata = data frame with row for each sample; columns are exprimental factors # then replicate labels, then Cts for each gene (col names=gene names) # slopes = is a data frame with gene names in rownames and a column "slopes" that # has the slopes of Ct vs log2(rna) regression. It can be the return # value of plot.slopes() # # Returns df in same format as rawdata with Ct substituted by Ct/slope for each gene # # note this will fail for any "double" covariate unless exclude is used # if rawdata has columns a,b,... that are double covariates # use x.aslog2<-raw2aslog2(x.raw,x.slopes,c(a,b,...)) { excl<-character(0) if(!is.null(exclude)) { if(!is.character(exclude)) { for(n in exclude)excl { anexcl<-which(colnames(rawdata)==n) excl<-c(excl,colnames(rawdata)[anexcl]) } } else excl<-exclude } aslog2<-rawdata for (cn in colnames(rawdata)) { if(typeof(rawdata[[cn]])=="double") { if(!(cn %in% excl)) { islope<-grep(cn,rownames(slopes)) if(length(islope)>1)stop(paste("Ambiguous gene name",cn)) if(length(islope)<1)stop(paste("Missing gene name",cn)) aslope=slopes[islope,1] if(is.na(aslope))stop("Missing slope") aslog2[[cn]]<-aslog2[[cn]]/aslope } } } attr(aslog2,"date")<-date() attr(aslog2,"call")<-sys.call() aslog2 } df2qpcr <-function(df,genes,facts) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # requires package NormqPCR # used by df2hkgn and df2hknf # df = data frame with columns for factors and Cts or aslog2 for each gene # Ct columns should be gebe names; raw2aslog2 output is suitable # genes = integer or character vector indexing genes to include # facts = integer or character vector indexing factors to use # must include replicate labels as factors for each sample must be unique # output is a qPCRBase for use in genorm { if(is.character(genes)) # convert to integer index { cix<-NULL for(acol in genes) { ix<-grep(acol,colnames(df)) if(length(ix)!=1)stop(paste("gene",acol,"not found")) cix<-c(cix,ix) } } else cix<-genes if(is.character(facts)) # convert to integer index { xix<-NULL for(afac in facts) { ix<-grep(afac,colnames(df)) if(length(ix)!=1)stop(paste("factor",afac,"not found")) xix<-c(xix,ix) } } else xix<-facts for(ix in cix) # check ct column types { if(!is.numeric(df[[ix]]))stop(paste(colnames(df)[ix],"is not numeric")) } if(length(xix)>1) # paste factors to get distinct sample names { fax<-rep("",nrow(df)) for(r in 1:nrow(df)) { afx<-as.character(df[r,xix[1]]) for(c in 2:length(xix))afx<-paste(afx,as.character(df[r,xix[c]]),sep=".") fax[r]<-afx } } else fax<-as.character(df[[xix]]) # use single factor if(length(fax)>length(unique(fax)))stop("Duplicate sample names") res<-new("qPCRBatch") dat<-t(as.matrix(df[,cix])) colnames(dat)<-fax exprs(res)<-dat res } df2hkgn <-function(adf,genes,facts,bycols) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # requires df2qpcr and package NormqPCR # wrapper for NormqPCR:::selectHKs with genorm method # # adf = data frame with columns for factors and Cts or aslog2 for each gene # Ct columns should be gebe names; raw2aslog2 output is suitable # genes = integer or character vector indexing genes to include # facts = integer or character vector indexing factors to use # must include replicate labels as factors for each sample must be unique # # Returns list ... # $ranking: selectHKs ranking of genes by stability # $variation: bars for the stdev(delta NF plot) from selectHKs # $MeanM: stability measures for increasing numbers of genes added # $genes: nammes of input genes # $date: execution timestamp # $call: function call used { sym<-colnames(adf)[genes] apcr<-df2qpcr(adf,genes,facts) res<-selectHKs(apcr,method="geNorm",Symbols=sym,minNrHKs=2,trace=F) res$genes<-sym res$date<-date() res$call<-sys.call() res } df2hknf <-function(adf,genes,facts,bycols) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # requires df2qpcr and package NormqPCR # wrapper for NormqPCR:::selectHKs with normfinder method # also adds data for plotting deltaSD(NF) # # adf = data frame with columns for factors and Cts or aslog2 for each gene # Ct columns should be gebe names; raw2aslog2 output is suitable # genes = integer or character vector indexing genes to include # facts = integer or character vector indexing factors to use # must include replicate labels as factors for each sample must be unique # # Returns list ... # $ranking: selectHKs ranking of genes by stability # $rho: rho stability measures from selectHKs # $sd.delta.nf: bars for the stdev(delta NF plot) # $genes: nammes of input genes # $date: execution timestamp # $call: function call used { sym<-colnames(adf)[genes] grp<-character(nrow(adf)) for(acol in bycols)grp<-paste(grp,adf[,acol],sep=".") grp<-as.factor(grp) apcr<-df2qpcr(adf,genes,facts) mhk<-length(genes)-1 res<-selectHKs(apcr,group=grp,method="NormFinder",Symbols=sym,minNrHKs=mhk,trace=F) sdnf<-numeric(0) gix<-grep(res$ranking[1],colnames(adf)) oldnf<-apply(as.matrix(adf[gix]),1,mean,na.rm=T) for(i in 2:length(res$ranking)){ gix<-c(gix,grep(res$ranking[i],colnames(adf))) newnf<-apply(as.matrix(adf[gix]),1,mean,na.rm=T) sdnf<-c(sdnf,sd(newnf-oldnf)) oldnf<-newnf } res$sd.delta.nf<-sdnf res$genes<-sym res$date<-date() res$call<-sys.call() res } gnplot <-function(gn) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # plot the graphs from genorm papers # gn is output of df2hkgn { # first the stabilty vs step graph par(mar=c(5,5,1,1),mex=0.9) ng<-length(gn$ranking) yf<-floor(10*min(gn$meanM))/10 yc<-ceiling(10*max(gn$meanM))/10 plot(ng:2,gn$meanM[(ng-1):1],ylim=c(yf,yc),type="b",xaxt="n", las=2,ylab="Stability",xlab="next discard",pch=19) yx<-yf-c(2.0,1,1.75)*(yc-yf)*0.04 for(i in 2:(ng-1))text(i,yx[1],gn$ranking[ng+2-i],srt=90,cex=1,adj=1,xpd=NA) text(ng,yx[1],paste(gn$ranking[2:1],collapse=":"),srt=90,cex=1,adj=1,xpd=NA) for(i in 2:11)lines(rep(i,2),yx[2:3],xpd=NA) # now the deltaSD(NF) graph barlim<-max(0.2,1.2*max(gn$variation)) barplot(gn$variation[(ng-2):1],ylim=c(0,barlim),las=2, axisnames=F,xlab="added transcript",ylab="SD(deltaNF)") abline(h=0.15,lty=2) text(0.2,-0.08*barlim,gn$ranking[1],cex=0.95,pos=2,xpd=NA) text(0.2,-0.15*barlim,paste0("+",gn$ranking[2]),cex=0.95,pos=2,xpd=NA) for(i in 3:ng)text((i-2.5)*1.2,-0.02*barlim,paste0("+",gn$ranking[i]),srt=90,cex=1,adj=1,xpd=NA) } nfplot <-function(nf) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # plot graphs like genorm paper (a bit) but for normfinder # nf is output df2hkgn { # first the stabilty vs step graph par(mar=c(5,5,1,1),mex=1) ng<-length(nf$ranking) yf<-floor(20*min(nf$rho))/20 yc<-ceiling(20*max(nf$rho))/20 plot(1:ng,nf$rho[1:ng],ylim=c(yf,yc),type="b",xaxt="n", las=2,ylab="rho",xlab="",pch=19) yx<-yf-c(2.0,1,1.75)*(yc-yf)*0.04 text(1,yx[1],nf$ranking[1],srt=90,cex=1,adj=1,xpd=NA) for(i in 2:ng)text(i,yx[1],paste0("+",nf$ranking[i]),srt=90,cex=1,adj=1,xpd=NA) for(i in 1:ng)lines(rep(i,2),yx[2:3],xpd=NA) # now the deltaSD(NF) graph barlim<-max(0.2,1.2*max(nf$sd.delta.nf)) barplot(nf$sd.delta.nf[1:(ng-1)],ylim=c(0,barlim),las=2, axisnames=F,xlab="added transcript",ylab="SD(deltaNF)") abline(h=0.15,lty=2) text(0.2,-0.1*barlim,nf$ranking[1],cex=0.95,pos=2,xpd=NA) for(i in 2:ng)text((i-1.45)*1.2,-0.02*barlim,paste0("+",nf$ranking[i]),srt=90,cex=1,adj=1,xpd=NA) } df2stats <-function(adf,genes,facts,refg,adj=NULL,vgrp=c("n","b","g","f")) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # Reuires: packages nlme and multcomp # Testing version, in development # Output results of simple model with gls (simple version) # adf = data frame such as from raw2aslog2, with factors for experimental # variables and replicate labels, then columns for level of each gene # genes = column indices of genes to analyse (may include ref genes) # facts = column indices of factors for grouping (ie not including repl labels) # these will be concatenated into a single factor (for now) # there should be one more factor column for repl labels so that # factor combinations are unique to a row # refg = column indices of genes to use for normalisation (or zero) # adj = method for mutiple testing correction of p values # vgrp = variance model: n=all the same, g=per gene, f=per factor (concatenated), # b=both - ie per gene+factor combination (very slow) # # For now, gls formula will not include interactions, multiple factors get merged # If there are technical replicates, they MUST be averaged out before input (for now) # Normalisation is done before gls, because no tech reps means no df are left for # NF error - so that is subsumed into biol rep error. # Now uses gls only, lme option later? # gls result is used for glht with contrasts comparing all experimental groups # i.e. all possible combinations of factors in facts. # # Output conatains a list ... # $asterisks = conventional asterisk categories fror inter-group differences # *:p<0.05, **:p<0.01, ***:p<0.001; group x group character array. # $coef = estimated inter-group differences; group x gene matrix. # $stderr = standard errors for coeff as per vgrp variance model; grp x gene matrix. # $ci95hi = upper bound of 95% confidence interval for coef; grp x gene matrix. # $ci95hi = upper bound of 95% confidence interval for coef; grp x gene matrix. # $resid = residuals; rep x grp x gene array. # $refg = column names of reference gene columns in adf; character vector. # $call = call generating output # $date = execution timestamp { #checks etc vgrp<-match.arg(vgrp) if(is.null(adj))adj="bonferroni" # most conservative possible if(typeof(genes)=="character"){ gx<-grep(paste0(genes,collapse="|"),colnames(adf)) if(length(gx)<1)stop("Genes not found") } else gx<-genes for(g in gx)if(!is.numeric(adf[,g]))stop(paste(colnames(adf)[g],"is not numeric data")) if(typeof(facts)=="character"){ fx<-grep(paste0(facts,collapse="|"),colnames(adf)) if(length(fx)<1)stop("Factors not found") } else fx<-facts for(f in fx)if(!is.factor(adf[,f]))stop(paste(colnames(adf)[f],"is not a factor")) if(typeof(refg)=="character"){ rx<-grep(paste0(refg,collapse="|"),colnames(adf)) } # none = no normalisation else rx<-refg for(r in rx)if(!is.numeric(adf[,r]))stop(paste(colnames(adf)[r],"is not numeric data")) # problem with single ref gene giving all zeros after normalisation! if(length(rx)==1)if(rx[1] %in% gx)stop("single ref gene cannot be included in model") allgx<-union(gx,rx) ng<-length(allgx) # have to do normalisation here if(length(rx)>0){ #otherwise no normalisation normf<-apply(as.matrix(adf[,rx]),1,mean,na.rm=T) for(i in allgx) adf[,i]<-adf[,i]-normf } gall<-NULL ctall<-NULL nr<-nrow(adf) # for now don't need ref genes in model unless they are also in gx # but maybe including them will include their effect on overall error? ???? for(g in gx)gall<-c(gall,rep(colnames(adf)[g],nr)) abigdf<-data.frame(gene=factor(gall,levels=unique(gall))) # factor/levels/uniqe keeps order # below was version with interactions - harder to parse for contrasts # for(f in fx) abigdf<-cbind(abigdf,factor(rep(adf[,f],ng),levels=unique(as.character(adf[,f])))) # if(length(fx)>1)fterms<-c(fterms,paste("gene",apply(combn(colnames(adf)[fx],2),2,paste,collapse=":"),sep=":")) # for(f in fx) colnames(abigdf)[f+1]<-colnames(adf)[f] # version with factors concatenated; as matrix so it works on single factor fconc<-apply(as.matrix(adf[,fx]),1,paste,collapse=".") abigdf<-cbind(abigdf,factor(fconc,levels=unique(fconc))) colnames(abigdf)[2]<-paste(colnames(adf)[fx],collapse=".") # end of version changes; now the data for(g in gx)ctall<-c(ctall,adf[,g]) abigdf<-cbind(abigdf,ct=ctall) # models and glht fterms<-"ct~gene" fterms<-c(fterms,paste("gene",colnames(abigdf)[2],sep=":")) aform<-as.formula(paste0(fterms,collapse="+")) if(vgrp=="b")fw<-as.formula(paste0("~1|gene*",paste0(colnames(abigdf)[2],collapse="*"))) # both if(vgrp=="g")fw<-as.formula("~1|gene") # gene if(vgrp=="f")fw<-as.formula(paste0("~1|",paste0(colnames(abigdf)[2],collapse="*"))) # factor if(vgrp=="n")fw<-as.formula("~1") # all the same amodl<-gls(aform,abigdf,weights=varIdent(form=fw),na.action=na.exclude) coefs<-amodl$coef # prepare contrast matrix gnams<-colnames(adf)[gx] ngens<-length(gnams) fnams<-as.vector(unique(as.matrix(abigdf[,2]))) nfacs<-length(fnams) rnams<-colnames(adf)[rx] nrefs<-length(rnams) krows<- ngens*nfacs*(nfacs-1)/2 # ngenes x factor pairs count kcols<- ngens*nfacs # expected num coeffs from model if(kcols!=length(coefs))stop("contrast matrix dimension wrong") kk<-rep(0,krows*kcols) dim(kk)<-c(krows,kcols) rownames(kk)<-rep("",krows) kctr<-1 asep<-paste0(":",colnames(abigdf)[2]) # :facname for(ig in 1:ngens){ g<-gnams[ig] for(i in 1:(nfacs-1)){ iname<-fnams[i] ixi<-grep(paste(g,iname,sep=asep),names(coefs)) # gene:facnamefaci for(j in (i+1):nfacs){ jname<-fnams[j] rowname<-paste(g,paste(jname,iname,sep="-"),sep=":") # gene:facj-faci arow<-rep(0,kcols) ixj<-grep(paste(g,jname,sep=asep),names(coefs)) # gene:facnamefacj if(length(ixi)>0) arow[ixi]<- -1 # -faci if(length(ixj)>0) arow[ixj]<- 1 # +facj # this part dropped. It could be used for tech reps in hk genes, but not yet # it can't be used for biological reps because NF is per sample, so that does # not acheieve the intended effect, and it leaves no df #for(ixr in 1:nrefs){ #rixi<-grep(paste(rnams[ixr],iname,sep=asep),names(coefs)) #rixj<-grep(paste(rnams[ixr],jname,sep=asep),names(coefs)) #if(length(rixi)>0) arow[rixi]<- arow[rixi]+1/nrefs #if(length(rixj)>0) arow[rixj]<- arow[rixj]-1/nrefs #} kk[kctr,]<-arow rownames(kk)[kctr]<-rowname kctr<-kctr+1 } } } # debug only colnames(kk)<-names(coefs) atest<-glht(amodl,linfct=kk) asumm<-summary(atest,test=adjusted(adj)) # now make all outputs: asterisks; coeffs+se+ci, residuals, for plotting # asterisks first... asterisks<-rep("",nfacs*nfacs*ngens) dim(asterisks)<-c(nfacs,nfacs,ngens) dimnames(asterisks)<-list(fnams,fnams,gene=gnams) ix<-gnams %in% rnams dimnames(asterisks)$gene[ix]<-paste0(dimnames(asterisks)$gene[ix],"*") for(i in 1:length(asumm$test$coef)){ ixa<-strsplit(names(asumm$test$coef)[i],"[-:]") ixi<-c(grep(ixa[[1]][1],gnams),grep(ixa[[1]][2],fnams),grep(ixa[[1]][3],fnams)) pval<-asumm$test$pvalues[i] ast<-ifelse(pval<0.001,"***",ifelse(pval<0.01,"**",ifelse(pval<0.05,"*",""))) asterisks[ixi[2],ixi[3],ixi[1]]<-ast asterisks[ixi[3],ixi[2],ixi[1]]<-ast } res<-list() res$asterisks<-asterisks # now tables for plots # need to remodel using glht with simple contrasts, each gene cdata on its own # basically, just remove intercept structure xcont<-rep(0,ngens*ngens*nfacs*nfacs) dim(xcont)<-c(ngens*nfacs,ngens*nfacs) for(ag in 1:ngens){ for(fac in 1:nfacs){ xcont[ngens*(fac-1)+ag,ngens*(1:(nfacs-1))+ag]<--1/nfacs if(fac>1)xcont[ngens*(fac-1)+ag,ngens*(fac-1)+ag]<-(nfacs-1)/nfacs } } xtst<-glht(amodl,linfct=xcont) mcoef<-coef(xtst) dim(mcoef)<-c(ngens,nfacs) mcoef<-t(mcoef) # to match dim order of input colnames(mcoef)<-gnams rownames(mcoef)<-fnams names(dimnames(mcoef))<-colnames(abigdf)[2:1] res$coef<-mcoef mse<-sqrt(diag(vcov(xtst))) # from multcomp:::pqglht dim(mse)<-c(ngens,nfacs) mse<-t(mse) colnames(mse)<-gnams rownames(mse)<-fnams names(dimnames(mse))<-colnames(abigdf)[2:1] res$stderr<-mse ci95<-confint(xtst)$confint ci95lo<-ci95[,2] dim(ci95lo)<-c(ngens,nfacs) ci95lo<-t(ci95lo) colnames(ci95lo)<-gnams rownames(ci95lo)<-fnams names(dimnames(ci95lo))<-colnames(abigdf)[2:1] res$ci95lo<-ci95lo ci95hi<-ci95[,3] dim(ci95hi)<-c(ngens,nfacs) ci95hi<-t(ci95hi) colnames(ci95hi)<-gnams rownames(ci95hi)<-fnams names(dimnames(ci95hi))<-colnames(abigdf)[2:1] res$ci95hi<-ci95hi # have to deal with ragged input for residuals! aresid<-amodl$residuals maxreps<-0 for(gn in 1:ngens){ for(fn in 1:nfacs){ nreps<-sum(abigdf[,1]==gnams[gn] & abigdf[,2]==fnams[fn]) if(nreps>maxreps)maxreps<-nreps } } mresid<-rep(NA,maxreps*ngens*nfacs) dim(mresid)<-c(maxreps,nfacs,ngens) dimnames(mresid)<-list(rep=1:maxreps,fact=dimnames(mse)[[1]],gene=dimnames(mse)[[2]]) names(dimnames(mresid))[2]<-names(dimnames(mse))[1] for(gn in 1:ngens){ for(fn in 1:nfacs){ reps<-aresid[abigdf[,1]==gnams[gn] & abigdf[,2]==fnams[fn]] if(length(reps)>0)mresid[1:length(reps),fn,gn]<-reps } } res$resid<-mresid # remove these when debugged # res$mdl<-amodl # res$x<-asumm # finally res$refg<-colnames(adf)[rx] res$date<-date() res$call<-sys.call() return(res) } plbars <-function(src, genes=NULL, facts=NULL, ebars=NULL, basel=NULL, lbl="", blbls=F, ylm=NULL, head=F, gap=0, bclr="white", clip=F) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # Plot barplot from from df2stats output # src = return value from df2stats # genes = column indices of genes to plot; char or int, null means all # facts = column indices of groups to plot; char or int, null means all # ebars = what kind of error bars; no:none, ci:95% conf interval from src, # se:stderr from src, sd:sd calc from src, se2/sd2:stderr/sd from actual points. # all ecept se2/sd2 will depend on which variance model was used in src. # basel = default to mean of genes, otherwise "f"= relative to first; character # lbl = label for whole set of plots; not used now # llbls = label for bars, overrides factor column names # ylm = c(lower, upper) overrides range of plots; log2 uniots # head = errrm; doesn't seem to be used; probably supposed to control lbl # gap = overrides gap between plots; log2 units # bclr = bar colours; MUST match number of bars, otherwise just first is used # clip = whether to clip each plot; otherwise (error) bars can overlap # # Output to graphics device is a bar plot for each gene on vertical log2 scale { if(is.null(genes)) genes<-1:ncol(src$coef) if(is.null(facts)) facts<-1:nrow(src$coef) if(is.null(ebars)) ebars<-"ci" if (typeof(genes) == "character") { gix <- grep(paste0(genes, collapse = "|"), colnames(src$coef)) } else gix <- genes if (typeof(facts) == "character") { fix <- grep(paste0(facts, collapse = "|"), rownames(src$coef)) } else fix <- facts midl<-src$coef[fix,gix,drop=F] if(ebars=="ci"){ ebarhi<-src$ci95hi[fix,gix,drop=F] ebarlo<-src$ci95lo[fix,gix,drop=F] } if(ebars=="se" || ebars=="sd"){ ebs<-src$stderr[fix,gix,drop=F] if(ebars=="sd")ebs<-ebs*sqrt(dim(src$resid)[1]) # depends on no NAs ebarhi<-midl+ebs ebarlo<-midl-ebs } if(ebars=="se2" || ebars=="sd2"){ ebs<-apply(src$resid[,fix,gix,drop=F],2:3,sd) if(ebars=="se2")ebs<-ebs/sqrt(dim(src$resid)[1]) # depends on no NAs ebarhi<-midl+ebs ebarlo<-midl-ebs } yoff<-apply(midl,2,mean) #NA? if(is.character(basel)) if(basel=="f")yoff<-midl[1,] if(is.numeric(basel)) yoff=basel if(length(yoff)==1) yoff<-rep(yoff,ncol(midl)) if(length(yoff)!=ncol(midl)) stop("wrong number of baselines") for(i in 1:nrow(midl)){ midl[i,]<-midl[i,]-yoff ebarhi[i,]<-ebarhi[i,]-yoff ebarlo[i,]<-ebarlo[i,]-yoff } if(length(bclr)!=nrow(midl))bclr<-rep(bclr[1],nrow(midl)) if(is.null(ylm))ylm<-c(min(c(0,ebarlo)),max(c(0,ebarhi))) #NA? ystep<-ylm[2]-ylm[1]+gap plot.new() plot.window(xlim=c(0,nrow(midl)),ylim=c(0,ystep*ncol(midl)),bty="n",xaxt="n",yaxt="n") for(g in 1:ncol(midl)){ yoff<-(ncol(midl)-g)*ystep-ylm[1] #zero pt if(clip) clip(0,nrow(midl),yoff+ylm[1],yoff+ylm[2]) lyp<-(round(ylm[1]):round(ylm[2]))+yoff abline(h=lyp,lty=3,col="grey70") ly<-c(as.vector(t(cbind(lyp,lyp,rep(NA,length(lyp))))),min(lyp),max(lyp)) lx<-c(rep(c(-0.5,-0.75,NA),length(lyp)),rep(-0.75,2)) lines(lx,ly,xpd=NA) if(clip) clip(0,nrow(midl),yoff+ylm[1]-gap/3,yoff+ylm[2]+gap/3) for(f in 1:nrow(midl)){ rect(f-1,min(0,midl[f,g])+yoff,f,max(0,midl[f,g])+yoff,col=bclr[f]) ebx<-c(f-0.75,f-0.25,NA,f-0.75,f-0.25,NA,f-0.5,f-0.5) ebyl<-ebarlo[f,g]+yoff ebyh<-ebarhi[f,g]+yoff eby<-c(ebyl,ebyl,NA,ebyh,ebyh,NA,ebyh,ebyl) if(clip)lines(ebx,eby) else lines(ebx,eby,xpd=NA) if(blbls && g==1) text(f-0.5,ystep*ncol(midl)-gap/2,rownames(midl)[f],adj=c(0,0.5),srt=90,cex=1,xpd=NA) } gnm<-colnames(midl)[g] if(gnm %in% src$refg) gnm<-paste0(gnm,"*") mtext(gnm,side=2,line=0.5,at=mean(lyp),las=0,xpd=NA,cex=0.8) } # mtext(-0.6,mean(ylm),lbl,srt=90,xpd=NA) } plres <-function(src,offset=F,boxt=c("no","ci","se","sd","se2","sd2"),ylm=NULL,gap=1,pcex=1,subg=NULL,subf=NULL) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # Plot residuals from df2stats output # src = return value from df2stats # offset = whether to add the group coefs as offset to residuals # boxt = is what box to draw around points; no:none, ci:95% conf interval from src, # se:stderr from src, sd:sd calc from src, se2/sd2:stderr/sd from actual points. # all ecept se2/sd2 will depend on which variance model was used in src. # ylm = c(lower, upper), override range of value plots, units of log2 # gap = gap gap between plot for each gene, units of log2 # pcex = symbol size multiplier for plotted residues # subg = subgroup of genes to plot, numeric indices only # subf = subgroup of factors to plot, numeric indices only # # Output to graphics device is a plot for each gene of the residuals, plus offset if specified # Each sample is plotted and a box as specified is added to each group. Vertical scale is log2 units # Note - pcex needs lowering for dev.copy2pdf results. # Note - Use useDingbats=F if you want to edit pdf in Illustrator, to avoid crazy font substitution. { if(length(boxt)>1)boxt<-boxt[1] if(is.null(subg))subg<-1:ncol(src$coef) if(is.null(subf))subf<-1:nrow(src$coef) cxx<-src$resid[,subf,subg] ng<-dim(cxx)[3] nf<-dim(cxx)[2] nr<-dim(cxx)[1] if(offset)for(i in 1:nr)cxx[i,,]<-cxx[i,,]+src$coef[subf,subg] if(is.null(ylm))ylm<-c(round(min(cxx))-1,round(max(cxx))+1) plot.new() ypan<-(ylm[2]-ylm[1]+gap) plot.window(xlim=c(0,nf*(nr+1)),ylim=c(0,ypan*ng),xaxs="i") for(f in 1:nf)text((f-0.5)*(nr+1),ypan*ng-gap,dimnames(cxx)[[2]][f],pos=3,cex=0.7) for(g in 1:ng){ gnm<-dimnames(cxx)[[3]][g] if(gnm %in% src$refg) gnm<-paste0(gnm,"*") mtext(gnm,side=2,at=ypan*(ng-g+0.5),line=0.5,cex=0.8) } for(g in 1:ng){ yoff<-(ylm[2]-ylm[1]+gap)*(ng-g)-ylm[1] ytck<-yoff+(round(ylm[1]):round(ylm[2])) clip(0,nf*(nr+1),min(ytck),max(ytck)) abline(h=ytck,lty=3,col="grey70") abline(h=c(min(ytck),max(ytck),yoff)) for(i in 1:(nf-1))lines(rep(i*(nr+1),2),c(min(ytck),max(ytck)),lty=3,col="grey70") for(yt in ytck)lines(c(-1,0),rep(yt,2),xpd=NA) lines(rep(-1,2),c(min(ytck),max(ytck)),xpd=NA) for(yt in ytck)lines(c(0,1)+nf*(nr+1),rep(yt,2),xpd=NA) lines(rep(nf*(nr+1)+1,2),c(min(ytck),max(ytck)),xpd=NA) if(boxt!="no"){ if(boxt=="ci"){ boxy=cbind(src$ci95lo[subf,subg[g]],src$ci95hi[subf,subg[g]]) if(!offset)for(i in 1:2)boxy[,i]<-boxy[,i]-src$coef[subf,subg[g]] } else if(boxt=="se"||boxt=="sd"){ boxy<-cbind(src$stderr[subf,subg[g]],-src$stderr[subf,subg[g]]) if(boxt=="sd")boxy<-boxy*sqrt(nr) if(offset)for(i in 1:2)boxy[,i]<-boxy[,i]+src$coef[subf,subg[g]] } else if(boxt=="se2"||boxt=="sd2"){ boxy<-apply(cxx[,,g],2,sd) boxy<-cbind(boxy,-boxy) if(boxt=="se2")boxy<-boxy/sqrt(nr) if(offset)for(i in 1:2)boxy[,i]<-boxy[,i]+src$coef[subf,subg[g]] } clip(0,nf*(nr+1),min(ytck),max(ytck)) for(f in 1:nf){ xoff<-(f-1)*(nr+1)+0.5 rect(xoff,yoff+boxy[f,2],xoff+nr,yoff+boxy[f,1],border="grey50") } } ptsx<-rep(0,nr*nf) for(f in 0:(nf-1))ptsx[(1:nr)+nr*f]<-(1:nr)+(nr+1)*f clip(0,nf*(nr+1),min(ytck),max(ytck)) points(ptsx,yoff+as.vector(cxx[,,g]),pch=19,cex=pcex) } } plotast <-function(ast, genes=1:dim(ast$asterisks)[[3]], facts=1:dim(ast$asterisks)[[2]], lablentop=20, lablenlft=20, cexast=1) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # Plot tables asterisks from df2stats output $asterisks # # ast = return value from df2stats # genes = indices of genes to plot; char or int # facts = indices of groups to plot; char or int # lablentop = max characters for truncating group names for column labels # lablenlft = max characters for truncating group names for row labels # cexast = size multiplier for asterisks # # Output to graphics device is a set of group x group grids with asterisks # using conventional asterisk categories fror inter-group differences # *:p<0.05, **:p<0.01, ***:p<0.001; { if (typeof(genes) == "character") { gix <- grep(paste0(genes, collapse = "|"), dimnames(ast$asterisks)[[3]]) } else gix <- genes if(length(gix)<1)stop("no genes selected") if (typeof(facts) == "character") { fix <- grep(paste0(facts, collapse = "|"), dimnames(ast$asterisks)[[2]]) } else fix <- facts if(length(fix)<1)stop("no factors selected") aster<-ast$asterisks[fix,fix,gix,drop=F] ght <- dim(aster)[1] gwd <- dim(aster)[2] for (g in 1:length(gix)) { plot(-1, -1, xlim = c(0, gwd), ylim = c(0, ght), xaxt = "n", yaxt = "n", xlab = "", ylab = "", xaxs = "i", yaxs = "i", bty = "n") for (i in 1:ght) for (j in 1:gwd){ text(i - 0.5, gwd - j + 0.5, aster[i, j, g], cex=cexast) } abline(h = (1:(ght - 1)), col = "grey90") abline(v = (1:(gwd - 1)), col = "grey90") rect(0, 0, gwd, ght, xpd = NA) for (i in 1:gwd) text(i - 0.5, ght, substr(dimnames(aster)[[2]][i],1,lablentop), xpd = NA, pos = 3,offset=0.2) for (i in 1:ght) text(0, ght - i + 0.5, substr(dimnames(aster)[[1]][i],1,lablenlft), xpd = NA, pos = 2,offset=0.2) text(gwd/2, 0, dimnames(aster)[[3]][g], xpd = NA,pos=1,offset=0.2) } } plot.stds<-function(stds,gix=2:ncol(stds),dix=1:nrow(stds),doci=F,plci=F,maxct=40,minct=5) # License: LGPL-3 # Author: John Young, Pirbright Institute, UK # stds: data frame with all columns type "double", first "conc" # gix: which genes # dix: which rows; matrix, or vector repeated for all genes # doci: put 95% conf int on plot # plci: plot 95% conf int of prediction # maxct: omit any higher ct # minct: omit any lower ct { par(mfrow=c(4,3),mar=c(3,3,1,1),mex=0.5) res<-NULL if(colnames(stds)[1]!="conc")stop("First column must be \"conc\"") if(is.null(dim(dix))) for(j in 2:length(gix)) dix<-cbind(dix,dix) for(i in 1:length(gix)) { x<-log2(stds$conc[dix[,i]]) ct<-stds[dix[,i],gix[i]] ctflt<-(!is.na(ct))&(ct<=maxct)&(ct>=minct) ct<-ct[ctflt] x<-x[ctflt] mct<-max(ct,na.rm=T) plot(x,ct,xlab=colnames(stds)[gix[i]],ylab="Cq") alm<-lm(ct~x,data.frame(ct=ct,x=x)) isl<-alm$coeff abline(isl) slp<-isl[2] eff<-2^(-1/slp) se<-summary(alm)$coef[2,2] conf<-confint(alm,level=0.95) gene<-colnames(stds)[gix[i]] text(max(x),mct,sprintf("%s: s=%4.2f: e=%4.2f",gene,slp,eff),pos=2) text(max(x),mct-1,sprintf("se=%5.3f",se),pos=2) if(doci)text(max(x),mct-2,sprintf("95%%ci=%4.2f,%4.2f",conf[2,1],conf[2,2]),pos=2) res<-rbind(res,c(slp,eff,se,conf[2,])) if(plci) { q<-unique(x) pred<-predict(alm,newdata=data.frame(x=q),interval="confidence",level=0.95) lines(q,pred[,2],lty=2,col="blue2") lines(q,pred[,3],lty=2,col="red") } } rownames(res)<-colnames(stds)[c(gix)] colnames(res)<-c("slope","effic","stderr","cilo","cihi") res }