# these functions are work in porogress 
# they are provided only as a record of the procedures used
# the author makes no claim as to their accuracy or suitability for any other work
# the license allows you to make any use vyou like of them, but at your own risk
# you can access them bu pasting this into the R console
# the required libraries should be installed first...
 
library(NormqPCR)
library(nlme)
library(multcomp)

raw2log2 <-function(rawdata,slopes,exclude=NULL)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# Convert raw Ct data to log2 scale level using standard curve slopes
# rawdata = data frame with row for each sample; columns are exprimental factors
#             then replicate labels, then Cts for each gene (col names=gene names)
# slopes  = is a data frame with gene names in rownames and a column "slopes" that 
#             has the slopes of Ct vs log2(rna) regression. It can be the return 
#             value of plot.slopes()
#
# Returns df in same format as rawdata with Ct substituted by Ct/slope for each gene
# 
# note this will fail for any "double" covariate unless exclude is used
# if rawdata has columns a,b,... that are double covariates
# use x.aslog2<-raw2aslog2(x.raw,x.slopes,c(a,b,...))
{
  excl<-character(0)
  if(!is.null(exclude))
  {
    if(!is.character(exclude))
    {
      for(n in exclude)excl
      {
        anexcl<-which(colnames(rawdata)==n)
        excl<-c(excl,colnames(rawdata)[anexcl])
      }
    }
    else excl<-exclude
  }
  aslog2<-rawdata
  for (cn in colnames(rawdata))
  {
    if(typeof(rawdata[[cn]])=="double")
    {
      if(!(cn %in% excl))
      {
        islope<-grep(cn,rownames(slopes))
        if(length(islope)>1)stop(paste("Ambiguous gene name",cn))
        if(length(islope)<1)stop(paste("Missing gene name",cn))
        aslope=slopes[islope,1]
        if(is.na(aslope))stop("Missing slope")
        aslog2[[cn]]<-aslog2[[cn]]/aslope
      }
    }
  }
  attr(aslog2,"date")<-date()
  attr(aslog2,"call")<-sys.call()
  aslog2
}

df2qpcr <-function(df,genes,facts)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# requires package NormqPCR
# used by df2hkgn and df2hknf
# df    = data frame with columns for factors and Cts or aslog2 for each gene
#           Ct columns should be gebe names; raw2aslog2 output is suitable
# genes = integer or character vector indexing genes to include
# facts = integer or character vector indexing factors to use
#           must include replicate labels as factors for each sample must be unique
# output is a qPCRBase for use in genorm
{
  if(is.character(genes)) # convert to integer index
  {
    cix<-NULL
    for(acol in genes)
    {
      ix<-grep(acol,colnames(df))
      if(length(ix)!=1)stop(paste("gene",acol,"not found"))
      cix<-c(cix,ix)
    }
  }
  else cix<-genes
  if(is.character(facts)) # convert to integer index
  {
    xix<-NULL
    for(afac in facts)
    {
      ix<-grep(afac,colnames(df))
      if(length(ix)!=1)stop(paste("factor",afac,"not found"))
      xix<-c(xix,ix)
    }
  }
  else xix<-facts
  for(ix in cix) # check ct column types
  {
    if(!is.numeric(df[[ix]]))stop(paste(colnames(df)[ix],"is not numeric"))
  }
  if(length(xix)>1) # paste factors to get distinct sample names
  {
    fax<-rep("",nrow(df))
    for(r in 1:nrow(df))
    {
      afx<-as.character(df[r,xix[1]])
      for(c in 2:length(xix))afx<-paste(afx,as.character(df[r,xix[c]]),sep=".")
      fax[r]<-afx
    }
  }
  else fax<-as.character(df[[xix]]) # use single factor
  if(length(fax)>length(unique(fax)))stop("Duplicate sample names")
  res<-new("qPCRBatch")
  dat<-t(as.matrix(df[,cix])) 
  colnames(dat)<-fax
  exprs(res)<-dat
  res
}

df2hkgn <-function(adf,genes,facts,bycols)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# requires df2qpcr and package NormqPCR
# wrapper for NormqPCR:::selectHKs with genorm method
#
# adf   = data frame with columns for factors and Cts or aslog2 for each gene
#           Ct columns should be gebe names; raw2aslog2 output is suitable
# genes = integer or character vector indexing genes to include
# facts = integer or character vector indexing factors to use
#           must include replicate labels as factors for each sample must be unique
#
# Returns list ...
#   $ranking: selectHKs ranking of genes by stability
#   $variation: bars for the stdev(delta NF plot) from selectHKs
#   $MeanM: stability measures for increasing numbers of genes added
#   $genes: nammes of input genes
#   $date: execution timestamp
#   $call: function call used
{
sym<-colnames(adf)[genes]
apcr<-df2qpcr(adf,genes,facts)
res<-selectHKs(apcr,method="geNorm",Symbols=sym,minNrHKs=2,trace=F)
res$genes<-sym
res$date<-date()
res$call<-sys.call()
res
}

df2hknf <-function(adf,genes,facts,bycols)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# requires df2qpcr and package NormqPCR
# wrapper for NormqPCR:::selectHKs with normfinder method
# also adds data for plotting deltaSD(NF)
#
# adf   = data frame with columns for factors and Cts or aslog2 for each gene
#           Ct columns should be gebe names; raw2aslog2 output is suitable
# genes = integer or character vector indexing genes to include
# facts = integer or character vector indexing factors to use
#           must include replicate labels as factors for each sample must be unique
#
# Returns list ...
#   $ranking: selectHKs ranking of genes by stability
#   $rho: rho stability measures from selectHKs
#   $sd.delta.nf: bars for the stdev(delta NF plot)
#   $genes: nammes of input genes
#   $date: execution timestamp
#   $call: function call used
{
  sym<-colnames(adf)[genes]
  grp<-character(nrow(adf))
  for(acol in bycols)grp<-paste(grp,adf[,acol],sep=".")
  grp<-as.factor(grp)
  apcr<-df2qpcr(adf,genes,facts)
  mhk<-length(genes)-1
  res<-selectHKs(apcr,group=grp,method="NormFinder",Symbols=sym,minNrHKs=mhk,trace=F)
  sdnf<-numeric(0)
  gix<-grep(res$ranking[1],colnames(adf))
  oldnf<-apply(as.matrix(adf[gix]),1,mean,na.rm=T)
  for(i in 2:length(res$ranking)){
    gix<-c(gix,grep(res$ranking[i],colnames(adf)))
    newnf<-apply(as.matrix(adf[gix]),1,mean,na.rm=T)
    sdnf<-c(sdnf,sd(newnf-oldnf))
    oldnf<-newnf
  }
  res$sd.delta.nf<-sdnf
  res$genes<-sym
  res$date<-date()
  res$call<-sys.call()
  res
}

gnplot <-function(gn)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# plot the graphs from genorm papers
# gn is output of df2hkgn
{
# first the stabilty vs step graph
  par(mar=c(5,5,1,1),mex=0.9)
  ng<-length(gn$ranking)
  yf<-floor(10*min(gn$meanM))/10
  yc<-ceiling(10*max(gn$meanM))/10
  plot(ng:2,gn$meanM[(ng-1):1],ylim=c(yf,yc),type="b",xaxt="n",
       las=2,ylab="Stability",xlab="next discard",pch=19)
  yx<-yf-c(2.0,1,1.75)*(yc-yf)*0.04
  for(i in 2:(ng-1))text(i,yx[1],gn$ranking[ng+2-i],srt=90,cex=1,adj=1,xpd=NA)
  text(ng,yx[1],paste(gn$ranking[2:1],collapse=":"),srt=90,cex=1,adj=1,xpd=NA)
  for(i in 2:11)lines(rep(i,2),yx[2:3],xpd=NA)
# now the deltaSD(NF) graph
  barlim<-max(0.2,1.2*max(gn$variation))
  barplot(gn$variation[(ng-2):1],ylim=c(0,barlim),las=2,
          axisnames=F,xlab="added transcript",ylab="SD(deltaNF)")
  abline(h=0.15,lty=2)
  text(0.2,-0.08*barlim,gn$ranking[1],cex=0.95,pos=2,xpd=NA)
  text(0.2,-0.15*barlim,paste0("+",gn$ranking[2]),cex=0.95,pos=2,xpd=NA)
  for(i in 3:ng)text((i-2.5)*1.2,-0.02*barlim,paste0("+",gn$ranking[i]),srt=90,cex=1,adj=1,xpd=NA)
}

nfplot <-function(nf)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# plot graphs like genorm paper (a bit) but for normfinder
# nf is output df2hkgn
{
# first the stabilty vs step graph
  par(mar=c(5,5,1,1),mex=1)
  ng<-length(nf$ranking)
  yf<-floor(20*min(nf$rho))/20
  yc<-ceiling(20*max(nf$rho))/20
  plot(1:ng,nf$rho[1:ng],ylim=c(yf,yc),type="b",xaxt="n",
       las=2,ylab="rho",xlab="",pch=19)
  yx<-yf-c(2.0,1,1.75)*(yc-yf)*0.04
  text(1,yx[1],nf$ranking[1],srt=90,cex=1,adj=1,xpd=NA)
  for(i in 2:ng)text(i,yx[1],paste0("+",nf$ranking[i]),srt=90,cex=1,adj=1,xpd=NA)
  for(i in 1:ng)lines(rep(i,2),yx[2:3],xpd=NA)
# now the deltaSD(NF) graph
  barlim<-max(0.2,1.2*max(nf$sd.delta.nf))
  barplot(nf$sd.delta.nf[1:(ng-1)],ylim=c(0,barlim),las=2,
          axisnames=F,xlab="added transcript",ylab="SD(deltaNF)")
  abline(h=0.15,lty=2)
  text(0.2,-0.1*barlim,nf$ranking[1],cex=0.95,pos=2,xpd=NA)
  for(i in 2:ng)text((i-1.45)*1.2,-0.02*barlim,paste0("+",nf$ranking[i]),srt=90,cex=1,adj=1,xpd=NA)
}

df2stats <-function(adf,genes,facts,refg,adj=NULL,vgrp=c("n","b","g","f"))
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# Reuires: packages nlme and multcomp
# Testing version, in development
# Output results of simple model with gls (simple version)
# adf   = data frame such as from raw2aslog2, with factors for experimental
#           variables and replicate labels, then columns for level of each gene
# genes = column indices of genes to analyse (may include ref genes)
# facts = column indices of factors for grouping (ie not including repl labels)
#           these will be concatenated into a single factor (for now)
#           there should be one more factor column for repl labels so that 
#           factor combinations are unique to a row
# refg  = column indices of genes to use for normalisation (or zero) 
# adj   = method for mutiple testing correction of p values
# vgrp  = variance model: n=all the same, g=per gene, f=per factor (concatenated),
#           b=both - ie per gene+factor combination (very slow)
# 
# For now, gls formula will not include interactions, multiple factors get merged
# If there are technical replicates, they MUST be averaged out before input (for now)
# Normalisation is done before gls, because no tech reps means no df are left for 
# NF error - so that is subsumed into biol rep error.
# Now uses gls only, lme option later?
# gls result is used for glht with contrasts comparing all experimental groups 
# i.e. all possible combinations of factors in facts.
#
# Output conatains a list ...
#   $asterisks = conventional asterisk categories fror inter-group differences
#                  *:p<0.05, **:p<0.01, ***:p<0.001; group x group character array.
#   $coef   = estimated inter-group differences; group x gene matrix.
#   $stderr = standard errors for coeff as per vgrp variance model; grp x gene matrix.
#   $ci95hi = upper bound of 95% confidence interval for coef; grp x gene matrix.
#   $ci95hi = upper bound of 95% confidence interval for coef; grp x gene matrix.
#   $resid  = residuals; rep x grp x gene array.
#   $refg   = column names of reference gene columns in adf; character vector.
#   $call   = call generating output
#   $date   = execution timestamp
{
  #checks etc
  vgrp<-match.arg(vgrp)
  if(is.null(adj))adj="bonferroni" # most conservative possible
  if(typeof(genes)=="character"){
    gx<-grep(paste0(genes,collapse="|"),colnames(adf))
    if(length(gx)<1)stop("Genes not found")
  }
  else gx<-genes
  for(g in gx)if(!is.numeric(adf[,g]))stop(paste(colnames(adf)[g],"is not numeric data"))

  if(typeof(facts)=="character"){
    fx<-grep(paste0(facts,collapse="|"),colnames(adf))
    if(length(fx)<1)stop("Factors not found")
  }
  else fx<-facts
  for(f in fx)if(!is.factor(adf[,f]))stop(paste(colnames(adf)[f],"is not a factor"))

  if(typeof(refg)=="character"){
    rx<-grep(paste0(refg,collapse="|"),colnames(adf))
  } # none = no normalisation
  else rx<-refg
  for(r in rx)if(!is.numeric(adf[,r]))stop(paste(colnames(adf)[r],"is not numeric data"))
    
  # problem with single ref gene giving all zeros after normalisation!
  if(length(rx)==1)if(rx[1] %in% gx)stop("single ref gene cannot be included in model")

  allgx<-union(gx,rx)
  ng<-length(allgx)

  # have to do normalisation here
  if(length(rx)>0){ #otherwise no normalisation
    normf<-apply(as.matrix(adf[,rx]),1,mean,na.rm=T)
	for(i in allgx) adf[,i]<-adf[,i]-normf
  }
  
  gall<-NULL
  ctall<-NULL
  nr<-nrow(adf)
  
  # for now don't need ref genes in model unless they are also in gx
  # but maybe including them will include their effect on overall error? ????
  for(g in gx)gall<-c(gall,rep(colnames(adf)[g],nr))
  abigdf<-data.frame(gene=factor(gall,levels=unique(gall))) # factor/levels/uniqe keeps order

  # below was version with interactions - harder to parse for contrasts
  # for(f in fx) abigdf<-cbind(abigdf,factor(rep(adf[,f],ng),levels=unique(as.character(adf[,f]))))
  # if(length(fx)>1)fterms<-c(fterms,paste("gene",apply(combn(colnames(adf)[fx],2),2,paste,collapse=":"),sep=":"))
  # for(f in fx) colnames(abigdf)[f+1]<-colnames(adf)[f]

  # version with factors concatenated; as matrix so it works on single factor
  fconc<-apply(as.matrix(adf[,fx]),1,paste,collapse=".")
  abigdf<-cbind(abigdf,factor(fconc,levels=unique(fconc)))
  colnames(abigdf)[2]<-paste(colnames(adf)[fx],collapse=".")

  # end of version changes; now the data
  for(g in gx)ctall<-c(ctall,adf[,g])
  abigdf<-cbind(abigdf,ct=ctall)

  # models and glht
  fterms<-"ct~gene"
  fterms<-c(fterms,paste("gene",colnames(abigdf)[2],sep=":"))
  aform<-as.formula(paste0(fterms,collapse="+"))
  if(vgrp=="b")fw<-as.formula(paste0("~1|gene*",paste0(colnames(abigdf)[2],collapse="*"))) # both
  if(vgrp=="g")fw<-as.formula("~1|gene") # gene
  if(vgrp=="f")fw<-as.formula(paste0("~1|",paste0(colnames(abigdf)[2],collapse="*"))) # factor
  if(vgrp=="n")fw<-as.formula("~1") # all the same
  amodl<-gls(aform,abigdf,weights=varIdent(form=fw),na.action=na.exclude)
  coefs<-amodl$coef

  # prepare contrast matrix

  gnams<-colnames(adf)[gx]
  ngens<-length(gnams)
  fnams<-as.vector(unique(as.matrix(abigdf[,2])))
  nfacs<-length(fnams)
  rnams<-colnames(adf)[rx]
  nrefs<-length(rnams)

  krows<- ngens*nfacs*(nfacs-1)/2 # ngenes x factor pairs count
  kcols<- ngens*nfacs # expected num coeffs from model
  if(kcols!=length(coefs))stop("contrast matrix dimension wrong")
  kk<-rep(0,krows*kcols)
  dim(kk)<-c(krows,kcols)
  rownames(kk)<-rep("",krows)
  kctr<-1
  asep<-paste0(":",colnames(abigdf)[2]) # :facname

  for(ig in 1:ngens){
    g<-gnams[ig] 
    for(i in 1:(nfacs-1)){
      iname<-fnams[i]
      ixi<-grep(paste(g,iname,sep=asep),names(coefs)) # gene:facnamefaci
      for(j in (i+1):nfacs){
        jname<-fnams[j]
        rowname<-paste(g,paste(jname,iname,sep="-"),sep=":") # gene:facj-faci
        arow<-rep(0,kcols)
        ixj<-grep(paste(g,jname,sep=asep),names(coefs)) # gene:facnamefacj
        if(length(ixi)>0) arow[ixi]<- -1  # -faci
        if(length(ixj)>0) arow[ixj]<-  1  # +facj
 # this part dropped. It could be used for tech reps in hk genes, but not yet
 # it can't be used for biological reps because NF is per sample, so that does
 # not acheieve the intended effect, and it leaves no df
          #for(ixr in 1:nrefs){
          #rixi<-grep(paste(rnams[ixr],iname,sep=asep),names(coefs))
          #rixj<-grep(paste(rnams[ixr],jname,sep=asep),names(coefs))
          #if(length(rixi)>0) arow[rixi]<- arow[rixi]+1/nrefs
          #if(length(rixj)>0) arow[rixj]<- arow[rixj]-1/nrefs
        #}
        kk[kctr,]<-arow
        rownames(kk)[kctr]<-rowname
        kctr<-kctr+1
      }
    }
  }
  # debug only colnames(kk)<-names(coefs)

  atest<-glht(amodl,linfct=kk) 
  asumm<-summary(atest,test=adjusted(adj)) 

  # now make all outputs: asterisks; coeffs+se+ci, residuals, for plotting
  # asterisks first...
  asterisks<-rep("",nfacs*nfacs*ngens)
  dim(asterisks)<-c(nfacs,nfacs,ngens)
  dimnames(asterisks)<-list(fnams,fnams,gene=gnams)
  ix<-gnams %in% rnams
  dimnames(asterisks)$gene[ix]<-paste0(dimnames(asterisks)$gene[ix],"*")
  for(i in 1:length(asumm$test$coef)){
    ixa<-strsplit(names(asumm$test$coef)[i],"[-:]")
    ixi<-c(grep(ixa[[1]][1],gnams),grep(ixa[[1]][2],fnams),grep(ixa[[1]][3],fnams))
    pval<-asumm$test$pvalues[i]
    ast<-ifelse(pval<0.001,"***",ifelse(pval<0.01,"**",ifelse(pval<0.05,"*","")))
    asterisks[ixi[2],ixi[3],ixi[1]]<-ast
    asterisks[ixi[3],ixi[2],ixi[1]]<-ast
  }
  res<-list()
  res$asterisks<-asterisks
  # now tables for plots
  # need to remodel using glht with simple contrasts, each gene cdata on its own
  # basically, just remove intercept structure

  xcont<-rep(0,ngens*ngens*nfacs*nfacs)
  dim(xcont)<-c(ngens*nfacs,ngens*nfacs)
  for(ag in 1:ngens){
    for(fac in 1:nfacs){
      xcont[ngens*(fac-1)+ag,ngens*(1:(nfacs-1))+ag]<--1/nfacs
      if(fac>1)xcont[ngens*(fac-1)+ag,ngens*(fac-1)+ag]<-(nfacs-1)/nfacs
    }
  }
  xtst<-glht(amodl,linfct=xcont)
  
  mcoef<-coef(xtst)
  dim(mcoef)<-c(ngens,nfacs)
  mcoef<-t(mcoef)  # to match dim order of input
  colnames(mcoef)<-gnams
  rownames(mcoef)<-fnams
  names(dimnames(mcoef))<-colnames(abigdf)[2:1] 
  res$coef<-mcoef
  
  mse<-sqrt(diag(vcov(xtst))) # from multcomp:::pqglht
  dim(mse)<-c(ngens,nfacs)
  mse<-t(mse)
  colnames(mse)<-gnams
  rownames(mse)<-fnams
  names(dimnames(mse))<-colnames(abigdf)[2:1] 
  res$stderr<-mse
  
  ci95<-confint(xtst)$confint
  ci95lo<-ci95[,2]
  dim(ci95lo)<-c(ngens,nfacs)
  ci95lo<-t(ci95lo)
  colnames(ci95lo)<-gnams
  rownames(ci95lo)<-fnams
  names(dimnames(ci95lo))<-colnames(abigdf)[2:1] 
  res$ci95lo<-ci95lo
  ci95hi<-ci95[,3]
  dim(ci95hi)<-c(ngens,nfacs)
  ci95hi<-t(ci95hi)
  colnames(ci95hi)<-gnams
  rownames(ci95hi)<-fnams
  names(dimnames(ci95hi))<-colnames(abigdf)[2:1] 
  res$ci95hi<-ci95hi
  
  # have to deal with ragged input for residuals!
  aresid<-amodl$residuals
  maxreps<-0
  for(gn in 1:ngens){
    for(fn in 1:nfacs){
	  nreps<-sum(abigdf[,1]==gnams[gn] & abigdf[,2]==fnams[fn])
	  if(nreps>maxreps)maxreps<-nreps
	}
  }
  mresid<-rep(NA,maxreps*ngens*nfacs)
  dim(mresid)<-c(maxreps,nfacs,ngens)  
  dimnames(mresid)<-list(rep=1:maxreps,fact=dimnames(mse)[[1]],gene=dimnames(mse)[[2]])
  names(dimnames(mresid))[2]<-names(dimnames(mse))[1]
  for(gn in 1:ngens){
    for(fn in 1:nfacs){
	  reps<-aresid[abigdf[,1]==gnams[gn] & abigdf[,2]==fnams[fn]]
	  if(length(reps)>0)mresid[1:length(reps),fn,gn]<-reps
	}
  }
  res$resid<-mresid

  # remove these when debugged
  # res$mdl<-amodl
  # res$x<-asumm
  
  # finally
  res$refg<-colnames(adf)[rx]
  res$date<-date()
  res$call<-sys.call()
  return(res)
}

plbars <-function(src, genes=NULL, facts=NULL, ebars=NULL, basel=NULL, lbl="", blbls=F,
         ylm=NULL, head=F, gap=0, bclr="white", clip=F)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# Plot barplot from from df2stats output
# src    = return value from df2stats
# genes  = column indices of genes to plot; char or int, null means all
# facts  = column indices of groups to plot; char or int, null means all
# ebars  = what kind of error bars; no:none, ci:95% conf interval from src,
#           se:stderr from src, sd:sd calc from src, se2/sd2:stderr/sd from actual points.
#           all ecept se2/sd2 will depend on which variance model was used in src.
# basel  = default to mean of genes, otherwise "f"= relative to first; character
# lbl    = label for whole set of plots; not used now
# llbls  = label for bars, overrides factor column names
# ylm    = c(lower, upper) overrides range of plots; log2 uniots
# head   = errrm; doesn't seem to be used; probably supposed to control lbl
# gap    = overrides gap between plots; log2 units
# bclr   = bar colours; MUST match number of bars, otherwise just first is used
# clip   = whether to clip each plot; otherwise (error) bars can overlap
#
# Output to graphics device is a bar plot for each gene on vertical log2 scale
{
  if(is.null(genes)) genes<-1:ncol(src$coef)
  if(is.null(facts)) facts<-1:nrow(src$coef)
  if(is.null(ebars)) ebars<-"ci"
  if (typeof(genes) == "character") {
    gix <- grep(paste0(genes, collapse = "|"), colnames(src$coef))
  }
  else gix <- genes
  if (typeof(facts) == "character") {
    fix <- grep(paste0(facts, collapse = "|"), rownames(src$coef))
  }
  else fix <- facts
  midl<-src$coef[fix,gix,drop=F]
  
  if(ebars=="ci"){
    ebarhi<-src$ci95hi[fix,gix,drop=F]
    ebarlo<-src$ci95lo[fix,gix,drop=F]
  }
  if(ebars=="se" || ebars=="sd"){
    ebs<-src$stderr[fix,gix,drop=F]
    if(ebars=="sd")ebs<-ebs*sqrt(dim(src$resid)[1]) # depends on no NAs
    ebarhi<-midl+ebs
    ebarlo<-midl-ebs
  }
  if(ebars=="se2" || ebars=="sd2"){
    ebs<-apply(src$resid[,fix,gix,drop=F],2:3,sd)
    if(ebars=="se2")ebs<-ebs/sqrt(dim(src$resid)[1]) # depends on no NAs
    ebarhi<-midl+ebs
    ebarlo<-midl-ebs
  }

  yoff<-apply(midl,2,mean) #NA?
  if(is.character(basel)) if(basel=="f")yoff<-midl[1,]
  if(is.numeric(basel)) yoff=basel
  if(length(yoff)==1) yoff<-rep(yoff,ncol(midl))
  if(length(yoff)!=ncol(midl)) stop("wrong number of baselines")
  for(i in 1:nrow(midl)){
    midl[i,]<-midl[i,]-yoff
    ebarhi[i,]<-ebarhi[i,]-yoff
    ebarlo[i,]<-ebarlo[i,]-yoff
  }
  
  if(length(bclr)!=nrow(midl))bclr<-rep(bclr[1],nrow(midl))
  if(is.null(ylm))ylm<-c(min(c(0,ebarlo)),max(c(0,ebarhi))) #NA?
  ystep<-ylm[2]-ylm[1]+gap
  plot.new()
  plot.window(xlim=c(0,nrow(midl)),ylim=c(0,ystep*ncol(midl)),bty="n",xaxt="n",yaxt="n")
  for(g in 1:ncol(midl)){
    yoff<-(ncol(midl)-g)*ystep-ylm[1] #zero pt
    if(clip) clip(0,nrow(midl),yoff+ylm[1],yoff+ylm[2])
    lyp<-(round(ylm[1]):round(ylm[2]))+yoff
    abline(h=lyp,lty=3,col="grey70")
    ly<-c(as.vector(t(cbind(lyp,lyp,rep(NA,length(lyp))))),min(lyp),max(lyp))
    lx<-c(rep(c(-0.5,-0.75,NA),length(lyp)),rep(-0.75,2))
    lines(lx,ly,xpd=NA)
    if(clip) clip(0,nrow(midl),yoff+ylm[1]-gap/3,yoff+ylm[2]+gap/3)
    for(f in 1:nrow(midl)){
      rect(f-1,min(0,midl[f,g])+yoff,f,max(0,midl[f,g])+yoff,col=bclr[f])
      ebx<-c(f-0.75,f-0.25,NA,f-0.75,f-0.25,NA,f-0.5,f-0.5)
      ebyl<-ebarlo[f,g]+yoff
      ebyh<-ebarhi[f,g]+yoff
      eby<-c(ebyl,ebyl,NA,ebyh,ebyh,NA,ebyh,ebyl)
      if(clip)lines(ebx,eby)
      else lines(ebx,eby,xpd=NA)
      if(blbls && g==1) text(f-0.5,ystep*ncol(midl)-gap/2,rownames(midl)[f],adj=c(0,0.5),srt=90,cex=1,xpd=NA)
    }
    gnm<-colnames(midl)[g]
    if(gnm %in% src$refg) gnm<-paste0(gnm,"*")
    mtext(gnm,side=2,line=0.5,at=mean(lyp),las=0,xpd=NA,cex=0.8)
  }
#  mtext(-0.6,mean(ylm),lbl,srt=90,xpd=NA)
}

plres <-function(src,offset=F,boxt=c("no","ci","se","sd","se2","sd2"),ylm=NULL,gap=1,pcex=1,subg=NULL,subf=NULL)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# Plot residuals from df2stats output
# src    = return value from df2stats
# offset = whether to add the group coefs as offset to residuals
# boxt   = is what box to draw around points; no:none, ci:95% conf interval from src,
#           se:stderr from src, sd:sd calc from src, se2/sd2:stderr/sd from actual points.
#           all ecept se2/sd2 will depend on which variance model was used in src.
# ylm    = c(lower, upper), override range of value plots, units of log2
# gap    = gap gap between plot for each gene, units of log2
# pcex   = symbol size multiplier for plotted residues
# subg   = subgroup of genes to plot, numeric indices only
# subf   = subgroup of factors to plot, numeric indices only
#
# Output to graphics device is a plot for each gene of the residuals, plus offset if specified
# Each sample is plotted and a box as specified is added to each group. Vertical scale is log2 units
# Note - pcex needs lowering for dev.copy2pdf results. 
# Note - Use useDingbats=F if you want to edit pdf in Illustrator, to avoid crazy font substitution.
{
  if(length(boxt)>1)boxt<-boxt[1]
  if(is.null(subg))subg<-1:ncol(src$coef)
  if(is.null(subf))subf<-1:nrow(src$coef)
  cxx<-src$resid[,subf,subg]
  ng<-dim(cxx)[3]
  nf<-dim(cxx)[2]
  nr<-dim(cxx)[1]
  if(offset)for(i in 1:nr)cxx[i,,]<-cxx[i,,]+src$coef[subf,subg]
  if(is.null(ylm))ylm<-c(round(min(cxx))-1,round(max(cxx))+1)
  plot.new()
  ypan<-(ylm[2]-ylm[1]+gap)
  plot.window(xlim=c(0,nf*(nr+1)),ylim=c(0,ypan*ng),xaxs="i")
  for(f in 1:nf)text((f-0.5)*(nr+1),ypan*ng-gap,dimnames(cxx)[[2]][f],pos=3,cex=0.7)
  for(g in 1:ng){
    gnm<-dimnames(cxx)[[3]][g]
    if(gnm %in% src$refg) gnm<-paste0(gnm,"*")
    mtext(gnm,side=2,at=ypan*(ng-g+0.5),line=0.5,cex=0.8)
  }
  for(g in 1:ng){
    yoff<-(ylm[2]-ylm[1]+gap)*(ng-g)-ylm[1]
    ytck<-yoff+(round(ylm[1]):round(ylm[2]))
    clip(0,nf*(nr+1),min(ytck),max(ytck))
    abline(h=ytck,lty=3,col="grey70")
    abline(h=c(min(ytck),max(ytck),yoff))
    for(i in 1:(nf-1))lines(rep(i*(nr+1),2),c(min(ytck),max(ytck)),lty=3,col="grey70")
    for(yt in ytck)lines(c(-1,0),rep(yt,2),xpd=NA)
    lines(rep(-1,2),c(min(ytck),max(ytck)),xpd=NA)
    for(yt in ytck)lines(c(0,1)+nf*(nr+1),rep(yt,2),xpd=NA)
    lines(rep(nf*(nr+1)+1,2),c(min(ytck),max(ytck)),xpd=NA)
    if(boxt!="no"){
      if(boxt=="ci"){
        boxy=cbind(src$ci95lo[subf,subg[g]],src$ci95hi[subf,subg[g]])
        if(!offset)for(i in 1:2)boxy[,i]<-boxy[,i]-src$coef[subf,subg[g]]
      }
      else if(boxt=="se"||boxt=="sd"){
        boxy<-cbind(src$stderr[subf,subg[g]],-src$stderr[subf,subg[g]])
        if(boxt=="sd")boxy<-boxy*sqrt(nr)
        if(offset)for(i in 1:2)boxy[,i]<-boxy[,i]+src$coef[subf,subg[g]]
      }
      else if(boxt=="se2"||boxt=="sd2"){
        boxy<-apply(cxx[,,g],2,sd)
        boxy<-cbind(boxy,-boxy)
        if(boxt=="se2")boxy<-boxy/sqrt(nr)
        if(offset)for(i in 1:2)boxy[,i]<-boxy[,i]+src$coef[subf,subg[g]]
      }
      clip(0,nf*(nr+1),min(ytck),max(ytck))
      for(f in 1:nf){
        xoff<-(f-1)*(nr+1)+0.5
        rect(xoff,yoff+boxy[f,2],xoff+nr,yoff+boxy[f,1],border="grey50")
      }
    }
    ptsx<-rep(0,nr*nf)
    for(f in 0:(nf-1))ptsx[(1:nr)+nr*f]<-(1:nr)+(nr+1)*f
    clip(0,nf*(nr+1),min(ytck),max(ytck))
    points(ptsx,yoff+as.vector(cxx[,,g]),pch=19,cex=pcex)
  }
}

plotast <-function(ast, genes=1:dim(ast$asterisks)[[3]], facts=1:dim(ast$asterisks)[[2]],
           lablentop=20, lablenlft=20, cexast=1)
# License: LGPL-3
# Author: John Young, Pirbright Institute, UK
# Plot tables asterisks from df2stats output $asterisks
#
# ast       = return value from df2stats
# genes     = indices of genes to plot; char or int
# facts     = indices of groups to plot; char or int
# lablentop = max characters for truncating group names for column labels
# lablenlft = max characters for truncating group names for row labels
# cexast    = size multiplier for asterisks
#
# Output to graphics device is a set of group x group grids with asterisks
# using conventional asterisk categories fror inter-group differences
#                  *:p<0.05, **:p<0.01, ***:p<0.001; 
{
    if (typeof(genes) == "character") {
        gix <- grep(paste0(genes, collapse = "|"), dimnames(ast$asterisks)[[3]])
    }
    else gix <- genes
    if(length(gix)<1)stop("no genes selected")
    if (typeof(facts) == "character") {
        fix <- grep(paste0(facts, collapse = "|"), dimnames(ast$asterisks)[[2]])
    }
    else fix <- facts
    if(length(fix)<1)stop("no factors selected")
    aster<-ast$asterisks[fix,fix,gix,drop=F]
    ght <- dim(aster)[1]
    gwd <- dim(aster)[2]
    for (g in 1:length(gix)) {
        plot(-1, -1, xlim = c(0, gwd), ylim = c(0, ght), xaxt = "n", 
            yaxt = "n", xlab = "", ylab = "", xaxs = "i", yaxs = "i", 
            bty = "n")
        for (i in 1:ght) for (j in 1:gwd){
            text(i - 0.5, gwd - j + 0.5, aster[i, j, g], cex=cexast)
        }
        abline(h = (1:(ght - 1)), col = "grey90")
        abline(v = (1:(gwd - 1)), col = "grey90")
        rect(0, 0, gwd, ght, xpd = NA)
        for (i in 1:gwd) text(i - 0.5, ght, substr(dimnames(aster)[[2]][i],1,lablentop),
                                 xpd = NA, pos = 3,offset=0.2)
        for (i in 1:ght) text(0, ght - i + 0.5, substr(dimnames(aster)[[1]][i],1,lablenlft), 
                                 xpd = NA, pos = 2,offset=0.2)
        text(gwd/2, 0, dimnames(aster)[[3]][g], xpd = NA,pos=1,offset=0.2)
    }
}

plot.stds<-function(stds,gix=2:ncol(stds),dix=1:nrow(stds),doci=F,plci=F,maxct=40,minct=5)
    # License: LGPL-3
    # Author: John Young, Pirbright Institute, UK
    # stds: data frame with all columns type "double", first "conc"
    # gix: which genes
    # dix: which rows; matrix, or vector repeated for all genes
    # doci: put 95% conf int on plot
    # plci: plot 95% conf int of prediction
    # maxct: omit any higher ct
    # minct: omit any lower ct
    {
      par(mfrow=c(4,3),mar=c(3,3,1,1),mex=0.5)
      res<-NULL
      if(colnames(stds)[1]!="conc")stop("First column must be \"conc\"")
      if(is.null(dim(dix))) for(j in 2:length(gix)) dix<-cbind(dix,dix)
      for(i in 1:length(gix))
      {
        x<-log2(stds$conc[dix[,i]])
        ct<-stds[dix[,i],gix[i]]
        ctflt<-(!is.na(ct))&(ct<=maxct)&(ct>=minct)
        ct<-ct[ctflt]
        x<-x[ctflt]
        mct<-max(ct,na.rm=T)
        plot(x,ct,xlab=colnames(stds)[gix[i]],ylab="Cq")
        alm<-lm(ct~x,data.frame(ct=ct,x=x))
        isl<-alm$coeff
        abline(isl)
        slp<-isl[2]
        eff<-2^(-1/slp)
        se<-summary(alm)$coef[2,2]
        conf<-confint(alm,level=0.95)
        gene<-colnames(stds)[gix[i]]
        text(max(x),mct,sprintf("%s: s=%4.2f: e=%4.2f",gene,slp,eff),pos=2)
        text(max(x),mct-1,sprintf("se=%5.3f",se),pos=2)
        if(doci)text(max(x),mct-2,sprintf("95%%ci=%4.2f,%4.2f",conf[2,1],conf[2,2]),pos=2)
        res<-rbind(res,c(slp,eff,se,conf[2,]))
        if(plci)
        {
          q<-unique(x)
          pred<-predict(alm,newdata=data.frame(x=q),interval="confidence",level=0.95)
          lines(q,pred[,2],lty=2,col="blue2")
          lines(q,pred[,3],lty=2,col="red")
        }
      }
      rownames(res)<-colnames(stds)[c(gix)]
      colnames(res)<-c("slope","effic","stderr","cilo","cihi")
      res
    }