###########################################################################
### THIS SCRIPT FILE CONTAINS THE R COMMANDS USED TO PRODUCE ANALYSIS FOR
### VENEZIA, MERCHANT et al (2004). TO USE THIS FILE YOU SHOULD HAVE DOWNLOADED THE
### CEL FILE (AND OR GCRMA FILE) DATA THAT ACCOMPANIES THIS ARTICLE
### YOU SHOULD ALSO HAVE INSTALLED R.AND THE AFFY, GCRMA,
### and MGU74AV2 R PACKAGES
###
###
### TO OBTAIN R VISIT HTTP://WWW.R-CRAN.ORG. THE BIOCONDUCTOR LINKS
### TO OBTAIN PACKAGES ARE ALSO AVAILABLE FROM THE MAIN R SITE.
###
###
### ALL COMMENTS IN THIS FILE ARE ON SEPARATE LINES
### DELIMITED BY THE POUND SIGN. ALL R COMMANDS -- INTENDED
### TO BE ENTERED AT THE R-PROMPT ARE ON SEPARATE LINES.
#Step 1. Start R and set up the analysis environment.
#Note that you must have the affy and
#gcrma packages installed.
library(affy)
library(gcrma)
#these two vectors are useful for analysis
#later in the script
times<-c(0,0,1,1,2,2,3,3,6,6,10,10,30,30)
treat<-c(1,1,2,2,7,7,3,3,4,4,8,8,5,5,6,6)
affyids<-ls(mgu74av2GENENAME)
cntrls<-substr(affyids,1,4)=='AFFX'
#LOAD IN THE DATA. THE COMMANDS SHOWN SUPPOSE
#THAT YOU HAVE DOWNLOADED ALL CEL FILES TO A SUBDIRECTORY
#CALLED "CEL"
filenames<-system('ls CEL/*',T)
affybatch<-ReadAffy(filenames=FILENAMES[order(treat)])
present.absent<-t(PresentMAS5(affybatch))
#NORMALIZE THE DATA AND OBTAIN EXPRESSION
#SUMMARIES. NOTE THAT OTHER METHODS ARE POSSIBLE.
allchips.gcrma<-gcrma(affybatch)
alldata<-exprs(allchips.gcrma)
goodell<-alldata
#EXTRACT THE TIME COURSE COMPONENT --LEAVE OUT FL ARRAYS.
timecourse<-alldata[,3:16]
# FORM THE POLYNOMIAL BASIS FOR DOING LEAST SQUARES REGRESSION
# OF THE GENE VALUES
polybasis<-poly(c(0,0,1,1,2,2,3,3,6,6,10,10,30,30),5)
#FIT THE REGRESSION MODEL FOR EACH GENE (AFFYID) AND CALCULATE
#THE ANOVA P-VALUE AND THE R2.
mynova.full.4<-t(apply(timecourse,1,function(x,covs=polybasis){
ans<-aov(as.numeric(x)~covs)
ansc<-ans$coef
r2<-summary(ans)[[1]][["Sum Sq"]]
r2<-r2[1]/sum(r2)
pval<-round(summary(ans)[[1]][5][[1]][1],3)
c(ansc,r2,pval)
}))
#CALCULATE THE FITTED VALUES (aka predicted values) ACCORDING TO THE
#REGRESSION MODEL FITS ---THESE ARE THE VALUES VISUALIZED IN THE QUERY
#TOOL.
preds.4<- mynova.full.4[,1:6] %*% t(cbind(rep(1,14),polybasis))[,seq(from=1,to=14,by=2)]
#OBTAIN A LOGICAL VECTOR OF GENES FOR WHICH A CALL OF PRESENT
#WAS OBTAINED ON AT LEAST 1 ARRAY LOOKING ACROSS ALL TIMES. EXCLUDE
#THE AFFYMETRIX CONTROLS.
atleast1<-apply(present.absent[,3:16],1,function(x){sum(x<.04,na.rm=T)>=1}) & !cntrls
#OBTAIN THE TIME OF MAXIMUM EXPRESSION FOR EACH GENE
#ACCORDING TO THE POLYNOMIAL REGRESSION FIT.
tom<-apply(preds.4,1,function(x){which(x==max(x))})
### THE GENE LIST SECTION WHICH FOLLOWS GENERATES
#LOGICAL VECTORS FOR EACH OF THE GENE LISTS DETERMINED IN
#OUR ANALYSIS.
#GENES WHICH HAVE A TIME PATTERN ACCORDING TO OUR REGRESSION MODEL
#AND WHICH ARE CALLED PRESENT IN AT LEAST 1 ARRAY
changeintimecourse<-atleast1 & mynova.full.4[,8]<.05
#WE DEFINE THE TOM PROLIFERATION GROUP AS THOSE GENES
#WITH A TIME OF MAXIMUM EXPRESSION AT DAY 2,3,6 AND WHICH
#WERE FOUND TO HAVE A SIGNIFICANT TIME PATTERN BY REGRESSION.
tom.prolif<-rep(F,12488)
tom.prolif<-tom==3 | tom==4 | tom==5 & !cntrls
tom.prolif<-tom.prolif & changeintimecourse
#WE DEFINE THE TOM PROLIFERATION GROUP AS THOSE GENES
#WITH A TIME OF MAXIMUM EXPRESSION AT DAY 2,3,6 AND WHICH
#WERE FOUND TO HAVE A SIGNIFICANT TIME PATTERN BY REGRESSION.
tom.quiet<-rep(F,12488)
tom.quiet<-tom==1 | tom==2 | tom==6 | tom==7 & !cntrls
tom.quiet<-tom.quiet & changeintimecourse
##NOW CONSIDER ANALYSIS OF FL DATA
pafl<-present.absent[,1]<.05 | present.absent[,2]<.05 & !cntrls
spvsfl<-data.frame(ZandT(alldata[,1:2],alldata[,3:4],ok=pafl|atleast1,myalpha=0.05))
#ESTABLISH THE FOLD CHANGE BASED CUTOFF GENE LISTS
upinfl<-abs(spvsfl[["MeanDif"]])>=1 & spvsfl[["MeanDif"]]>0 & !cntrls
upinsp<-abs(spvsfl[["MeanDif"]])>=1 & spvsfl[["MeanDif"]]<0 & !cntrls
#We calculate our TOM groups according to the criterion:
# time of max =j AND F-statistic Pvalue <0.05 AND not control probeset.
tom0<-tom==1 & goods05 & !cntrls
tom1<-tom==2 & goods05 & !cntrls
tom2<-tom==3 & goods05 & !cntrls
tom3<-tom==4 & goods05 & !cntrls
tom6<-tom==5 & goods05 & !cntlrs
tom10<-tom==6 & goods05 &!cntlrs
tom30<-tom==7 & goods05 & !cntlrs
# our analysis included an addidiontal data set obtained by
#personal correspondence. This data set, which we refer to as
#the akashi data set was obtained via coorespondence
#with Lin Heng Li (lil@stowers-institute.org)
#the akashidata is rendered to a data frame 12488 x 4.
###
akashi<-data.frame(ZandT(akashidata[,1:2],akashidata[,3:4],ok=atleast1.akashi,myalpha=0.05))
upinst<-akashi[["Reject.Z"]] & akashi[["MeanDif"]]>0 & !cntrls
upinlt<-akashi[["Reject.Z"]] & akashi[["MeanDif"]]<0 & !cntrls
upinstT<-akashi[["Reject.T"]] & akashi[["MeanDif"]]>1 & !cntrls
upinltT<-akashi[["Reject.T"]] & akashi[["MeanDif"]]< -1 & !cntrls
upinst1<-abs(akashi[["MeanDif"]])>=1 & akashi[["MeanDif"]]>0 & !cntrls
upinlt1<-abs(akashi[["MeanDif"]])>=1 & akashi[["MeanDif"]]<0 & !cntrls
PresentMAS5<-
function(affybatch,noms=affyids,tau=0.015)
{
library(ctest)
ans<-NULL
ans<-rbind(ans,sapply(noms,function(x)
{
pms<-pm(affybatch,x)
mms<-mm(affybatch,x)
Rvals<-sapply(seq(1:ncol(pms)),function(x)
{
Rs<-(pms[,x]-mms[,x])/(pms[,x]+mms[,x])
Rs<-Rs-tau#hardcoded Tau
#print(Rs)
huh<-wilcox.test(Rs,alternative='greater')
huh$p.value
})
ans<-cbind(ans,Rvals)
ans
}))
ans
}
ZandT<-function(data1,data2,ok=rep(T,12488),myalpha=0.05)
{
data1[!ok,]<-NA
data2[!ok,]<-NA
n1<-apply(data1,1,function(x){sum(!is.na(x))})
n2<-apply(data2,1,function(x){sum(!is.na(x))})
m1<-apply(data1,1,mean,na.rm=T)
m2<-apply(data2,1,mean,na.rm=T)
ss1<-apply((data1-m1)^2,1,sum,na.rm=T)
ss2<-apply((data2-m2)^2,1,sum,na.rm=T)
dfs<-apply(cbind(n1,n2),1,function(x){
if(x[1] >1 & x[2]>1) return (sum(x)-2)
if(x[1]>1) return(x[1]-1)
if(x[2]>1) return(x[2]-1)
else return(NA)
})
sp<-(ss1+ss2)/(dfs)
sigsq<-mean(sp,na.rm=T)
T<-(m1-m2)/(sp*(1/n1+1/n2)^.5)
Z<-(m1-m2)/(sqrt(sigsq)*(1/n1+1/n2)^.5)
pz<-2*(1-pnorm(abs(Z)))
pts<-2*(1-pt(abs(T),dfs))
rej.z<-LinearStepUp(pz,al=myalpha,exclude=is.na(pz))
rej.t<-LinearStepUp(pts,al=myalpha,exclude=is.na(pts))
rej.z[is.na(rej.z)]<-FALSE
rej.t[is.na(rej.t)]<-FALSE
ans<-cbind(m1-m2,Z,pz,rej.z,T,dfs,pts,rej.t)
ans[,1][is.na(ans[,1])]<-0
dimnames(ans)[[2]]<-c('MeanDif','Z','P-Z','Reject-Z','T','df','P-T','Reject-T')
ans
}
BuildRefs<-function(id,substr='',mname,sep='')
}
else{
part1<-paste(substr,mlocus,sep='')
mname<-as.character(get(id,env=mgu74av2SYMBOL))
part2<-paste('\' >',mname,sep='')
}
paste(part1,part2,sep='')
}
outputHtml<-function (analysis, outfile, title='', useGeneNames = F)
{
cat("", "", "",
title, "
", "
",x," | ",sep='')})),collapse=''),'|
", paste(as.matrix(analysis[i, ]), collapse = " | "), " |