########################################################################### ### THIS SCRIPT FILE CONTAINS THE R COMMANDS USED TO PRODUCE ANALYSIS FOR ### VENEZIA, MERCHANT et al (2004). TO USE THIS FILE YOU SHOULD HAVE DOWNLOADED THE ### CEL FILE (AND OR GCRMA FILE) DATA THAT ACCOMPANIES THIS ARTICLE ### YOU SHOULD ALSO HAVE INSTALLED R.AND THE AFFY, GCRMA, ### and MGU74AV2 R PACKAGES ### ### ### TO OBTAIN R VISIT HTTP://WWW.R-CRAN.ORG. THE BIOCONDUCTOR LINKS ### TO OBTAIN PACKAGES ARE ALSO AVAILABLE FROM THE MAIN R SITE. ### ### ### ALL COMMENTS IN THIS FILE ARE ON SEPARATE LINES ### DELIMITED BY THE POUND SIGN. ALL R COMMANDS -- INTENDED ### TO BE ENTERED AT THE R-PROMPT ARE ON SEPARATE LINES. #Step 1. Start R and set up the analysis environment. #Note that you must have the affy and #gcrma packages installed. library(affy) library(gcrma) #these two vectors are useful for analysis #later in the script times<-c(0,0,1,1,2,2,3,3,6,6,10,10,30,30) treat<-c(1,1,2,2,7,7,3,3,4,4,8,8,5,5,6,6) affyids<-ls(mgu74av2GENENAME) cntrls<-substr(affyids,1,4)=='AFFX' #LOAD IN THE DATA. THE COMMANDS SHOWN SUPPOSE #THAT YOU HAVE DOWNLOADED ALL CEL FILES TO A SUBDIRECTORY #CALLED "CEL" filenames<-system('ls CEL/*',T) affybatch<-ReadAffy(filenames=FILENAMES[order(treat)]) present.absent<-t(PresentMAS5(affybatch)) #NORMALIZE THE DATA AND OBTAIN EXPRESSION #SUMMARIES. NOTE THAT OTHER METHODS ARE POSSIBLE. allchips.gcrma<-gcrma(affybatch) alldata<-exprs(allchips.gcrma) goodell<-alldata #EXTRACT THE TIME COURSE COMPONENT --LEAVE OUT FL ARRAYS. timecourse<-alldata[,3:16] # FORM THE POLYNOMIAL BASIS FOR DOING LEAST SQUARES REGRESSION # OF THE GENE VALUES polybasis<-poly(c(0,0,1,1,2,2,3,3,6,6,10,10,30,30),5) #FIT THE REGRESSION MODEL FOR EACH GENE (AFFYID) AND CALCULATE #THE ANOVA P-VALUE AND THE R2. mynova.full.4<-t(apply(timecourse,1,function(x,covs=polybasis){ ans<-aov(as.numeric(x)~covs) ansc<-ans$coef r2<-summary(ans)[[1]][["Sum Sq"]] r2<-r2[1]/sum(r2) pval<-round(summary(ans)[[1]][5][[1]][1],3) c(ansc,r2,pval) })) #CALCULATE THE FITTED VALUES (aka predicted values) ACCORDING TO THE #REGRESSION MODEL FITS ---THESE ARE THE VALUES VISUALIZED IN THE QUERY #TOOL. preds.4<- mynova.full.4[,1:6] %*% t(cbind(rep(1,14),polybasis))[,seq(from=1,to=14,by=2)] #OBTAIN A LOGICAL VECTOR OF GENES FOR WHICH A CALL OF PRESENT #WAS OBTAINED ON AT LEAST 1 ARRAY LOOKING ACROSS ALL TIMES. EXCLUDE #THE AFFYMETRIX CONTROLS. atleast1<-apply(present.absent[,3:16],1,function(x){sum(x<.04,na.rm=T)>=1}) & !cntrls #OBTAIN THE TIME OF MAXIMUM EXPRESSION FOR EACH GENE #ACCORDING TO THE POLYNOMIAL REGRESSION FIT. tom<-apply(preds.4,1,function(x){which(x==max(x))}) ### THE GENE LIST SECTION WHICH FOLLOWS GENERATES #LOGICAL VECTORS FOR EACH OF THE GENE LISTS DETERMINED IN #OUR ANALYSIS. #GENES WHICH HAVE A TIME PATTERN ACCORDING TO OUR REGRESSION MODEL #AND WHICH ARE CALLED PRESENT IN AT LEAST 1 ARRAY changeintimecourse<-atleast1 & mynova.full.4[,8]<.05 #WE DEFINE THE TOM PROLIFERATION GROUP AS THOSE GENES #WITH A TIME OF MAXIMUM EXPRESSION AT DAY 2,3,6 AND WHICH #WERE FOUND TO HAVE A SIGNIFICANT TIME PATTERN BY REGRESSION. tom.prolif<-rep(F,12488) tom.prolif<-tom==3 | tom==4 | tom==5 & !cntrls tom.prolif<-tom.prolif & changeintimecourse #WE DEFINE THE TOM PROLIFERATION GROUP AS THOSE GENES #WITH A TIME OF MAXIMUM EXPRESSION AT DAY 2,3,6 AND WHICH #WERE FOUND TO HAVE A SIGNIFICANT TIME PATTERN BY REGRESSION. tom.quiet<-rep(F,12488) tom.quiet<-tom==1 | tom==2 | tom==6 | tom==7 & !cntrls tom.quiet<-tom.quiet & changeintimecourse ##NOW CONSIDER ANALYSIS OF FL DATA pafl<-present.absent[,1]<.05 | present.absent[,2]<.05 & !cntrls spvsfl<-data.frame(ZandT(alldata[,1:2],alldata[,3:4],ok=pafl|atleast1,myalpha=0.05)) #ESTABLISH THE FOLD CHANGE BASED CUTOFF GENE LISTS upinfl<-abs(spvsfl[["MeanDif"]])>=1 & spvsfl[["MeanDif"]]>0 & !cntrls upinsp<-abs(spvsfl[["MeanDif"]])>=1 & spvsfl[["MeanDif"]]<0 & !cntrls #We calculate our TOM groups according to the criterion: # time of max =j AND F-statistic Pvalue <0.05 AND not control probeset. tom0<-tom==1 & goods05 & !cntrls tom1<-tom==2 & goods05 & !cntrls tom2<-tom==3 & goods05 & !cntrls tom3<-tom==4 & goods05 & !cntrls tom6<-tom==5 & goods05 & !cntlrs tom10<-tom==6 & goods05 &!cntlrs tom30<-tom==7 & goods05 & !cntlrs # our analysis included an addidiontal data set obtained by #personal correspondence. This data set, which we refer to as #the akashi data set was obtained via coorespondence #with Lin Heng Li (lil@stowers-institute.org) #the akashidata is rendered to a data frame 12488 x 4. ### akashi<-data.frame(ZandT(akashidata[,1:2],akashidata[,3:4],ok=atleast1.akashi,myalpha=0.05)) upinst<-akashi[["Reject.Z"]] & akashi[["MeanDif"]]>0 & !cntrls upinlt<-akashi[["Reject.Z"]] & akashi[["MeanDif"]]<0 & !cntrls upinstT<-akashi[["Reject.T"]] & akashi[["MeanDif"]]>1 & !cntrls upinltT<-akashi[["Reject.T"]] & akashi[["MeanDif"]]< -1 & !cntrls upinst1<-abs(akashi[["MeanDif"]])>=1 & akashi[["MeanDif"]]>0 & !cntrls upinlt1<-abs(akashi[["MeanDif"]])>=1 & akashi[["MeanDif"]]<0 & !cntrls PresentMAS5<- function(affybatch,noms=affyids,tau=0.015) { library(ctest) ans<-NULL ans<-rbind(ans,sapply(noms,function(x) { pms<-pm(affybatch,x) mms<-mm(affybatch,x) Rvals<-sapply(seq(1:ncol(pms)),function(x) { Rs<-(pms[,x]-mms[,x])/(pms[,x]+mms[,x]) Rs<-Rs-tau#hardcoded Tau #print(Rs) huh<-wilcox.test(Rs,alternative='greater') huh$p.value }) ans<-cbind(ans,Rvals) ans })) ans } ZandT<-function(data1,data2,ok=rep(T,12488),myalpha=0.05) { data1[!ok,]<-NA data2[!ok,]<-NA n1<-apply(data1,1,function(x){sum(!is.na(x))}) n2<-apply(data2,1,function(x){sum(!is.na(x))}) m1<-apply(data1,1,mean,na.rm=T) m2<-apply(data2,1,mean,na.rm=T) ss1<-apply((data1-m1)^2,1,sum,na.rm=T) ss2<-apply((data2-m2)^2,1,sum,na.rm=T) dfs<-apply(cbind(n1,n2),1,function(x){ if(x[1] >1 & x[2]>1) return (sum(x)-2) if(x[1]>1) return(x[1]-1) if(x[2]>1) return(x[2]-1) else return(NA) }) sp<-(ss1+ss2)/(dfs) sigsq<-mean(sp,na.rm=T) T<-(m1-m2)/(sp*(1/n1+1/n2)^.5) Z<-(m1-m2)/(sqrt(sigsq)*(1/n1+1/n2)^.5) pz<-2*(1-pnorm(abs(Z))) pts<-2*(1-pt(abs(T),dfs)) rej.z<-LinearStepUp(pz,al=myalpha,exclude=is.na(pz)) rej.t<-LinearStepUp(pts,al=myalpha,exclude=is.na(pts)) rej.z[is.na(rej.z)]<-FALSE rej.t[is.na(rej.t)]<-FALSE ans<-cbind(m1-m2,Z,pz,rej.z,T,dfs,pts,rej.t) ans[,1][is.na(ans[,1])]<-0 dimnames(ans)[[2]]<-c('MeanDif','Z','P-Z','Reject-Z','T','df','P-T','Reject-T') ans } BuildRefs<-function(id,substr='',mname,sep='') } else{ part1<-paste(substr,mlocus,sep='') mname<-as.character(get(id,env=mgu74av2SYMBOL)) part2<-paste('\' >',mname,sep='') } paste(part1,part2,sep='') } outputHtml<-function (analysis, outfile, title='', useGeneNames = F) { cat("", "", "", title, "", "", "", "

", title, "

", "", file = outfile) noms<-dimnames(analysis)[[2]] aline<-paste('',paste(unlist(sapply(noms,function(x){paste("",sep='')})),collapse=''),'') cat(aline,file = outfile, append = T) for (i in 1:nrow(analysis)) { cat("\n \n", file = outfile, sep = "", append = T) } cat("
",x,"
", paste(as.matrix(analysis[i, ]), collapse = " "), "
\n
\n \n ", file = outfile, append = T) }