# # this calculates the pooled SD. # SDp(theData$Glucan,paste(theData$Analyst,theData$batch)) # SDp<-function(theColumn,theFactor){ diffs <- unlist(sapply(split(theColumn,theFactor),function(x) x-mean(x,na.rm=T))) df <- unlist(sapply(split(theColumn, theFactor),function(x) length(x) - 1)) SDp<-sqrt(sum(diffs^2,na.rm=T)/sum(df,na.rm=T)) SDp } # # this calculates the mean SD by sample # (e.g., the 1st sample in each batch) # it searches theFactor for the strings "-1" thru "-12", and then gets the SD for each group # # SS(theData$Glucan,theData$SampleID) # SS<-function(theData,theFactor){ Sb<-0 for (j in 1:12){ x<-paste("-",j,"$",sep="") Sb[j]<-sd(theData[grep(x,theFactor)],na.rm=T) } SS<-mean(Sb) SS } # this makes a nice histogram # nH(theData$Glucan,"label") # nH<-function(theVector,theXLab){ hist(theVector,main="",col="wheat2",xlab=theXLab,freq=1) xx<-seq(min(theVector,na.rm=T),max(theVector,na.rm=T),length=100) lines(xx,(dnorm(xx, mean=mean(theVector,na.rm=T), sd=sd(theVector,na.rm=T)))) } # # this makes a nice control chart # thePlot(theData$Glucan,"title") # thePlot<-function(x,theMain,theXlab="Sample",...){ theFactor=1.2 theRange=3.0 theMean<-mean(x,na.rm=T) theMax<-theMean+theRange*sd(x,na.rm=T) theMin<-theMean-theRange*sd(x,na.rm=T) plot(x,type="p",ylim=c(theMin-sd(x,na.rm=T)*theFactor,theMax+sd(x,na.rm=T)*theFactor),ylab="",main=theMain,xlab=theXlab,pch=20) abline(h=theMean,col="green") abline(h=theMax,col="red",lty="dashed") abline(h=theMin,col="red",lty="dashed") text(length(x)*.75,theMax,labels=paste("mean=",format(mean(x,na.rm=T),digits=3)," SD=",format(sd(x,na.rm=T),digits=2)),pos=3) } # # this takes a data vector and returns a copy of it # with the value of any Tukey outlier replaced with NA # it doesn't modify the function argument directly (e.g., it is call-by-value) # theData$Glucan<-tukey(theData$Glucan) # tukey<-function(theDataCol){ j<- fivenum(theDataCol,na.rm=T) i<- IQR(theDataCol,na.rm=T) x<- which( (theDataColj[4]+1.5*i)) is.na(theDataCol)<-x tukey<-theDataCol } # # this function creates a boxplot by factor for a given constituent, # then adds a grey-shaded area behind it that is the IQR (mid-50) of the overall constituent # msdBox(theData$Glucan,theData$Analyst,"the title") # msdBox<-function(theDataCol,theFactor,theTitle="asdf"){ theM2<-mean(theDataCol,na.rm=T) theS2<-SDp(theDataCol,theFactor) theM<-median(theDataCol,na.rm=T) theS<-fivenum(theDataCol,na.rm=T) j<-boxplot(theDataCol ~ theFactor,main=theTitle,show.names=F,col="gray95",whisklty="blank",staplelty="blank",outpch=NA) x<-c(0,length(j$names)+1,length(j$names)+1,0) y<-c(theS[2],theS[2],theS[4],theS[4]) polygon(x,y,density=NA, col="gray") abline(h=theM,lwd=3) theNames<-as.character(c(1:length(j$names))) # # analysts 1 & 5 did multiple sets, so let's mark them specially... theNames[1]<-"1*" theNames[5]<-"5*" boxplot(theDataCol ~ theFactor,main=theTitle,show.names=T,names=theNames,col="gray95",add=TRUE,whisklty="blank",staplelty="blank",outpch=NA)#,ylim=theR) } #################################### # #theRobot<-read.csv(file="EFRobot.csv",sep=",",header=TRUE) #bartlett.test(theRobot$Glucan,theRobot$Source) #pairwise.t.test(theRobot$Glucan,theRobot$Source) #j<-lm(Glucan~Source,data=theRobot) #summary(j) #aov(j) #boxplot(theRobot$Glucan~theRobot$Source) #par(mfrow=c(1,2)) #asdf<-good$WholeAsh #hist(asdf,freq=1) #xx<-seq(min(asdf),max(asdf),length=100) #lines(xx,(dnorm(xx, mean=mean(asdf), sd=sd(asdf)))) #hist(asdf,freq=0) #lines(xx,(dnorm(xx, mean=mean(asdf), sd=sd(asdf))))