#
# this calculates the pooled SD.
# SDp(theData$Glucan,paste(theData$Analyst,theData$batch))
#
SDp<-function(theColumn,theFactor){
diffs 	<- unlist(sapply(split(theColumn,theFactor),function(x) x-mean(x,na.rm=T)))
df 	<- unlist(sapply(split(theColumn, theFactor),function(x) length(x) - 1))
SDp<-sqrt(sum(diffs^2,na.rm=T)/sum(df,na.rm=T))
SDp
}




#
# this calculates the mean SD by sample # (e.g., the 1st sample in each batch)
# it searches theFactor for the strings "-1" thru "-12", and then gets the SD for each group
#
# SS(theData$Glucan,theData$SampleID)
#
SS<-function(theData,theFactor){
Sb<-0
for (j in 1:12){
	x<-paste("-",j,"$",sep="")
	Sb[j]<-sd(theData[grep(x,theFactor)],na.rm=T)
	}
SS<-mean(Sb)
SS
}


# this makes a nice histogram
# nH(theData$Glucan,"label")
#
nH<-function(theVector,theXLab){
hist(theVector,main="",col="wheat2",xlab=theXLab,freq=1)
xx<-seq(min(theVector,na.rm=T),max(theVector,na.rm=T),length=100)
lines(xx,(dnorm(xx, mean=mean(theVector,na.rm=T), sd=sd(theVector,na.rm=T))))
}
 

#
# this makes a nice control chart
# thePlot(theData$Glucan,"title")
#
thePlot<-function(x,theMain,theXlab="Sample",...){
	theFactor=1.2
	theRange=3.0
	theMean<-mean(x,na.rm=T)
	theMax<-theMean+theRange*sd(x,na.rm=T)
	theMin<-theMean-theRange*sd(x,na.rm=T)
	plot(x,type="p",ylim=c(theMin-sd(x,na.rm=T)*theFactor,theMax+sd(x,na.rm=T)*theFactor),ylab="",main=theMain,xlab=theXlab,pch=20)
	abline(h=theMean,col="green")
	abline(h=theMax,col="red",lty="dashed")
	abline(h=theMin,col="red",lty="dashed")
	text(length(x)*.75,theMax,labels=paste("mean=",format(mean(x,na.rm=T),digits=3)," SD=",format(sd(x,na.rm=T),digits=2)),pos=3)
}

#
# this takes a data vector and returns a copy of it
# with the value of any Tukey outlier replaced with NA
# it doesn't modify the function argument directly (e.g., it is call-by-value)
# theData$Glucan<-tukey(theData$Glucan)
#
tukey<-function(theDataCol){
j<-	fivenum(theDataCol,na.rm=T)
i<-	IQR(theDataCol,na.rm=T)
x<-	which( (theDataCol<j[2]-1.5*i) | (theDataCol>j[4]+1.5*i))
is.na(theDataCol)<-x
tukey<-theDataCol
}


#
# this function creates a boxplot by factor for a given constituent,
# then adds a grey-shaded area behind it that is the IQR (mid-50) of the overall constituent
# msdBox(theData$Glucan,theData$Analyst,"the title")
#
msdBox<-function(theDataCol,theFactor,theTitle="asdf"){
theM2<-mean(theDataCol,na.rm=T)
theS2<-SDp(theDataCol,theFactor)
theM<-median(theDataCol,na.rm=T)
theS<-fivenum(theDataCol,na.rm=T)
j<-boxplot(theDataCol ~ theFactor,main=theTitle,show.names=F,col="gray95",whisklty="blank",staplelty="blank",outpch=NA)
x<-c(0,length(j$names)+1,length(j$names)+1,0)
y<-c(theS[2],theS[2],theS[4],theS[4])
polygon(x,y,density=NA, col="gray")
abline(h=theM,lwd=3)
theNames<-as.character(c(1:length(j$names)))
#
# analysts 1 & 5 did multiple sets, so let's mark them specially...
theNames[1]<-"1*"
theNames[5]<-"5*"
boxplot(theDataCol ~ theFactor,main=theTitle,show.names=T,names=theNames,col="gray95",add=TRUE,whisklty="blank",staplelty="blank",outpch=NA)#,ylim=theR)
}


####################################
#
#theRobot<-read.csv(file="EFRobot.csv",sep=",",header=TRUE)
#bartlett.test(theRobot$Glucan,theRobot$Source)
#pairwise.t.test(theRobot$Glucan,theRobot$Source)
#j<-lm(Glucan~Source,data=theRobot)
#summary(j)
#aov(j)
#boxplot(theRobot$Glucan~theRobot$Source)

#par(mfrow=c(1,2))
#asdf<-good$WholeAsh
#hist(asdf,freq=1)
#xx<-seq(min(asdf),max(asdf),length=100)
#lines(xx,(dnorm(xx, mean=mean(asdf), sd=sd(asdf))))
#hist(asdf,freq=0)
#lines(xx,(dnorm(xx, mean=mean(asdf), sd=sd(asdf))))