# Functions described in 
# "Bias Correction and Bayesian Analysis of Aggregate Counts in SAGE Libraries" 
#  by Zaretzki, Gilchrist, Briggs and Armagan Copyright 2009

rdirichlet<-function(n,alpha){
#Random Dirichlet Function from the MCMCpack library in R.
    l <- length(alpha)
    x <- matrix(rgamma(l * n, alpha), ncol = l, byrow = TRUE)
    sm <- x %*% rep(1, l)
    return(x/as.vector(sm))
}


dpb=function(Y,simsize=1500,subsize=20, burnin = 500, type="flat", Nprior.shape=100, Nprior.scale=200){

#DPB Gibbs Sampling approach.
#here we assume that the observed counts Ti are binomial(phi_i,g_i)
#g_i are distributed poisson with parameter(Nm_i).  m_i follow a Dirichlet
#distribution and N follows a gamma distribution and is then truncated since it should be a whole number.
#Data is contained in the Dataframe Y which should have two parts, 
#Y$Tags is the aggregated tag counts and Y$Phi is the estimated tag formation probability.
#simsize is the total number of simulations stored.  subsize=20 
#means that only every 20th sample is kept so that subsize*simsize simulations
#are actually computed.  burnin is the number of burnin samples it is included in
#the total number of samples so here 500 of the 1500 samples kept will be considered burnin and 
#dropped from the returned data.  type describe the dirichlet prior alpha = "flat" or "tub".
#Nprior.shape and Nprior.scale are described in Zaretzki(2009) and should define the approximate
#average population size.  N ~ shape*scale 



sizeY = dim(Y)


#Storage Structures.
#T are the observed counts.
sizeY<-dim(Y)

#full storage
Nsamp<-matrix(0,simsize,1);
msamp<-matrix(0,sizeY[1],simsize)
gsamp<-matrix(0,sizeY[1],simsize)


#Sub storage
Nvals<-matrix(0,subsize,1);
mvals<-matrix(0,sizeY[1],subsize)
gvals<-matrix(0,sizeY[1],subsize)

#Initial Values 
alpha = rep(1,sizeY[1])
if(type != "flat") alpha = (1/sizeY[1])*alpha

Nvals[1]<-round(sum(Y$Tags/Y$Phi))
mvals[,1]<-alpha/sizeY[1]
gvals[,1]<-round(Y$Tags/Y$Phi)


#Use a Gamma prior for the shape and scale of N.
#Nprior.shape<-100
#Nprior.scale<-200

for(i in 1:simsize){
	for(k in 2:subsize){
	    lam<-Nvals[(k-1)]*mvals[,(k-1)]*(1-Y$Phi)
	    gvals[,k]<-Y$Tags+rpois(sizeY[1],lam)  #g_k conditional
	    Nshape<-sum(gvals[,k])+Nprior.shape+1
	    Nvals.rate<-(1+1/Nprior.scale)
	    Nvals[k]<-round(rgamma(1,shape=Nshape,rate=Nvals.rate)) #N conditional   		
	    mvals[,k]<-rgamma(sizeY[1],shape=(gvals[,k]+alpha),rate=Nvals[k]) #mconditional

         	}
	gvals[,1]<-gsamp[,i]<-gvals[,subsize]
	Nvals[1]<-Nsamp[i]<-Nvals[subsize]
	mvals[,1]<-msamp[,i]<-mvals[,subsize]	
#	if( i%%200 ==0 ) print(i)	
}

#msim.big<-msamp[big.mle,]
mean.samp<-apply(msamp[,(burnin+1):simsize],1,mean)

#apparently returns a matrix.
cuts<-c(.025,.5,.975)
m.pctiles<-apply(msamp[,(burnin+1):simsize],1,quantile,probs=cuts)

answer = data.frame(t(rbind(mean.samp,m.pctiles)))
}



dmb=function(Y,simsize=1500,subsize=20, burnin = 500, type="flat"){

#Gibbs Sampling approach.
#here we assume that the observed counts Ti are binomial(phi_i,g_i)
#g_i are distributed Multinomial with parameter(N,m_i).  m_i follow a Dirichlet
#distribution and N follows a gamma distribution and is then truncated since it should be a whole number.
#Data is contained in the Dataframe Y which should have two parts, 
#Y$Tags is the aggregated tag counts and Y$Phi is the estimated tag formation probability.
#simsize is the total number of simulations stored.  subsize=20 
#means that every 20th sample is kept so that subsize*simsize is the actual simulations
#computed.  burnin is the number of burnin samples; it is included in
#the total number of samples so here 500 of the 1500 samples kept will be considered burnin and 
#dropped from the returned data.  type describe the dirichlet prior alpha = "flat" or "tub".

sizeY = dim(Y)


#Storage Structures.
#T are the observed counts.
sizeY<-dim(Y)
T.tot<-sum(Y$Tags)

#full storage
Nsamp<-matrix(0,simsize,1);
msamp<-matrix(0,sizeY[1],simsize)
gsamp<-matrix(0,sizeY[1],simsize)


#Sub storage
Nvals<-matrix(0,subsize,1);
mvals<-matrix(0,sizeY[1],subsize)
gvals<-matrix(0,sizeY[1],subsize)

#Initial Values 
alpha = rep(1,sizeY[1])
if(type != "flat") alpha = (1/sizeY[1])*alpha

#Subsize Stuff
N.parm<-round(sum(Y$Tags/Y$Phi))
Nvals<-rpois(subsize,N.parm)
mvals[,1]<-alpha/sizeY[1]
gvals[,1]<-round(Y$Tags/Y$Phi)

#library(MCMCpack)

for(i in 1:simsize){
	for(k in 2:subsize){
	    gvals[,k]<-Y$Tags+rmultinom(1,(Nvals[k-1]-T.tot),((1-Y$Phi)*mvals[,(k-1)])) #g_k conditional
	    mvals[,k]<-rdirichlet(1,(gvals[,k]+alpha))
	    #Nvals[k]<-rpois(1,N.parm) #N conditional
	}		

	gvals[,1]<-gsamp[,i]<-gvals[,subsize]
	Nsamp[i]<-Nvals[subsize]
	Nvals<-rpois(subsize,N.parm)
	mvals[,1]<-msamp[,i]<-mvals[,subsize]
 #if( i%%200 ==0 ) print(i)		
}

#Save results.
mean.samp<-apply(msamp[,(burnin+1):simsize],1,mean)

#apparently returns a matrix.
cuts<-c(.025,.5,.975)
m.pctiles<-apply(msamp[,(burnin+1):simsize],1,quantile,probs=cuts)

answer = data.frame(t(rbind(mean.samp,m.pctiles)))
}

md=function(Y,simsize=1500,subsize=20,burnin=500,type="flat",lambda=20000,Phi.c=Phi.c){
#Uses data augmentation by assuming that the SAGE process produces "r" unformed tags where
#r follows a Poisson distribution.  Given "r", we can uses a Dirichlet-Multinomial posterior
#distribution to do inference for proportions.
#Data is contained in the Dataframe Y which should have two parts, 
#Y$Tags is the aggregated tag counts and Y$Phi is the estimated tag formation probability.
#simsize is the total number of simulations stored.  subsize=20 
#means that every 20th sample is kept so that subsize*simsize is the actual simulations
#computed.  burnin is the number of burnin samples; it is included in
#the total number of samples so here 500 of the 1500 samples kept will be considered burnin and 
#dropped from the returned data.  type describe the dirichlet prior alpha = "flat" or "tub".



sizeY = dim(Y)


#Storage Structures.
#T are the observed counts.

sizeY<-dim(Y)

#full storage
rsamp<-matrix(0,simsize,1);
msamp<-matrix(0,sizeY[1],simsize)
#gsamp<-matrix(0,sizeY[1],simsize)


#Sub storage
rvals<-matrix(0,subsize,1);
mvals<-matrix(0,sizeY[1],subsize)
#gvals<-matrix(0,sizeY[1],subsize)

#Initial Values 
alpha = rep(1,sizeY[1])
if(type != "flat") alpha = (1/sizeY[1])*alpha

mvals[,1]<-alpha/sizeY[1]
rvals[1]<-10000
mu<-sum(Y$Tags)*((1-Phi.c)/Phi.c)

for(i in 1:simsize){
	for(k in 2:subsize){
          postvec<-c(Y$Tags+alpha,(rvals[(k-1)]+1))
          theta<-rdirichlet(1,postvec)  #mphi conditional
	    mtemp<-theta[1:sizeY[1]]/Y$Phi
	    mvals[,k]<-mtemp/sum(mtemp)
          
	    mu<-(lambda*(1-sum(theta[1:sizeY[1]])))
	    rvals[k]<-rpois(1,mu) #r conditional
	}
	rvals[1]<-rsamp[i]<-rvals[subsize]
	mvals[,1]<-msamp[,i]<-mvals[,subsize]
  #  if( i%%100 ==0 ) print(rsamp[i])	
}


#Save results.
mean.samp<-apply(msamp[,(burnin+1):simsize],1,mean)

#apparently returns a matrix.
cuts<-c(.025,.5,.975)
m.pctiles<-apply(msamp[,(burnin+1):simsize],1,quantile,probs=cuts)

answer = data.frame(t(rbind(mean.samp,m.pctiles)))

}