########################################################################################################
#Computes NMDS, CA, DCA, and PCO
#then ccomputes reduced major axis regression of depth with Axis 1 scores
#Outputs scores.cvs to be used for correlations
#Then runs RMA for each type of ordination and generates DepthCorr.csv 
#Also does bootsrap simlations of R^2, calculates 95% Confidence Intervals,and
#calculates R^2 after itterative species/sample removal
#Written by C.Tyler and M.Kowalewski, 12 March, 2014
########################################################################################################

setwd("~/R/Name")#Specify pathway to where R files can be located on your computer 

library(vegan)
library(MASS)

data<-read.csv('All.Taxa.csv', head=TRUE)# Input Filenames:All.Taxa, Preservable, RobustMoll, Ocean, or Transect
rownames(data)<-data[,1]
new<-data[,-1]

#NMDS
m<-metaMDS(new)

loc.scores<-m$points
sp.scores<-m$species

#Sspecify pathway to where R files should be saved on your computer (e.g., C:/Documents/R)
write.csv(sp.scores,'C:/Documents/R/Species_scores_NMDS.csv')
write.csv(loc.scores,'C:/Documents/R/Locality_scores_NMDS.csv')

scores<-read.csv('Locality_scores_NMDS.csv')
refs<-read.csv('Refs_local.csv')

depth<-array(length(scores[,1]))
loc.scores<-cbind(loc.scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(refs[,1])){
    if(scores[i,1]==refs[j,1]){
      scores[i,4]<-refs[j,2]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Locality_NMDS.csv')

scores<-read.csv('Species_scores_NMDS.csv')
taxa<-read.csv('Refs_Taxa.csv')

scores[,1]<-as.character(scores[,1])
taxa[,1]<-as.character(taxa[,1])

depth<-array(length(scores[,1]))
scores<-cbind(scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,4]<-taxa[j,2]
    }
  }
}
PP<-array(length(scores[,1]))
scores<-cbind(scores,PP)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,5]<-taxa[j,3]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Species_NMDS.csv')

#CA
ca<-corresp(new, nf=2)

loc.scores<-ca$rscore
sp.scores<-ca$cscore

write.csv(sp.scores,'C:/Documents/R/Species_scores_CA.csv')
write.csv(loc.scores,'C:/Documents/R/Locality_scores_CA.csv')

scores<-read.csv('Locality_scores_CA.csv')

depth<-array(length(scores[,1]))
loc.scores<-cbind(loc.scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(refs[,1])){
    if(scores[i,1]==refs[j,1]){
      scores[i,4]<-refs[j,2]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Locality_CA.csv')

scores<-read.csv('Species_scores_CA.csv')
scores[,1]<-as.character(scores[,1])

depth<-array(length(scores[,1]))
scores<-cbind(scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,4]<-taxa[j,2]
    }
  }
}
PP<-array(length(scores[,1]))
scores<-cbind(scores,PP)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,5]<-taxa[j,3]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Species_CA.csv')

#PCO
dist<-vegdist(new, method="bray")
mds.loc<-cmdscale(dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
sp<-t(new)
dist<-vegdist(sp, method="bray")
mds.sp<-cmdscale(dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)

write.csv(mds.sp,'C:/Documents/R/Species_scores_PCO.csv')
write.csv(mds.loc,'C:/Documents/R/Locality_scores_PCO.csv')

scores<-read.csv('Locality_scores_PCO.csv')

depth<-array(length(scores[,1]))
scores<-cbind(scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(refs[,1])){
    if(scores[i,1]==refs[j,1]){
      scores[i,4]<-refs[j,2]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Locality_PCO.csv')

scores<-read.csv('Species_scores_PCO.csv')
scores[,1]<-as.character(scores[,1])

depth<-array(length(scores[,1]))
scores<-cbind(scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,4]<-taxa[j,2]
    }
  }
}
PP<-array(length(scores[,1]))
scores<-cbind(scores,PP)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,5]<-taxa[j,3]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Species_PCO.csv')

#DCA
data.ra<-decorana(new, ira=0)
data.ra.taxonscores <- scores(data.ra,display=c("species"), choices=c(1,2))
data.ra.samplescores <- scores(data.ra,display=c("sites"), choices=c(1,2))

data.ra

write.csv(data.ra.taxonscores,'C:/Documents/R/Species_scores_DCA.csv')
write.csv(data.ra.samplescores,'C:/Documents/R/Locality_scores_DCA.csv')

scores<-read.csv('Locality_scores_DCA.csv')

depth<-array(length(scores[,1]))
scores<-cbind(scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(refs[,1])){
    if(scores[i,1]==refs[j,1]){
      scores[i,4]<-refs[j,2]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Locality_DCA.csv')

scores<-read.csv('Species_scores_DCA.csv')
scores[,1]<-as.character(scores[,1])

depth<-array(length(scores[,1]))
scores<-cbind(scores,depth)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,4]<-taxa[j,2]
    }
  }
}
PP<-array(length(scores[,1]))
scores<-cbind(scores,PP)
for(i in 1:length(scores[,1])){
  for(j in 1:length(taxa[,1])){
    if(scores[i,1]==taxa[j,1]){
      scores[i,5]<-taxa[j,3]
    }
  }
}

write.csv(scores,'C:/Documents/R/RMA_Species_DCA.csv')

################
#RMA
data<-read.csv('RMA_Species_NMDS.csv')

x<-data[,3]
y<-data[,4]
d<-data[,5]

slope<-(slopeRMA=sign(cov(x,d))*sd(d)/sd(x))
int<-(interceptRMA=mean(d)-slopeRMA*mean(x))

lm.r=lm(d~x)
sp.r<-summary(lm.r)
sp.r

data<-read.csv('RMA_Locality_NMDS.csv')

x<-data[,3]
y<-data[,4]
d<-data[,5]

slope<-(slopeRMA=sign(cov(x,d))*sd(d)/sd(x))
int<-(interceptRMA=mean(d)-slopeRMA*mean(x))

lm.r=lm(d~x)
loc.r<-summary(lm.r)
loc.r

############################
#RESAMPLING

rm(list=ls(all=TRUE))
data<-data.matrix(read.csv("Resampling.csv", head=FALSE))#All Taxa without headings
dl<-data.matrix(read.csv("Depth_Loc.csv", head=FALSE))
dt<-data.matrix(read.csv("Depth_Tax.csv",head=FALSE))
library(vegan)      
library(MASS)
library(lattice)

n<- sum(data)     # total specimens
t<- ncol(data)    # total taxa
s<- nrow(data)    # total samples
n; t; s           # check original data

#Dimensions listed are for Open Ocean
n1<- 7120         # number of specimens to subsample
t1<- 52           # number of taxa to subsample
s1<- 42           # number of samples to subsample
times=100000      # number of iterations (start with a small number)

ar2sp<- 0.50      # enter the actual r-square value for species in the limited dataset of interest (e.g., robust mollusks)
ar2loc<- 0.54     # enter the actual r-square value for localities in the limited dataset of interest (e.g., robust mollusks)

###########
output<- NULL
for (i in 1:times) {
  tax<- sample(1:t,t1,replace=F)                  # select t1 taxa
  datat<- data[,tax]                              # select t1 taxa
  fdt<- dt[tax]                                   # select taxon depth values
  
  if (sum(datat)>n1) {
    if (nrow(datat)>=s1) {
      sams<- sample(1:nrow(datat),s1,replace=F)
      datas<- datat[sams,]                       # select s1 samples
      checkrows<- which((apply(datas,1,sum))==0)
      checkcols<- which((apply(datas,2,sum))==0)
      if (sum(checkrows)==0 & sum(checkcols)==0) {
        if (sum(datas)>n1) {
          fdl2<- dl[sams]                            # select locality depth values
          n2<- sum(datas)
          if (n2>n1) {
            for (i in 1:(n2-n1)) {    
              select<- sample(x=1:length(datas),prob=datas/sum(datas),size=1,replace=F)
              datas[select]<- datas[select]-1
            }
            checkrows2<- which((apply(datas,1,sum))==0)
            checkcols2<- which((apply(datas,2,sum))==0)
            if (sum(checkrows2)==0 & sum(checkcols2)==0) {
              
              m<-metaMDS(datas)
              loc.scores<-m$points#sample scores
              sp.scores<-m$species#Species scores
              stress<-m$stress
              
              if(stress<0.2){
                loc.scores<-as.matrix(m$points[,1])
                sp.scores<-as.matrix(m$species[,1])
                
                rl<- cor(loc.scores,fdl2)^2  # compute r-square for locality depths 
                rt<- cor(sp.scores,fdt)^2    # compute r-square for taxon depths
                output<- rbind(output,c(rl,rt,stress,sum(datas),ncol(datas),nrow(datas)))
              }
            }
          }       
        }
      }
    }
  }
}

dim(output)
new<-output


b<- kde2d(new[,1],new[,2])
meanx<-mean(new[,1])
meany<-mean(new[,2])


write.csv(new, 'C:/Documents/R/Ocean_R2.csv')

###############################
#Filled Countour Density Plot for Resampling

aloc<- 0.66   # Observed R2 value for all localities
asp<- 0.81    # Observed R2 value for all species


oc<-read.csv("Ocean_R2.csv", head=TRUE)
ob<- kde2d(oc[,2],oc[,3])
omeanx<-mean(oc[,2])
omeany<-mean(oc[,3])
osp<- 0.50   # Observed R2 value for subset 'Ocean' localities     
oloc<- 0.54  # Observed R2 value for subset 'Ocean' species

numlevels<-6
jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"),space=("rgb"))
op<- par(mai=c(1,1,0.75,0.75))

###OCEAN
filled.contour(ob,color=jet.colors,nlevels=20,
               xlab=expression("r"^2* " for localities"),ylab=expression("r"^2* " for species"),
               plot.axes = {axis(1); axis(2); points(omeanx,omeany,pch=21,bg="white",cex=0.75);
                            axis(1); axis(2); points(oloc,osp,pch=21,bg="red",cex=1);
                            points(aloc,asp,pch=21,bg='black',cex=1)})


##############################
#PARAMETRIC AND BOOTSTRAP CONFIDENCE INTERVALS FOR Pearson's R

data<-read.csv('RMA_Species_NMDS.csv', head=TRUE)
x<-data[,3]#dim1
y<-data[,5]#depth

options("scipen"=100)
x1<- c(x)
y1<- c(y)

param<- cor.test(x1,y1)
out<- NULL
times<- 10000
for (i in 1:times) {
  b1<- sample(length(x1),replace=TRUE)
  br<- cor(x1[b1],y1[b1])
  out<- rbind(out,br)
}
if (param$estimate>0) pboot<- (length(which(out<=0)))/times
if (param$estimate<0) pboot<- (length(which(out>=0)))/times
report<- round(cbind(length(x1),param$estimate,param$p.value,pboot,param$conf.int[1],                
                     param$conf.int[2],quantile(out,probs=c(0.025,0.975))[1],quantile(out,probs=c(0.025,0.975))[2]),4)
colnames(report)<- c("n", "r","p(param)","p(boot)","L95CL(param)","U95CL(param)","L95CL(boot)","U95CL(boot)")
rownames(report)<- c("dataset1")
report

##########################################
#SAMPLE SIZE FILTERING

new<-read.csv('All.Taxa.csv', head=TRUE)
data<-new[,-1]
refs<-read.csv('Refs_local.csv')
taxa<-read.csv('Refs_Taxa.csv')

library(vegan)
library(MASS)

### LOCLITY FILTERING
out2<-NULL
out3<-NULL
out4<-NULL
for (i in seq(0, 500, 10))              #10-bin size 
{
  goodS<- which(apply(data,1,sum)>=i)   # find samples for a series of cutoff values given by i (0,5,10,...200)
  DSX<- data[goodS,]							      # restrict data to sample rows that fulfill this criterion
  DSX=DSX[,apply(DSX,2,sum)!=0]
  TO<- DSX								              # copy to a new dataset
  TO[TO > 1] <- 1						           	# convert to occurrences
  occtaxa<- sum(TO)						          # count number of taxon occurrences for restricted data
  TN=(apply(DSX,2,sum)!=0)					  	# compute column sums (number of specimens per taxon)
  TN[TN > 1] <- 1							          # convert sums to 1s
  numtaxa<- sum(TN)					          	# count number of taxa
  numspec<- sum(DSX)					        	# count number of specimens
  out<- c(i,nrow(DSX),numtaxa, occtaxa, numspec) 	# assemble results for a given iteration
  if(i==0) {out2=out} else {out2=rbind(out2,out)}
  
  
  ###NMDS
  m<-metaMDS(DSX)  
  loc.scores<-m$points
  
  #Must be performed separately for each ordination type 
  #activate the following indidually, and deactivate NMDS
  
  ###CA
  #ca<-corresp(DSX, nf=2)
  #loc.scores<-ca$rscore
  
  #PCO
  #dist<-vegdist(DSX, method="bray")
  #loc.scores<-cmdscale(dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
  
  #DCA
  #data.ra<-decorana(DSX, ira=0)
  #loc.scores<- scores(data.ra,display=c("sites"), choices=c(1,2))
  
  a<-as.numeric(row.names(loc.scores))
  score<-cbind(a,loc.scores)
  
  depth<-array(length(score[,1]))
  score<-cbind(score,depth)
  
  for(i in 1:length(score[,1])){
    for(j in 1:length(refs[,1])){
      if(score[i,1]==refs[j,1]){
        score[i,4]<-refs[j,2]
      }
    }
  }
  
  r<-summary(lm(score[,4]~score[,2]))$adj.r.squared
  out3<-rbind(out3,r)
  out4=cbind(out2,out3)	
}
cols<- c("minN", "samples", "taxa", "taxaoccs", "num_spec","r.sq")
rows<- 1:nrow(out3)
colnames(out4) <- cols
rownames(out4) <- rows
result<- as.matrix(out4)
result

plot(x=out4[,1], y=out4[,6], xlab=cols[1], ylab=cols[6], pch=16)

##########
### SPECIES ABUNDANCE FILTERING
new2<-NULL
new3<-NULL
new4<-NULL
for (i in seq(0, 250, 10))              #10-bin size 
{
  goodsT<- which(apply(data,2,sum)>=i)  # find samples for a series of cutoff values given by i (0,5,10,...200)
  DSX<- data[,goodsT]							      # restrict data to species columns that fulfill this criterion
  DSX=DSX[apply(DSX,1,sum)!=0,]
  TO<- DSX								              # copy to a new dataset
  TO[TO > 1] <- 1							          # convert to occurrences
  occtaxa<- sum(TO)						          # count number of taxon occurrences for restricted data
  TN=(apply(DSX,2,sum))					      	# compute column sums (number of specimens per taxon)
  TN[TN > 1] <- 1						           	# convert sums to 1s
  numtaxa<- sum(TN)					          	# count number of taxa
  numspec<- sum(DSX)					         	# count number of specimens
  new1<- c(i,nrow(DSX),numtaxa, occtaxa, numspec) 	# assemble results for a given iteration
  if(i==0) {new2=new1} else {new2=rbind(new2,new1)}
  
  ##NMDS
  m<-metaMDS(DSX)  
  sp.scores<-m$species
  
  #Must be performed separately for each ordination type 
  #activate the following indidually, and deactivate NMDS
  
  #CA
  #ca<-corresp(DSX, nf=2)
  #sp.scores<-ca$cscore
  
  #PCO
  #sp<-t(DSX)
  #dist<-vegdist(sp, method="bray")# by Species
  #sp.scores<-cmdscale(dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
  
  #DCA
  #data.ra<-decorana(DSX, ira=0)
  #sp.scores<- scores(data.ra,display=c("species"), choices=c(1,2))
  
  
  b<-row.names(sp.scores)
  scores<-data.frame(cbind(b,sp.scores))
  
  scores[,1]<-as.character(scores[,1])
  taxa[,1]<-as.character(taxa[,1])
  
  depth<-array(length(scores[,1]))
  scores<-cbind(scores,depth)
  for(i in 1:length(scores[,1])){
    for(j in 1:length(taxa[,1])){
      if(scores[i,1]==taxa[j,1]){
        scores[i,4]<-taxa[j,2]
      }
    }
  }
  dim1<-as.numeric(scores[,2])
  depth<-as.numeric(scores[,4])
  r<-summary(lm(depth~dim1))$adj.r.squared
  new3<-rbind(new3,r)
  new4=cbind(new2,new3)	
  
}
cols<- c("minN", "samples", "taxa", "taxaoccs", "num_spec","r.sq")
rows<- 1:nrow(new4)
colnames(new4) <- cols
rownames(new4) <- rows
result<- as.matrix(new4)
result

plot(x=new4[,1], y=new4[,6], xlab=cols[1], ylab=cols[6], pch=16, col="red")

###################################################################################################