######################################################################
#######   Functions-Automatic identification of free-ranging   ####### 
#######                   green turtle behaviours              #######
#######                 Royal Society Open Science             #######
#######                     Jeantet et al. (2020)              #######
######################################################################



###############################               Part   I                  ##################################

h5read=function(h5file,fileBehaviour,freq){
  
  #Opening the file containing the raw data
  f=h5file(fileh5,"r")
  
  dt=h5attr(f["/"], "datestart")
  datestart<<-as.POSIXlt(dt, tz="GMT")
  dset=openDataSet(f,"/data")
  set<-readDataSet(dset)
  size=dset@dim
  nbrow<<-size[1]
  h5close(f)
  
  colnames(set)<-c("AccX","AccY","AccZ","GyrX","GyrY","GyrZ","Depth")
  
  #addition of the time 
  sdeb_data=3600*datestart$hou+60*datestart$min+datestart$sec
  Time_s<-round(seq(from=sdeb_data, by=1/freq, length.out=nbrow),2)
  
  set<-cbind(set,Time_s)
  
  
  #Opening behavioural file
  Behaviour<-read.csv(fileBehaviour,header=T,sep=";")[,1:10]
  Behaviour$Behavior<-as.character(Behaviour$Behavior)
  
  #Synchronisation of the behaviours
  behav<-data.frame(matrix(0, nrow =nbrow, ncol = 1))
  colnames(behav)<-"Behaviour"
  
  for (i in 1:nrow(Behaviour)){
    a<-freq*round(Behaviour$Start..s.[i],1)+1
    b<-freq*round(Behaviour$Stop..s.[i],1)+1
    behav[a:b,1]<-as.character(Behaviour$Behavior[i])
  }
  
  return(list(set,behav))
}

correction=function(set,time){
  norme<-sqrt(set[,"AccX"]^2+set[,"AccY"]^2+set[,"AccZ"]^2)
  set[,"AccX"]=set[,"AccX"]/norme
  set[,"AccY"]=set[,"AccY"]/norme
  set[,"AccZ"]=set[,"AccZ"]/norme
  
  #Tortle laying flat at "time"
  ax<-set[,"AccX"][time]
  ay<-set[,"AccY"][time]
  az<-set[,"AccZ"][time]
  
  sin_phi<--ax
  cos_phi=(ay^2+az^2)^0.5
  cos_lambda<-1
  sin_lambda<-0
  
  
  AccX_cor = cos_phi*set[,"AccX"]-sin_phi*set[,"AccZ"]
  AccY_cor=cos_lambda*set[,"AccY"]
  AccZ_cor=sin_phi*set[,"AccX"]+cos_phi*set[,"AccZ"]
  
  GyrX_cor=cos_phi*set[,"GyrX"]-sin_phi*set[,"GyrZ"]
  GyrY_cor=set[,"GyrY"]
  GyrZ_cor=sin_phi*set[,"GyrX"]+cos_phi*set[,"GyrZ"]
  
  set[,"AccX"]<-AccX_cor
  set[,"AccY"]<-AccY_cor
  set[,"AccZ"]<-AccZ_cor
  set[,"GyrX"]<-GyrX_cor
  set[,"GyrY"]<-GyrY_cor
  set[,"GyrZ"]<-GyrZ_cor
  
  return(set)
}

#Calculates the derived-parameters (DBA and RA) from the raw acceleration data. Takes into input parameters the value table (X) and the temporal window (mean) of the running mean.  
Data_computation=function(X,mean,freq){
  ##Associates the accelerometer axes to the animal axes
  X<-data.matrix(X)
  AccX<--X[,'AccX']
  AccY<--X[,'AccY']
  AccZ<-X[,'AccZ']
  
  ## Calculation of the Static acceleration (St) by computing a running mean with a temporal window of "mean" 
  n<-mean*freq
  StX<-runmean(AccX, n, endrule="mean", alg="C")
  StY<-runmean(AccY, n, endrule="mean", alg="C")
  StZ<-runmean(AccZ, n, endrule="mean", alg="C") 
  
  ## Calculation of the Dynamic acceleration (D)
  DX<-AccX-StX
  DY<-AccY-StY
  DZ<-AccZ-StZ
  
  #Calculation of the Dynamic Body Acceleration (DBA) and Rotational Activity (RA)
  dynamique<-DX^2+DY^2+DZ^2
  DBA<-sqrt(dynamique) 
  
  RA<-sqrt(X[,'GyrX']^2+X[,'GyrY']^2+X[,'GyrZ']^2)
  
  X<-cbind(DBA,RA)
  return(X)
  
}

cut_behav_first=function(Tortue,freq, cut){
  
  Tortue[Tortue$Behaviour=="Time mark",which(colnames(Tortue)=="Behaviour")]<-"0"
  behav<-as.numeric(as.factor(Tortue$Behaviour))
  b<-which(diff(behav)!=0)[1]-cut*freq
  Tortue<-Tortue[b:length(Tortue[,1]),]
}

cut_behav_fin=function(Tortue,freq, cut){
  
  Tortue[Tortue$Behaviour=="Time mark",which(colnames(Tortue)=="Behaviour")]<-"0"
  behav<-as.numeric(as.factor(Tortue$Behaviour))
  b<-which(diff(behav)!=0)[length(which(diff(behav)!=0))]
  if ((b+cut*freq)<length(Tortue[,1])){Tortue<-Tortue[1:(b+cut*freq),]}else{Tortue<-Tortue[1:b,]}
}

Selection=function(X,ncol){
  
  X$Behaviour<-as.character(X$Behaviour)
  
 
  X[X$Behaviour=="0",ncol]<-"Other"
  X[X$Behaviour=="Breathing",ncol]<-"Other"
  X[X$Behaviour=="Stay in surface",ncol]<-"Other"
  
  X[X$Behaviour=="Catching",ncol]<-"Feeding"
  X[X$Behaviour=="Catching jellyfish",ncol]<-"Feeding"
  X[X$Behaviour=="Chewing jellyfish",ncol]<-"Feeding"
  X[X$Behaviour=="Chewing on movement",ncol]<-"Feeding"
  X[X$Behaviour=="Chewing stationary",ncol]<-"Feeding"
  X[X$Behaviour=="Grabbing on movement",ncol]<-"Feeding"
  X[X$Behaviour=="Grabbing stationary",ncol]<-"Feeding"
  X[X$Behaviour=="Grabbing the wall",ncol]<-"Feeding"

  X[X$Behaviour=="Gliding ascent",ncol]<-"Gliding"
  X[X$Behaviour=="Gliding descent",ncol]<-"Gliding"
  
  X[X$Behaviour=="Resting ",ncol]<-"Resting"
  X[X$Behaviour=="Resting active",ncol]<-"Resting"
  X[X$Behaviour=="Resting in flow",ncol]<-"Resting"
  X[X$Behaviour=="Resting watching",ncol]<-"Resting"
  
  X[X$Behaviour=="Scratching camera",ncol]<-"Scratching"
  X[X$Behaviour=="Scratching head",ncol]<-"Scratching"
  
  X[X$Behaviour=="Stepping back",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming 1 ascent",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming 1 descent",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming 1 horizontally",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming ascent",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming descent",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming horizontally",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming fast ascent",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming fast descent",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming fast horizontally",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming in place",ncol]<-"Swimming"
  X[X$Behaviour=="Swimming on the bottom",ncol]<-"Swimming"
  X[X$Behaviour=="Prospection",ncol]<-"Swimming"
  X[X$Behaviour=="Watching",ncol]<-"Swimming"
  X[X$Behaviour=="Left U-turn",ncol]<-"Swimming"
  X[X$Behaviour=="Right U-turn",ncol]<-"Swimming"
  
  X[X$Behaviour=="Escape",ncol]<-"Other"
  X[X$Behaviour=="Flipper beat",ncol]<-"Other"
  X[X$Behaviour=="Foraging",ncol]<-"Other"
  X[X$Behaviour=="Interaction",ncol]<-"Other"
  X[X$Behaviour=="Landing",ncol]<-"Other"
  X[X$Behaviour=="Obstacle",ncol]<-"Other"
  X[X$Behaviour=="Pursuit",ncol]<-"Other"
  X[X$Behaviour=="Regurgitating",ncol]<-"Other"
  X[X$Behaviour=="Sand",ncol]<-"Other"
  X[X$Behaviour=="Shaking",ncol]<-"Other"
  X[X$Behaviour=="Shaking head",ncol]<-"Other"
  X[X$Behaviour=="Hunting jellyfish",ncol]<-"Other"
  X[X$Behaviour=="New video",ncol]<-"Other"
  
  X<-na.omit(X)
  return(X)
  
}


#######Functions used in Predictos_computation function 

#Segments the dive 
seg.2algo<-function(Dive, freq){
  
  #First step : PELT segmentation according to the depth 
  depth<-Dive[seq(1,length(Dive[,1]),by=freq),'Depth']
  m.pelt<-cpt.mean(diff(depth,3),method="PELT",penalty="Manual", pen.value=5)
  #plot(m.pelt)
  begin<-c(1,(m.pelt@cpts[-length(m.pelt@cpts)])*freq+1)
  end<-c((m.pelt@cpts[-length(m.pelt@cpts)])*freq,length(Dive[,1]))
  df<-cbind(begin,end)
  
  segment.p<-lapply(seq(1,length(df[,1])), function(a) Dive[df[a,1]:df[a,2],])
  
  #2nd step: segmentation accroding the vertical speed
  mean.depth<-m.pelt@param.est[[1]]
  data.segmented<-list()
  m.pelt<-list()
  w<-1 #counter
  
  for (g in 1:length(segment.p)){
    
    segment<-segment.p[[g]]
    #If ascendant/descendant phase : PELT segmentation according to DBA
    if (abs(mean.depth[g])>0.001){
      m.pelt[[w]]=cpt.meanvar(segment[,'DBA'],method="PELT",penalty="Manual",pen.value=50)
      #plot(m.pelt[[w]])
      data.segmented[[w]]<-segment
      w<-w+1}
    #If horizontal phase : PELT segmentation according to pitch speed
    else{
      m.pelt[[w]]=cpt.var(segment[,'GyrY'],method="PELT",penalty="Manual",pen.value=20)
      #plot(m.pelt[[w]])
      data.segmented[[w]]<-segment
      w<-w+1
    }  
  } 
  
  return(list(m.pelt,data.segmented))
}

#high-filteres with a running window of 1 s
#computes the squared differences (gy -mean(gy))2
#gets back the max and the mean
fun.noise=function(y){
  
  y_smooth=sgolayfilt(y, p = 2, n = 19, m = 0, ts = 1) # p= order of approximation, and n= window size 
  high_freq=y-y_smooth

  HF_mean=mean((high_freq-mean(high_freq))^2)
  HF_max=max((high_freq-mean(high_freq))^2)
  
  return(c(HF_mean,HF_max))
}

#Selects the most expressed behaviours
behav_select<-function(y, behavior_list,n.behav){
  
  x<-as.numeric(names(sort(table(y),decreasing=TRUE)))
  if (length(x)==1){Behavior<-x}
  else if (max(table(y))>trunc(0.6*length(y))){Behavior<-names(which(table(y)==max(tabulate(y))))}
  
  else{Behavior<-n.behav+1}
  
  Behavior=as.numeric(Behavior)
  return(Behavior)
}

###Segments the behavioraul sequences and calculates the descriptive statistics of the obtained segments. 
Predictors_computation<-function(X,n.dive,freq,behaviour_list,n.behav){
  
  vec.names<-c("AccX","AccY","AccZ","GyrX","GyrY","GyrZ","DBA","RA") #vector containing all the variables that we want to described statically 
  Predictors<-c() #to store the predictors
  
  ##for each dive
  for (i in 1:n.dive){
    Dive<-X[X$num==i,] 
    
    ###we look at the dive duration
    #If the dive lasts more than 5 sec, we calculate the predictors
    if (length(Dive[,1])>(5*freq)){
      #plot(-Dive[,7],type='l')      
      
      
      segment<-seg.2algo(Dive,freq) #segmentation of the dive 
      
      #Retrieves the segments 
      for (k in 1:length(segment[[2]])){
        seg<-list()   
        m.pelt<-segment[[1]][[k]]
        dt.segment<-segment[[2]][[k]]
        
        #Deletes the segments shorter than 2 sec
        if (length(dt.segment[,1])>2*freq){
          #plot(m.pelt)
          seg.ind<-m.pelt@cpts 
          ind=1
          lg<-length(seg.ind)
          w<-1
          
          while(w<lg){
            while (ind!=0 & w<lg){
              if(seg.ind[w+1]-seg.ind[w]<2*freq){seg.ind<-seg.ind[-(w+1)]
              ind=1
              lg<-length(seg.ind)
              }else{ind=0}
            }
            lg<-length(seg.ind)
            w<-w+1
            ind=1
          }
          if (length(seg.ind)>2){
            seg.ind<-seg.ind[-c(1,length(seg.ind))]
            debut<-c(1,seg.ind+1)
            fin<-c(seg.ind,length(dt.segment[,1]))
            df<-cbind(debut,fin)
            seg<-lapply(seq(1,length(df[,1])), function(a) dt.segment[df[a,1]:df[a,2],])
          }else{seg[[1]]=dt.segment}
          
        }else {seg[[1]]=dt.segment}
        
        ##Calculates the descriptive variables for each segment
        for (g in 1:length(seg)){
          
          #Calculates mean, variance, minimum and 
          data<-seg[[g]][,c(1:8)]
          predictor=lapply(list(mean=mean, var=var, max=max, min=min), function(f) apply(data,2,f))
          predictor<-do.call(rbind,predictor)
          rownames(predictor)<-c("mean","variance","minimum","maximum")
          
          high_frequency<-lapply(vec.names[c(1:2,4:5)], function(x) fun.noise(data[,x]))
          high_frequency<-do.call(cbind,high_frequency)
          colnames(high_frequency)<-c("AccX","AccY","GyrX","GyrY")   
          rownames(high_frequency)<-c("HF_mean","HF_max")
          
          high_freq<-c()
          for (z in colnames(high_frequency)){
            t.high_freq<-t(high_frequency[,z])
            colnames(t.high_freq)<-paste(z,rownames(high_frequency),sep="_")
            high_freq<-cbind(high_freq,t.high_freq)
            
          }
          
          predictors<-c()              
          
          for (z in vec.names){
            pred<-t(predictor[,z])
            colnames(pred)<-paste(z,colnames(pred),sep="_")
            predictors<-cbind(predictors,pred)                    
          }
          
          Behaviour<-behav_select(seg[[g]][,"Behaviour"],behaviour_list,n.behav)
          Depth_difference<-seg[[g]][length(seg[[g]][,1]),"Depth"]-seg[[g]][1,"Depth"]
          
          predictors<-cbind(Dive[1,"num"],seg[[g]][1,"Time_s"],Behaviour,length(data[,1]),Depth_difference,predictors,high_freq)                              
          colnames(predictors)[1:4]<-c("num","time","Behaviour","last")                          
          
          Predictors<-rbind(Predictors,predictors)
          rm(predictors,pred,predictor)
     
        }
      }
      
    }else{X[X$num==i,"num"]<-0} #plongée trop courte passe en surface
  } 
  return(list(Predictors,X))
}


###############################               Part   II                 ##################################

#Calculates the number of segments associated with each behavior
Freq=function(X){
  x<-as.factor(X)
  a<-tabulate(x,nbin=length(levels(x)))
  a<-as.matrix(a)
  rownames(a)<-levels(x)
  return(a)
}


#Each function takes the training and test datasets as input parameters (additional parameters can be required)  
#Returns the confusion matrix, the predicted behaviors and the model 

fExtreme_Gradient_Boosting<-function(train,test,nclass){
  
  labels<-as.numeric(train$Behaviour)-1
  test_label<-as.numeric(test$Behaviour)-1
  new_tr=model.matrix(~.+0,data=train[,-1])
  new_ts=model.matrix(~.+0, data=test[,-1])  
  dtrain<-xgb.DMatrix(data=new_tr, label=labels)
  dtest<-xgb.DMatrix(data=new_ts, label=test_label)  
  
  params<-list(booster ="gbtree", objectif="multi:softprob", num_class=nclass, eta=0.3, max_depth=3, subsample=0.5, colsample_bytree=0.5, seed=1) 
  xgbcv<-xgb.cv(params=params, data=dtrain, label=labels, nround=500, nfold=5, showsd = T, stratified = F, print_every_n =50, early_stopping_rounds= 20, maximize = F) 
  y<-xgbcv$best_iteration
  xgb<-xgb.train(params=params, data=dtrain, nround=y,print_every_n = 30, early_stop_round = 10, maximize = F , eval_metric = "merror") 
  
  pred<-predict(xgb, dtest)
  
  #Confusion matrice 
  cf_gb<-table(pred, test_label)
  rownames(cf_gb)<-levels(train$Behaviour)[c(as.numeric(rownames(cf_gb))+1)]
  colnames(cf_gb)<-levels(test$Behaviour)

  return(list(cf_gb,pred,xgb))
}

fRandom_forest<-function(train, test,n.mtry){
  
  #To compute only one time to determine "n.mtry"
  #model.res<-tuneRF(train, train$Behavior, stepFactor=1.5)
  
  rf<-randomForest(Behaviour~.,data=train,importance=TRUE,do.trace=FALSE , proximity=TRUE, ntree=300, mtry=n.mtry)
  pred<-predict(rf,test, type="class")
  cf_RF<-table(pred,test$Behaviour)
  return(list(cf_RF,pred,rf))
}

fCART<-function(train, test){
  
  tree<-rpart(train$Behaviour~.,data=train, method="class")
  bestcp <- tree$cptable[which.min(tree$cptable[,"xerror"]),"CP"]
  tree<-prune(tree, cp=bestcp)
  
  #Plots the decision tree 
  #par(xpd = TRUE)
  #plot(tree, compress=TRUE)
  #text(tree)
  #prp(tree, yesno=T, faclen=0,type=0,extra=0,fallen.leaves=T,varlen=0, cex=0.8 ,split.font=1,border.col=0,font=2)
  
  pred<-predict(tree,test,type="class")
  cf_cart<-table(pred, test$Behaviour )
  
  return(list(cf_cart,pred,tree))
  
}

fSVM<-function(train,test){
  
  svm<-svm(Behaviour~., data=train, kernel="radial",cost=5,gamma=0.001)
  
  pred<-predict(svm,test,type="class")
  cf_svm<-table(pred, test$Behaviour)
  
  
  return(list(cf_svm,pred,svm))
}

fLDA<-function(train,test){
  
  lda<-lda(Behaviour~., data=train)
  pred<-predict(lda, test,type="class")
  cf_lda<-table(pred$class, test$Behaviour)
  return(list(cf_lda,pred$class))
}


#Evaluation the efficiency of the algorithm by computing the number of the well-identified behaviors (true positive, TP, and true negative, TN) and of those considered misclassified (false negative, FN and false positive, FP) 
#from the confusion matrix X. 
#Computes three indicators specific to each behavior: Precision, Sensitivity, Specificity
#Returns the accuracy of the algorithm and a table containing all the indidcators
Accuracy<-function(X,nclass,name.behav){
  
  nb<-length(colnames(X))
  vec_col<-colnames(X)
  vec_row<-rownames(X)
  if (length(vec_col)!=length(vec_row)){
    P=1*outer(vec_col,vec_row,function (a,b) a==b)
    sum_row<-apply(P,1,sum)
    sum_col<-apply(P,2,sum)
    Y<-data.frame(matrix(NA, ncol=nclass, nrow=nclass))
    if (length(which(sum_row==0))==0 & length(which(sum_col==0))>0){
      Y[,which(sum_col==0)]<-0    
      Y[,-which(sum_col==0)]<-X}
    else if (length(which(sum_row==0))>0 & length(which(sum_col==0))==0){
      Y[which(sum_row==0),]<-0
      Y[-which(sum_row==0),]<-X
    }else {
      Y[which(sum_row==0),]<-0
      Y[,which(sum_col==0)]<-0
      Y[-which(sum_row==0),-which(sum_col)==0]<-X
    }
    X<-Y
  }
  
  validation<-data.frame(matrix(0, ncol=8, nrow=nclass))
  colnames(validation)<-c("TP","FN","FP","TN","Accuracy", "Recall","Precision","Specificity")
  rownames(validation)<-name.behav
  
  for (i in 1:(nb)){
    TP=X[i,i]  #TP
    FP=sum(X[i,])-X[i,i]  #FP
    FN=sum(X[,i])-X[i,i]  #FN
    TN=sum(X)-TP-FP-FN  #TN
    
    Accuracy=(TP+TN)/(TP+TN+FP+FN)
    Recall=TP/(TP+FN)
    Precision =TP/(TP+FP)
    Specificity =TN/(TN+FP)
    
    validation[i,1]<-TP
    validation[i,2]<-FN
    validation[i,3]<-FP
    validation[i,4]<-TN
    validation[i,5]<-Accuracy
    validation[i,6]<-Recall
    validation[i,7]<-Precision
    validation[i,8]<-Specificity
  }
  
  global_accuracy=sum(validation$TP+validation$TN)/sum(validation$TP+validation$TN+validation$FP+validation$FN)
  
  return(list("Validation_Matrix"=validation, 'global_accuracy'=  global_accuracy))
  
}


###############################               Part   III                ##################################

#Applies a fonction fun to all the time budget obtained for the individual i 
Budget_fun<-function(X,i,fun){
  Y<-X[[i]]
  Y<-Y[-which(Y[,5]==0),]
  Y_mean<-apply(Y,2,fun)
  return(Y_mean)
}

#Identifies the behaviours expressed at the surface according to the time spent in surface
Prediction_surface=function(X,freq){
  X$num<- ceiling(cumsum(abs(c(0, diff(X$Time_s)>1)))) #Numbering of the surface periods
  n.surf<-max(X$num)
  
  #Creation of a data frame to store the identified behaviours  
  Behaviour<-as.data.frame(matrix(0, ncol=4))
  colnames(Behaviour)<-c("num","time","last","Pred")
  
  
  #for each surface period   
  for (i in 1:n.surf){
    
    Surf<-X[X$num==i,]
    Surf<-as.data.frame(Surf)
    lg<-length(Surf[,1])
    
    #if the surface period lasts less than 6 s
    #the sequence is associated with "Breathing " 
    #the duration of the surface period is calculated
    
    #if the surface period lasts more than 6s :
    ##the sequence is associated with "Stay at surface" 
    #the duration of the surface period is calculated
    
    if (lg<(6*freq)){
      Behaviour[i,1]<-i
      Behaviour[i,2]<-Surf[1,8]
      Behaviour[i,4]<-"Breathing"
      Behaviour[i,3]<-lg/freq
    } else { 
      Behaviour[i,1]<-i
      Behaviour[i,2]<-Surf[1,8]
      Behaviour[i,4]<-"Staying at the surface"
      Behaviour[i,3]<-lg/freq
    }
    
  }

  #Returns a table with the number of each surface period, the starting time, the associated behavior and the duration.
  return (Behaviour)
  
}