######################################################################################################## ####################### Behavioural inference from signal processing ####################### ################### animal-borne multi-sensor loggers ################### ############# Jeantet et al. 2020 ############# #Loading of the required R-packages library(h5) #package to open the dataset library(caTools) # package to apply the runmean function library(changepoint) #package to apply the PELT algorithm library(signal) #package to apply a smoothing filter library(MASS) #package of LDA library(rpart) #package of CART library(rpart.plot) #package to plot the decision tree of the CART algorithm library(xgboost) # package of Extreme Gradient Boosting library(randomForest) #package of Random Forest library(e1071) #package of SVM #### Data downloading #The data have to be downloaded from the DOI : https://doi.org/10.5061/dryad.hhmgqnkd9 #it is recommended to put all the behaviour files in the same folder named "Behaviour" #and the raw data files in h5 format in the same folder named "Acceleration_h5". #personal working directory containing the downloaded data setwd("~/Martinique/Analyse") #names of the folders containing the Behaviour and h5 files fileBehaviours<-dir("Behaviour", full.names = T) fileh5s<-dir("Acceleration h5", full.names=T) ########################### Visualisation ################################# ##installation of rblt package install.packages("devtools") devtools::install_github("sebastien45/rblt") library(rblt) #To visualise the raw acceleration and gyroscope signals WITHOUT correction of one individual i=1 #choose the individual fileBehaviour<-fileBehaviours[i] fileh5<-fileh5s[i] #Visualisation ll=LoggerList$new() ll$add(LoggerData$new(fileh5, filebehavior=fileBehaviour,besep=";",besaturation=0.7)) ui=LoggerUI$new(ll) ui$gui() ########################### Part I ############################ ########### Calculation of descriptive statistics from the raw data ################# Model_dive<-list() #creates a list to contain the descriptive statistics of the diving behaviours Model_surf<-list() #creates a list to contain the data of surface behaviours Dict_behav<-list() #creates a list to contain the associated number of each behaviour freq<-20 #frequence of the data #For each individual, we separate the surface behaviours from the diving ones and calculate the descriptive statistics of the diving behaviours for(i in 1:16){ fileh5<-fileh5s[i] fileBehaviour<-fileBehaviours[i] print(c(fileh5,i)) Tortue<-h5read(fileh5,fileBehaviour,freq) #opens the file containing the raw data and synchronises the behaviours Tortue<-as.data.frame(cbind(Tortue[[1]],Tortue[[2]])) #combines the raw data and the synchronised behaviours ###Correction of the acceleration and angular speed titlt : the correction is done according to raw data when the animal is inactive laying flat if (i==1){Tortue<-correction(Tortue,1)} if (i==2){Tortue<-correction(Tortue,1)} if (i==3){Tortue<-correction(Tortue,1)} if (i==4){Tortue<-correction(Tortue,1)} if (i==5){Tortue<-correction(Tortue,20950)} if (i==6){Tortue<-correction(Tortue,34580)} if (i==7){Tortue<-correction(Tortue,4440)} if (i==11){Tortue<-correction(Tortue,19060)} if (i==12){Tortue<-correction(Tortue,34440)} if (i==13){Tortue<-correction(Tortue,1)} if (i==14){Tortue<-correction(Tortue,16050)} if (i==15){Tortue<-correction(Tortue,13400)} if (i==16){Tortue<-correction(Tortue,48550)} ###Computation of the Dynamic Body Acceleration (DBA) and Rotational Activity (RA) X<-Data_computation(Tortue[,1:6],2,freq) Tortue<-cbind(Tortue[,1:6],X,Tortue[,7:9]) rm(X) #Keeping only the labeled data Tortue<-cut_behav_first(Tortue,freq,10) #Removes the unlabeled raw data 10 seconds before the first behavioural label Tortue<-cut_behav_fin(Tortue,freq,30) #Removes the unlabeled raw data 30 seconds after the last behavioural label #Separation of the surface behaviours from the diving behaviours and numbering of the dives Tortue$Depth=-Tortue$Depth Tortue$num<- ceiling(cumsum(abs(c(0, diff(Tortue$Depth>0.3))))/2) Tortue$num[Tortue$Depth<0.3]<-0 n.dive<-max(Tortue$num) print(n.dive) #Clustering the behaviours by category Tortue<-Selection(Tortue,11) #Changing the behaviours by a number to facilitate the processing #Recording the associated number of each behaviour Dict_behav[[i]]<-levels(as.factor(Tortue$Behaviour)) Tortue$Behaviour<-as.numeric(as.factor(Tortue$Behaviour)) n.behav<-length(Dict_behav[[i]]) #Segmentation and calculation of the descriptive statistics Tortue_p<-Predictors_computation(Tortue,n.dive,freq,Dict_behav[[i]],n.behav) #Storage of the predictors and indication of the corresponding individual's number (as several files can correspond to one individual) if(i<3){j=i} if(i==3 | i==4){j=3} if(i>4 & i<8){j=i-1} if(i==8 | i==9 | i==10){j=7} if(i>10){j=i-3} Model_dive[[i]]<-cbind(j,Tortue_p[[1]]) #stores the predictors of the diving behaviours with the associated individual's number. Model_surf[[i]]<-cbind(j,Tortue_p[[2]][Tortue_p[[2]]$num==0,]) ##stores the surface periods with the associated individual's number (depth<0.3m or dive_duration<5s). } rm(Tortue,Tortue_p,nbrow,n.dive,i,j,freq,fileh5,fileh5s,fileBehaviour,fileBehaviours,datestart) #Replace the number by the associated behaviour for (i in 1:length(Model_dive)){ Predictors<-as.data.frame(Model_dive[[i]]) for (j in 1:length(Dict_behav[[i]])){ Predictors[Predictors$Behaviour==j,4]=Dict_behav[[i]][j] } Predictors[Predictors$Behaviour==(length(Dict_behav[[i]])+1),4]<-"Transition" Model_dive[[i]]<-Predictors rm(Predictors) } #Combining Model_dive by individual Model_dive[[3]]<-rbind(Model_dive[[3]],Model_dive[[4]]) Model_dive[[4]]<-Model_dive[[5]] Model_dive[[5]]<-Model_dive[[6]] Model_dive[[6]]<-Model_dive[[7]] Model_dive[[7]]<-rbind(Model_dive[[8]],Model_dive[[9]],Model_dive[[10]]) for (j in 8:13){ Model_dive[[j]]<-Model_dive[[j+3]] } Model_dive<-Model_dive[1:13] #Combining Model_surf by individual Model_surf[[3]]<-rbind(Model_surf[[3]],Model_surf[[4]]) Model_surf[[4]]<-Model_surf[[5]] Model_surf[[5]]<-Model_surf[[6]] Model_surf[[6]]<-Model_surf[[7]] Model_surf[[7]]<-rbind(Model_surf[[8]],Model_surf[[9]],Model_surf[[10]]) for (j in 8:13){ Model_surf[[j]]<-Model_surf[[j+3]] } Model_surf<-Model_surf[1:13] ############################### Part II ################################## ############# Prediction of diving behaviours using supervised machine learning algorithms ################# ###Selection of the combinations in which "Feeding" and "Scratching" were not under-represented in the training dataset #i.e. occur more than 60% of their respective total number of segments size<-choose(13,4) #selectes all the possible combinations train<-do.call(rbind, Model_dive) #combines all the training segments total_feeding<-Freq(train[,"Behaviour"])[1] #retrieves the total number of the "Feeding" segments total_scratching<-Freq(train[,"Behaviour"])[5] #retrieves the total number of the "Scratching" segments name.behav<-unique(train[,"Behaviour"]) index<-c() #to store all the combinations in which "Feeding" and "Scratching" were not under-represented #For each combinations, we check if "Feeding" and "Scratching" occur more than 60% of their respective total number of segments for (k in 1:size){ test_set<-combn(13,4)[,k] training_set=seq(13)[-test_set] train<-do.call(rbind, Model_dive[training_set]) test<- do.call(rbind, Model_dive[test_set]) test_ind<-test[,c(1:3,5:6)] train<-train[,-c(1:3)] test<-test[,-c(1:3)] X<-Freq(train[,"Behaviour"]) if (X[1]>(0.60*total_feeding) & X[5]>(0.60*total_scratching)){index<-c(index,k)} } ###Creation of the data frames to store the results n<-length(index) vec_names<-c("CART","SVM","LDA","EGB","RF","VE","WS") vec_index<-c("Accuracy","Sensitivity","Precision","Specificity") #Data frame to store the accuracy of each classifier table_accuracy=data.frame(matrix(0, ncol = 7)) colnames(table_accuracy)=vec_names #List to store the indicators of accuracy of each classifier for each behaviour Behaviour<-list() temp=matrix(1:28, ncol=7) for (i in 1:7){ Behaviour[[i]]<-data.frame(matrix(0, nrow =n, ncol = 28)) for (j in 1:7){ colnames(Behaviour[[i]])[temp[,j]]<-paste(vec_names[j], vec_index, sep="_") } } #Data frame to store the time budgets budget_WS<-list() for(i in 1:13){ budget_WS[[i]]<-data.frame(matrix(0,nrow=n, ncol=7)) colnames(budget_WS[[i]])<-c("Feeding","Gliding","Resting","Scratching","Swimming","Transition", "Other") } w<-1 #compteur #####Let's go ##For each selected combinations, we train and test the seven classifiers and record the time budgets of the tested individuals for (k in index){ print(w) #Getting back the index of the individuals used for the training set and those kept for the test set. test_set<-combn(13,4)[,k] training_set=seq(13)[-test_set] #Combining the data of the individuals into one training set and one test set according to the k-th combination train<-do.call(rbind, Model_dive[training_set]) test<- do.call(rbind, Model_dive[test_set]) test_ind<-test[,c(1:3,5)] train<-train[,-c(1:3)] test<-test[,-c(1:3)] train$Behaviour<-as.factor(train$Behaviour) test$Behaviour<-as.factor(test$Behaviour) #Calculation of the number of segments associated with each behaviour (not essential) Freq(train$Behaviour) Freq(test$Behaviour) nclass<-length(unique(train$Behaviour)) name.behav<-levels(train$Behaviour) ### Balancing the training dataset #we randomly selected 1000 segments for the over-expressed categories n.col=length(train) transition<-data.frame() g<-data.frame(matrix(ncol=(n.col)+1)) train$num<-as.numeric(train$Behaviour) colnames(g)<-colnames(train) for (i in 1:length(unique(train$num))){ transition<-train[train$num==i,] n<-length(transition$num) if (n>1000){ alea<-runif(n) rang<-rank(alea) rang<-rang[1:1000] transition<-transition[rang,] } g<-rbind(g,transition) } g<-na.omit(g) train<-g[,-44] rm(g,transition) train$Behaviour<-as.factor(train$Behaviour) test$Behaviour<-as.factor(test$Behaviour) ### Prediction of the diving behaviours by the supervised machine learning algorithms. #For each algorithm : #the associated function builds a model from "train" and predicts the behaviours of "test" #the prediction of the test dataset is returned and stored in "pred_" for the ensemble classifiers #From the confusion matrix, the function Accuracy calculates the accuracy of the algorithms as well as the indicators Precision, Sensitivity and Specificity associated with each behaviour. #Gradient Boosting cf_EGB<-fExtreme_Gradient_Boosting(train,test,nclass) pred_EGB<-(cf_EGB[[2]])+1 Gradient<-Accuracy(cf_EGB[[1]],nclass,name.behav) #Random Forest cf_RF<-fRandom_forest(train,test,14) pred_RF<-cf_RF[[2]] Random<-Accuracy(cf_RF[[1]],nclass,name.behav) #CART cf_CART<-fCART(train,test) pred_CART<-cf_CART[[2]] CART<-Accuracy(cf_CART[[1]],nclass,name.behav) #Support Vector Machine cf_SVM<-fSVM(train,test) pred_SVM<-cf_SVM[[2]] SVM<-Accuracy(cf_SVM[[1]],nclass,name.behav) #Linear Discriminant Analysis cf_LDA<-fLDA(train,test) pred_LDA<-cf_LDA[[2]] LDA<-Accuracy(cf_LDA[[1]],nclass,name.behav) #Storage and comparison of the accuracies of each algorithm accuracy<-c(CART[[2]],SVM[[2]],LDA[[2]],Gradient[[2]],Random[[2]]) print(accuracy) ### Creation of the Ensemble classifiers : Voting Ensemble and Weighted SUm #Combines the predictions (in a numeric format) of the 5 algorithms to the observed behaviours Prediction<-as.numeric(as.factor(test$Behaviour)) Prediction<-cbind(Prediction,pred_CART,pred_SVM,pred_LDA,pred_EGB,pred_RF) Prediction<-as.data.frame(Prediction) n<-length(Prediction[,1]) ##The Weighted Sum # Combines the Precison values of each behaviour of each algorithm into a matrix Precision<-rbind(CART[[1]][,7],SVM[[1]][,7],LDA[[1]][,7],Gradient[[1]][,7],Random[[1]][,7]) Precision[is.na(Precision)]<- 0 Precision<-as.matrix(Precision) pred_WS<-c() #creation of the vector containing the predictions based on the ensemble method Weighted Sum #for each segment (corresponding to each row) for (i in 1:n){ pred<-as.numeric(Prediction[i,-1]) # the predictions of the five algorithms of the i-th segment #Detects which behaviour is predicted according to the algorithms P=1*outer(1:5,1:8,function (a,b) pred[a]==b) P<-t(P) P<-as.matrix(P) #Calculates the weight of each behaviour according to the Precision indicator and the associated algorithm. Pred<-P%*%Precision a<-diag(Pred) #Calculates how many time each behaviour is predicted by the five algorithms for the i-th segment sum<-apply(P,1,sum) #If some behaviours got the same weight, we refer to how many time they have been predicted by the five algorithms if (length(which(a==max(a)))<2){pred_WS<-c(pred_WS,which(a==max(a))) }else if (length(which(sum==max(sum) & (a==max(a))))>0){pred_WS<-c(pred_WS,which(sum==max(sum) & (a==max(a)))) }else{pred_WS<-c(pred_WS,which(sum==max(sum)))} #print(c(i,length(pred_WS))) } Prediction<-cbind(Prediction,pred_WS) #Adds the prediction of the WS method ##The Voting Ensemble #for each segment for (i in 1:n){ pred<-as.numeric(Prediction[i,c(2:6)]) #gets back the prediction of the five algorithms for the i-th segment Prediction[i,8]<-as.numeric(names(sort(table(pred),decreasing=TRUE)[1])) #selects the behaviour the most predicted by the five algorithms } colnames(Prediction)[8]<-"pred_VE" ##Rename of the predictions of the 7 classifiers and the observed behaviours. #The observed behaviours n<-length(levels(test$Behaviour)) for (i in 1:n){Prediction[Prediction$Prediction==i,1]<-levels(test$Behaviour)[i]} #The predicted behaviours n<-length(levels(pred_RF)) for (i in 1:n){ Prediction[Prediction$pred_CART==i,2]<-levels(pred_CART)[i] Prediction[Prediction$pred_SVM==i,3]<-levels(pred_SVM)[i] Prediction[Prediction$pred_LDA==i,4]<-levels(pred_LDA)[i] Prediction[Prediction$pred_EGB==i,5]<-levels(pred_RF)[i] Prediction[Prediction$pred_RF==i,6]<-levels(pred_RF)[i] Prediction[Prediction$pred_WS==i,7]<-levels(pred_RF)[i] Prediction[Prediction$pred_VE==i,8]<-levels(pred_RF)[i] } #Creation of the confusion matrix of the Ensemble learners #From the confusion matrix, the function Accuracy calculates the accuracy of the Ensemble learners as well as the indicators Precision, Sensitivity and Specificity associated with each behaviour. cf_VE<-table(Prediction$pred_VE,test$Behaviour) cf_WS<-table(Prediction$pred_WS,test$Behaviour) Voting<-Accuracy(cf_VE,nclass,name.behav) Weighted<-Accuracy(cf_WS,nclass,name.behav) #Comparison of the accuracies of each method accuracy_full<-c(accuracy,Voting[[2]],Weighted[[2]]) print(accuracy_full) #Storage of the accuracies associated with the k-th combination. table_accuracy[w,]=accuracy_full #Storage of the four indicators (Accuracy,Precision, Sensitivity, Specificity) of each behaviour of each classifier associated with the k-th combination. for (g in 1:7){ Behaviour[[g]][w,1:4]<-CART[[1]][g,5:8] Behaviour[[g]][w,5:8]<-SVM[[1]][g,5:8] Behaviour[[g]][w,9:12]<-LDA[[1]][g,5:8] Behaviour[[g]][w,13:16]<-Gradient[[1]][g,5:8] Behaviour[[g]][w,17:20]<-Random[[1]][g,5:8] Behaviour[[g]][w,21:24]<-Voting[[1]][g,5:8] Behaviour[[g]][w,25:28]<-Weighted[[1]][g,5:8] } #Storage of the time budgtes of the tested individuals for (l in test_set){ individu<-test_ind[,1]==l prediction<-cbind(test_ind[individu,],pred_WS[individu]) colnames(prediction)[5]<-"Pred_WS" for (i in 1:7){prediction[prediction$Pred_WS==i,5]<-levels(test$Behaviour)[i]} budget_WS[[l]][w,"Feeding"]<-sum(prediction[prediction$Pred_WS=="Feeding",4]) budget_WS[[l]][w,"Gliding"]<-sum(prediction[prediction$Pred_WS=="Gliding",4]) budget_WS[[l]][w,"Resting"]<-sum(prediction[prediction$Pred_WS=="Resting",4]) budget_WS[[l]][w,"Scratching"]<-sum(prediction[prediction$Pred_WS=="Scratching",4]) budget_WS[[l]][w,"Swimming"]<-sum(prediction[prediction$Pred_WS=="Swimming",4]) budget_WS[[l]][w,"Transition"]<-sum(prediction[prediction$Pred_WS=="Transition",4]) budget_WS[[l]][w,"Other"]<-sum(prediction[prediction$Pred_WS=="Other",4]) } w<-w+1 } ############################### Part III ################################## ############# Identification of the surface behaviours and calculation of the time budget by individual ################# #Calculation of the mean time budget of the 13 individuals from the selected combinations freq=20 Budget_WS<-list() for (i in 1:13){Budget_WS[[i]]<-Budget_fun(budget_WS,i,mean)} Budget_WS<-do.call(rbind,Budget_WS) Budget=as.data.frame(matrix(nrow=13,ncol=9)) #Identification of the surface behaviours of each individual for (i in 1:13){ Surface<-Model_surf[[i]] #gets back the surface periods of the individual i #Calculates the duration of the surface periods #Associates the behaviour "Breathing" or "Stay at surface" to the surface periods according to the duration surface_behaviours<-Prediction_surface(Surface,freq) #Calculates the total time allocated to both behaviours budget_breath<-sum(surface_behaviours[surface_behaviours$Pred=="Breathing",3]) budget_stay<-sum(surface_behaviours[surface_behaviours$Pred=="Staying at the surface",3]) #Combines the diving and surface time budgets Budget[i,1:7]<-Budget_WS[i,] Budget[i,8]<-budget_breath Budget[i,9]<-budget_stay } colnames(Budget)[1:7]<-colnames(Budget_WS) colnames(Budget)[8:9]<-c("Breathing","Staying at the surface")