###################################################################### ####### Functions-Automatic identification of free-ranging ####### ####### green turtle behaviours ####### ####### Royal Society Open Science ####### ####### Jeantet et al. (2020) ####### ###################################################################### ############################### Part I ################################## h5read=function(h5file,fileBehaviour,freq){ #Opening the file containing the raw data f=h5file(fileh5,"r") dt=h5attr(f["/"], "datestart") datestart<<-as.POSIXlt(dt, tz="GMT") dset=openDataSet(f,"/data") set<-readDataSet(dset) size=dset@dim nbrow<<-size[1] h5close(f) colnames(set)<-c("AccX","AccY","AccZ","GyrX","GyrY","GyrZ","Depth") #addition of the time sdeb_data=3600*datestart$hou+60*datestart$min+datestart$sec Time_s<-round(seq(from=sdeb_data, by=1/freq, length.out=nbrow),2) set<-cbind(set,Time_s) #Opening behavioural file Behaviour<-read.csv(fileBehaviour,header=T,sep=";")[,1:10] Behaviour$Behavior<-as.character(Behaviour$Behavior) #Synchronisation of the behaviours behav<-data.frame(matrix(0, nrow =nbrow, ncol = 1)) colnames(behav)<-"Behaviour" for (i in 1:nrow(Behaviour)){ a<-freq*round(Behaviour$Start..s.[i],1)+1 b<-freq*round(Behaviour$Stop..s.[i],1)+1 behav[a:b,1]<-as.character(Behaviour$Behavior[i]) } return(list(set,behav)) } correction=function(set,time){ norme<-sqrt(set[,"AccX"]^2+set[,"AccY"]^2+set[,"AccZ"]^2) set[,"AccX"]=set[,"AccX"]/norme set[,"AccY"]=set[,"AccY"]/norme set[,"AccZ"]=set[,"AccZ"]/norme #Tortle laying flat at "time" ax<-set[,"AccX"][time] ay<-set[,"AccY"][time] az<-set[,"AccZ"][time] sin_phi<--ax cos_phi=(ay^2+az^2)^0.5 cos_lambda<-1 sin_lambda<-0 AccX_cor = cos_phi*set[,"AccX"]-sin_phi*set[,"AccZ"] AccY_cor=cos_lambda*set[,"AccY"] AccZ_cor=sin_phi*set[,"AccX"]+cos_phi*set[,"AccZ"] GyrX_cor=cos_phi*set[,"GyrX"]-sin_phi*set[,"GyrZ"] GyrY_cor=set[,"GyrY"] GyrZ_cor=sin_phi*set[,"GyrX"]+cos_phi*set[,"GyrZ"] set[,"AccX"]<-AccX_cor set[,"AccY"]<-AccY_cor set[,"AccZ"]<-AccZ_cor set[,"GyrX"]<-GyrX_cor set[,"GyrY"]<-GyrY_cor set[,"GyrZ"]<-GyrZ_cor return(set) } #Calculates the derived-parameters (DBA and RA) from the raw acceleration data. Takes into input parameters the value table (X) and the temporal window (mean) of the running mean. Data_computation=function(X,mean,freq){ ##Associates the accelerometer axes to the animal axes X<-data.matrix(X) AccX<--X[,'AccX'] AccY<--X[,'AccY'] AccZ<-X[,'AccZ'] ## Calculation of the Static acceleration (St) by computing a running mean with a temporal window of "mean" n<-mean*freq StX<-runmean(AccX, n, endrule="mean", alg="C") StY<-runmean(AccY, n, endrule="mean", alg="C") StZ<-runmean(AccZ, n, endrule="mean", alg="C") ## Calculation of the Dynamic acceleration (D) DX<-AccX-StX DY<-AccY-StY DZ<-AccZ-StZ #Calculation of the Dynamic Body Acceleration (DBA) and Rotational Activity (RA) dynamique<-DX^2+DY^2+DZ^2 DBA<-sqrt(dynamique) RA<-sqrt(X[,'GyrX']^2+X[,'GyrY']^2+X[,'GyrZ']^2) X<-cbind(DBA,RA) return(X) } cut_behav_first=function(Tortue,freq, cut){ Tortue[Tortue$Behaviour=="Time mark",which(colnames(Tortue)=="Behaviour")]<-"0" behav<-as.numeric(as.factor(Tortue$Behaviour)) b<-which(diff(behav)!=0)[1]-cut*freq Tortue<-Tortue[b:length(Tortue[,1]),] } cut_behav_fin=function(Tortue,freq, cut){ Tortue[Tortue$Behaviour=="Time mark",which(colnames(Tortue)=="Behaviour")]<-"0" behav<-as.numeric(as.factor(Tortue$Behaviour)) b<-which(diff(behav)!=0)[length(which(diff(behav)!=0))] if ((b+cut*freq)0.001){ m.pelt[[w]]=cpt.meanvar(segment[,'DBA'],method="PELT",penalty="Manual",pen.value=50) #plot(m.pelt[[w]]) data.segmented[[w]]<-segment w<-w+1} #If horizontal phase : PELT segmentation according to pitch speed else{ m.pelt[[w]]=cpt.var(segment[,'GyrY'],method="PELT",penalty="Manual",pen.value=20) #plot(m.pelt[[w]]) data.segmented[[w]]<-segment w<-w+1 } } return(list(m.pelt,data.segmented)) } #high-filteres with a running window of 1 s #computes the squared differences (gy -mean(gy))2 #gets back the max and the mean fun.noise=function(y){ y_smooth=sgolayfilt(y, p = 2, n = 19, m = 0, ts = 1) # p= order of approximation, and n= window size high_freq=y-y_smooth HF_mean=mean((high_freq-mean(high_freq))^2) HF_max=max((high_freq-mean(high_freq))^2) return(c(HF_mean,HF_max)) } #Selects the most expressed behaviours behav_select<-function(y, behavior_list,n.behav){ x<-as.numeric(names(sort(table(y),decreasing=TRUE))) if (length(x)==1){Behavior<-x} else if (max(table(y))>trunc(0.6*length(y))){Behavior<-names(which(table(y)==max(tabulate(y))))} else{Behavior<-n.behav+1} Behavior=as.numeric(Behavior) return(Behavior) } ###Segments the behavioraul sequences and calculates the descriptive statistics of the obtained segments. Predictors_computation<-function(X,n.dive,freq,behaviour_list,n.behav){ vec.names<-c("AccX","AccY","AccZ","GyrX","GyrY","GyrZ","DBA","RA") #vector containing all the variables that we want to described statically Predictors<-c() #to store the predictors ##for each dive for (i in 1:n.dive){ Dive<-X[X$num==i,] ###we look at the dive duration #If the dive lasts more than 5 sec, we calculate the predictors if (length(Dive[,1])>(5*freq)){ #plot(-Dive[,7],type='l') segment<-seg.2algo(Dive,freq) #segmentation of the dive #Retrieves the segments for (k in 1:length(segment[[2]])){ seg<-list() m.pelt<-segment[[1]][[k]] dt.segment<-segment[[2]][[k]] #Deletes the segments shorter than 2 sec if (length(dt.segment[,1])>2*freq){ #plot(m.pelt) seg.ind<-m.pelt@cpts ind=1 lg<-length(seg.ind) w<-1 while(w2){ seg.ind<-seg.ind[-c(1,length(seg.ind))] debut<-c(1,seg.ind+1) fin<-c(seg.ind,length(dt.segment[,1])) df<-cbind(debut,fin) seg<-lapply(seq(1,length(df[,1])), function(a) dt.segment[df[a,1]:df[a,2],]) }else{seg[[1]]=dt.segment} }else {seg[[1]]=dt.segment} ##Calculates the descriptive variables for each segment for (g in 1:length(seg)){ #Calculates mean, variance, minimum and data<-seg[[g]][,c(1:8)] predictor=lapply(list(mean=mean, var=var, max=max, min=min), function(f) apply(data,2,f)) predictor<-do.call(rbind,predictor) rownames(predictor)<-c("mean","variance","minimum","maximum") high_frequency<-lapply(vec.names[c(1:2,4:5)], function(x) fun.noise(data[,x])) high_frequency<-do.call(cbind,high_frequency) colnames(high_frequency)<-c("AccX","AccY","GyrX","GyrY") rownames(high_frequency)<-c("HF_mean","HF_max") high_freq<-c() for (z in colnames(high_frequency)){ t.high_freq<-t(high_frequency[,z]) colnames(t.high_freq)<-paste(z,rownames(high_frequency),sep="_") high_freq<-cbind(high_freq,t.high_freq) } predictors<-c() for (z in vec.names){ pred<-t(predictor[,z]) colnames(pred)<-paste(z,colnames(pred),sep="_") predictors<-cbind(predictors,pred) } Behaviour<-behav_select(seg[[g]][,"Behaviour"],behaviour_list,n.behav) Depth_difference<-seg[[g]][length(seg[[g]][,1]),"Depth"]-seg[[g]][1,"Depth"] predictors<-cbind(Dive[1,"num"],seg[[g]][1,"Time_s"],Behaviour,length(data[,1]),Depth_difference,predictors,high_freq) colnames(predictors)[1:4]<-c("num","time","Behaviour","last") Predictors<-rbind(Predictors,predictors) rm(predictors,pred,predictor) } } }else{X[X$num==i,"num"]<-0} #plongée trop courte passe en surface } return(list(Predictors,X)) } ############################### Part II ################################## #Calculates the number of segments associated with each behavior Freq=function(X){ x<-as.factor(X) a<-tabulate(x,nbin=length(levels(x))) a<-as.matrix(a) rownames(a)<-levels(x) return(a) } #Each function takes the training and test datasets as input parameters (additional parameters can be required) #Returns the confusion matrix, the predicted behaviors and the model fExtreme_Gradient_Boosting<-function(train,test,nclass){ labels<-as.numeric(train$Behaviour)-1 test_label<-as.numeric(test$Behaviour)-1 new_tr=model.matrix(~.+0,data=train[,-1]) new_ts=model.matrix(~.+0, data=test[,-1]) dtrain<-xgb.DMatrix(data=new_tr, label=labels) dtest<-xgb.DMatrix(data=new_ts, label=test_label) params<-list(booster ="gbtree", objectif="multi:softprob", num_class=nclass, eta=0.3, max_depth=3, subsample=0.5, colsample_bytree=0.5, seed=1) xgbcv<-xgb.cv(params=params, data=dtrain, label=labels, nround=500, nfold=5, showsd = T, stratified = F, print_every_n =50, early_stopping_rounds= 20, maximize = F) y<-xgbcv$best_iteration xgb<-xgb.train(params=params, data=dtrain, nround=y,print_every_n = 30, early_stop_round = 10, maximize = F , eval_metric = "merror") pred<-predict(xgb, dtest) #Confusion matrice cf_gb<-table(pred, test_label) rownames(cf_gb)<-levels(train$Behaviour)[c(as.numeric(rownames(cf_gb))+1)] colnames(cf_gb)<-levels(test$Behaviour) return(list(cf_gb,pred,xgb)) } fRandom_forest<-function(train, test,n.mtry){ #To compute only one time to determine "n.mtry" #model.res<-tuneRF(train, train$Behavior, stepFactor=1.5) rf<-randomForest(Behaviour~.,data=train,importance=TRUE,do.trace=FALSE , proximity=TRUE, ntree=300, mtry=n.mtry) pred<-predict(rf,test, type="class") cf_RF<-table(pred,test$Behaviour) return(list(cf_RF,pred,rf)) } fCART<-function(train, test){ tree<-rpart(train$Behaviour~.,data=train, method="class") bestcp <- tree$cptable[which.min(tree$cptable[,"xerror"]),"CP"] tree<-prune(tree, cp=bestcp) #Plots the decision tree #par(xpd = TRUE) #plot(tree, compress=TRUE) #text(tree) #prp(tree, yesno=T, faclen=0,type=0,extra=0,fallen.leaves=T,varlen=0, cex=0.8 ,split.font=1,border.col=0,font=2) pred<-predict(tree,test,type="class") cf_cart<-table(pred, test$Behaviour ) return(list(cf_cart,pred,tree)) } fSVM<-function(train,test){ svm<-svm(Behaviour~., data=train, kernel="radial",cost=5,gamma=0.001) pred<-predict(svm,test,type="class") cf_svm<-table(pred, test$Behaviour) return(list(cf_svm,pred,svm)) } fLDA<-function(train,test){ lda<-lda(Behaviour~., data=train) pred<-predict(lda, test,type="class") cf_lda<-table(pred$class, test$Behaviour) return(list(cf_lda,pred$class)) } #Evaluation the efficiency of the algorithm by computing the number of the well-identified behaviors (true positive, TP, and true negative, TN) and of those considered misclassified (false negative, FN and false positive, FP) #from the confusion matrix X. #Computes three indicators specific to each behavior: Precision, Sensitivity, Specificity #Returns the accuracy of the algorithm and a table containing all the indidcators Accuracy<-function(X,nclass,name.behav){ nb<-length(colnames(X)) vec_col<-colnames(X) vec_row<-rownames(X) if (length(vec_col)!=length(vec_row)){ P=1*outer(vec_col,vec_row,function (a,b) a==b) sum_row<-apply(P,1,sum) sum_col<-apply(P,2,sum) Y<-data.frame(matrix(NA, ncol=nclass, nrow=nclass)) if (length(which(sum_row==0))==0 & length(which(sum_col==0))>0){ Y[,which(sum_col==0)]<-0 Y[,-which(sum_col==0)]<-X} else if (length(which(sum_row==0))>0 & length(which(sum_col==0))==0){ Y[which(sum_row==0),]<-0 Y[-which(sum_row==0),]<-X }else { Y[which(sum_row==0),]<-0 Y[,which(sum_col==0)]<-0 Y[-which(sum_row==0),-which(sum_col)==0]<-X } X<-Y } validation<-data.frame(matrix(0, ncol=8, nrow=nclass)) colnames(validation)<-c("TP","FN","FP","TN","Accuracy", "Recall","Precision","Specificity") rownames(validation)<-name.behav for (i in 1:(nb)){ TP=X[i,i] #TP FP=sum(X[i,])-X[i,i] #FP FN=sum(X[,i])-X[i,i] #FN TN=sum(X)-TP-FP-FN #TN Accuracy=(TP+TN)/(TP+TN+FP+FN) Recall=TP/(TP+FN) Precision =TP/(TP+FP) Specificity =TN/(TN+FP) validation[i,1]<-TP validation[i,2]<-FN validation[i,3]<-FP validation[i,4]<-TN validation[i,5]<-Accuracy validation[i,6]<-Recall validation[i,7]<-Precision validation[i,8]<-Specificity } global_accuracy=sum(validation$TP+validation$TN)/sum(validation$TP+validation$TN+validation$FP+validation$FN) return(list("Validation_Matrix"=validation, 'global_accuracy'= global_accuracy)) } ############################### Part III ################################## #Applies a fonction fun to all the time budget obtained for the individual i Budget_fun<-function(X,i,fun){ Y<-X[[i]] Y<-Y[-which(Y[,5]==0),] Y_mean<-apply(Y,2,fun) return(Y_mean) } #Identifies the behaviours expressed at the surface according to the time spent in surface Prediction_surface=function(X,freq){ X$num<- ceiling(cumsum(abs(c(0, diff(X$Time_s)>1)))) #Numbering of the surface periods n.surf<-max(X$num) #Creation of a data frame to store the identified behaviours Behaviour<-as.data.frame(matrix(0, ncol=4)) colnames(Behaviour)<-c("num","time","last","Pred") #for each surface period for (i in 1:n.surf){ Surf<-X[X$num==i,] Surf<-as.data.frame(Surf) lg<-length(Surf[,1]) #if the surface period lasts less than 6 s #the sequence is associated with "Breathing " #the duration of the surface period is calculated #if the surface period lasts more than 6s : ##the sequence is associated with "Stay at surface" #the duration of the surface period is calculated if (lg<(6*freq)){ Behaviour[i,1]<-i Behaviour[i,2]<-Surf[1,8] Behaviour[i,4]<-"Breathing" Behaviour[i,3]<-lg/freq } else { Behaviour[i,1]<-i Behaviour[i,2]<-Surf[1,8] Behaviour[i,4]<-"Staying at the surface" Behaviour[i,3]<-lg/freq } } #Returns a table with the number of each surface period, the starting time, the associated behavior and the duration. return (Behaviour) }