% Supporting Information Text File S3. Matlab code for HSMM Viterbi algorithm. function [path,p] = hsmm_path(prior, transmat, obslik, dur, Dur) % Constrained forward-backward Viterbi algorithm. Most probable % forward-backward Viterbi path for a hidden semi-Markov model, when we assume that we know that the % sequence starts at a given state eta and ends at the same state eta. In our % case study, we know a foraging trip always starts at state 'cruising' and % ends at state 'cruising'. % The forward-backward algorithm has been implemented in Fortran by Guedon % (2003). Applying it to my data, I coded it in matlab based on Guedon (2003) % and Murphy's matlab code (1998) for HMM Viterbi, and added the % constraints. % % Inputs: % prior(i) = Pr(S_0 = i) % initial probabilities % transmat(i,j) = Pr(S_{t+1}=j | S_t=i) % transition probabilities % obslik(i,t) = Pr(X_t | S_t=i) % observation probabilities % dur(j,u,t) = Pr(S_{t+u+1}\neq j, S_{t+u-v}=j, v=0,...,u-2 | S_{t+1}=j, S_t\neq j) % duration probabilities % Dur(j,u) = \sum_{v>=u}(dur(j,v,t)) % survivor function for duration distribution % Outputs: % path(t) = s_t, where s_1 ... s_T is the argmax of: Pr(S_0=eta, S_1^{t-1}=s_1^{t-1}, S_t=s_t, S_{t+1}\neq j, S_{t+1}^{T-1}=s_{t+1}^{T-1}, S_T=eta, X_0^T=x_0^T) % p: the log probability of the sequence T = size(obslik, 2); prior = prior(:); S = length(prior); eta = find(prior == max(prior)); alpha = zeros(S,T); beta = zeros(S,T); beta2 = zeros(S,T); beta3 = zeros(S,T,S); psi = zeros(S,4,T); path = zeros(1,T); transmat(transmat==0) = eps(0); % forward initialization t = 1; alpha(:,t) = log(prior.*dur(:,1,t).*obslik(:,t)); t = 2; for j = 1:S alpha(j,t) = log(obslik(j,t)); if j == eta alpha(j,t) = alpha(j,t) + log(obslik(j,t-1)) + log(dur(j,2,t)) + log(prior(j)); else alpha(j,t) = alpha(j,t) + log(dur(j,1,t)) + log(transmat(eta,j)) + alpha(eta,t-1); end end psi(:,1:2,t) = repmat([eta,1],S,1); % forward recursion for t=3:T-1 for j=1:S if j == eta limu = t-2; else limu = t-1; end us = zeros(1,limu); maxi = zeros(1,limu); for u = 1:limu [us0,maxi(u)] = max(log(transmat(:,j))+alpha(:,t-u)); if us0 == 0 maxi(u) = 0; end us(u) = sum(log(obslik(j,t-u+1:t-1))) + log(dur(j,u,t)) + us0; end [us2,usa2] = max(us); if j == eta [alpha(j,t),orden] = max([us2,sum(log(obslik(j,1:t-1))) + log(dur(j,t,t)) + log(prior(j))]); else alpha(j,t) = us2; orden = 1; end if alpha(j,t) == 0 orden = 0; else alpha(j,t) = log(obslik(j,t))+alpha(j,t); end if orden == 1 psi(j,1:2,t) = [maxi(usa2),usa2]; elseif orden == 2 psi(j,1:2,t) = [j,t]; end end end t = T; for j=1:S if j == eta limu = t-2; else limu = t-1; end us = zeros(1,limu); maxi = zeros(1,limu); for u = 1:limu [us0,maxi(u)] = max(log(transmat(:,j))+alpha(:,t-u)); if us0 == 0 maxi(u) = 0; end us(u) = sum(log(obslik(j,t-u+1:t-1)))+log(Dur(j,u))+us0; end [us2,usa2] = max(us); if j == eta [alpha(j,t),orden] = max([us2,sum(log(obslik(j,1:t-1)))+log(Dur(j,t))+log(prior(j))]); else alpha(j,t) = us2; orden = 1; end if alpha(j,t) == 0 orden = 0; else alpha(j,t) = log(obslik(j,t))+alpha(j,t); end if orden == 1 psi(j,1:2,t) = [maxi(usa2),usa2]; elseif orden == 2 psi(j,1:2,t) = [j,t]; end end % backward initialization ind = find(1:S~=j); beta(ind,t) = repmat(log(eps(0)),length(ind),1); t = T-1; beta(:,t) = repmat(log(obslik(j,t+1)) + log(Dur(eta,1)),S,1) + log(transmat(:,eta)); psi(:,3:4,t) = repmat([eta,T-t],S,1); % backward recursion for t = T-2:-1:1 for j = 1:S vs = zeros(T-1-t,S); vs2 = zeros(1,S); vsa2 = zeros(1,S); for k = 1:S if k ~= j for v = 1:T-t-1 vs(v,k) = sum(log(obslik(k,t+1:t+v))) + log(dur(k,v,t)) + beta(k,t+v); end [vs2(k),vsa2(k)] = max(vs(:,k)); beta3(j,t,k) = vs2(k) + log(transmat(j,k)); end end ind = find(beta3(j,t,:) ~= 0); [beta2(j,t),orden2] = max(beta3(j,t,ind)); orden2 = ind(orden2); if j == eta beta(j,t) = beta2(j,t); orden = 1; else [beta(j,t),orden] = max([beta2(j,t),sum(log(obslik(eta,t+1:T))) + log(Dur(eta,T-t+1)) + log(transmat(j,eta))]); end if beta(j,t) == 0 orden = 0; end if orden == 1 psi(j,3:4,t) = [orden2,vsa2(orden2)]; elseif orden == 2 psi(j,3:4,t) = [eta,T-t]; end end end gamma = alpha + beta; % termination [p,path0] = max(gamma(:,T)); % path backtracking t = T; while t>=1 repet = psi(path0,2,t); if t == 1 && repet == 0 repet = 1; end path(t-repet+1:t) = repmat(path0,1,repet); path0 = psi(path0,1,t); t = t-repet; end end % Guedon, Y. (2003). Estimating Hidden Semi-Markov Chains From Discrete Sequences. Journal of Computational and Graphical Statistics, 12(3), 604-639. doi:10.1198/1061860032030