Skip to main content
. 2023 Jan 20;23(3):1198. doi: 10.3390/s23031198
Algorithm 2 Generation algorithm for the safe decision controller
Input:MDP=(S,A,T,R),τr=<r,φ,φr,risk,riskr>,done,update
Output: Optimal safety policy π
  1: Init (π)
  2: sts0
  3: aNOP
  4: while st!=doneandepisode<episodes  do
  5:    // Iterative selection of optimal value
  6:     A=A
  7:    achoose(A) // Select action
  8:    envEnvironmentalsampling
  9:    while monitor(st,a,env) do // monitor
10:        AAa
11:        if A!= then
12:           achoose(A)
13:        else
14:           achoose(aconservativeA)
15:           break
16:        end if
17:    end while
18:    update(st,a,π) // Update parameters
19:    Go to the next state st+1
20: end while