|
Input: stochastic continuous-time dynamics (Eq. (1)); ct: local cost functions for 0 ≤ t ≤ l; Δ: time step duration; l: number of time steps |
|
Data: x̂: smoothed states; π: control policy; : inverse control policy; vt: cost-to-go function; v̄t: cost-to-come function |
1
|
πt = 0, St = 0, st = 0, st = 0 |
2
|
repeat
|
3
|
S̄0 := 0, s̄0 := 0, s̄0 := 0 |
4
|
for
t := 0; t < l; t := t + 1 do
|
5
|
x̂t = –(St + S̄t)–1 (st + s̄t) (smoothed states) |
6
|
ût = πt(x̂t), x̂t+1 = g(x̂t, ût) |
7
|
Linearize inverse discrete dynamics around (x̂t+1, ût) (Eq. (16)) |
8
|
Quadratize ct around (x̂t, ūt) (Eq. (12)) |
9
|
Compute S̄t+1, s̄t+1, s̄t+1, v̄t+1, (forward value iteration in Sec. IV-C) |
10
|
end
|
11
|
Quadratize cl around x̂l in the form of Eq. (12) to compute Ql, ql, and ql
|
12
|
Sl := Ql, sl := ql, and sl := ql. |
13
|
for
t := l – 1; t ≥ 0; t := t – 1 do
|
14
|
x̂t+1 = –(St+1 + S̄t+1)–1(st+1 + s̄t+1) (smoothed states) |
15
|
, x̂t = ḡ(x̂t+1, ût) |
16
|
Linearize stochastic discrete dynamics around (x̂t, ût) (Eq. (11)) |
17
|
Quadratize ct around (x̂t, ût) (Eq. (12)) |
18
|
Compute St, st, st, vt, πt (backward value iteration in Sec. IV-B) |
19
|
end
|
20
|
until
Converged (e.g., v0
stops changing significantly); |
21
|
return πt for 0 ≤ t ≤ l
|