|
Algorithm 2 CGRL algorithm. |
|
|
Input:
|
| Initialization: |
|
D ← n × (T − 1) matrix initialized to zero; |
|
A ← n–dimensional vector initialized to zero; |
|
B ← n–dimensional vector initialized to zero; |
|
Computation of the Lipschitz constants
: |
|
; |
|
for
k = 2 … T
do
|
|
; |
| end for |
|
t ← T − 2; |
|
while
t > −1 do
|
| for
i = 1 … n
do
|
|
; |
|
; |
| A(i) ← m0; |
| D(i, t + 1) ← j0; \\ best tuple at t + 1 if in tuple i at time t
|
| end for
|
| B ← A; |
| t = t −1; |
| end while |
| Conclusion: |
|
S ← (T + 1)–length vector of actions initialized to zero; |
|
; |
|
; \\ best lower bound
|
|
S(1) ← ul; \\ CGRL action for t = 0. |
|
for t = 0 … T − 2 do
|
| l′ ← D(l, t + 1); |
| S(t + 2,:) ← ul′; other CGRL actions
|
| l ← l′; |
| end for |
|
Return: S |
|