Skip to main content
. 2021 Sep 11;21(18):6092. doi: 10.3390/s21186092
Algorithm 1: QMAC-2ND
1: Initialize Q(st, at) = 0, st = 0, at = 0, CWt = 0, Action[7] = {3, 7, 15, 31, 63, 127, 255},
    Reward = [1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7], at t = 0;
2: repeat for each episode
3:       procedure Update_Environment()
4:           Get the number of two-hop neighbors NNei2;
5:           Update st based NNei2;
6:           if TTrained(s) < TSet and ε > 0.05
7:               Update α, ε according to Equation (3);
8:           else
9:               α = ε = 0.05;
10:         end if
11:     end procedure
12:     Choose_Action() at according to ε-greedy;
13:     Choose_ReplyNode() Nx according to AckFactor(x);
14:     Send packets using CWt = Action[at];
15:     procedure Wait_ACK()
16:         if get ACK message
17:             rt = Reward[at];
18:         else
19:             rt = −1; AckFactor(x) = 0;
20:         end if
21:     end procedure
22:     Update Q-table according to Equation (1);
23:     Next episode;
24: until s is terminal;