Skip to main content
. 2021 Oct 12;23(10):1331. doi: 10.3390/e23101331
Algorithm1 MCTS(time_limit,iteration_limit,explorationRate)
//explorationRate defines the degree of exploration
root = treeNode(initialState, None)
While (time<time_limit & count<iteration_limit) do
randomPolicy(state):
  while not state.isTerminal():
   try:
     action = random.choice(state.getPossibleActions())
   except IndexError:
     raise Exception("Non-terminal state has no possible actions: " + str(state))
   state = state.takeAction(action)
 return state.getReward()
  def selectNode(self, node):
   while not node.isTerminal:
     if node.isFullyExpanded:
      node = self.getBestChild(node, self.explorationConstant)
     else:
      return self.expand(node)
   return node
def expand(self, node):
   actions = node.state.getPossibleActions(node)
   for action in actions:
     newNode = treeNode(node.state.takeAction(action), node)
     node.children[action] = newNode
     if len(actions) == len(node.children):
      node.isFullyExpanded = True
     return newNode
  def backpropogate(self, node, reward):
   while node is not None:
    node.numVisits += 1
    node.totalReward += reward
    node = node. Parent