|
Algorithm1 MCTS(time_limit,iteration_limit,explorationRate) //explorationRate defines the degree of exploration |
| root = treeNode(initialState, None) While (time<time_limit & count<iteration_limit) do randomPolicy(state): while not state.isTerminal(): try: action = random.choice(state.getPossibleActions()) except IndexError: raise Exception("Non-terminal state has no possible actions: " + str(state)) state = state.takeAction(action) return state.getReward() def selectNode(self, node): while not node.isTerminal: if node.isFullyExpanded: node = self.getBestChild(node, self.explorationConstant) else: return self.expand(node) return node def expand(self, node): actions = node.state.getPossibleActions(node) for action in actions: newNode = treeNode(node.state.takeAction(action), node) node.children[action] = newNode if len(actions) == len(node.children): node.isFullyExpanded = True return newNode def backpropogate(self, node, reward): while node is not None: node.numVisits += 1 node.totalReward += reward node = node. Parent |