diff --git a/README.md b/README.md index b4ddc92..619a92c 100644 --- a/README.md +++ b/README.md @@ -34,10 +34,12 @@ Voici la liste des options : blancs * `-bd` | `--black-depth-exploration`: niveau d'eploration de l'arbre de jeu pour le joueur au pions noirs, valable pour les moteurs `minmax` et - `alphabeta` + `alphabeta`. Utilisé aussi pour définit la profondeur de départ pour + l'*iterative deepening* * `-wd` | `--white-depth-exploration`: niveau d'eploration de l'arbre de jeu pour le joueur au pions noirs, valable pour les moteurs `minmax` et - `alphabeta` + `alphabeta`Utilisé aussi pour définit la profondeur de départ pour + l'*iterative deepening* * `-bh` | `--black-heuristic-engine`: moteur heuristique utilisé pour l'exploration de l'arbre de jeu du joueur noir (valable pour les moteur de jeu `minmax` et `alphabeta`) @@ -53,7 +55,8 @@ affichée avec l'option `-d`. ## Choix d'implémentation J'ai avant tout privilégié la personnalisation des différentes paramètres des -différents moteurs composant le jeu. +différents moteurs composant le jeu. Il,e st ainsi plus aisé de tester le +fonctionnement des différents moteurs. ### Classes PlayerEngine @@ -85,15 +88,19 @@ Quatre moteur "joueurs" sont implémentés : maximale définie; * `AphaBeta` utilise *AlphaBeta* pour déterminer le coup à jouer avec une profondeur maximale définie; - * `IterativeDeepeningMinmax` utilise Minmax avec un temps maximum autorisé + * `MinmaxDeepeningMinmax` utilise Minmax avec un temps maximum autorisé; + * `AlphaBetaDeepening` utilise AlphaBeta avec un temps maximum autorisé -Le choix de ces moteur se fait en ligne de commande avec +Le choix de ces moteur se fait en ligne de commande avec les options évoquées +plus haut. ### Classes HeuristicsEngine Plusieurs classes impémentent plusieurs méthodes pour le calcul de -l'heuristique. Comme nous l'avons vu, les moteurs peuvent être choisis en ligne -de commande et de façon indépendante pour les joueurs blanc et noir. +l'heuristique. Toutes les implémentations se trouvent dans le fichier +`./src/classes/Heuristic.py` Comme nous l'avons vu, les moteurs peuvent être +choisis en ligne de commande et de façon indépendante pour les joueurs blanc et +noir. Trois implementation sond disponibles: @@ -112,9 +119,44 @@ ordre d'importance : 1. Les coins représentent les parties les plus importantes; 2. Ensuite vient les bords; - 3. Et enfin les coins. + 3. Et enfin le centre. + +Cependant certaines parties du plateau de jeu sont à éviter : + + * Les cases autour des coins, car elle laisserai la possibilité au joueur + adverse de placer un de ses pions dans le coin. La case en diagonale du coin + est particulièrement sensible. + * Les lignes juste avant les bords, placer un pion à cet endroit permettrai à + l'adversaire de placer un pion sur le bord. Ce ion sera alors p[lus + difficilement *"capturable"* Les poids affectés sont personnalisable via l'options `--weight`, par défaut -nous avons 2, 10 et 25. +nous avons -5, 2, 10 et 25. + +Voici le tableau des poinds par défaut, il peut être affiché avec l'option +`--show-weights-table`: + +```text +./game.py --show-weights-table +Starting PyReverso... + + | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | +-------------------------------------------- + 0 |25 |-5 |10 | 8 | 8 | 8 | 8 |10 |-5 |25 | + 1 |-5 |-7 |-3 |-3 |-3 |-3 |-3 |-3 |-7 |-5 | + 2 |10 |-3 | 0 | 0 | 0 | 0 | 0 | 0 |-3 |10 | + 3 | 8 |-3 | 0 | 2 | 2 | 2 | 2 | 0 |-3 | 8 | + 4 | 8 |-3 | 0 | 2 | 2 | 2 | 2 | 0 |-3 | 8 | + 5 | 8 |-3 | 0 | 2 | 2 | 2 | 2 | 0 |-3 | 8 | + 6 | 8 |-3 | 0 | 2 | 2 | 2 | 2 | 0 |-3 | 8 | + 7 |10 |-3 | 0 | 0 | 0 | 0 | 0 | 0 |-3 |10 | + 8 |-5 |-7 |-3 |-3 |-3 |-3 |-3 |-3 |-7 |-5 | + 9 |25 |-5 |10 | 8 | 8 | 8 | 8 |10 |-5 |25 | +``` + + +### À savoir: + +Les pois utilisé pour les heuristiques sont important. [reversi]:https://www.coolmathgames.com/blog/how-to-play-reversi-basics-and-best-strategies diff --git a/src/classes/CustomFormater.py b/src/classes/CustomFormater.py index 02ad065..ac2926f 100644 --- a/src/classes/CustomFormater.py +++ b/src/classes/CustomFormater.py @@ -8,7 +8,7 @@ class CustomFormatter(logging.Formatter): red = "\x1b[31;20m" bold_red = "\x1b[31;1m" reset = "\x1b[0m" - format = "%(levelname)s: %(message)s (%(filename)s:%(lineno)d)" + format = "%(levelname)s: %(message)s" FORMATS = { logging.DEBUG: blue + format + reset, diff --git a/src/classes/Engines.py b/src/classes/Engines.py index 4ad9d13..6be23b4 100644 --- a/src/classes/Engines.py +++ b/src/classes/Engines.py @@ -1,4 +1,4 @@ -import random, math, time +import random, math, time, signal class PlayerEngine: def __init__(self, player, logger, heuristic, options: dict = {}): @@ -7,6 +7,7 @@ class PlayerEngine: self.logger = logger self.heuristic = heuristic self.options = options + self.interrupt_search = False self.logger.info("Init engine {}, options:{}".format( self.__class__.__name__, self.options @@ -17,10 +18,16 @@ class PlayerEngine: self.__class__.__name__, self.get_player_name(self.player) )) + + def get_player_moves(self, board): + moves = board.legal_moves() + if self.options['randomize_moves'] is True: + random.shuffle(moves) + return moves @staticmethod def get_player_name(player): - return 'White (O)' if player is 2 else 'Black (X)' + return 'White (O)' if player == 2 else 'Black (X)' class RandomPlayerEngine(PlayerEngine): def get_move(self, board): @@ -60,19 +67,25 @@ class HumanPlayerEngine(PlayerEngine): return [board._nextPlayer, x, y] class MinmaxPlayerEngine(PlayerEngine): + def get_move(self, board): super().get_move(board) + move, score = self._call(board, self.options['depth']) + return move + + def _call(self, board, depth): value = -math.inf nodes = 1 leafs = 0 - move = '' - for m in board.legal_moves(): + move = [] + moves = self.get_player_moves(board) + for m in moves: board.push(m) - v, n, l = self.checkMinMax(board, False, self.options['depth'] - 1) + v, n, l = self.checkMinMax(board, False, depth - 1) if v > value: value = v move = m - self.logger.debug("found a better move: {} (heuristic:{})".format( + self.logger.debug("\tfound a better move: {} (heuristic:{})".format( move, value )) @@ -80,23 +93,25 @@ class MinmaxPlayerEngine(PlayerEngine): leafs += l board.pop() - self.logger.debug("Tree statistics:\n\tnodes:{}\n\tleafs:{}".format( + self.logger.info("Tree statistics:\n\tnodes:{}\n\tleafs:{}\n\theuristic:{}".format( nodes, - leafs - )) - return move + leafs, + value + )) + return move, value def checkMinMax(self, board, friend_move:bool, depth :int = 2): nodes = 1 leafs = 0 - move = '' - if depth == 0: + move = [] + if depth == 0 or board.is_game_over() or self.interrupt_search: leafs +=1 return self.heuristic.get(board, self.player), nodes, leafs if friend_move: value = -math.inf - for m in board.legal_moves(): + moves = self.get_player_moves(board) + for m in moves: board.push(m) v, n, l = self.checkMinMax(board, False, depth - 1) if v > value: @@ -107,7 +122,8 @@ class MinmaxPlayerEngine(PlayerEngine): else: value = math.inf - for m in board.legal_moves(): + moves = self.get_player_moves(board) + for m in moves: board.push(m) v, n, l = self.checkMinMax(board, True, depth - 1) if v < value: @@ -121,126 +137,148 @@ class AlphabetaPlayerEngine(PlayerEngine): def get_move(self, board): super().get_move(board) + move, heuristic = self._call(board, self.options['depth']) + return move + + def _call(self, board, depth): + self.logger.debug("Enter AlphaBeta function") alpha = -math.inf beta = math.inf nodes = 1 leafs = 0 move = [] - value = -math.inf - for m in board.legal_moves(): + moves = self.get_player_moves(board) + for m in moves: board.push(m) - v, n, l = self.checkAlphaBeta(board, False, self.options['depth'] - 1, alpha, beta) + value, n, l = self.checkAlphaBeta(board, False, depth - 1, alpha, beta) board.pop() - alpha = max(alpha,v) nodes += n leafs += l - if alpha >= value: - value = alpha + if value >= alpha: + alpha = value move = m - self.logger.debug("found a better move: {} (heuristic:{})".format( + self.logger.debug("\t-> found a better move: {} | heuristic:{})".format( move, alpha )) - self.logger.debug("Tree statistics:\n\tnodes:{}\n\tleafs:{}".format( + self.logger.info("Tree statistics:\n\tnodes:{}\n\tleafs:{}\n\theuristic:{}".format( nodes, - leafs + leafs, + alpha )) - return move + return move, alpha def checkAlphaBeta(self, board, friend_move : bool, depth, alpha, beta): nodes = 1 leafs = 0 - if depth == 0 : + if depth == 0 or board.is_game_over() or self.interrupt_search: leafs +=1 return self.heuristic.get(board, self.player), nodes, leafs if friend_move: - value = -math.inf - for m in board.legal_moves(): + + moves = self.get_player_moves(board) + for m in moves: board.push(m) v, n, l = self.checkAlphaBeta(board, False, depth - 1, alpha, beta) board.pop() - alpha = max(value,v) + alpha = max(alpha,v) nodes += n leafs += l if alpha >= beta: - self.logger.debug("Alpha pruning - alpha:{} / beta:{}".format( - alpha, - beta - )) return beta, nodes, leafs return alpha, nodes, leafs else: - value = math.inf - for m in board.legal_moves(): + moves = self.get_player_moves(board) + for m in moves: board.push(m) v, n, l = self.checkAlphaBeta(board, True, depth - 1, alpha, beta) board.pop(); - beta = min(beta,v) + beta = min(beta, v) nodes += n leafs += l if alpha >= beta: - self.logger.debug("Beta pruning - alpha:{} / beta:{}".format( - alpha, - beta - )) return alpha, nodes, leafs return beta, nodes, leafs -class MinmaxDeepeningPlayerEngine(PlayerEngine): - + +class MinmaxDeepeningPlayerEngine(MinmaxPlayerEngine): + def get_move(self, board): super().get_move(board) - value = -math.inf - nodes = 1 - leafs = 0 - move = '' - start_time = time.time() - for m in board.legal_moves(): - board.push(m) - v, n, l = self.checkMinMax(board, False, start_time) - if v > value: - value = v - move = m - self.logger.debug("found a better move: {} (heuristic:{})".format( - move, - value - )) - nodes += n - leafs += l - board.pop() + self.interrupt_search = False + + # Get an alarm signal to stop iterations + signal.signal(signal.SIGALRM, self.alarm_handler) + signal.alarm(self.options['time_limit']) + depth = self.options['depth'] + heuristic = -math.inf + move = None + + # We can go deeper than blank place in our board, then we must get + # numbers of avaible place + max_depth = (board.get_board_size()**2) - ( + board.get_nb_pieces()[0] + board.get_nb_pieces()[1]) + + # Iterate depth while our alarm does not trigger and there is enougth + # avaiable move to play + while not self.interrupt_search and depth <= max_depth: + current_move, current_heuristic = self._call(board, depth) + # return the current move onli if heuristic is better than previous + # iteration + if current_heuristic > heuristic: + move = current_move + + depth = depth + 1 + self.logger.debug("id_minmax - depth reached: {} | max depth : {}".format( + depth - 1, + max_depth + )) return move - def checkMinMax(self, board, friend_move:bool, start_time): - nodes = 1 - leafs = 0 - move = '' - if time.time() >= start_time + self.options['time_limit'] or board.is_game_over(): - leafs +=1 - return self.heuristic.get(board, self.player), nodes, leafs - - if friend_move: - value = -math.inf - for m in board.legal_moves(): - board.push(m) - v, n, l = self.checkMinMax(board, False, start_time) - if v > value: - value = v - nodes += n - leafs += l - board.pop() + def alarm_handler(self, signal, frame): + self.logger.debug("Raise SIGALMR Signal") + self.interrupt_search = True - else: - value = math.inf - for m in board.legal_moves(): - board.push(m) - v, n, l = self.checkMinMax(board, True, start_time) - if v < value: - value = v - board.pop(); - nodes += n - leafs += l - return value, nodes, leafs + + +class AlphaBetaDeepeningPlayerEngine(AlphabetaPlayerEngine): + + + def get_move(self, board): + self.interrupt_search = False + + # Get an alarm signal to stop iterations + signal.signal(signal.SIGALRM, self.alarm_handler) + signal.alarm(self.options['time_limit']) + depth = self.options['depth'] + heuristic = -math.inf + move = None + + # We can go deeper than blank place in our board, then we must get + # numbers of avaible place + max_depth = (board.get_board_size()**2) - ( + board.get_nb_pieces()[0] + board.get_nb_pieces()[1]) + + # Iterate depth while our alarm does not trigger and there is enougth + # avaiable move to play + while not self.interrupt_search and depth <= max_depth: + current_move, current_heuristic = self._call(board, depth) + # return the current move only if heuristic is better than previous + # iteration can be possible id iteration is stopped by timer + if current_heuristic > heuristic: + move = current_move + depth = depth + 1 + + self.logger.debug("id_minmax - depth reached: {} | max depth : {}".format( + depth - 1, + max_depth + )) + return move + + def alarm_handler(self, signal, frame): + self.logger.debug("Raise SIGALMR Signal") + self.interrupt_search = True diff --git a/src/classes/Heuristic.py b/src/classes/Heuristic.py index d7fa3ea..16a4a8f 100644 --- a/src/classes/Heuristic.py +++ b/src/classes/Heuristic.py @@ -17,42 +17,90 @@ class ScoreHeuristicEngine(HeuristicEngine): class WeightHeuristicEngine(HeuristicEngine): + def __init__(self, logger, options): + super().__init__(logger, options) + self.weights = self._get_weight_array() + self.logger.debug("{}".format(self.show_weights())) + def get(self, board, player): score = self.get_weight(board, player) return score def get_weight(self, board, player): score = 0 - size = board.get_board_size() - weights = self._get_weight_array(size) - for pos_x in range(size): - for pos_y in range(size): - if board._board[pos_x][pos_y] == player: - score += weights[pos_x][pos_y] - else: - score -= weights[pos_x][pos_y] + size = self.options['size'] + w = [[ 0 for _ in range(size)] for _ in range(size)] + for pos_x in range(self.options['size']): + for pos_y in range(self.options['size']): + p = board._board[pos_x][pos_y] + if p == player: + score += self.weights[pos_x][pos_y] + w[pos_x][pos_y] = self.weights[pos_x][pos_y] + + elif p != player and p != board._EMPTY: + score -= self.weights[pos_x][pos_y] + w[pos_x][pos_y] = -self.weights[pos_x][pos_y] return score - def _get_weight_array(self, size): + def _get_weight_array(self): + size = self.options['size'] w = [[ 0 for _ in range(size)] for _ in range(size)] padding = size // 5 center = size // 2 - for pos_y in range(size): - for pos_x in range(size): + full_range = range(self.options['size']) + center_range = range(center - padding, center + padding) + + for pos_y in full_range: + for pos_x in full_range: # Elements in the corner if pos_x in [0, size -1] and pos_y in [0,size - 1]: - w[pos_x][pos_y] = self.options['weight'][2] + w[pos_x][pos_y] = self.options['weight'][3] + # corners are a bad place! + elif (pos_x in [0, size - 1] and pos_y in [size - 2, 1]) or \ + (pos_x in [1, size -2] and pos_y in [0, size - 1]): + w[pos_x][pos_y] = self.options['weight'][0] + + # in diagonale of the corner too + elif pos_x in [size - 2, 1] and pos_y in [size - 2, 1]: + w[pos_x][pos_y] = int(self.options['weight'][0] * 1.5) + + elif pos_x in [1,size - 2] and pos_y in range(2, size - 2) or \ + pos_y in [1,size - 2] and pos_x in range(2, size - 2) : + w[pos_x][pos_y] = int(self.options['weight'][0] * 0.75) + + # center border : cool but not so... + elif (pos_x in center_range and pos_y in [0, size-1]) or \ + pos_y in center_range and pos_x in [0, size-1]: + w[pos_x][pos_y] = int(self.options['weight'][2] // 1.25) + # Elements on the border elif pos_x in [0, size -1] or pos_y in [0, size -1]: - w[pos_x][pos_y] = self.options['weight'][1] + w[pos_x][pos_y] = self.options['weight'][2] # Element the center - elif pos_x in range( center - padding, center + padding) and pos_y in range(center - padding, center + padding): - w[pos_x][pos_y] = self.options['weight'][0] + elif pos_x in center_range and pos_y in center_range: + w[pos_x][pos_y] = self.options['weight'][1] + return w + + def show_weights(self): + display = "\n |" + sep = "\n----" + for x in range(self.options['size']): + display += "{:^3}|".format(x) + sep += '----' + display += sep + "\n" + for x in range(self.options['size']): + display += "{:^3}|".format(str(x)) + for y in range(self.options['size']): + display += "{:^3}|".format(self.weights[x][y]) + display += "\n" + return display + + class FullHeuristicEngine(WeightHeuristicEngine): def get(self, board, player): diff --git a/src/game.py b/src/game.py index 6f60705..fcb72b6 100755 --- a/src/game.py +++ b/src/game.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from classes.Reversi import Board -from classes.Engines import RandomPlayerEngine, HumanPlayerEngine, MinmaxPlayerEngine, AlphabetaPlayerEngine, MinmaxDeepeningPlayerEngine +from classes.Engines import RandomPlayerEngine, HumanPlayerEngine, MinmaxPlayerEngine, AlphabetaPlayerEngine, MinmaxDeepeningPlayerEngine, AlphaBetaDeepeningPlayerEngine from classes.Heuristic import ScoreHeuristicEngine, WeightHeuristicEngine, FullHeuristicEngine import logging as log import argparse as arg @@ -12,7 +12,7 @@ from classes.CustomFormater import CustomFormatter Function to parse command line arguments """ def parse_aguments(): - engines_choices=['random', 'human', 'minmax', 'alphabeta', 'id_minmax'] + engines_choices=['random', 'human', 'minmax', 'alphabeta', 'id_minmax', 'id_alphabeta'] heuristic_choices=['score', 'weight', 'full'] parser = arg.ArgumentParser('Playing Reversi with (virtual) friend') @@ -52,11 +52,26 @@ def parse_aguments(): default='score', ) + parser.add_argument('-br', '--black-randomize-moves', + help='Apply a random function on moves list before explore the game tree - black player', + type=bool, + default=True, + ) + + parser.add_argument('-wr', '--white-randomize-moves', + help='Apply a random function on moves list before explore the game tree - white player', + type=bool, + default=True, + ) parser.add_argument('--weight', help='Weight table for weight based heuristic engines', type=int, - nargs=3, - default=[2,10,25] + nargs=4, + default=[-5, 2, 10,25] + ) + parser.add_argument('--show-weights-table', + help='Display weight table used in \'weight\' and \'full\' heuristic calculation and exit', + action='store_true', ) debug_group = parser.add_mutually_exclusive_group() @@ -79,13 +94,14 @@ if __name__ == '__main__': "minmax": MinmaxPlayerEngine, "alphabeta": AlphabetaPlayerEngine, "id_minmax": MinmaxDeepeningPlayerEngine, + "id_alphabeta": AlphaBetaDeepeningPlayerEngine } heuristic_engine = { "score": ScoreHeuristicEngine, "weight": WeightHeuristicEngine, "full": FullHeuristicEngine, } - print("Stating PyReverso...") + print("Starting PyReverso...") args = parse_aguments() logger = log.getLogger() # Create handler for streaming to tty (stderr / stdout) @@ -93,6 +109,16 @@ if __name__ == '__main__': tty_handler.setFormatter(CustomFormatter()) logger.addHandler(tty_handler) + # IT shoud be better implemented but no time to make it clean + if args.show_weights_table: + print("{}".format( + heuristic_engine['weight'](logger,{ + 'weight': args.weight, + 'size': 10 + }).show_weights() + )) + exit(0) + # Activate verbose or debug mode if args.verbose is True: logger.setLevel(log.INFO) @@ -101,7 +127,6 @@ if __name__ == '__main__': if args.debug is True: logger.setLevel(log.DEBUG) logger.debug('DEBUG mode activated') - game = Board(10) logger.debug("Init players engines - black:{} / white:{}".format( args.black_engine, @@ -113,12 +138,14 @@ if __name__ == '__main__': logger, heuristic_engine[args.white_heuristic_engine]( logger, { - 'weight': args.weight + 'weight': args.weight, + 'size': game.get_board_size() } ), { 'depth': args.white_depth_exploration, - 'time_limit': 10 + 'time_limit': 10, + 'randomize_moves': args.white_randomize_moves } ) bplayer = player_engines[args.black_engine]( @@ -126,12 +153,14 @@ if __name__ == '__main__': logger, heuristic_engine[args.black_heuristic_engine]( logger, { - 'weight': args.weight + 'weight': args.weight, + 'size': game.get_board_size() } ), { 'depth': args.black_depth_exploration, - 'time_limit': 10 + 'time_limit': 10, + 'randomize_moves': args.black_randomize_moves } ) while ( not game.is_game_over()):