=== Code vom 07.01.2020: === Bei dem folgenden Code ist der selbstlernender Gegner mithilfe einer Tabelle umgesetzt: import turtle as t import numpy as np from math import inf as infinity import itertools import random def play_oldmove(state, player, block_num):#Spielt den Zug in einer Matrix if state[int((block_num-1)/3)][(block_num-1)%3] is ' ': state[int((block_num-1)/3)][(block_num-1)%3] = player def play_move(state, player, block_num):#Spielt den Zug in der Matrix und zeigt das dazugehörige Ergebnis if state[int((block_num-1)/3)][(block_num-1)%3] is ' ': state[int((block_num-1)/3)][(block_num-1)%3] = player if(player == 'X'): printX(int((block_num-1)/3), (block_num-1)%3) else: print0(int((block_num-1)/3), (block_num-1)%3) def copy_game_state(state):#Kopiert den Spielstand(Damit der Computer Züge ausprobieren kann, ohne den Spielstand zu ändern) new_state = [[' ',' ',' '],[' ',' ',' '],[' ',' ',' ']] for i in range(3): for j in range(3): new_state[i][j] = state[i][j] return new_state def check_current_state(game_state):#Prüft den Spielstand # Check horizontals if (game_state[0][0] == game_state[0][1] and game_state[0][1] == game_state[0][2] and game_state[0][0] is not ' '): return game_state[0][0], "Done" if (game_state[1][0] == game_state[1][1] and game_state[1][1] == game_state[1][2] and game_state[1][0] is not ' '): return game_state[1][0], "Done" if (game_state[2][0] == game_state[2][1] and game_state[2][1] == game_state[2][2] and game_state[2][0] is not ' '): return game_state[2][0], "Done" # Check verticals if (game_state[0][0] == game_state[1][0] and game_state[1][0] == game_state[2][0] and game_state[0][0] is not ' '): return game_state[0][0], "Done" if (game_state[0][1] == game_state[1][1] and game_state[1][1] == game_state[2][1] and game_state[0][1] is not ' '): return game_state[0][1], "Done" if (game_state[0][2] == game_state[1][2] and game_state[1][2] == game_state[2][2] and game_state[0][2] is not ' '): return game_state[0][2], "Done" # Check diagonals if (game_state[0][0] == game_state[1][1] and game_state[1][1] == game_state[2][2] and game_state[0][0] is not ' '): return game_state[1][1], "Done" if (game_state[2][0] == game_state[1][1] and game_state[1][1] == game_state[0][2] and game_state[2][0] is not ' '): return game_state[1][1], "Done" # Check if draw draw_flag = 0 for i in range(3): for j in range(3): if game_state[i][j] is ' ': draw_flag = 1 if draw_flag is 0: return None, "Draw" return None, "Not Done" def comstarts(): global playerChoice newGame('O') playerChoice = 0 def playerstarts(): global playerChoice newGame('X') playerChoice = 1 def newGame(player_choice):#Setzt den Spielstand zurück global NoWin global WinsPlayer global WinsCom global game_state global current_state if(current_state == "Done" or( NoWin == 0 and WinsPlayer == 0 and WinsCom == 0)): start.up() start.clear() start.sety(0) start.setx(-2) start.hideturtle() start.write(WinsPlayer) start.sety(start.ycor()+2) start.write(WinsCom) start.sety(start.ycor()+2) start.write(NoWin) t.clear() game_state = [[' ',' ',' '], [' ',' ',' '], [' ',' ',' ']] current_state = "Not Done" if(player_choice == 'O'): block_choice = getBestMove(game_state, players[0]) play_move(game_state ,players[0], block_choice) def wheretogo(x, y): ''' prüft, wo der Spieler hinklickt und gibt je nachdem eine Zahl zwischen 1 und 9 zurück. ''' global game_state if(x > 0 and x <=10): spalte = 1 if(x >10 and x <=20): spalte = 2 if(x >20 and x <=30): spalte = 3 if(y > 0 and y <=10): zeile = 0 if(y >10 and y <=20): zeile = 1 if(y >20 and y <=30): zeile = 2 block_choice = 3*zeile +spalte if(game_state[int((block_choice-1)/3)][(block_choice-1)%3] == ' '): t.up() t.setx((spalte-1)*10+5) t.sety(zeile*10+5) game(block_choice) def printX(zeile, spalte):#Malt ein Kreuz t.up() t.setx(spalte*10+5) t.sety(zeile*10+5) t.down() t.left(45) t.forward(5) t.backward(10) t.forward(5) t.left(90) t.forward(5) t.backward(10) t.forward(5) t.right(135) t.up() def print0(zeile, spalte):#Malt ein Kreis t.up() t.setx(spalte*10+5) t.sety(zeile*10+5) t.down() t.up() t.right(90) t.forward(4) t.left(90) t.down() t.circle(4) t.up() def tttfeld(): """ Malt ein 3x3 Feld und den dazugehörigen Text. """ t.setworldcoordinates(0,30,30,0) spalten = t.Turtle() spalten.left(90) zeilen = t.Turtle() spalten.speed(0) zeilen.speed(0) spalten.hideturtle() zeilen.hideturtle() for i in range(4): spalten.setx(i*10) zeilen.sety(i*10) spalten.forward(30) zeilen.forward(30) spalten.sety(0) zeilen.setx(0) zeilen.goto(0,0) spalten.goto(0,0) zeilen.up() zeilen.setx(-3) zeilen.write("Siege vom Spieler:", align="right") zeilen.sety(zeilen.ycor()+2) zeilen.write("Siege vom Computer:", align="right") zeilen.sety(zeilen.ycor()+2) zeilen.write("Unentschieden:", align="right") zeilen.goto(0,0) zeilen.goto(31,15) zeilen.write("Wer soll anfangen? Du(X) oder der Computer(O)?", align="left", font=("Arial", 8, "normal")) zeilen.down() def update_state_value(curr_state_idx, next_state_idx, learning_rate):#Hier werden die Werte der einzelnen Zustände überarbeitet new_value = state_values_for_AI[curr_state_idx] + learning_rate*(state_values_for_AI[next_state_idx] - state_values_for_AI[curr_state_idx]) state_values_for_AI[curr_state_idx] = new_value def getBestMove(state, player): ''' Reinforcement Learning Algorithmus Der Algorithmus ermittelt die freien Felder und bewertet den Zustand jedes Feldes. Anschließend sucht er den Folgezustand mit der höchstmöglichen Belohnung aus und wählt die dazugehörige Aktion. ''' moves = [] curr_state_values = [] empty_cells = [] for i in range(3): for j in range(3): if state[i][j] is ' ': empty_cells.append(i*3 + (j+1)) for empty_cell in empty_cells: moves.append(empty_cell) new_state = copy_game_state(state) play_oldmove(new_state, player, empty_cell) next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)] curr_state_values.append(state_values_for_AI[next_state_idx]) best_move_idx = np.argmax(curr_state_values) best_move = moves[best_move_idx] return best_move def game(block_choice):#Hier findet das eigentliche Spiel statt global NoWin global WinsPlayer global WinsCom global game_state global playerChoice global current_state t.speed(0) t.hideturtle() current_player_idx=playerChoice winner = None if current_player_idx == 0: player_choice = 'X' else: player_choice = 'O' curr_state_idx = list(states_dict.keys())[list(states_dict.values()).index(game_state)] if current_player_idx == 0: #Mensch macht ersten Zug play_move(game_state ,players[current_player_idx], block_choice) else: #Computer macht ersten Zug play_move(game_state ,players[current_player_idx], block_choice) winner, current_state = check_current_state(game_state)#Prüft ob jemand gewonnen hat if winner is not None: t.up() t.goto(15,15) if(str(winner) == player_choice): WinsPlayer = WinsPlayer +1 t.write("Du hast gewonnen", align="center", font=("Arial", 32, "normal")) else: WinsCom = WinsCom +1 t.write("Du hast verloren", align="center", font=("Arial", 32, "normal")) t.goto(40,15) return if current_state == "Draw": NoWin = NoWin +1 t.up() t.goto(15,15) current_state = "Done" t.write("Unentschieden", align="center", font=("Arial", 30, "normal")) t.goto(40,15) return #Wenn der SPieler das Spiel durch seinen Zug nicht beendet hat, ist der Computer an der Reihe if current_player_idx == 0: #Mensch macht ersten Zug current_player_idx = 1 block_choice = getBestMove(game_state, players[current_player_idx]) play_move(game_state ,players[current_player_idx], block_choice) else: #Computer macht ersten Zug current_player_idx = 0 block_choice = getBestMove(game_state, players[current_player_idx]) play_move(game_state ,players[current_player_idx], block_choice) winner, current_state = check_current_state(game_state) if winner is not None: t.up() t.goto(15,15) if(str(winner) == player_choice): WinsPlayer = WinsPlayer +1 t.write("Du hast gewonnen", align="center", font=("Arial", 32, "normal")) else: WinsCom = WinsCom +1 t.write("Du hast verloren", align="center", font=("Arial", 32, "normal")) t.goto(40,15) #t.write("Wer soll anfangen? Du(X) oder der Computer(O)?", align="left", font=("Arial", 8, "normal")) if current_state == "Draw": NoWin = NoWin +1 t.up() t.goto(15,15) current_state == "Done" t.write("Unentschieden", align="center", font=("Arial", 30, "normal")) t.goto(40,15) # PLaying # Initialize state values player = ['X','O',' '] states_dict = {} all_possible_states = [[list(i[0:3]),list(i[3:6]),list(i[6:10])] for i in itertools.product(player, repeat = 9)] n_states = len(all_possible_states) # 2 players, 9 spaces n_actions = 9 # 9 spaces state_values_for_AI = np.full((n_states),0.0) #print("n_states = %i \nn_actions = %i"%(n_states, n_actions)) for i in range(n_states): states_dict[i] = all_possible_states[i] winner, _ = check_current_state(states_dict[i]) if winner == 'O': # AI won state_values_for_AI[i] = 1 elif winner == 'X': # AI lost state_values_for_AI[i] = -1 start = t.Turtle() start.speed(0) t.speed(0) t.hideturtle() screen=t.Screen() WinsPlayer = 0 WinsCom = 0 NoWin = 0 playerChoice = 0 game_state = [[' ',' ',' '], [' ',' ',' '], [' ',' ',' ']] players = ['X','O'] current_state = "Not Done" tttfeld() #LOAD TRAINED STATE VALUES state_values_for_AI = np.loadtxt('trained_state_values_X.txt', dtype=np.float64) screen.onclick(wheretogo) screen.onkeypress(comstarts, "o") screen.onkeypress(playerstarts, "x") screen.listen() t.mainloop() t.exitonclick()