=== Code vom 07.01.2020: === Bei dem folgenden Code ist der selbstlernender Gegner mithilfe einer Tabelle umgesetzt:


import turtle as t
import numpy as np
from math import inf as infinity
import itertools
import random

def play_oldmove(state, player, block_num):#Spielt den Zug in einer Matrix
    if state[int((block_num-1)/3)][(block_num-1)%3] is ' ':
        state[int((block_num-1)/3)][(block_num-1)%3] = player
        
def play_move(state, player, block_num):#Spielt den Zug in der Matrix und zeigt das dazugehörige Ergebnis
    if state[int((block_num-1)/3)][(block_num-1)%3] is ' ':
        state[int((block_num-1)/3)][(block_num-1)%3] = player
        if(player == 'X'):
            printX(int((block_num-1)/3), (block_num-1)%3)
        else:
            print0(int((block_num-1)/3), (block_num-1)%3)

def copy_game_state(state):#Kopiert den Spielstand(Damit der Computer Züge ausprobieren kann, ohne den Spielstand zu ändern)
    new_state = [[' ',' ',' '],[' ',' ',' '],[' ',' ',' ']]
    for i in range(3):
        for j in range(3):
            new_state[i][j] = state[i][j]
    return new_state

def check_current_state(game_state):#Prüft den Spielstand
    # Check horizontals
    if (game_state[0][0] == game_state[0][1] and game_state[0][1] == game_state[0][2] and game_state[0][0] is not ' '):
        return game_state[0][0], "Done"
    if (game_state[1][0] == game_state[1][1] and game_state[1][1] == game_state[1][2] and game_state[1][0] is not ' '):
        return game_state[1][0], "Done"
    if (game_state[2][0] == game_state[2][1] and game_state[2][1] == game_state[2][2] and game_state[2][0] is not ' '):
        return game_state[2][0], "Done"
    
    # Check verticals
    if (game_state[0][0] == game_state[1][0] and game_state[1][0] == game_state[2][0] and game_state[0][0] is not ' '):
        return game_state[0][0], "Done"
    if (game_state[0][1] == game_state[1][1] and game_state[1][1] == game_state[2][1] and game_state[0][1] is not ' '):
        return game_state[0][1], "Done"
    if (game_state[0][2] == game_state[1][2] and game_state[1][2] == game_state[2][2] and game_state[0][2] is not ' '):
        return game_state[0][2], "Done"
    
    # Check diagonals
    if (game_state[0][0] == game_state[1][1] and game_state[1][1] == game_state[2][2] and game_state[0][0] is not ' '):
        return game_state[1][1], "Done"
    if (game_state[2][0] == game_state[1][1] and game_state[1][1] == game_state[0][2] and game_state[2][0] is not ' '):
        return game_state[1][1], "Done"
    
    # Check if draw
    draw_flag = 0
    for i in range(3):
        for j in range(3):
            if game_state[i][j] is ' ':
                draw_flag = 1
    if draw_flag is 0:
        return None, "Draw"
    
    return None, "Not Done"

def comstarts():
    global playerChoice
    newGame('O')
    playerChoice = 0
    
def playerstarts():
    global playerChoice
    newGame('X')
    playerChoice = 1
    
def newGame(player_choice):#Setzt den Spielstand zurück
    global NoWin
    global WinsPlayer
    global WinsCom
    global game_state
    global current_state
    if(current_state == "Done" or( NoWin == 0 and WinsPlayer == 0 and WinsCom == 0)):
        start.up()
        start.clear()
        start.sety(0)
        start.setx(-2)
        start.hideturtle()
        start.write(WinsPlayer)
        start.sety(start.ycor()+2)
        start.write(WinsCom)
        start.sety(start.ycor()+2)
        start.write(NoWin)
        t.clear()
        game_state = [[' ',' ',' '],
              [' ',' ',' '],
              [' ',' ',' ']]
        current_state = "Not Done"
        if(player_choice == 'O'):
            block_choice = getBestMove(game_state, players[0])
            play_move(game_state ,players[0], block_choice)
            

def wheretogo(x, y):
    '''
    prüft, wo der Spieler hinklickt und gibt je nachdem eine Zahl zwischen 1 und 9 zurück.
    '''
    global game_state
    if(x > 0 and x <=10):
        spalte = 1
    if(x >10 and x <=20):
        spalte = 2
    if(x >20 and x <=30):
        spalte = 3
    if(y > 0 and y <=10):
        zeile = 0
    if(y >10 and y <=20):
        zeile = 1
    if(y >20 and y <=30):
        zeile = 2
    block_choice = 3*zeile +spalte
    if(game_state[int((block_choice-1)/3)][(block_choice-1)%3] == ' '):
        t.up()
        t.setx((spalte-1)*10+5)
        t.sety(zeile*10+5)
        game(block_choice)
    

def printX(zeile, spalte):#Malt ein Kreuz
    t.up()
    t.setx(spalte*10+5)
    t.sety(zeile*10+5)
    t.down()
    t.left(45)
    t.forward(5)
    t.backward(10)
    t.forward(5)
    t.left(90)
    t.forward(5)
    t.backward(10)
    t.forward(5)
    t.right(135)
    t.up()
    
def print0(zeile, spalte):#Malt ein Kreis
    t.up()
    t.setx(spalte*10+5)
    t.sety(zeile*10+5)
    t.down()
    t.up()
    t.right(90)
    t.forward(4)
    t.left(90)
    t.down()
    t.circle(4)
    t.up()

def tttfeld():
    """
    Malt ein 3x3 Feld und den dazugehörigen Text.
    """
    t.setworldcoordinates(0,30,30,0)
    spalten = t.Turtle()
    spalten.left(90)
    zeilen = t.Turtle()
    spalten.speed(0)
    zeilen.speed(0)
    spalten.hideturtle()
    zeilen.hideturtle()
    for i in range(4):
        spalten.setx(i*10)
        zeilen.sety(i*10)
        spalten.forward(30)
        zeilen.forward(30)
        spalten.sety(0)
        zeilen.setx(0)
    zeilen.goto(0,0)
    spalten.goto(0,0)
    zeilen.up()
    zeilen.setx(-3)
    zeilen.write("Siege vom Spieler:", align="right")
    zeilen.sety(zeilen.ycor()+2)
    zeilen.write("Siege vom Computer:", align="right")
    zeilen.sety(zeilen.ycor()+2)
    zeilen.write("Unentschieden:", align="right")
    zeilen.goto(0,0)
    zeilen.goto(31,15)
    zeilen.write("Wer soll anfangen? Du(X) oder der Computer(O)?", align="left", font=("Arial", 8, "normal"))
    zeilen.down()
    
    
    
def update_state_value(curr_state_idx, next_state_idx, learning_rate):#Hier werden die Werte der einzelnen Zustände überarbeitet
    new_value = state_values_for_AI[curr_state_idx] + learning_rate*(state_values_for_AI[next_state_idx]  - state_values_for_AI[curr_state_idx])
    state_values_for_AI[curr_state_idx] = new_value

def getBestMove(state, player):
    '''
    Reinforcement Learning Algorithmus
    Der Algorithmus ermittelt die freien Felder und bewertet den Zustand jedes Feldes.
    Anschließend sucht er den Folgezustand mit der höchstmöglichen Belohnung aus
    und wählt die dazugehörige Aktion.
    '''    
    moves = []
    curr_state_values = []
    empty_cells = []
    for i in range(3):
        for j in range(3):
            if state[i][j] is ' ':
                empty_cells.append(i*3 + (j+1))
    
    for empty_cell in empty_cells:
        moves.append(empty_cell)
        new_state = copy_game_state(state)
        play_oldmove(new_state, player, empty_cell)
        next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)]
        curr_state_values.append(state_values_for_AI[next_state_idx])
            
    best_move_idx = np.argmax(curr_state_values)
    best_move = moves[best_move_idx]
    return best_move

def game(block_choice):#Hier findet das eigentliche Spiel statt
    global NoWin
    global WinsPlayer
    global WinsCom
    global game_state
    global playerChoice
    global current_state
    t.speed(0)
    t.hideturtle()
    current_player_idx=playerChoice
     
    winner = None
    if current_player_idx == 0:
        player_choice = 'X'
    else: 
        player_choice = 'O'
    curr_state_idx = list(states_dict.keys())[list(states_dict.values()).index(game_state)]
    if current_player_idx == 0: #Mensch macht ersten Zug
        play_move(game_state ,players[current_player_idx], block_choice)
        
    else:   #Computer macht ersten Zug
        play_move(game_state ,players[current_player_idx], block_choice)

    winner, current_state = check_current_state(game_state)#Prüft ob jemand gewonnen hat
    if winner is not None:
        t.up()
        t.goto(15,15)
        if(str(winner) == player_choice):
            WinsPlayer = WinsPlayer +1
            t.write("Du hast gewonnen", align="center", font=("Arial", 32, "normal"))
        else:
            WinsCom = WinsCom +1
            t.write("Du hast verloren", align="center", font=("Arial", 32, "normal"))
        t.goto(40,15)
        return
    if current_state == "Draw":
        NoWin = NoWin +1
        t.up()
        t.goto(15,15)
        current_state = "Done"
        t.write("Unentschieden", align="center", font=("Arial", 30, "normal"))
        t.goto(40,15)
        return
    #Wenn der SPieler das Spiel durch seinen Zug nicht beendet hat, ist der Computer an der Reihe 
    if current_player_idx == 0: #Mensch macht ersten Zug
        current_player_idx = 1 
        block_choice = getBestMove(game_state, players[current_player_idx])
        play_move(game_state ,players[current_player_idx], block_choice)
        
    else:   #Computer macht ersten Zug
        current_player_idx = 0
        block_choice = getBestMove(game_state, players[current_player_idx])
        play_move(game_state ,players[current_player_idx], block_choice)
        winner, current_state = check_current_state(game_state)
    if winner is not None:
        t.up()
        t.goto(15,15)
        if(str(winner) == player_choice):
            WinsPlayer = WinsPlayer +1
            t.write("Du hast gewonnen", align="center", font=("Arial", 32, "normal"))
        else:
            WinsCom = WinsCom +1
            t.write("Du hast verloren", align="center", font=("Arial", 32, "normal"))
        t.goto(40,15)
        #t.write("Wer soll anfangen? Du(X) oder der Computer(O)?", align="left", font=("Arial", 8, "normal"))
    if current_state == "Draw":
        NoWin = NoWin +1
        t.up()
        t.goto(15,15)
        current_state == "Done"
        t.write("Unentschieden", align="center", font=("Arial", 30, "normal"))
        t.goto(40,15)
# PLaying

# Initialize state values
player = ['X','O',' ']
states_dict = {}
all_possible_states = [[list(i[0:3]),list(i[3:6]),list(i[6:10])] for i in itertools.product(player, repeat = 9)]
n_states = len(all_possible_states) # 2 players, 9 spaces
n_actions = 9   # 9 spaces
state_values_for_AI = np.full((n_states),0.0)
#print("n_states = %i \nn_actions = %i"%(n_states, n_actions))

for i in range(n_states):
    states_dict[i] = all_possible_states[i]
    winner, _ = check_current_state(states_dict[i])
    if winner == 'O':   # AI won
        state_values_for_AI[i] = 1
    elif winner == 'X':   # AI lost
        state_values_for_AI[i] = -1



 

start = t.Turtle() 
start.speed(0)
t.speed(0)
t.hideturtle()
screen=t.Screen()
WinsPlayer = 0
WinsCom = 0
NoWin = 0
playerChoice = 0
game_state = [[' ',' ',' '],
              [' ',' ',' '],
              [' ',' ',' ']]
players = ['X','O']

current_state = "Not Done"
tttfeld()


#LOAD TRAINED STATE VALUES
state_values_for_AI = np.loadtxt('trained_state_values_X.txt', dtype=np.float64)



screen.onclick(wheretogo)
screen.onkeypress(comstarts, "o")
screen.onkeypress(playerstarts, "x")
screen.listen()
t.mainloop()

t.exitonclick()