===Protokoll=== Heute haben wir den kompletten Algorithmus eins zu eins übertragen auf unser Vier gewinnt Spiel. So funktioniert jetzt die Funktion zur Überprüfung, ob jemand gewonnen hat oder nicht auch. Dabei sind wir jedoch auf ein einfaches Problem gestoßen, was man im Vorhinein hätte bedanken sollen. Und zwar ist der Zustandsraum von Vier gewinnt sehr viel größer, als der Zustandsraum von Tic Tac Toe. So gibt es über 10 hoch 20 mögliche Zustände, (wenn man nicht zwischen legalen und nicht möglichen Zuständen unterscheidet). Somit kann nicht so einfach wie zuvor eine Tabelle benutzt werden, in der jeder möglicher Zustand steht und der zugehörige Wert. Also haben wir ein Programm geschrieben, was zwar theoretisch fertig ist, aber praktisch nicht umsetztbar. Das heißt, wir müssen uns einen anderen Weg überlegen, unsere Ai gegen uns Vier gewinnt spielen zu lassen. Momentan sieht unser Trainingsprogramm für unsere Ai so aus: #Training AI vs AI import numpy as np import itertools import random game_state = np.zeros_like(np.arange(42).reshape(6,7))#4Gewinnt Feld mit Nullen players = ['Blue','Red']#Blau fängt an / Blau ist 1 und Rot ist -1 def play_move(state, player, block_num): if(state[int((block_num-1)/6)][(block_num-1)%7] == 0): state[int((block_num-1)/6)][(block_num-1)%7] = player else: print("Fehler")#Dürfte eigentlich beim Training nie passieren def copy_game_state(state): new_state = np.zeros_like(np.arange(42).reshape(6,7))#4Gewinnt Feld mit Nullen for i in range(6): for j in range(7): new_state[i][j] = state[i][j] return new_state def check_array(a): for i in range(len(a)-3): b=sum(a[i:i+4]) if (b== 4 or b==-4): return b def check_current_state(game_state):#Überprüfung ob jemand Gewonnen hat oder das Spiel unentschieden ausgegangen ist #Spalte for i in range(7): a=check_array(game_state[:6, i:i+1].reshape(6,)) if(a==4):return "Win" if(a==-4):return "Lose" #Diagonalen: for i in range(3): a=check_array(np.diagonal(game_state, i+1)) if(a==4):return "Win" if(a==-4):return "Lose" a=check_array(np.diagonal(game_state, -i)) if(a==4):return "Win" if(a==-4):return "Lose" a=check_array(np.fliplr(game_state).diagonal(i+1)) if(a==4):return "Win" if(a==-4):return "Lose" a=check_array(np.fliplr(game_state).diagonal(-i)) if(a==4):return "Win" if(a==-4):return "Lose" #Zeile: for i in range(6): a=check_array(game_state[i:i+1].reshape(7,)) if(a==4):return "Win" if(a==-4):return "Lose" # Check if draw for i in range(7): if(game_state[0,i] == 0): return None return "Draw" # Initialize state values player = [1,-1,0] states_dict = {} all_possible_states = [[list(i[0:7]),list(i[7:14]),list(i[14:21]),list(i[21:28]),list(i[28:35]),list(i[35:42])] for i in itertools.product(player, repeat = 42)] n_states = len(all_possible_states) # 2 players, 42 spaces n_actions = 42 # 42 spaces state_values_for_AI_Blue = np.full((n_states),0.0) state_values_for_AI_Red = np.full((n_states),0.0) #print("n_states = %i \nn_actions = %i"%(n_states, n_actions)) # State values for AI 'Red', (fängt nicht an) for i in range(n_states): states_dict[i] = all_possible_states[i] winner = check_current_state(states_dict[i]) if winner == 'Lose': # AI won state_values_for_AI_Red[i] = 1 elif winner == 'Win': # AI lost state_values_for_AI_Red[i] = -1 # State values for AI 'Blue', (fängt an) for i in range(n_states): winner = check_current_state(states_dict[i]) if winner == 'Lose': # AI lost state_values_for_AI_Blue[i] = -1 elif winner == 'Win': # AI won state_values_for_AI_Blue[i] = 1 def update_state_value_Red(curr_state_idx, next_state_idx, learning_rate): new_value = state_values_for_AI_Red[curr_state_idx] + learning_rate*(state_values_for_AI_Red[next_state_idx] - state_values_for_AI_Red[curr_state_idx]) state_values_for_AI_Red[curr_state_idx] = new_value def update_state_value_Blue(curr_state_idx, next_state_idx, learning_rate): new_value = state_values_for_AI_Blue[curr_state_idx] + learning_rate*(state_values_for_AI_Blue[next_state_idx] - state_values_for_AI_Blue[curr_state_idx]) state_values_for_AI_Blue[curr_state_idx] = new_value def getBestMove(state, player, epsilon): ''' Reinforcement Learning Algorithm ''' moves = [] curr_state_values = [] empty_cells = [] for i in range(7): if(state[0,i] == 0): a = 5 while(state[a,i] != 0): a = a-1 empty_cells.append(a*6 + (i+1)) for empty_cell in empty_cells: moves.append(empty_cell) new_state = copy_game_state(state) play_move(new_state, player, empty_cell) next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)] if player == 'Blue': curr_state_values.append(state_values_for_AI_Blue[next_state_idx]) else: curr_state_values.append(state_values_for_AI_Red[next_state_idx]) best_move_idx = np.argmax(curr_state_values) if np.random.uniform(0,1) <= epsilon: # Exploration best_move = random.choice(empty_cells) epsilon *= 0.99 else: #Exploitation best_move = moves[best_move_idx] return best_move # PLaying #LOAD TRAINED STATE VALUES state_values_for_AI_Blue = np.loadtxt('trained_state_values_Blue.txt', dtype=np.float64) state_values_for_AI_Red = np.loadtxt('trained_state_values_Red.txt', dtype=np.float64) learning_rate = 0.2 epsilon = 0.2 num_iterations = 1 for iteration in range(num_iterations): game_state = np.zeros_like(np.arange(42).reshape(6,7))#4Gewinnt Feld mit Nullen winner = None current_player_idx = random.choice([0,1]) while winner == None: curr_state_idx = list(states_dict.keys())[list(states_dict.values()).index(game_state)] if current_player_idx == 0: # AI_Blue's turn block_choice = getBestMove(game_state, players[current_player_idx], epsilon) play_move(game_state ,players[current_player_idx], block_choice) new_state_idx = list(states_dict.keys())[list(states_dict.values()).index(game_state)] else: # AI_Red's turn block_choice = getBestMove(game_state, players[current_player_idx], epsilon) play_move(game_state ,players[current_player_idx], block_choice) new_state_idx = list(states_dict.keys())[list(states_dict.values()).index(game_state)] update_state_value_Red(curr_state_idx, new_state_idx, learning_rate) update_state_value_Blue(curr_state_idx, new_state_idx, learning_rate) winner = check_current_state(game_state) if(winner == None): current_player_idx = (current_player_idx + 1)%2 # Save state values for future use np.savetxt('trained_state_values_Blue.txt', state_values_for_AI_Blue, fmt = '%.6f') np.savetxt('trained_state_values_Red.txt', state_values_for_AI_Red, fmt = '%.6f') print('Training Complete!') Vorheriger Termin: [[some:23.01.2020]]\\ Nächster Termin: [[some:Blocktermin]]