Hier werden die Unterschiede zwischen zwei Versionen gezeigt.
Beide Seiten der vorigen Revision Vorhergehende Überarbeitung Nächste Überarbeitung | Vorhergehende Überarbeitung | ||
ws1819:pacman_code [2019/03/20 18:57] rhotert |
ws1819:pacman_code [2019/03/31 15:30] (aktuell) rhotert |
||
---|---|---|---|
Zeile 1: | Zeile 1: | ||
**Pacman Code** | **Pacman Code** | ||
+ | |||
+ | ZIP des Code: {{:ws1819:pacman_uni.rar|}} | ||
Wenn ihr das Programm selbst ausprobieren möchtet braucht ihr leider alle Pakete von [[Requirements]], selbst damit ist ein Funktionieren alles andere als Garantiert. Wenn ein Windows update kommt geht wahrscheinlich nichts mehr. | Wenn ihr das Programm selbst ausprobieren möchtet braucht ihr leider alle Pakete von [[Requirements]], selbst damit ist ein Funktionieren alles andere als Garantiert. Wenn ein Windows update kommt geht wahrscheinlich nichts mehr. | ||
Zeile 102: | Zeile 104: | ||
def save(self, name): | def save(self, name): | ||
self.model.save_weights(name) | self.model.save_weights(name) | ||
+ | | ||
+ | EPISODES = 22 | ||
+ | |||
+ | |||
+ | env = gym.make('MsPacman-v0') | ||
+ | state_size = env.observation_space.shape | ||
+ | action_size = env.action_space.n | ||
+ | agent = DQNAgent(state_size, action_size) | ||
+ | done = False | ||
+ | batch_size = 32 | ||
+ | |||
+ | for e in range(EPISODES): | ||
+ | state = env.reset() | ||
+ | state = np.reshape(state, (1,)+ state_size) | ||
+ | cum_reward = 0 | ||
+ | for time in range(500): | ||
+ | env.render() | ||
+ | action = agent.act(state) | ||
+ | next_state, reward, done, _ = env.step(action) | ||
+ | #additional_reward = -(state[0,0] + state[0,0]*state[0,2]-state[0,1]*state[0,3])##faktore aus probieren | ||
+ | reward = reward #+ additional_reward if not done else 10 # | ||
+ | cum_reward += reward | ||
+ | next_state = np.reshape(next_state, (1,)+ state_size) | ||
+ | agent.remember(state, action, reward, next_state, done,reward,1) | ||
+ | state = next_state | ||
+ | if done: | ||
+ | print("episode: {}/{}, score: {}, e: {:.2}" | ||
+ | .format(e, EPISODES, time, agent.epsilon)) | ||
+ | break | ||
+ | if len(agent.memory) > batch_size: | ||
+ | loss = agent.replay(batch_size) | ||
+ | # Logging training loss and actual reward every 10 timesteps | ||
+ | if time % 10 == 0: | ||
+ | print("episode: {}/{}, time: {}, cumulative reward: {:.4f}, loss: {:.4f}".format(e, EPISODES, time, cum_reward, loss)) | ||
+ | | ||
+ | | ||
+ | for i in range(time): | ||
+ | pos = -i-1 | ||
+ | agent.memory[-i-2][-2] += reward | ||
+ | for j in range(-time,pos): | ||
+ | new_total = agent.memory[j][-2] + agent.memory[pos][2] | ||
+ | mem = agent.memory[j] | ||
+ | agent.memory[j][-1] =new_total | ||
+ | |||
+ | for i in range(time): | ||
+ | pos = -i-1 | ||
+ | imp = max(agent.memory[pos][-2]-agent.model.predict(agent.memory[pos][0])[0,agent.memory[pos][1]],0) | ||
+ | mem = agent.memory[pos] | ||
+ | agent.memory[pos][-1] = imp | ||
+ | | ||
+ | | ||
+ | agent.save("qlearning_Acrobot_3versuche") | ||
+ | | ||
+ | | ||
+ | import gym | ||
+ | env = gym.make('MsPacman-v0') | ||
+ | state_size = env.observation_space.shape | ||
+ | action_size = env.action_space.n | ||
+ | agent = DQNAgent(state_size, action_size) | ||
+ | done = False | ||
+ | batch_size = 32 | ||
+ | zähler=0 | ||
+ | |||
+ | #agent.load("qlearning_Acrobot_3versuche") | ||
+ | |||
+ | import time as ti | ||
+ | for e in range(100): | ||
+ | state = env.reset() | ||
+ | #state[0] = state[0] + np.random.randn()*0.1 | ||
+ | #state[1] = state[1] + np.random.randn()*0.1 | ||
+ | #state[2] = state[2] + np.random.randn()*0.1 | ||
+ | #state[3] = state[3] + np.random.randn()*0.1 | ||
+ | #env.env.state = state | ||
+ | state = np.reshape(state, [1, state_size]) | ||
+ | for time in range(2000): | ||
+ | | ||
+ | env.render() | ||
+ | agent.epsilon = 0 | ||
+ | action = agent.act(state) | ||
+ | next_state, reward, done, _ = env.step(action) | ||
+ | next_state = np.reshape(next_state, [1, state_size]) | ||
+ | state = next_state | ||
+ | if done: | ||
+ | zähler+=1 | ||
+ | print (zähler, "Duration: ", time) | ||
+ | break | ||
+ | | ||
+ | else: | ||
+ | print ("Volle Zeit") |