diff --git a/environment_wrapper.py b/environment_wrapper.py index fb800698a1c5f4290bfa6537b9be146ffa611cf3..e55058bcf7c9c6468471fdaf1ac4be40fbb5d931 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -77,24 +77,32 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000): def learn_offline(agent, conf): """ Train the agent with its memories. """ print('Learning with ', len(agent.memory.history), ' memories.') + agent.epsilon = agent.epsilon_min + + score_history = [] + avg_score_history = [] + desc_train = '' pbar = trange(conf.offline_epochs, desc='Loss: x') for i in pbar: loss = agent.learn(offline=True, epochs=conf.learn_iterations) - desc = ('Loss: %05.4f' %(loss)) + desc = ('Loss: %05.4f' %(loss)) + desc_train pbar.set_description(desc) pbar.refresh() - if i % conf.offline_validate_every_x_iteration == 0 and conf.offline_validate_every_x_iteration is not -1: - score, avg = run(conf.env, conf.agent, 1, render=conf.render, learn=False, conf=conf) - conf.name += '1' - process_logs(avg, score, conf) - if avg[-1] > IS_SOLVED: + if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1: + score = one_episode(conf.env, agent, conf.render, False, conf=conf) + score_history.append(score) + is_solved = np.mean(score_history[-25:]) + desc_train = (', Avg: %05.1f' %(is_solved)) + avg_score_history.append(is_solved) + if is_solved > IS_SOLVED: break + process_logs(avg_score_history, score_history, conf) def run(environment, agent, episodes, render=True, learn=True, conf=None): """ Run an agent """ - + conf.name += 'on' # Set the exploring rate to its minimum. # (epsilon *greedy*) if not learn: diff --git a/networks.py b/networks.py index 7e16615400a0456b362f61f00f0ad74c520ae908..32b9461c52ddaadadd19e52ccc89026015717283 100644 --- a/networks.py +++ b/networks.py @@ -5,7 +5,7 @@ from keras.layers import Dense from keras.optimizers import Adam from keras.activations import relu, linear from keras.regularizers import l2 - +from keras.callbacks import EarlyStopping class QNet: learn_rate = 0.0005 @@ -34,7 +34,8 @@ class QNet: self, states): return self.net.predict_on_batch(states) def fit(self, X, Y, epochs=1, verbose=0): - history = self.net.fit(X, Y, epochs=epochs, verbose=verbose) + callback = EarlyStopping(monitor='loss', patience=3) + history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback]) return history.history['loss'][-1] def save(self, path):