diff --git a/environment_wrapper.py b/environment_wrapper.py index 2c8317b0e5a5787e8f7b7bd3173ba358e9712867..c366b1e2bfffe9edb2984d14ae687770d4ac5382 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -77,7 +77,6 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000): def learn_offline(agent, conf): """ Train the agent with its memories. """ print('Learning with ', len(agent.memory.history), ' memories.') - agent.epsilon = agent.epsilon_min score_history = [] avg_score_history = [] @@ -89,6 +88,7 @@ def learn_offline(agent, conf): pbar.set_description(desc) pbar.refresh() if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1: + agent.epsilon = agent.epsilon_min score = one_episode(conf.env, agent, conf.render, False, conf=conf) score_history.append(score) is_solved = np.mean(score_history[-25:]) @@ -96,7 +96,8 @@ def learn_offline(agent, conf): avg_score_history.append(is_solved) if is_solved > IS_SOLVED: break - process_logs(avg_score_history, score_history, conf) + if conf.offline_validate_every_x_iteration is not -1: + process_logs(avg_score_history, score_history, conf)