diff --git a/environment_wrapper.py b/environment_wrapper.py
index 2c8317b0e5a5787e8f7b7bd3173ba358e9712867..c366b1e2bfffe9edb2984d14ae687770d4ac5382 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -77,7 +77,6 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
 def learn_offline(agent, conf):
     """ Train the agent with its memories. """
     print('Learning with ', len(agent.memory.history), ' memories.')
-    agent.epsilon = agent.epsilon_min
 
     score_history = []
     avg_score_history = []
@@ -89,6 +88,7 @@ def learn_offline(agent, conf):
         pbar.set_description(desc)
         pbar.refresh()
         if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
+            agent.epsilon = agent.epsilon_min
             score = one_episode(conf.env, agent, conf.render, False, conf=conf)
             score_history.append(score)
             is_solved = np.mean(score_history[-25:])
@@ -96,7 +96,8 @@ def learn_offline(agent, conf):
             avg_score_history.append(is_solved)
             if is_solved > IS_SOLVED:
                 break
-    process_logs(avg_score_history, score_history, conf)
+    if conf.offline_validate_every_x_iteration is not -1:
+        process_logs(avg_score_history, score_history, conf)