diff --git a/.gitignore b/.gitignore
index 43f67a2b2f59be6440652d7544d6fca3c545a65b..42640483c31d4089cf5c8a964709196beb5d7ec9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
 __pycache__
 saved_agents
-*.png
\ No newline at end of file
+*.png
diff --git a/agents.py b/agents.py
index edc276c6cf11a3003ec494f9db6870af456794dc..ede5a6d42aedadb185958ad5df6acb89ba233f85 100644
--- a/agents.py
+++ b/agents.py
@@ -78,7 +78,7 @@ class DQAgent(QAgent):
         return np.argmax(action_values[0])
 
     def learn(self, offline=False):
-        for _ in range(2):
+        for _ in range(3):
             if np.random.rand() < 0.5:
                 temp = self.q
                 self.q = self.q2
@@ -87,8 +87,8 @@ class DQAgent(QAgent):
         epochs = 1
         if offline:
             batch_size = 4096
-        if len(self.memory.history) < self.online_batch_size:
-            return 0.0
+        if len(self.memory.history) < batch_size:
+            return
         states, actions, rewards, following_states, dones = self.memory.get_batch(batch_size)
         q_max_hat = rewards + self.gamma * (np.amax(self.q2.predict_on_batch(following_states), axis=1)) * (1-dones)
         y = self.q.predict_on_batch(states)
diff --git a/environment_wrapper.py b/environment_wrapper.py
index a9e19205890dc2be581d5e92cc19e1ba9a94a930..f624a2132529e8b87ba554a00376ba9a77fb6285 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -76,11 +76,11 @@ def run(environment, agent, episodes, render=True, learn=True):
     return score_history, avg_score_history
 
 
-def process_logs(avg_score_history, loss, title="Title"):
+def process_logs(avg_score_history, loss, title="Title", render=False):
     """ Plot the log history """
     plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
     plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
     plt.title(title)
-    plt.show()
     plt.savefig(title + '.png', format="png")
-
+    if render:
+        plt.show()
diff --git a/main.py b/main.py
index daea0e605118df5365fd8b42e1d4b775cd8c115a..254f2b6bbde25c54cf3ca32e7c1cbb035374e2e1 100644
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@ import os
 import atexit
 import gym
 
-from agents import QAgent
+from agents import DQAgent as QAgent
 import environment_wrapper as ew
 
 # Allow GPU usage or force tensorflow to use the CPU.
@@ -21,12 +21,14 @@ if __name__ == '__main__':
     env = gym.make('LunarLander-v2')
 
     # 2. Create a learning agent
-    marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'from_scratch')
+    marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'FromScratchDouble')
 
     # (2.5) *optional* Load agent memory and/or net from disk.
-    LOAD_MEMORIES = False
+    agnt = 'agent'
     LOAD_ANN = False
-    marvin.load('saved_agents/agent/agent', net=LOAD_ANN, memory=LOAD_MEMORIES)
+    LOAD_MEMORIES = False
+    if LOAD_ANN or LOAD_MEMORIES:
+        marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
 
     # 3. Set your configurations for the run.
     RENDER = False
@@ -60,5 +62,4 @@ if __name__ == '__main__':
         marvin.save(SAVE_PATH)
 
     # Show the result of the runl.
-    if RENDER:
-        ew.process_logs(avg_score, loss, title=marvin.name)
+    ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER)