From e09a4dd03f942448d92851c0d2f5578b7a8194cc Mon Sep 17 00:00:00 2001
From: Armin <armin.co@hs-bochum.de>
Date: Mon, 4 Jan 2021 11:44:06 +0100
Subject: [PATCH] Refactoring

---
 .gitignore             |  3 ++-
 environment_wrapper.py | 19 +++++++++++--------
 main.py                | 20 +++++++++++---------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/.gitignore b/.gitignore
index b9518e5..43f67a2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 __pycache__
-saved_agents
\ No newline at end of file
+saved_agents
+*.png
\ No newline at end of file
diff --git a/environment_wrapper.py b/environment_wrapper.py
index a3945f1..a9e1920 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -24,18 +24,18 @@ def one_episode(environment, agent, render, learn, max_steps=3000):
     score = 0
     state = reset(environment)
     for _ in range(max_steps):
-        action = agent.get_action(state)
         if render:
             environment.render()
+        action = agent.get_action(state)
         following_state, reward, done, _ = step(environment, action)
         agent.remember(state, action, reward, following_state, done)
         score += reward
         state = following_state
         if learn:
-            loss = agent.learn()
+            agent.learn()
         if done:
-            if learn: 
-                print(loss)
+            if learn:
+                agent.learn(offline=True)
             break
     return score
 
@@ -60,16 +60,19 @@ def run(environment, agent, episodes, render=True, learn=True):
     score_history = []
     avg_score_history = []
     
-    for _ in tqdm(range(episodes), desc="Training", unit="Episode"):
+    pbar = trange(episodes, desc='Score [actual, average]: [0, 0]', unit="Episodes")
+    for _ in pbar:
         score = one_episode(environment, agent, render, learn)
         score_history.append(score)
 
-        is_solved = np.mean(score_history[-100:])
+        is_solved = np.mean(score_history[-50:])
         avg_score_history.append(is_solved)
 
-        if is_solved > 235 and learn:
+        if is_solved > 200 and learn:
             break
-        print("Score [actual, average]: [{0:.2f}, {1:.2f}]\n".format(score, is_solved))
+        desc = ("Score [actual, average]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
+        pbar.set_description(desc)
+        pbar.refresh()
     return score_history, avg_score_history
 
 
diff --git a/main.py b/main.py
index 704a05a..daea0e6 100644
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@ import os
 import atexit
 import gym
 
-from agents import DQAgent
+from agents import QAgent
 import environment_wrapper as ew
 
 # Allow GPU usage or force tensorflow to use the CPU.
@@ -21,18 +21,20 @@ if __name__ == '__main__':
     env = gym.make('LunarLander-v2')
 
     # 2. Create a learning agent
-    marvin = DQAgent(env.action_space.n, env.observation_space.shape[0], 'double_test')
+    marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'from_scratch')
 
     # (2.5) *optional* Load agent memory and/or net from disk.
-    marvin.load('saved_agents/large_memory/large_memory', net=False)
+    LOAD_MEMORIES = False
+    LOAD_ANN = False
+    marvin.load('saved_agents/agent/agent', net=LOAD_ANN, memory=LOAD_MEMORIES)
 
     # 3. Set your configurations for the run.
-    RENDER = True
+    RENDER = False
     LEARNING = True
-    LEARN_ONLINE = False
-    LEARN_OFFLINE = True
-    RUN_EPISODES = 200
-    LEARN_OFFLINE_EPOCHS = 100
+    LEARN_ONLINE = True
+    LEARN_OFFLINE = False
+    RUN_EPISODES = 500
+    LEARN_OFFLINE_EPOCHS = 500
     SAVE_PATH = "./saved_agents"
 
     # Register an *atexit* callback,
@@ -59,4 +61,4 @@ if __name__ == '__main__':
 
     # Show the result of the runl.
     if RENDER:
-        ew.process_logs(avg_score, loss)
+        ew.process_logs(avg_score, loss, title=marvin.name)
-- 
GitLab