From 0fe0f9b8c13f84546317f907e0c56d0186c3cc02 Mon Sep 17 00:00:00 2001
From: Armin <armin.co@hs-bochum.de>
Date: Tue, 16 Mar 2021 16:48:20 +0100
Subject: [PATCH] Working Environment

---
 .gitignore                  |   1 +
 agents.py                   |  12 +++--
 carla_environment.py        |   2 +-
 environment_wrapper.py      |   6 +++
 memory.py                   |   2 +-
 networks.py                 |   2 +-
 run_scripts/cartpole.py     |  11 ++--
 run_scripts/manual_carla.py | 103 +++++++++---------------------------
 run_scripts/start_carla.sh  |   2 +-
 9 files changed, 49 insertions(+), 92 deletions(-)

diff --git a/.gitignore b/.gitignore
index e624cfb..068c0b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ benchmarks
 baselines
 simple
 final_wo_o
+orientation
 workspace.code-workspace
 test
 tech_demo.py
diff --git a/agents.py b/agents.py
index 2e7cdbe..adf7204 100644
--- a/agents.py
+++ b/agents.py
@@ -3,6 +3,7 @@ import numpy as np
 from memory import Memory
 from networks import QNet
 from steering_wheel import Controller
+from keras.callbacks import EarlyStopping
 
 class QAgent:
     gamma = 0.99
@@ -38,7 +39,7 @@ class QAgent:
         if offline:
             batch_size = self.OFFLINE_BATCHSIZE
 
-        if len(self.memory.history) < batch_size:
+        if len(self.memory.history) < batch_size * 35:
             return
 
         states, actions, rewards, following_states, dones = self.memory.get_batch(
@@ -85,6 +86,8 @@ class DQAgent(QAgent):
         return np.argmax(action_values[0])
 
     def learn(self, offline=False, epochs=1):
+        if self.epsilon > self.epsilon_min:
+                self.epsilon *= self.epsilon_decay
         for _ in range(2):
             if np.random.rand() < 0.5:
                 temp = self.q
@@ -93,7 +96,7 @@ class DQAgent(QAgent):
             batch_size = self.online_batch_size
             if offline:
                 batch_size = self.OFFLINE_BATCHSIZE
-            if len(self.memory.history) < batch_size:
+            if len(self.memory.history) < batch_size * 35:
                 return
             states, actions, rewards, following_states, dones = self.memory.get_batch(
                 batch_size)
@@ -103,12 +106,11 @@ class DQAgent(QAgent):
             idx = np.array([i for i in range(batch_size)])
             y[[idx], [actions]] = qMax
             if offline:
-                history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
+                callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
+                history = self.q.net.fit(states, y, epochs=epochs, verbose=0, callbacks=[callback])
                 loss = history.history['loss'][-1]
             else:
                 loss = self.q.fit(states, y, epochs)
-            if self.epsilon > self.epsilon_min:
-                self.epsilon *= self.epsilon_decay
         return loss
 
     def load(self, path, net=True, memory=True):
diff --git a/carla_environment.py b/carla_environment.py
index 5f00231..fc0ad1c 100644
--- a/carla_environment.py
+++ b/carla_environment.py
@@ -266,7 +266,7 @@ class World:
         if pos_diff < 1.1 and v < 0.01:
             done = True
             r += 150
-            if yaw_dif < 0.01:
+            if abs(yaw_dif) < 2:
                 r+= 50
             
         if self.collision_sensor.collision is not None:
diff --git a/environment_wrapper.py b/environment_wrapper.py
index 61242e3..2c61cc0 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -57,6 +57,8 @@ def reset(environment):
 def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
     """ Perform one episode of the agent in the environment. """
     score = 0
+    if conf.env_type == 'Carla':
+        max_steps = 300
     state = reset(environment)
     for _ in range(max_steps):
         if render:
@@ -88,6 +90,9 @@ def learn_offline(agent, conf):
         desc = ('Loss: %05.4f' %(loss)) + desc_train
         pbar.set_description(desc)
         pbar.refresh()
+        if loss > 1000:
+            print("Loss exceeded 1000!!")
+            exit()
         if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
             agent.epsilon = agent.epsilon_min
             score = one_episode(conf.env, agent, conf.render, False, conf=conf)
@@ -97,6 +102,7 @@ def learn_offline(agent, conf):
             avg_score_history.append(is_solved)
             if is_solved > IS_SOLVED:
                 break
+            
     if conf.offline_validate_every_x_iteration is not -1:
         process_logs(avg_score_history, score_history, conf)
 
diff --git a/memory.py b/memory.py
index d6393a7..b8f2995 100644
--- a/memory.py
+++ b/memory.py
@@ -34,4 +34,4 @@ class Memory:
 
     def load(self, path):
         self.history = pickle.load(open(path, 'rb'))
-        print('Loaded '+ str(len(self.history)) + ' memories.')
+        print('Loaded '+ str(len(self.history)) + ' memories.')
\ No newline at end of file
diff --git a/networks.py b/networks.py
index 32b9461..cf4e5b9 100644
--- a/networks.py
+++ b/networks.py
@@ -34,7 +34,7 @@ class QNet:
         self, states): return self.net.predict_on_batch(states)
 
     def fit(self, X, Y, epochs=1, verbose=0):
-        callback = EarlyStopping(monitor='loss', patience=3)
+        callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
         history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback])
         return history.history['loss'][-1]
 
diff --git a/run_scripts/cartpole.py b/run_scripts/cartpole.py
index ad2bb1d..7951918 100644
--- a/run_scripts/cartpole.py
+++ b/run_scripts/cartpole.py
@@ -5,16 +5,15 @@ from agents import DQAgent, QAgent
 
 c = ew.Config()
 
-c.name = 'DoubleCartPole'
-c.render = False
+c.name = '01'
+c.render = True
 c.env = gym.make('CartPole-v0')
 c.env_type = 'CartPole'
-c.net_layout = [128, 64, 32]
+c.net_layout = [128, 64]
 c.eps_decay = 0.9991
 c.learn_rate= 0.001
 c.run_episodes = 300
-c.save_to = 'benchmarks/'
+c.save_to = 'Screencast/'
 c.conf_to_name()
-c.agent = QAgent(c)
-
+c.agent = DQAgent(c)
 main.run(c)
diff --git a/run_scripts/manual_carla.py b/run_scripts/manual_carla.py
index d09a7b1..4572ae7 100644
--- a/run_scripts/manual_carla.py
+++ b/run_scripts/manual_carla.py
@@ -2,7 +2,7 @@ import main
 import environment_wrapper as ew
 import gym
 import copy
-from agents import QAgent, DQAgent
+from agents import CarlaManual, QAgent, DQAgent
 from carla_environment import CarlaEnvironment
 
 c = ew.Config()
@@ -19,101 +19,50 @@ c.load_from = 'Carla_2_256__128__128_0.9995_0.001_1DBL'
 c.load_mem = True
 c.load_ann = True
 
-# o = copy.deepcopy(c)
-# o.name = 'JTAP_4'
-# o.force_cpu = True
-# o.render = True
-# o.learn = True
-# o.env_type = 'Carla'
-# o.net_layout = [256, 128, 128]
-# o.save_to = 'simple/'
-# o.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
-# o.load_mem = True
-# o.load_ann = False
-# o.learn_offline = True
-# o.offline_epochs = 75000
-# o.offline_batchsize = 256
-# o.learn_iterations = 1
-# o.offline_validate_every_x_iteration = -1
-# o.learn_online = False
-# o.eps_decay = 0.9995
-# o.learn_rate= 0.001
-# o.run_episodes = 15
-
 o = copy.deepcopy(c)
-o.name = 'JTAP_0'
+o.name = 'Final_wo_Obstacles_052O_2M'
 o.force_cpu = True
-o.render = True
+o.render = False
 o.learn = True
 o.env_type = 'Carla'
-o.net_layout = [256, 128]
-o.save_to = 'orientation/'
-o.load_from = ''
-o.load_mem = False
+o.net_layout = [1024, 1024, 256, 128]
+o.save_to = 'final_wo_o/'
+o.load_from = 'Carla_Final_wo_Obstacles_02_2M_256__256__128_0.99974_0.001_1DBL'
+o.load_mem = True
 o.load_ann = False
 o.learn_online = True
-o.eps_decay = 0.9995
-o.learn_rate= 0.001
-o.run_episodes = 750
+o.eps_decay = 0.9915
+o.learn_rate= 0.01
+o.run_episodes = 550
+o.learn_offline = True
+o.offline_epochs = 100000
+o.offline_batchsize = 64
+o.offline_validate_every_x_iteration = 500
+o.learn_iterations = 1
 
 validate = copy.deepcopy(c)
-validate.name = 'JTAP_Validate'
+validate.name = 'Validate0'
 validate.render = True
-validate.learn = True
+validate.learn = False
 validate.env_type = 'Carla'
 validate.net_layout = [256, 128]
 validate.save_to = 'simple/'
-validate.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
+validate.load_from = 'Carla_JTAP_1_256__128__128_0.9995_0.001_1'
 validate.load_mem = False
 validate.load_ann = True
 validate.learn_offline = False
-validate.offline_epochs = 1500
+validate.offline_batchsize = 64000
+validate.offline_epochs = 20
 validate.learn_iterations = 1
-validate.offline_validate_every_x_iteration = -1
+validate.offline_validate_every_x_iteration = 1
 validate.learn_online = True
-validate.eps_decay = 0.9995
-validate.learn_rate= 0.001
+validate.eps_decay = 0.95
+validate.learn_rate= 0.0000005
 validate.run_episodes = 10
 
-# t = copy.deepcopy(c)
-# t.render = True
-# t.net_layout = [1024, 1024, 256, 32]
-# t.eps_decay = 0.9993
-# t.learn_rate = 0.0005
-# t.force_cpu = False
-# t.load_mem = True
-# t.load_ann = False
-# t.save_to = 'test/'
-# t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
-# t.name = 'Offline'
-# t.learn_offline = True
-# t.learn_online = True
-# t.run_episodes = 500
-# t.offline_epochs = 100
-# t.learn_iterations = 100
-# t.offline_validate_every_x_iteration = -1
-
-configuration = o
+configuration = validate
 configuration.env = CarlaEnvironment(render=configuration.render, manual=False)
 configuration.conf_to_name()
-configuration.agent = QAgent(configuration)
+configuration.agent = DQAgent(configuration)
+# configuration.agent = CarlaManual(configuration)
 main.run(configuration)
-
-# o = copy.deepcopy(c)
-# o.name = '5D'
-# o.render = True
-# o.learn = False
-# o.env_type = 'Carla'
-# o.net_layout = [1024, 512, 256]
-# o.save_to = 'test/'
-# o.load_from = 'Carla_01D_1024__512__512_0.9991_0.00025_1DBLoffline'
-# o.load_mem = True
-# o.load_ann = True
-# o.learn_offline = False
-# o.offline_epochs = 1000
-# o.learn_iterations = 1
-# o.offline_validate_every_x_iteration = -1
-# o.learn_online = True
-# o.eps_decay = 0.9991
-# o.learn_rate= 0.0005
-# o.run_episodes = 20
\ No newline at end of file
diff --git a/run_scripts/start_carla.sh b/run_scripts/start_carla.sh
index bc6da0a..5302c74 100755
--- a/run_scripts/start_carla.sh
+++ b/run_scripts/start_carla.sh
@@ -5,4 +5,4 @@
 # apt install carla-simulator
 #
 
-DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=15 -quality-level=Low -opengl
\ No newline at end of file
+DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=10 -quality-level=Low -opengl
\ No newline at end of file
-- 
GitLab