Working Environment

0fe0f9b8 · Armin Co · c2fed4b4 · 0fe0f9b8 · 0fe0f9b8 · 0fe0f9b8
Commit 0fe0f9b8 authored Mar 16, 2021 by Armin Co
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ benchmarks
 baselines
 simple
 final_wo_o
+orientation
 workspace.code-workspace
 test
 tech_demo.py

--- a/agents.py
+++ b/agents.py
@@ -3,6 +3,7 @@ import numpy as np
 from memory import Memory
 from networks import QNet
 from steering_wheel import Controller
+from keras.callbacks import EarlyStopping

 class QAgent:
    gamma = 0.99
@@ -38,7 +39,7 @@ class QAgent:
        if offline:
            batch_size = self.OFFLINE_BATCHSIZE

-        if len(self.memory.history) < batch_size:
+        if len(self.memory.history) < batch_size * 35:
            return

        states, actions, rewards, following_states, dones = self.memory.get_batch(
@@ -85,6 +86,8 @@ class DQAgent(QAgent):
        return np.argmax(action_values[0])

    def learn(self, offline=False, epochs=1):
+        if self.epsilon > self.epsilon_min:
+                self.epsilon *= self.epsilon_decay
        for _ in range(2):
            if np.random.rand() < 0.5:
                temp = self.q
@@ -93,7 +96,7 @@ class DQAgent(QAgent):
            batch_size = self.online_batch_size
            if offline:
                batch_size = self.OFFLINE_BATCHSIZE
-            if len(self.memory.history) < batch_size:
+            if len(self.memory.history) < batch_size * 35:
                return
            states, actions, rewards, following_states, dones = self.memory.get_batch(
                batch_size)
@@ -103,12 +106,11 @@ class DQAgent(QAgent):
            idx = np.array([i for i in range(batch_size)])
            y[[idx], [actions]] = qMax
            if offline:
-                history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
+                callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
+                history = self.q.net.fit(states, y, epochs=epochs, verbose=0, callbacks=[callback])
                loss = history.history['loss'][-1]
            else:
                loss = self.q.fit(states, y, epochs)
-            if self.epsilon > self.epsilon_min:
-                self.epsilon *= self.epsilon_decay
        return loss

    def load(self, path, net=True, memory=True):

--- a/carla_environment.py
+++ b/carla_environment.py
@@ -266,7 +266,7 @@ class World:
        if pos_diff < 1.1 and v < 0.01:
            done = True
            r += 150
-            if yaw_dif < 0.01:
+            if abs(yaw_dif) < 2:
                r+= 50
            
        if self.collision_sensor.collision is not None:

--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -57,6 +57,8 @@ def reset(environment):
 def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
    """ Perform one episode of the agent in the environment. """
    score = 0
+    if conf.env_type == 'Carla':
+        max_steps = 300
    state = reset(environment)
    for _ in range(max_steps):
        if render:
@@ -88,6 +90,9 @@ def learn_offline(agent, conf):
        desc = ('Loss: %05.4f' %(loss)) + desc_train
        pbar.set_description(desc)
        pbar.refresh()
+        if loss > 1000:
+            print("Loss exceeded 1000!!")
+            exit()
        if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
            agent.epsilon = agent.epsilon_min
            score = one_episode(conf.env, agent, conf.render, False, conf=conf)
@@ -97,6 +102,7 @@ def learn_offline(agent, conf):
            avg_score_history.append(is_solved)
            if is_solved > IS_SOLVED:
                break
+            
    if conf.offline_validate_every_x_iteration is not -1:
        process_logs(avg_score_history, score_history, conf)


--- a/memory.py
+++ b/memory.py
--- a/networks.py
+++ b/networks.py
@@ -34,7 +34,7 @@ class QNet:
        self, states): return self.net.predict_on_batch(states)

    def fit(self, X, Y, epochs=1, verbose=0):
-        callback = EarlyStopping(monitor='loss', patience=3)
+        callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
        history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback])
        return history.history['loss'][-1]


--- a/run_scripts/cartpole.py
+++ b/run_scripts/cartpole.py
@@ -5,16 +5,15 @@ from agents import DQAgent, QAgent

 c = ew.Config()

-c.name = 'DoubleCartPole'
-c.render = False
+c.name = '01'
+c.render = True
 c.env = gym.make('CartPole-v0')
 c.env_type = 'CartPole'
-c.net_layout = [128, 64, 32]
+c.net_layout = [128, 64]
 c.eps_decay = 0.9991
 c.learn_rate= 0.001
 c.run_episodes = 300
-c.save_to = 'benchmarks/'
+c.save_to = 'Screencast/'
 c.conf_to_name()
-c.agent = QAgent(c)
-
+c.agent = DQAgent(c)
 main.run(c)
--- a/run_scripts/manual_carla.py
+++ b/run_scripts/manual_carla.py
@@ -2,7 +2,7 @@ import main
 import environment_wrapper as ew
 import gym
 import copy
-from agents import QAgent, DQAgent
+from agents import CarlaManual, QAgent, DQAgent
 from carla_environment import CarlaEnvironment

 c = ew.Config()
@@ -19,101 +19,50 @@ c.load_from = 'Carla_2_256__128__128_0.9995_0.001_1DBL'
 c.load_mem = True
 c.load_ann = True

-# o = copy.deepcopy(c)
-# o.name = 'JTAP_4'
-# o.force_cpu = True
-# o.render = True
-# o.learn = True
-# o.env_type = 'Carla'
-# o.net_layout = [256, 128, 128]
-# o.save_to = 'simple/'
-# o.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
-# o.load_mem = True
-# o.load_ann = False
-# o.learn_offline = True
-# o.offline_epochs = 75000
-# o.offline_batchsize = 256
-# o.learn_iterations = 1
-# o.offline_validate_every_x_iteration = -1
-# o.learn_online = False
-# o.eps_decay = 0.9995
-# o.learn_rate= 0.001
-# o.run_episodes = 15
-
 o = copy.deepcopy(c)
-o.name = 'JTAP_0'
+o.name = 'Final_wo_Obstacles_052O_2M'
 o.force_cpu = True
-o.render = True
+o.render = False
 o.learn = True
 o.env_type = 'Carla'
-o.net_layout = [256, 128]
-o.save_to = 'orientation/'
-o.load_from = ''
-o.load_mem = False
+o.net_layout = [1024, 1024, 256, 128]
+o.save_to = 'final_wo_o/'
+o.load_from = 'Carla_Final_wo_Obstacles_02_2M_256__256__128_0.99974_0.001_1DBL'
+o.load_mem = True
 o.load_ann = False
 o.learn_online = True
-o.eps_decay = 0.9995
-o.learn_rate= 0.001
-o.run_episodes = 750
+o.eps_decay = 0.9915
+o.learn_rate= 0.01
+o.run_episodes = 550
+o.learn_offline = True
+o.offline_epochs = 100000
+o.offline_batchsize = 64
+o.offline_validate_every_x_iteration = 500
+o.learn_iterations = 1

 validate = copy.deepcopy(c)
-validate.name = 'JTAP_Validate'
+validate.name = 'Validate0'
 validate.render = True
-validate.learn = True
+validate.learn = False
 validate.env_type = 'Carla'
 validate.net_layout = [256, 128]
 validate.save_to = 'simple/'
-validate.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
+validate.load_from = 'Carla_JTAP_1_256__128__128_0.9995_0.001_1'
 validate.load_mem = False
 validate.load_ann = True
 validate.learn_offline = False
-validate.offline_epochs = 1500
+validate.offline_batchsize = 64000
+validate.offline_epochs = 20
 validate.learn_iterations = 1
-validate.offline_validate_every_x_iteration = -1
+validate.offline_validate_every_x_iteration = 1
 validate.learn_online = True
-validate.eps_decay = 0.9995
-validate.learn_rate= 0.001
+validate.eps_decay = 0.95
+validate.learn_rate= 0.0000005
 validate.run_episodes = 10

-# t = copy.deepcopy(c)
-# t.render = True
-# t.net_layout = [1024, 1024, 256, 32]
-# t.eps_decay = 0.9993
-# t.learn_rate = 0.0005
-# t.force_cpu = False
-# t.load_mem = True
-# t.load_ann = False
-# t.save_to = 'test/'
-# t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
-# t.name = 'Offline'
-# t.learn_offline = True
-# t.learn_online = True
-# t.run_episodes = 500
-# t.offline_epochs = 100
-# t.learn_iterations = 100
-# t.offline_validate_every_x_iteration = -1
-
-configuration = o
+configuration = validate
 configuration.env = CarlaEnvironment(render=configuration.render, manual=False)
 configuration.conf_to_name()
-configuration.agent = QAgent(configuration)
+configuration.agent = DQAgent(configuration)
+# configuration.agent = CarlaManual(configuration)
 main.run(configuration)
-
-# o = copy.deepcopy(c)
-# o.name = '5D'
-# o.render = True
-# o.learn = False
-# o.env_type = 'Carla'
-# o.net_layout = [1024, 512, 256]
-# o.save_to = 'test/'
-# o.load_from = 'Carla_01D_1024__512__512_0.9991_0.00025_1DBLoffline'
-# o.load_mem = True
-# o.load_ann = True
-# o.learn_offline = False
-# o.offline_epochs = 1000
-# o.learn_iterations = 1
-# o.offline_validate_every_x_iteration = -1
-# o.learn_online = True
-# o.eps_decay = 0.9991
-# o.learn_rate= 0.0005
-# o.run_episodes = 20
\ No newline at end of file
--- a/run_scripts/start_carla.sh
+++ b/run_scripts/start_carla.sh
@@ -5,4 +5,4 @@
 # apt install carla-simulator
 #

-DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=15 -quality-level=Low -opengl
\ No newline at end of file
+DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=10 -quality-level=Low -opengl
\ No newline at end of file