Added new config options and launch configurations

- learn_iterations - offline_validation_every_x

Added new config options and launch configurations
1b465405 · Armin Co · 353aaf19 · 1b465405 · 1b465405 · 1b465405
Commit 1b465405 authored Mar 5, 2021 by Armin Co
--- a/.gitignore
+++ b/.gitignore
@@ -4,5 +4,6 @@ saved_agents
 benchmarks
 baselines
 workspace.code-workspace
+test
 tech_demo.py
 *.png
--- a/agents.py
+++ b/agents.py
@@ -2,6 +2,7 @@ import random
 import numpy as np
 from memory import Memory
 from networks import QNet
+from steering_wheel import Controller
 class QAgent:
    gamma = 0.99
@@ -29,10 +30,9 @@ class QAgent:
    def remember(self, state, action, reward, following_state, done):
        self.memory.add(state, action, reward, following_state, done)
-    def learn(self, offline=False):
+    def learn(self, offline=False, epochs=1):
        """ Learn the Q-Function. """
        batch_size = self.online_batch_size
-        epochs = 1
        if offline:
            batch_size = self.OFFLINE_BATCHSIZE
@@ -49,7 +49,7 @@ class QAgent:
        y[[idx], [actions]] = qMax
        if offline:
-            history = self.q.net.fit(states, y, epochs=2, verbose=0)
+            history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
            loss = history.history['loss'][-1]
        else:
            loss = self.q.fit(states, y, epochs)
@@ -64,8 +64,9 @@ class QAgent:
        self.memory.save(path+'/' + self.name + '.mem')
    def load(self, path, net=True, memory=True):
-        print(path)
+        print('Load:  ' + path)
        if net:
+            print('Network')
            self.q.load(path+'.net')
        if memory:
            self.memory.load(path+'.mem')
@@ -81,14 +82,13 @@ class DQAgent(QAgent):
        action_values = (self.q.predict(state) + self.q2.predict(state)) / 2
        return np.argmax(action_values[0])
-    def learn(self, offline=False):
+    def learn(self, offline=False, epochs=1):
        for _ in range(2):
            if np.random.rand() < 0.5:
                temp = self.q
                self.q = self.q2
                self.q2 = temp
            batch_size = self.online_batch_size
-            epochs = 1
            if offline:
                batch_size = self.OFFLINE_BATCHSIZE
            if len(self.memory.history) < batch_size:
@@ -101,10 +101,22 @@ class DQAgent(QAgent):
            idx = np.array([i for i in range(batch_size)])
            y[[idx], [actions]] = qMax
            if offline:
-                history = self.q.net.fit(states, y, epochs=2, verbose=0)
+                history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
                loss = history.history['loss'][-1]
            else:
                loss = self.q.fit(states, y, epochs)
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
        return loss
+class CarlaManual(QAgent):
+    control = None
+    def __init__(self, conf):
+        super().__init__(conf)
+        self.control = Controller()
+    def get_action(self, state):
+        self.control.on_update()
+        return self.control.get_action()
--- a/carla_environment.py
+++ b/carla_environment.py
@@ -97,8 +97,9 @@ class ObstacleSensor:
        self.parent = parent
        bp = world.get_blueprint_library().find('sensor.other.obstacle')
        bp.set_attribute('distance', '10')
-        bp.set_attribute('hit_radius', '3')
+        bp.set_attribute('hit_radius', '5')
-        print(bp.get_attribute('hit_radius'))
+        # print(bp.get_attribute('hit_radius'))
+        position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0))
        self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid)
        weak_self = weakref.ref(self)
        self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event))
@@ -108,12 +109,12 @@ class ObstacleSensor:
        if not self:
            return
        self.collision = event
-        if not event.other_actor.type_id == 'static.road':
+        if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False:
-            print(self.parent.get_location())
+            print(self.parent.get_transform())
+            print(event.transform)
            print(self.parent.get_velocity())
            print(event.other_actor.type_id)
-            print(event.other_actor.transform)
+            print(str(event.distance))
-            print(event.distance)
 class LidarSensor:
@@ -333,7 +334,7 @@ if __name__ == "__main__":
        clock.tick(5)
        ctrl.on_update()
        obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
-        print(str(reward) + ' ' + str(done))
+        # print(str(reward) + ' ' + str(done))
        cumulated_reward += reward
        if done:
            break

--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -5,6 +5,8 @@ from tqdm import trange
 import pandas as pd
 import matplotlib.pyplot as plt
+IS_SOLVED = 195
 class Config:
    render = False
    force_cpu = True
@@ -18,8 +20,10 @@ class Config:
    net_layout= [256, 128]
    eps_decay = 0.9996
    learn_rate= 0.001
+    learn_iterations = 1
    run_episodes = 20
    offline_epochs = 1000
+    offline_validate_every_x_iteration = 10
    load_ann = False
    load_mem = False
    load_from = 'agnt'
@@ -35,7 +39,8 @@ class Config:
        for layer in self.net_layout:
            self.name += '_' + str(layer) + '_'
        self.name += str(self.eps_decay) + '_'
-        self.name += str(self.learn_rate)
+        self.name += str(self.learn_rate) + '_'
+        self.name += str(self.learn_iterations)
@@ -52,7 +57,7 @@ def reset(environment):
    return state
-def one_episode(environment, agent, render, learn, max_steps=1000):
+def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
    """ Perform one episode of the agent in the environment. """
    score = 0
    state = reset(environment)
@@ -65,22 +70,33 @@ def one_episode(environment, agent, render, learn, max_steps=1000):
        score += reward
        state = following_state
        if learn:
+            if conf is not None:
+                agent.learn(epochs=conf.learn_iterations)
+            else:
                agent.learn()
        if done:
            break
    return score
-def learn_offline(agent, epochs=1):
+def learn_offline(agent, conf):
    """ Train the agent with its memories. """
    print('Learning with ', len(agent.memory.history), ' memories.')
-    pbar = trange(epochs, desc='Loss: x')
+    pbar = trange(conf.offline_epochs, desc='Loss: x')
-    for _ in pbar:
+    for i in pbar:
-        loss = agent.learn(offline=True)
+        loss = agent.learn(offline=True, epochs=conf.learn_iterations)
        desc = ('Loss: %05.4f' %(loss))
        pbar.set_description(desc)
        pbar.refresh()
+        if i % conf.offline_validate_every_x_iteration == 0 and conf.offline_validate_every_x_iteration is not -1:
+            score, avg = run(conf.env, conf.agent, 1, render=conf.render, learn=False, conf=conf)
+            conf.name += '1'
+            process_logs(avg, score, conf)
+            if avg[-1] > IS_SOLVED:
+                break
-def run(environment, agent, episodes, render=True, learn=True):
+def run(environment, agent, episodes, render=True, learn=True, conf=None):
    """ Run an agent """
    # Set the exploring rate to its minimum.
@@ -93,13 +109,13 @@ def run(environment, agent, episodes, render=True, learn=True):
    pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
    for _ in pbar:
-        score = one_episode(environment, agent, render, learn)
+        score = one_episode(environment, agent, render, learn, conf=conf)
        score_history.append(score)
        is_solved = np.mean(score_history[-100:])
        avg_score_history.append(is_solved)
-        if is_solved > 195 and learn:
+        if is_solved > IS_SOLVED and learn:
            break
        desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
        pbar.set_description(desc)
@@ -109,6 +125,10 @@ def run(environment, agent, episodes, render=True, learn=True):
 def process_logs(avg_score_history, loss, conf):
    df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
+    try:
+        os.makedirs(conf.save_to + conf.name)
+    except:
+        pass
    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
    """ Plot the log history """
@@ -119,6 +139,7 @@ def process_logs(avg_score_history, loss, conf):
    plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
    if conf.render:
        plt.show()
+    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
 def load_logs(file):
    df = pd.read_csv(file)

--- a/main.py
+++ b/main.py
@@ -28,14 +28,14 @@ def run(conf):
    # Offline training of the agent with
    # previous collected and saved memories.
    if conf.learn_offline and conf.learn:
-        ew.learn_offline(marvin, epochs=conf.offline_epochs)
+        ew.learn_offline(marvin, conf)
    # Run the agent in the environment for the
    # number of specified epochs. Either to
    # verify the performance of the agent or
    # to train the agent.
    _LEARN = conf.learn_online and conf.learn
-    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN)
+    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf)
    # Save the final training result of the agent.
    if conf.learn:

--- a/run_scripts/baselines.py
+++ b/run_scripts/baselines.py
@@ -17,7 +17,7 @@ c_32.env_type = 'CartPole'
 c_32.net_layout = [32, 32]
 c_32.eps_decay = 0.9995
 c_32.learn_rate= 0.00075
-c_32.run_episodes = 350
+c_32.run_episodes = 1000
 c_32.save_to = 'baselines/'
@@ -36,22 +36,54 @@ c_512.net_layout = [512, 32]
 c_1024 = copy.deepcopy(c_32)
 c_1024.net_layout = [1024, 32]
+c_2048 = copy.deepcopy(c_32)
+c_2048.net_layout = [2048, 32]
 cd_3 = copy.deepcopy(c_32)
-cd_3.net_layout[128, 64, 32]
+cd_3.net_layout = [128, 64, 32]
 cd_4 = copy.deepcopy(c_32)
-cd_4.net_layout[128, 64, 32, 32]
+cd_4.net_layout = [128, 64, 32, 32]
+cd_5 = copy.deepcopy(c_32)
+cd_5.net_layout = [256, 128, 128, 64]
+cd_5.learn_iterations = 5
+cd_5.learn_rate = 0.001
+cd_6 = copy.deepcopy(c_32)
+cd_6.net_layout = [512, 256, 128, 64]
+cd_6.learn_iterations = 10
+cd_6.learn_rate = 0.00075
 cd_128 = copy.deepcopy(c_32)
-cd_128.net_layout[128, 128, 128]
+cd_128.net_layout = [128, 128, 128]
 cd_256 = copy.deepcopy(c_32)
-cd_256.net_layout[256, 256, 256]
+cd_256.net_layout = [256, 256, 256]
 cd_512 = copy.deepcopy(c_32)
-cd_512.net_layout[512, 512, 512]
+cd_512.net_layout = [512, 512, 512]
+cd_512.learn_iterations = 10
-conf = c_32
+cd_512.learn_rate = 0.001
+offline = copy.deepcopy(c_32)
+offline.force_cpu = False
+offline.load_from = 'Offline_Config_Test'
+offline.load_mem = True
+offline.load_ann = False
+offline.learn_offline = False
+offline.learn_online = True
+offline.run_episodes = 100
+offline.net_layout = [1024, 1024, 1024, 256]
+offline.learn_rate = 0.0005
+offline.learn_iterations = 1
+offline.offline_validate_every_x_iteration = 1
+offline.offline_epochs = 100
+offline.name = 'OnlineValidation'
+offline.render = False
+offline.save_to = 'test/'
+conf = offline
 conf.conf_to_name()
 conf.agent = QAgent(conf)

--- a/run_scripts/benchmarks.py
+++ b/run_scripts/benchmarks.py
@@ -12,7 +12,7 @@ c.render = False
 c.env = gym.make('LunarLander-v2')
 c.env_type = 'Lunar'
 c.net_layout = [256, 128]
-c.eps_decay = 0.9996
+c.eps_decay = 0.9995
 c.learn_rate= 0.001
 c.run_episodes = 300
 c.save_to = 'benchmarks/'
@@ -21,7 +21,12 @@ smallNet = copy.deepcopy(c)
 smallNet.name = 'SmallNet'
 smallNet.net_layout = [128, 32]
 smallNet.conf_to_name()
-# smallNet.agent = QAgent(smallNet)
+smallNetSlow = copy.deepcopy(c)
+smallNetSlow.name = 'SmallNetSlow'
+smallNetSlow.net_layout = [128, 32]
+smallNetSlow.learn_rate = 0.0005
+smallNetSlow.conf_to_name()
 smallNetDeep = copy.deepcopy(c)
 smallNetDeep.name = 'SmallNetDeep'
@@ -95,8 +100,9 @@ lun.conf_to_name()
 # lun.agent = QAgent(lun)
 # configuration = smallNet
+configuration = smallNetSlow
 # configuration = smallNetDeep
-configuration = normalNet
+# configuration = normalNet
 # configuration = normalSlowDecay
 # configuration = normalSlowLearn
 # configuration = largeNet

--- a/run_scripts/csv_history_to_plot.py
+++ b/run_scripts/csv_history_to_plot.py
+import pandas as pd
+import matplotlib.pyplot as plt
+from os import listdir
+from os.path import isfile, join
+from pathlib import Path
+BASE_PATH = '/home/armin/Master/semester_3/carla/'
+DIR='baselines/hyrican'
+path = BASE_PATH + DIR
+def plot_csv(file_path, show=False):
+    df = pd.read_csv(file_path)
+    act_score = df['Score']
+    avg_score = df['Average']
+    plt.figure()
+    plt.plot(act_score, label='Episode Score')
+    plt.plot(avg_score, label='Average Score')
+    plt.xlabel('Episode')
+    plt.ylabel('Score')
+    plt.legend()
+    plt.title(file_path)
+    plt.savefig(file_path + '.png')
+    if show:
+        plt.show()
+for dir in Path(path).iterdir():
+    for file in listdir(dir):
+        file_path = join(dir, file)
+        if isfile(file_path):
+            if file_path.endswith('.csv'):
+                plot_csv(file_path, show=True)
--- a/run_scripts/manual_carla.py
+++ b/run_scripts/manual_carla.py
+import main
+import environment_wrapper as ew
+import gym
+import copy
+from agents import QAgent as QAgent
+from carla_environment import CarlaEnvironment
+c = ew.Config()
+c.name = 'Base'
+c.render = True
+c.env_type = 'Carla'
+c.net_layout = [256, 128]
+c.eps_decay = 0.9995
+c.learn_rate= 0.001
+c.run_episodes = 20
+c.save_to = 'test/'
+t = copy.deepcopy(c)
+t.render = True
+t.net_layout = [1024, 1024, 256, 32]
+t.eps_decay = 0.9993
+t.learn_rate = 0.0005
+t.force_cpu = False
+t.load_mem = True
+t.load_ann = False
+t.save_to = 'test/'
+t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
+t.name = 'Offline'
+t.learn_offline = True
+t.learn_online = True
+t.run_episodes = 500
+t.offline_epochs = 100
+t.learn_iterations = 100
+t.offline_validate_every_x_iteration = -1
+configuration = t
+configuration.env = CarlaEnvironment(render=configuration.render)
+configuration.conf_to_name()
+configuration.agent = QAgent(configuration)
+main.run(configuration)