From 4764b70a146a1bdbc06b901287abe2f96a8b4323 Mon Sep 17 00:00:00 2001
From: Armin <armin.co@hs-bochum.de>
Date: Mon, 15 Feb 2021 19:51:40 +0100
Subject: [PATCH] Scriptable Learning

---
 agents.py              | 13 +++----
 benchmarks.py          | 58 ++++++++++++++++++++++++++++++
 carla_environment.py   | 81 +++++++++++++++++++++++++++++++++---------
 environment_wrapper.py | 40 ++++++++++++++++++---
 main.py                | 66 +++++++++++++++-------------------
 networks.py            | 20 ++++++-----
 steering_wheel.py      | 23 ++++++++----
 7 files changed, 221 insertions(+), 80 deletions(-)
 create mode 100644 benchmarks.py

diff --git a/agents.py b/agents.py
index ede5a6d..cc99e0f 100644
--- a/agents.py
+++ b/agents.py
@@ -7,14 +7,15 @@ class QAgent:
     gamma = 0.99
     epsilon = 1.0
     epsilon_min = 0.01
-    epsilon_decay = 0.9996
+    epsilon_decay = 0.9999
     online_batch_size = 64
 
-    def __init__(self, action_space, state_space, name):
-        self.q = QNet(action_space, state_space)
+    def __init__(self, conf):#self, action_space, state_space, name):
+        self.q = QNet(conf)#conf.env.action_space.n, conf.env.observation_space.shape[0])
         self.memory = Memory()
-        self.action_space = action_space
-        self.name = name
+        self.action_space = conf.env.action_space.n
+        self.name = conf.name
+        self.epsilon_decay = conf.eps_decay
 
     def get_action(self, state):
         if np.random.rand() <= self.epsilon:
@@ -31,7 +32,7 @@ class QAgent:
         epochs = 1
         
         if offline:
-            batch_size = 4096
+            batch_size = 2048
 
         if len(self.memory.history) < batch_size:
             return
diff --git a/benchmarks.py b/benchmarks.py
new file mode 100644
index 0000000..49c348a
--- /dev/null
+++ b/benchmarks.py
@@ -0,0 +1,58 @@
+import main
+import environment_wrapper as ew
+import gym
+from carla_environment import CarlaEnvironment
+import copy
+from tqdm import trange
+
+c = ew.Config()
+
+c.name = 'Base'
+c.render = False
+c.env = gym.make('LunarLander-v2')
+c.env_type = 'Lunar'
+c.net_layout = [256, 128]
+c.eps_decay = 0.9996
+c.learn_rate= 0.001
+c.run_episodes = 300
+c.save_to = 'benchmarks/'
+
+smallNet = copy.deepcopy(c)
+smallNet.name = 'SmallNet'
+smallNet.net_layout = [128, 32]
+smallNet.conf_to_name()
+
+normalNet = copy.deepcopy(c)
+normalNet.name = 'NormalNet'
+normalNet.net_layout = [256, 128]
+normalNet.conf_to_name()
+
+normalSlowDecay = copy.deepcopy(c)
+normalSlowDecay.name = 'NormalSlowDecay'
+normalSlowDecay.net_layout = [256, 128]
+normalSlowDecay.eps_decay = 0.99995
+normalSlowDecay.conf_to_name()
+
+normalSlowLearn = copy.deepcopy(c)
+normalSlowLearn.name = 'NormalSlowLearn'
+normalSlowLearn.net_layout = [256, 128]
+normalSlowLearn.learn_rate = 0.0005
+normalSlowLearn.conf_to_name()
+
+largeNet = copy.deepcopy(c)
+largeNet.name = 'LargeNet'
+largeNet.net_layout = [512, 256]
+largeNet.conf_to_name()
+
+deepNet = copy.deepcopy(c)
+deepNet.name = 'DeppNet'
+deepNet.net_layout = [256, 128, 128]
+deepNet.conf_to_name()
+
+# configurations = [smallNet, normalNet ]
+# configurations = [normalSlowDecay, normalSlowLearn]
+configurations = [largeNet, deepNet]
+
+configs = trange(len(configurations), desc='Benchmarking configs', unit="Configuration")
+for i in configs:
+    main.run(configurations[i])
diff --git a/carla_environment.py b/carla_environment.py
index 10e2d83..e41164c 100644
--- a/carla_environment.py
+++ b/carla_environment.py
@@ -10,7 +10,7 @@ import numpy as np
 import pygame
 import time
 
-from steering_wheel import ACTION_SPACE, Controller
+from steering_wheel import ACTION_SPACE, Controller, IDLE
 
 # find carla module
 try:
@@ -87,12 +87,36 @@ class CollisionSensor:
         intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
         # print(intensity)
 
+class LidarSensor:
+    sensor = None
+    data = None
 
+    def __init__(self, world, parent):
+        bp = world.get_blueprint_library().find('sensor.lidar.ray_cast')
+        bp.set_attribute('rotation_frequency', '10')
+        bp.set_attribute('channels', '1')
+        bp.set_attribute('points_per_second', '100')
+        bp.set_attribute('range', '7')
+        bp.set_attribute('sensor_tick', '0.1')
+        bp.set_attribute('upper_fov', '0')
+        bp.set_attribute('lower_fov', '0')
+        weak_self = weakref.ref(self)
+        self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
+        self.sensor.listen(lambda event: LidarSensor._on_data(weak_self, event))
+    
+    def _on_data(weak_self, event):
+        self = weak_self()
+        if not self:
+            return
+        for m in event: print(m.point)
+        if len(event) > 0:
+            print(event.horizontal_angle)
 
 class World:
     """ Wrapper for the carla environment, incl. player/vehicle """
     player = None
     collision_sensor = None
+    lidar_sensor = None
     world = None
     blueprint_library = None
     spawn_points = None
@@ -101,6 +125,10 @@ class World:
 
     def __init__(self, world):
         self.world = world
+        settings = self.world.get_settings()
+        settings.fixed_delta_seconds = 0.1
+        settings.synchronous_mode = True
+        self.world.apply_settings(settings)
         self.blueprint_library = self.world.get_blueprint_library()
         self.spawn_points = self.world.get_map().get_spawn_points()
         self.reset()
@@ -115,7 +143,8 @@ class World:
     def observation(self):
         if self.player is not None:
             pos = self.player.get_transform()
-            return [pos.location.x, pos.location.y, pos.rotation.yaw]
+            vel = self.player.get_velocity()
+            return [pos.location.x, pos.location.y, vel.x, vel.y, pos.rotation.yaw]
         else:
             return [0,0,0]
 
@@ -128,6 +157,7 @@ class World:
             start_location = self.player.get_location()
             print(str(start_location))
             self.collision_sensor = CollisionSensor(self.world, self.player)
+            # self.lidar_sensor = LidarSensor(self.world, self.player)
             # start_location.x = 288.0
             # start_location.y = 55.0
             # self.player.set_location(start_location)
@@ -154,6 +184,9 @@ class World:
             for actor in self.actors:
                 actor.destroy()
             self.actors = []
+        if self.collision_sensor is not None:
+            self.collision_sensor.sensor.destroy()
+            self.collision_sensor = None
 
 
     def step(self, action):
@@ -161,22 +194,24 @@ class World:
         controls = Controller.action_to_controls(action)
         c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
         self.player.apply_control(c)
-        reward = self.reward()
+        self.world.tick()
+        reward = self.reward(action)
         self.collision_sensor.collision = None
         return reward
 
-    def reward(self):
+    def reward(self, action):
         target = carla.Transform(carla.Location(x=-25.534311, y=54.460903, z=0.112781), carla.Rotation(pitch=0.000000, yaw=-175.922913, roll=-6.221135))
         pos = self.player.get_transform()
-        pos_diff = (target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2 + 1
-        rot_diff = (target.rotation.yaw - pos.rotation.yaw)**2 + 1
-        r = ((1/pos_diff) - 0.5)
+        pos_diff = math.sqrt((target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2)
+        rot_diff = (target.rotation.yaw - pos.rotation.yaw)
+        r = (-pos_diff / 10) + 1
         at_final_position = False
-        if pos_diff < 1.2:
+        if pos_diff < 1.8:
             if self.was_big_reward == False:
                 r += 100
                 self.was_big_reward = True
-            if rot_diff < 5:
+            if abs(rot_diff) < 5:
+                r += 100
                 at_final_position = True
         else:
             if self.was_big_reward == True:
@@ -184,20 +219,28 @@ class World:
                 r -= 50
 
         velocity = self.player.get_velocity()
-        v = (velocity.x)**2 + (velocity.y)**2
+        v = math.sqrt((velocity.x)**2 + (velocity.y)**2)
+        if v < 0.01:
+            r -= 1
+        if v > 15.0:
+            r -= 10
         done = False
         if at_final_position and v < 0.01:
+            print("Success")
             done = True
         if self.collision_sensor.collision is not None:
-            r -= 100
+            r -= 250
             done = True
+        if action == IDLE:
+            r -= 1
         return self.observation(), r, done, None
 
 class ActionSpace:
     n = ACTION_SPACE
 
 class ObservationSpace:
-    shape = [3]
+    shape = [5]
+    
 class CarlaEnvironment:
     action_space = ActionSpace
     observation_space = ObservationSpace
@@ -230,7 +273,13 @@ class CarlaEnvironment:
         if render:
             self.render()
         return self.world.step(action)
-        
+    
+    def __del__(self):
+        if self.camera is not None:
+            self.camera.sensor.destroy()
+        if self.world is not None:
+            self.world.destroy()
+
 if __name__ == "__main__":
     LOCALHOST="127.0.0.1"
     SERVER="192.168.188.20"
@@ -241,11 +290,11 @@ if __name__ == "__main__":
     while ctrl.is_running():
         clock.tick(20)
         ctrl.on_update()
-        obs, reward, done = env.step(ctrl.get_action(), render=True)
-        if done:
-            break
+        obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
         print(str(reward) + ' ' + str(done))
         cumulated_reward += reward
+        if done:
+            break
     env.world.destroy()
     pygame.quit()
     print(cumulated_reward)
diff --git a/environment_wrapper.py b/environment_wrapper.py
index fe29d01..5f707e4 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -1,10 +1,37 @@
 """ Wrapper to abstract different learning environments for an agent. """
 import numpy as np
 
-from tqdm import tqdm
 from tqdm import trange
+import pandas as pd
 import matplotlib.pyplot as plt
 
+class Config:
+    render = False
+    force_cpu = True
+    env = None
+    env_type = 'Lunar'
+    name = 'ConfigTest'
+    learn = True
+    learn_online = True
+    learn_offline = False 
+    net_layout= [256, 128]
+    eps_decay = 0.9996
+    learn_rate= 0.001
+    run_episodes = 20
+    offline_epochs = 1000
+    load_ann = False
+    load_mem = False
+    load_from = 'agnt'
+    save_to = 'saved_agents/'
+
+    def conf_to_name(self):
+        self.name += self.env_type
+        for layer in self.net_layout:
+            self.name += '_' + str(layer) + '_'
+        self.name += str(self.eps_decay) + '_'
+        self.name += str(self.learn_rate)
+    
+
 
 def step(environment, action):
     """ Perform one iteratino in the environment. """
@@ -74,11 +101,14 @@ def run(environment, agent, episodes, render=True, learn=True):
     return score_history, avg_score_history
 
 
-def process_logs(avg_score_history, loss, title="Title", render=False):
+def process_logs(avg_score_history, loss, conf):
+    df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
+    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
+
     """ Plot the log history """
     plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
     plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
-    plt.title(title)
-    plt.savefig('saved_agents/' + title + '.png', format="png")
-    if render:
+    plt.title(conf.name)
+    plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
+    if conf.render:
         plt.show()
diff --git a/main.py b/main.py
index 8539786..3b9cf51 100644
--- a/main.py
+++ b/main.py
@@ -4,65 +4,55 @@ Run your desired environment and agent configuration.
 
 import os
 import atexit
-from carla_environment import CarlaEnvironment
-CARLA=True
-# import gym
+import gym
 
 from agents import QAgent as QAgent
 import environment_wrapper as ew
 
-# Allow GPU usage or force tensorflow to use the CPU.
-FORCE_CPU=True
-if FORCE_CPU:
-    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-    os.environ["CUDA_VISIBLE_DEVICES"] = ""
-
-RENDER = False
-if __name__ == '__main__':
-    # 1. Create an environment
-    env = CarlaEnvironment(render=RENDER)
+def run(conf):
+    # 0. Allow GPU usage or force tensorflow to use the CPU.
+    if conf.force_cpu:
+        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+        os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
     # 2. Create a learning agent
-    marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'Carla')
+    marvin = QAgent(conf) #conf.env.action_space.n, conf.env.observation_space.shape[0], conf.name)
 
     # (2.5) *optional* Load agent memory and/or net from disk.
-    agnt = 'Carla'
-    LOAD_ANN = False
-    LOAD_MEMORIES = True
-    if LOAD_ANN or LOAD_MEMORIES:
-        marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
+    if conf.load_ann or conf.load_mem:
+        marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)
 
     # 3. Set your configurations for the run.
-    LEARNING = True
-    LEARN_ONLINE = True
-    LEARN_OFFLINE = True
-    RUN_EPISODES = 100
-    LEARN_OFFLINE_EPOCHS = 1000
-    SAVE_PATH = "./saved_agents"
-
     # Register an *atexit* callback,
     # to store the corrent result of the agent
     # if the program is interrupted.
-    if LEARNING:
-        atexit.register(marvin.save, SAVE_PATH)
+    if conf.learn:
+        atexit.register(marvin.save, conf.save_to)
 
     # Offline training of the agent with
     # previous collected and saved memories.
-    if LEARN_OFFLINE and LEARNING:
-        ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS)
+    if conf.learn_offline and conf.learn:
+        ew.learn_offline(marvin, epochs=conf.offline_epochs)
 
     # Run the agent in the environment for the
     # number of specified epochs. Either to
     # verify the performance of the agent or
     # to train the agent.
-    _LEARN = LEARN_ONLINE and LEARNING
-    loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN)
+    _LEARN = conf.learn_online and conf.learn
+    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN)
 
     # Save the final training result of the agent.
-    if LEARNING:
-        marvin.save(SAVE_PATH)
+    if conf.learn:
+        marvin.save(conf.save_to)
+    
+    ew.process_logs(avg_score, loss, conf)
 
-    if CARLA:
-        env.world.destroy()
-    # Show the result of the runl.
-    ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER)
+    if conf.env_type == 'Carla':
+        conf.env.world.destroy()
+
+if __name__ == '__main__':
+    conf = ew.Config()
+    conf.env = gym.make('LunarLander-v2')
+    conf.env_type = 'Lunar'
+    conf.conf_to_name()
+    run(conf)
\ No newline at end of file
diff --git a/networks.py b/networks.py
index 846e6fa..0492454 100644
--- a/networks.py
+++ b/networks.py
@@ -6,17 +6,21 @@ from keras.activations import relu, linear
 from keras.regularizers import l2
 
 class QNet:
-    net = Sequential()
-    learn_rate = 0.001
+    
+    learn_rate = 0.0005
 
-    def __init__(self, action_space, state_space):
-        self.compile_net(action_space, state_space)
+    def __init__(self, conf):
+        self.net = None    
+        self.net = Sequential()
+        self.compile_net(conf)
         self.net.summary()
+        self.learn_rate = conf.learn_rate
 
-    def compile_net(self, action_space, state_space):
-        self.net.add(Dense(256, input_dim=state_space, activation=relu))
-        self.net.add(Dense(128, activation=relu))
-        self.net.add(Dense(action_space, activation=linear))
+    def compile_net(self, conf):
+        self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
+        for layer in range(1, len(conf.net_layout)):
+            self.net.add(Dense(conf.net_layout[layer], activation=relu))
+        self.net.add(Dense(conf.env.action_space.n, activation=linear))
         self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
 
     def predict(self, state): 
diff --git a/steering_wheel.py b/steering_wheel.py
index 2828f7b..9f968f4 100644
--- a/steering_wheel.py
+++ b/steering_wheel.py
@@ -67,24 +67,33 @@ class ManualSteeringWheel:
     def action_to_controls(action):
         if action == IDLE:
             return [0.0, 0.0, 0.0, False]
+
         if action == FORWARD:
-            return [0.8, 0.0, 0.0, False]
+            return [0.6, 0.0, 0.0, False]
+
         if action == REVERSE:
-            return [0.8, 0.0, 0.0, True]
+            return [0.5, 0.0, 0.0, True]
+
         if action == FORWARD_LEFT:
-            return [0.8, -0.4, 0.0, False]
+            return [0.4, -0.4, 0.0, False]
+
         if action == FORWARD_RIGHT:
-            return [0.8, 0.4, 0.0, False]
+            return [0.4, 0.4, 0.0, False]
+
         if action == LEFT:
             return [0.0, -0.4, 0.0, False]
+
         if action == RIGHT:
-            return [0.0, 0.5, 0.0, False]
+            return [0.0, 0.4, 0.0, False]
+
         if action == BRAKE:
             return [0.0, 0.0, 0.5, False]
+
         if action == REVERSE_LEFT:
-            return [0.5, -0.4, 0.0, True]
+            return [0.4, -0.4, 0.0, True]
+
         if action == REVERSE_RIGHT:
-            return [0.5, 0.4, 0.0, True]
+            return [0.4, 0.4, 0.0, True]
         
     def get_action(self):
         action = IDLE
-- 
GitLab