Scriptable Learning

4764b70a · Armin Co · 747e4d42 · 4764b70a · 4764b70a · 4764b70a
Commit 4764b70a authored Feb 15, 2021 by Armin Co
--- a/agents.py
+++ b/agents.py
@@ -7,14 +7,15 @@ class QAgent:
    gamma = 0.99
    epsilon = 1.0
    epsilon_min = 0.01
-    epsilon_decay = 0.9996
+    epsilon_decay = 0.9999
    online_batch_size = 64
-    def __init__(self, action_space, state_space, name):
+    def __init__(self, conf):#self, action_space, state_space, name):
-        self.q = QNet(action_space, state_space)
+        self.q = QNet(conf)#conf.env.action_space.n, conf.env.observation_space.shape[0])
        self.memory = Memory()
-        self.action_space = action_space
+        self.action_space = conf.env.action_space.n
-        self.name = name
+        self.name = conf.name
+        self.epsilon_decay = conf.eps_decay
    def get_action(self, state):
        if np.random.rand() <= self.epsilon:
@@ -31,7 +32,7 @@ class QAgent:
        epochs = 1
        if offline:
-            batch_size = 4096
+            batch_size = 2048
        if len(self.memory.history) < batch_size:
            return

--- a/benchmarks.py
+++ b/benchmarks.py
+import main
+import environment_wrapper as ew
+import gym
+from carla_environment import CarlaEnvironment
+import copy
+from tqdm import trange
+c = ew.Config()
+c.name = 'Base'
+c.render = False
+c.env = gym.make('LunarLander-v2')
+c.env_type = 'Lunar'
+c.net_layout = [256, 128]
+c.eps_decay = 0.9996
+c.learn_rate= 0.001
+c.run_episodes = 300
+c.save_to = 'benchmarks/'
+smallNet = copy.deepcopy(c)
+smallNet.name = 'SmallNet'
+smallNet.net_layout = [128, 32]
+smallNet.conf_to_name()
+normalNet = copy.deepcopy(c)
+normalNet.name = 'NormalNet'
+normalNet.net_layout = [256, 128]
+normalNet.conf_to_name()
+normalSlowDecay = copy.deepcopy(c)
+normalSlowDecay.name = 'NormalSlowDecay'
+normalSlowDecay.net_layout = [256, 128]
+normalSlowDecay.eps_decay = 0.99995
+normalSlowDecay.conf_to_name()
+normalSlowLearn = copy.deepcopy(c)
+normalSlowLearn.name = 'NormalSlowLearn'
+normalSlowLearn.net_layout = [256, 128]
+normalSlowLearn.learn_rate = 0.0005
+normalSlowLearn.conf_to_name()
+largeNet = copy.deepcopy(c)
+largeNet.name = 'LargeNet'
+largeNet.net_layout = [512, 256]
+largeNet.conf_to_name()
+deepNet = copy.deepcopy(c)
+deepNet.name = 'DeppNet'
+deepNet.net_layout = [256, 128, 128]
+deepNet.conf_to_name()
+# configurations = [smallNet, normalNet ]
+# configurations = [normalSlowDecay, normalSlowLearn]
+configurations = [largeNet, deepNet]
+configs = trange(len(configurations), desc='Benchmarking configs', unit="Configuration")
+for i in configs:
+    main.run(configurations[i])
--- a/carla_environment.py
+++ b/carla_environment.py
@@ -10,7 +10,7 @@ import numpy as np
 import pygame
 import time
-from steering_wheel import ACTION_SPACE, Controller
+from steering_wheel import ACTION_SPACE, Controller, IDLE
 # find carla module
 try:
@@ -87,12 +87,36 @@ class CollisionSensor:
        intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
        # print(intensity)
+class LidarSensor:
+    sensor = None
+    data = None
+    def __init__(self, world, parent):
+        bp = world.get_blueprint_library().find('sensor.lidar.ray_cast')
+        bp.set_attribute('rotation_frequency', '10')
+        bp.set_attribute('channels', '1')
+        bp.set_attribute('points_per_second', '100')
+        bp.set_attribute('range', '7')
+        bp.set_attribute('sensor_tick', '0.1')
+        bp.set_attribute('upper_fov', '0')
+        bp.set_attribute('lower_fov', '0')
+        weak_self = weakref.ref(self)
+        self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
+        self.sensor.listen(lambda event: LidarSensor._on_data(weak_self, event))
+    def _on_data(weak_self, event):
+        self = weak_self()
+        if not self:
+            return
+        for m in event: print(m.point)
+        if len(event) > 0:
+            print(event.horizontal_angle)
 class World:
    """ Wrapper for the carla environment, incl. player/vehicle """
    player = None
    collision_sensor = None
+    lidar_sensor = None
    world = None
    blueprint_library = None
    spawn_points = None
@@ -101,6 +125,10 @@ class World:
    def __init__(self, world):
        self.world = world
+        settings = self.world.get_settings()
+        settings.fixed_delta_seconds = 0.1
+        settings.synchronous_mode = True
+        self.world.apply_settings(settings)
        self.blueprint_library = self.world.get_blueprint_library()
        self.spawn_points = self.world.get_map().get_spawn_points()
        self.reset()
@@ -115,7 +143,8 @@ class World:
    def observation(self):
        if self.player is not None:
            pos = self.player.get_transform()
-            return [pos.location.x, pos.location.y, pos.rotation.yaw]
+            vel = self.player.get_velocity()
+            return [pos.location.x, pos.location.y, vel.x, vel.y, pos.rotation.yaw]
        else:
            return [0,0,0]
@@ -128,6 +157,7 @@ class World:
            start_location = self.player.get_location()
            print(str(start_location))
            self.collision_sensor = CollisionSensor(self.world, self.player)
+            # self.lidar_sensor = LidarSensor(self.world, self.player)
            # start_location.x = 288.0
            # start_location.y = 55.0
            # self.player.set_location(start_location)
@@ -154,6 +184,9 @@ class World:
            for actor in self.actors:
                actor.destroy()
            self.actors = []
+        if self.collision_sensor is not None:
+            self.collision_sensor.sensor.destroy()
+            self.collision_sensor = None
    def step(self, action):
@@ -161,22 +194,24 @@ class World:
        controls = Controller.action_to_controls(action)
        c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
        self.player.apply_control(c)
-        reward = self.reward()
+        self.world.tick()
+        reward = self.reward(action)
        self.collision_sensor.collision = None
        return reward
-    def reward(self):
+    def reward(self, action):
        target = carla.Transform(carla.Location(x=-25.534311, y=54.460903, z=0.112781), carla.Rotation(pitch=0.000000, yaw=-175.922913, roll=-6.221135))
        pos = self.player.get_transform()
-        pos_diff = (target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2 + 1
+        pos_diff = math.sqrt((target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2)
-        rot_diff = (target.rotation.yaw - pos.rotation.yaw)**2 + 1
+        rot_diff = (target.rotation.yaw - pos.rotation.yaw)
-        r = ((1/pos_diff) - 0.5)
+        r = (-pos_diff / 10) + 1
        at_final_position = False
-        if pos_diff < 1.2:
+        if pos_diff < 1.8:
            if self.was_big_reward == False:
                r += 100
                self.was_big_reward = True
-            if rot_diff < 5:
+            if abs(rot_diff) < 5:
+                r += 100
                at_final_position = True
        else:
            if self.was_big_reward == True:
@@ -184,20 +219,28 @@ class World:
                r -= 50
        velocity = self.player.get_velocity()
-        v = (velocity.x)**2 + (velocity.y)**2
+        v = math.sqrt((velocity.x)**2 + (velocity.y)**2)
+        if v < 0.01:
+            r -= 1
+        if v > 15.0:
+            r -= 10
        done = False
        if at_final_position and v < 0.01:
+            print("Success")
            done = True
        if self.collision_sensor.collision is not None:
-            r -= 100
+            r -= 250
            done = True
+        if action == IDLE:
+            r -= 1
        return self.observation(), r, done, None
 class ActionSpace:
    n = ACTION_SPACE
 class ObservationSpace:
-    shape = [3]
+    shape = [5]
 class CarlaEnvironment:
    action_space = ActionSpace
    observation_space = ObservationSpace
@@ -231,6 +274,12 @@ class CarlaEnvironment:
            self.render()
        return self.world.step(action)
+    def __del__(self):
+        if self.camera is not None:
+            self.camera.sensor.destroy()
+        if self.world is not None:
+            self.world.destroy()
 if __name__ == "__main__":
    LOCALHOST="127.0.0.1"
    SERVER="192.168.188.20"
@@ -241,11 +290,11 @@ if __name__ == "__main__":
    while ctrl.is_running():
        clock.tick(20)
        ctrl.on_update()
-        obs, reward, done = env.step(ctrl.get_action(), render=True)
+        obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
-        if done:
-            break
        print(str(reward) + ' ' + str(done))
        cumulated_reward += reward
+        if done:
+            break
    env.world.destroy()
    pygame.quit()
    print(cumulated_reward)
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
 """ Wrapper to abstract different learning environments for an agent. """
 import numpy as np
-from tqdm import tqdm
 from tqdm import trange
+import pandas as pd
 import matplotlib.pyplot as plt
+class Config:
+    render = False
+    force_cpu = True
+    env = None
+    env_type = 'Lunar'
+    name = 'ConfigTest'
+    learn = True
+    learn_online = True
+    learn_offline = False 
+    net_layout= [256, 128]
+    eps_decay = 0.9996
+    learn_rate= 0.001
+    run_episodes = 20
+    offline_epochs = 1000
+    load_ann = False
+    load_mem = False
+    load_from = 'agnt'
+    save_to = 'saved_agents/'
+    def conf_to_name(self):
+        self.name += self.env_type
+        for layer in self.net_layout:
+            self.name += '_' + str(layer) + '_'
+        self.name += str(self.eps_decay) + '_'
+        self.name += str(self.learn_rate)
 def step(environment, action):
    """ Perform one iteratino in the environment. """
@@ -74,11 +101,14 @@ def run(environment, agent, episodes, render=True, learn=True):
    return score_history, avg_score_history
-def process_logs(avg_score_history, loss, title="Title", render=False):
+def process_logs(avg_score_history, loss, conf):
+    df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
+    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
    """ Plot the log history """
    plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
    plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
-    plt.title(title)
+    plt.title(conf.name)
-    plt.savefig('saved_agents/' + title + '.png', format="png")
+    plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
-    if render:
+    if conf.render:
        plt.show()
--- a/main.py
+++ b/main.py
@@ -4,65 +4,55 @@ Run your desired environment and agent configuration.
 import os
 import atexit
-from carla_environment import CarlaEnvironment
+import gym
-CARLA=True
-# import gym
 from agents import QAgent as QAgent
 import environment_wrapper as ew
-# Allow GPU usage or force tensorflow to use the CPU.
+def run(conf):
-FORCE_CPU=True
+    # 0. Allow GPU usage or force tensorflow to use the CPU.
-if FORCE_CPU:
+    if conf.force_cpu:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
-RENDER = False
-if __name__ == '__main__':
-    # 1. Create an environment
-    env = CarlaEnvironment(render=RENDER)
    # 2. Create a learning agent
-    marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'Carla')
+    marvin = QAgent(conf) #conf.env.action_space.n, conf.env.observation_space.shape[0], conf.name)
    # (2.5) *optional* Load agent memory and/or net from disk.
-    agnt = 'Carla'
+    if conf.load_ann or conf.load_mem:
-    LOAD_ANN = False
+        marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)
-    LOAD_MEMORIES = True
-    if LOAD_ANN or LOAD_MEMORIES:
-        marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
    # 3. Set your configurations for the run.
-    LEARNING = True
-    LEARN_ONLINE = True
-    LEARN_OFFLINE = True
-    RUN_EPISODES = 100
-    LEARN_OFFLINE_EPOCHS = 1000
-    SAVE_PATH = "./saved_agents"
    # Register an *atexit* callback,
    # to store the corrent result of the agent
    # if the program is interrupted.
-    if LEARNING:
+    if conf.learn:
-        atexit.register(marvin.save, SAVE_PATH)
+        atexit.register(marvin.save, conf.save_to)
    # Offline training of the agent with
    # previous collected and saved memories.
-    if LEARN_OFFLINE and LEARNING:
+    if conf.learn_offline and conf.learn:
-        ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS)
+        ew.learn_offline(marvin, epochs=conf.offline_epochs)
    # Run the agent in the environment for the
    # number of specified epochs. Either to
    # verify the performance of the agent or
    # to train the agent.
-    _LEARN = LEARN_ONLINE and LEARNING
+    _LEARN = conf.learn_online and conf.learn
-    loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN)
+    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN)
    # Save the final training result of the agent.
-    if LEARNING:
+    if conf.learn:
-        marvin.save(SAVE_PATH)
+        marvin.save(conf.save_to)
-    if CARLA:
+    ew.process_logs(avg_score, loss, conf)
-        env.world.destroy()
-    # Show the result of the runl.
+    if conf.env_type == 'Carla':
-    ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER)
+        conf.env.world.destroy()
+if __name__ == '__main__':
+    conf = ew.Config()
+    conf.env = gym.make('LunarLander-v2')
+    conf.env_type = 'Lunar'
+    conf.conf_to_name()
+    run(conf)
\ No newline at end of file
--- a/networks.py
+++ b/networks.py
@@ -6,17 +6,21 @@ from keras.activations import relu, linear
 from keras.regularizers import l2
 class QNet:
-    net = Sequential()
-    learn_rate = 0.001
-    def __init__(self, action_space, state_space):
+    learn_rate = 0.0005
-        self.compile_net(action_space, state_space)
+    def __init__(self, conf):
+        self.net = None    
+        self.net = Sequential()
+        self.compile_net(conf)
        self.net.summary()
+        self.learn_rate = conf.learn_rate
-    def compile_net(self, action_space, state_space):
+    def compile_net(self, conf):
-        self.net.add(Dense(256, input_dim=state_space, activation=relu))
+        self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
-        self.net.add(Dense(128, activation=relu))
+        for layer in range(1, len(conf.net_layout)):
-        self.net.add(Dense(action_space, activation=linear))
+            self.net.add(Dense(conf.net_layout[layer], activation=relu))
+        self.net.add(Dense(conf.env.action_space.n, activation=linear))
        self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
    def predict(self, state): 

--- a/steering_wheel.py
+++ b/steering_wheel.py
@@ -67,24 +67,33 @@ class ManualSteeringWheel:
    def action_to_controls(action):
        if action == IDLE:
            return [0.0, 0.0, 0.0, False]
        if action == FORWARD:
-            return [0.8, 0.0, 0.0, False]
+            return [0.6, 0.0, 0.0, False]
        if action == REVERSE:
-            return [0.8, 0.0, 0.0, True]
+            return [0.5, 0.0, 0.0, True]
        if action == FORWARD_LEFT:
-            return [0.8, -0.4, 0.0, False]
+            return [0.4, -0.4, 0.0, False]
        if action == FORWARD_RIGHT:
-            return [0.8, 0.4, 0.0, False]
+            return [0.4, 0.4, 0.0, False]
        if action == LEFT:
            return [0.0, -0.4, 0.0, False]
        if action == RIGHT:
-            return [0.0, 0.5, 0.0, False]
+            return [0.0, 0.4, 0.0, False]
        if action == BRAKE:
            return [0.0, 0.0, 0.5, False]
        if action == REVERSE_LEFT:
-            return [0.5, -0.4, 0.0, True]
+            return [0.4, -0.4, 0.0, True]
        if action == REVERSE_RIGHT:
-            return [0.5, 0.4, 0.0, True]
+            return [0.4, 0.4, 0.0, True]
    def get_action(self):
        action = IDLE