From 61a8051966139b9b3cdfd3923059105f37dd08c5 Mon Sep 17 00:00:00 2001
From: Armin <armin.co@hs-bochum.de>
Date: Sun, 4 Apr 2021 21:01:46 +0200
Subject: [PATCH] refactroing

---
 .gitignore                         |  1 +
 agents.py                          | 12 +-----
 carla_environment.py               | 38 +++++++-----------
 config.py                          | 31 +++++++++++++++
 environment_wrapper.py             | 63 ++++++++----------------------
 main.py                            | 10 ++---
 memory.py                          | 12 ++----
 networks.py                        | 27 +++++--------
 run_scripts/baselines.py           | 10 ++---
 run_scripts/csv_history_to_plot.py |  8 ++--
 run_scripts/manual_carla.py        |  8 ++--
 run_scripts/offline.py             |  4 +-
 run_scripts/report.py              |  4 +-
 13 files changed, 98 insertions(+), 130 deletions(-)
 create mode 100644 config.py

diff --git a/.gitignore b/.gitignore
index 068c0b7..24d5ee1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ orientation
 workspace.code-workspace
 test
 tech_demo.py
+Screencast
 *.png
diff --git a/agents.py b/agents.py
index adf7204..38606ff 100644
--- a/agents.py
+++ b/agents.py
@@ -79,12 +79,6 @@ class DQAgent(QAgent):
         self.q2 = QNet(conf)
         self.name = str(self.name) + 'DBL'
 
-    def get_action(self, state):
-        if np.random.rand() <= self.epsilon:
-            return random.randrange(self.action_space)
-        action_values = self.q.predict(state)
-        return np.argmax(action_values[0])
-
     def learn(self, offline=False, epochs=1):
         if self.epsilon > self.epsilon_min:
                 self.epsilon *= self.epsilon_decay
@@ -114,13 +108,9 @@ class DQAgent(QAgent):
         return loss
 
     def load(self, path, net=True, memory=True):
-        print('Load:  ' + path)
+        super().load(path, net=net, memory=memory)
         if net:
-            print('Network')
-            self.q.load(path+'.net')
             self.q2.load(path+'.net')
-        if memory:
-            self.memory.load(path+'.mem')
 
 class CarlaManual(QAgent):
     control = None
diff --git a/carla_environment.py b/carla_environment.py
index fc0ad1c..e8bd989 100644
--- a/carla_environment.py
+++ b/carla_environment.py
@@ -12,7 +12,6 @@ import time
 
 from steering_wheel import ACTION_SPACE, Controller, IDLE
 
-# find carla module
 try:
     CARLA_PATH='/media/armin/Games/carla/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
     CARLA_PATH='/opt/carla-simulator/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
@@ -23,8 +22,11 @@ try:
 except IndexError:
     pass
 
-import carla
-
+try:
+    import carla
+except:
+    print('Could not import carla, you will not be able to crate a client!')
+    print('Contiuing...')
 
 class Camera:
     """ Add camera sensor to the carla world """
@@ -71,7 +73,7 @@ class Camera:
 class CollisionSensor:
     sensor = None
     collision = None
-
+    intensity = 0
     def __init__(self, world, parent):
         bp = world.get_blueprint_library().find('sensor.other.collision')
         self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
@@ -82,16 +84,13 @@ class CollisionSensor:
         self = weak_self()
         if not self:
             return
-        # print(event.other_actor)
         self.collision = event
         impulse = event.normal_impulse
-        intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
-        # print(intensity)
+        self.intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
 
 class ObstacleSensor:
     sensor = None
     parent = None
-    ZONES = 4
     obstacle = []
 
     def __init__(self, world, parent):
@@ -99,8 +98,6 @@ class ObstacleSensor:
         bp = world.get_blueprint_library().find('sensor.other.obstacle')
         bp.set_attribute('distance', '10')
         bp.set_attribute('hit_radius', '5')
-        # print(bp.get_attribute('hit_radius'))
-        position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0))
         self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid)
         weak_self = weakref.ref(self)
         self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event))
@@ -110,7 +107,8 @@ class ObstacleSensor:
         if not self:
             return
         self.collision = event
-        if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False:
+        actor_id = event.other_actor.type_id
+        if not actor_id == 'static.road' and not actor_id == 'static.roadline' and not actor_id == 'static.sidewalk' and False:
             print(self.parent.get_transform())
             print(event.transform)
             print(self.parent.get_velocity())
@@ -139,12 +137,12 @@ class LidarSensor:
         self = weak_self()
         if not self:
             return
-        for m in event: print(m.point)
+        for m in event: 
+            if False: print(m.point)
         if len(event) > 0:
-            print(event.horizontal_angle)
+            if False: print(event.horizontal_angle)
 
 class World:
-    """ Wrapper for the carla environment, incl. player/vehicle """
     player = None
     collision_sensor = None
     obstacle_sensor = None
@@ -166,7 +164,6 @@ class World:
         self.reset()
     
     def reset(self):
-        """ Remove and create new player/vehicle. """
         self.destroy()
         time.sleep(0.5)
         self.spawn_on_sidewalk()
@@ -180,12 +177,10 @@ class World:
             return [0,0,0,0,0]
 
     def spawn_player(self, transform):
-        """ Add a vehicle to the world. """
         while self.player is None:
             blueprint = random.choice(self.blueprint_library.filter('model3'))
             position = transform
             self.player = self.world.try_spawn_actor(blueprint, position)
-            start_location = self.player.get_location()
             self.collision_sensor = CollisionSensor(self.world, self.player)
             self.obstacle_sensor = ObstacleSensor(self.world, self.player)
     
@@ -213,7 +208,6 @@ class World:
                 self.actors.append(actor)
 
     def destroy(self):
-        """ Remove vehicle from the world. """
         if self.player is not None:
             self.player.destroy()
             self.player = None
@@ -230,16 +224,15 @@ class World:
 
 
     def step(self, action):
-        """ Apply controls to vehicle. """
         controls = Controller.action_to_controls(action)
         c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
         self.player.apply_control(c)
         self.world.tick()
-        reward = self.reward(action)
+        reward = self.reward()
         self.collision_sensor.collision = None
         return reward
 
-    def reward(self, action):
+    def reward(self):
         x, y, vx, vy, yaw = self.observation()
         
         target = carla.Transform( \
@@ -280,10 +273,8 @@ class World:
 
 class ActionSpace:
     n = ACTION_SPACE
-
 class ObservationSpace:
     shape = [5]
-    
 class CarlaEnvironment:
     action_space = ActionSpace
     observation_space = ObservationSpace
@@ -340,7 +331,6 @@ if __name__ == "__main__":
         clock.tick(5)
         ctrl.on_update()
         obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
-        # print(str(reward) + ' ' + str(done))
         cumulated_reward += reward
         if done:
             break
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..550a001
--- /dev/null
+++ b/config.py
@@ -0,0 +1,31 @@
+class Config:
+    render = False
+    force_cpu = True
+    env = None
+    agent = None
+    env_type = 'Lunar'
+    name = '00'
+    learn = True
+    learn_online = True
+    learn_offline = False 
+    net_layout= [256, 128]
+    eps_decay = 0.9996
+    learn_rate= 0.001
+    learn_epochs = 1
+    run_episodes = 20
+    offline_batchsize = 64
+    offline_episodes = 100
+    offline_validate_every_x_iteration = -1
+    load_ann = False
+    load_mem = False
+    load_from = 'agent_name'
+    save_to = 'dir/'
+
+    def conf_to_name(self):
+        self.name = str(self.env_type) + '_' + str(self.name)
+        for layer in self.net_layout:
+            self.name += '_' + str(layer) + '_'
+        self.name += str(self.eps_decay) + '_'
+        self.name += str(self.learn_rate) + '_'
+        self.name += str(self.learn_epochs)
+    
\ No newline at end of file
diff --git a/environment_wrapper.py b/environment_wrapper.py
index 2c61cc0..c13585c 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -5,40 +5,7 @@ from tqdm import trange
 import pandas as pd
 import matplotlib.pyplot as plt
 
-IS_SOLVED = 195
 
-class Config:
-    render = False
-    force_cpu = True
-    env = None
-    agent = None
-    env_type = 'Lunar'
-    name = 'ConfigTest'
-    learn = True
-    learn_online = True
-    learn_offline = False 
-    net_layout= [256, 128]
-    eps_decay = 0.9996
-    learn_rate= 0.001
-    learn_iterations = 1
-    run_episodes = 20
-    offline_batchsize = 2048
-    offline_epochs = 1000
-    offline_validate_every_x_iteration = 10
-    load_ann = False
-    load_mem = False
-    load_from = 'agnt'
-    save_to = 'saved_agents/'
-
-
-    def conf_to_name(self):
-        self.name = str(self.env_type) + '_' + str(self.name)
-        for layer in self.net_layout:
-            self.name += '_' + str(layer) + '_'
-        self.name += str(self.eps_decay) + '_'
-        self.name += str(self.learn_rate) + '_'
-        self.name += str(self.learn_iterations)
-    
 
 
 def step(environment, action):
@@ -70,13 +37,14 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
         state = following_state
         if learn:
             if conf is not None:
-                agent.learn(epochs=conf.learn_iterations)
+                agent.learn(epochs=conf.learn_epochs)
             else:
                 agent.learn()
         if done:
             break
     return score
 
+IS_SOLVED = 195
 def learn_offline(agent, conf):
     """ Train the agent with its memories. """
     print('Learning with ', len(agent.memory.history), ' memories.')
@@ -84,9 +52,9 @@ def learn_offline(agent, conf):
     score_history = []
     avg_score_history = []
     desc_train = ''
-    pbar = trange(conf.offline_epochs, desc='Loss: x')
+    pbar = trange(conf.offline_episodes, desc='Loss: x')
     for i in pbar:
-        loss = agent.learn(offline=True, epochs=conf.learn_iterations)
+        loss = agent.learn(offline=True, epochs=conf.learn_epochs)
         desc = ('Loss: %05.4f' %(loss)) + desc_train
         pbar.set_description(desc)
         pbar.refresh()
@@ -108,20 +76,21 @@ def learn_offline(agent, conf):
 
 
 
-def run(environment, agent, episodes, render=True, learn=True, conf=None):
+def run(conf):
     """ Run an agent """
     conf.name = str(conf.name) + 'on'
     # Set the exploring rate to its minimum.
     # (epsilon *greedy*)
+    learn = conf.learn and conf.learn_online
     if not learn:
-        agent.epsilon = agent.epsilon_min
+        conf.agent.epsilon = conf.agent.epsilon_min
 
     score_history = []
     avg_score_history = []
     
-    pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
+    pbar = trange(conf.run_episodes, desc=conf.agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
     for _ in pbar:
-        score = one_episode(environment, agent, render, learn, conf=conf)
+        score = one_episode(conf.env, conf.agent, conf.render, learn, conf=conf)
         score_history.append(score)
 
         is_solved = np.mean(score_history[-100:])
@@ -129,7 +98,7 @@ def run(environment, agent, episodes, render=True, learn=True, conf=None):
 
         if is_solved > IS_SOLVED and learn:
             break
-        desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
+        desc = (conf.agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
         pbar.set_description(desc)
         pbar.refresh()
     return score_history, avg_score_history
@@ -143,14 +112,16 @@ def process_logs(avg_score_history, loss, conf):
         pass
     df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
 
-    """ Plot the log history """
+    act_score = df['Score']
+    avg_score = df['Average']
     plt.figure()
-    plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
-    plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
+    plt.plot(act_score, label='Episode Score')
+    plt.plot(avg_score, '--', label='Average Score')
+    plt.xlabel('Episode')
+    plt.ylabel('Score')
+    plt.legend()
     plt.title(conf.name)
     plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
-    if conf.render:
-        plt.show()
     df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
 
 def load_logs(file):
diff --git a/main.py b/main.py
index a362e28..860462a 100644
--- a/main.py
+++ b/main.py
@@ -10,15 +10,13 @@ from agents import QAgent
 import environment_wrapper as ew
 
 def run(conf):
-
-    # 1. Create a learning agent
+    # Set agent
     marvin = conf.agent
 
-    # (2.) *optional* Load agent memory and/or net from disk.
+    # (optional) Load agent memory and/or net from disk.
     if conf.load_ann or conf.load_mem:
         marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)
 
-    # 3. Set your configurations for the run.
     # Register an *atexit* callback,
     # to store the corrent result of the agent
     # if the program is interrupted.
@@ -33,8 +31,8 @@ def run(conf):
     # number of specified epochs. Either to
     # verify the performance of the agent or
     # to train the agent.
-    _LEARN = conf.learn_online and conf.learn
-    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf)
+    # loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=(conf.learn_online and conf.learn), conf=conf)
+    loss, avg_score = ew.run(conf)
 
     # Save the final training result of the agent.
     marvin.save(conf.save_to)
diff --git a/memory.py b/memory.py
index b8f2995..af47713 100644
--- a/memory.py
+++ b/memory.py
@@ -10,28 +10,24 @@ NEXT_STATE = 3
 DONE = 4
 
 class Memory:
-    """ Class to store memories of an agent. """
-    
     history = deque(maxlen=1000000)
 
     def add(self, state, action, reward, nextstate, done):
         self.history.append((state, action, reward, nextstate, done))
 
     def get_batch(self, batch_size):
-        """ Get a random batch of samples of "batch_size" """
         batch = random.sample(self.history, batch_size)
-        states = np.array([i[STATE] for i in batch])
-        states = np.squeeze(states)
+        states = np.squeeze(np.array([i[STATE] for i in batch]))
         actions = np.array([i[ACTION] for i in batch])
         rewards = np.array([i[REWARD] for i in batch])
-        nextstates = np.array([i[NEXT_STATE] for i in batch])
-        nextstates = np.squeeze(nextstates)
+        nextstates = np.squeeze(np.array([i[NEXT_STATE] for i in batch]))
         dones = np.array([i[DONE] for i in batch])
         return states, actions, rewards, nextstates, dones
 
     def save(self, path):
         pickle.dump(self.history, open(path, 'wb'))
+        print('Saved ' + str(len(self.history)) + ' memories.')
 
     def load(self, path):
         self.history = pickle.load(open(path, 'rb'))
-        print('Loaded '+ str(len(self.history)) + ' memories.')
\ No newline at end of file
+        print('Loaded ' + str(len(self.history)) + ' memories.')
\ No newline at end of file
diff --git a/networks.py b/networks.py
index cf4e5b9..f66dda6 100644
--- a/networks.py
+++ b/networks.py
@@ -4,34 +4,35 @@ from keras.models import load_model
 from keras.layers import Dense
 from keras.optimizers import Adam
 from keras.activations import relu, linear
-from keras.regularizers import l2
 from keras.callbacks import EarlyStopping
-class QNet:
-    
+from numpy.lib.function_base import _parse_input_dimensions
+
+class QNet:    
     learn_rate = 0.0005
 
     def __init__(self, conf):
         if conf.force_cpu:
             os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
             os.environ["CUDA_VISIBLE_DEVICES"] = ""
-        self.net = None    
         self.net = Sequential()
         self.compile_net(conf)
         self.net.summary()
         self.learn_rate = conf.learn_rate
 
     def compile_net(self, conf):
-        self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
+        input_dimension = conf.env.observation_space.shape[0]
+        first_layer = conf.net_layout[0]
+        self.net.add(Dense(first_layer, input_dim=input_dimension, activation=relu))
         for layer in range(1, len(conf.net_layout)):
             self.net.add(Dense(conf.net_layout[layer], activation=relu))
         self.net.add(Dense(conf.env.action_space.n, activation=linear))
-        self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
+        self.net.compile(loss='mse', optimizer=Adam(learning_rate=self.learn_rate))
 
     def predict(self, state): 
         return self.net.predict(state)
 
-    def predict_on_batch(
-        self, states): return self.net.predict_on_batch(states)
+    def predict_on_batch(self, states):
+        return self.net.predict_on_batch(states)
 
     def fit(self, X, Y, epochs=1, verbose=0):
         callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
@@ -44,13 +45,3 @@ class QNet:
     def load(self, path):
         self.net = load_model(path)
         self.net.summary()
-
-class QL2Net(QNet):
-    def __init__(self, action_space, state_space):
-        super().__init__(action_space, state_space)
-    
-    def compile_net(self, action_space, state_space):
-        self.net.add(Dense(192, input_dim=state_space, activation=relu, kernel_regularizer=l2(0.01)))
-        self.net.add(Dense(64, activation=relu))
-        self.net.add(Dense(action_space, activation=linear))
-        self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
diff --git a/run_scripts/baselines.py b/run_scripts/baselines.py
index bc98308..fc5674f 100644
--- a/run_scripts/baselines.py
+++ b/run_scripts/baselines.py
@@ -47,12 +47,12 @@ cd_4.net_layout = [128, 64, 32, 32]
 
 cd_5 = copy.deepcopy(c_32)
 cd_5.net_layout = [256, 128, 128, 64]
-cd_5.learn_iterations = 5
+cd_5.learn_epochs = 5
 cd_5.learn_rate = 0.001
 
 cd_6 = copy.deepcopy(c_32)
 cd_6.net_layout = [512, 256, 128, 64]
-cd_6.learn_iterations = 10
+cd_6.learn_epochs = 10
 cd_6.learn_rate = 0.00075
 
 cd_128 = copy.deepcopy(c_32)
@@ -63,7 +63,7 @@ cd_256.net_layout = [256, 256, 256]
 
 cd_512 = copy.deepcopy(c_32)
 cd_512.net_layout = [512, 512, 512]
-cd_512.learn_iterations = 10
+cd_512.learn_epochs = 10
 cd_512.learn_rate = 0.001
 
 offline = copy.deepcopy(c_32)
@@ -76,9 +76,9 @@ offline.learn_online = True
 offline.run_episodes = 100
 offline.net_layout = [1024, 1024, 1024, 256]
 offline.learn_rate = 0.0005
-offline.learn_iterations = 1
+offline.learn_epochs = 1
 offline.offline_validate_every_x_iteration = 1
-offline.offline_epochs = 100
+offline.offline_episodes = 100
 offline.name = 'OnlineValidation'
 offline.render = False
 offline.save_to = 'test/'
diff --git a/run_scripts/csv_history_to_plot.py b/run_scripts/csv_history_to_plot.py
index e20d257..ddda636 100644
--- a/run_scripts/csv_history_to_plot.py
+++ b/run_scripts/csv_history_to_plot.py
@@ -4,8 +4,8 @@ from os import listdir
 from os.path import isfile, join
 from pathlib import Path
 
-BASE_PATH = '/home/armin/Master/semester_3/carla/'
-DIR='baselines/hyrican'
+BASE_PATH = '/home/armin/Master/semester_3/angewandte_ki/BerichtCarla/Daten'
+DIR=''
 path = BASE_PATH + DIR
 
 def plot_csv(file_path, show=False):
@@ -19,7 +19,7 @@ def plot_csv(file_path, show=False):
     plt.xlabel('Episode')
     plt.ylabel('Score')
     plt.legend()
-    plt.title(file_path)
+    # plt.title(file_path)
     plt.savefig(file_path + '.png')
     if show:
         plt.show()
@@ -29,4 +29,4 @@ for dir in Path(path).iterdir():
         file_path = join(dir, file)
         if isfile(file_path):
             if file_path.endswith('.csv'):
-                plot_csv(file_path, show=True)
+                plot_csv(file_path, show=False)
diff --git a/run_scripts/manual_carla.py b/run_scripts/manual_carla.py
index 4572ae7..d018f01 100644
--- a/run_scripts/manual_carla.py
+++ b/run_scripts/manual_carla.py
@@ -35,10 +35,10 @@ o.eps_decay = 0.9915
 o.learn_rate= 0.01
 o.run_episodes = 550
 o.learn_offline = True
-o.offline_epochs = 100000
+o.offline_episodes = 100000
 o.offline_batchsize = 64
 o.offline_validate_every_x_iteration = 500
-o.learn_iterations = 1
+o.learn_epochs = 1
 
 validate = copy.deepcopy(c)
 validate.name = 'Validate0'
@@ -52,8 +52,8 @@ validate.load_mem = False
 validate.load_ann = True
 validate.learn_offline = False
 validate.offline_batchsize = 64000
-validate.offline_epochs = 20
-validate.learn_iterations = 1
+validate.offline_episodes = 20
+validate.learn_epochs = 1
 validate.offline_validate_every_x_iteration = 1
 validate.learn_online = True
 validate.eps_decay = 0.95
diff --git a/run_scripts/offline.py b/run_scripts/offline.py
index 4307d41..fd789b8 100644
--- a/run_scripts/offline.py
+++ b/run_scripts/offline.py
@@ -18,9 +18,9 @@ standard_conf.learn_offline = True
 standard_conf.net_layout= [256, 128]
 standard_conf.eps_decay = 0.6
 standard_conf.learn_rate= 0.0005
-standard_conf.learn_iterations = 4
+standard_conf.learn_epochs = 4
 standard_conf.run_episodes = 100
-standard_conf.offline_epochs = 2000
+standard_conf.offline_episodes = 2000
 standard_conf.offline_validate_every_x_iteration = -1
 standard_conf.load_ann = False
 standard_conf.load_mem = True
diff --git a/run_scripts/report.py b/run_scripts/report.py
index 7bf76df..e2d78ad 100644
--- a/run_scripts/report.py
+++ b/run_scripts/report.py
@@ -18,9 +18,9 @@ standard_conf.learn_offline = False
 standard_conf.net_layout= [256, 128]
 standard_conf.eps_decay = 0.9996
 standard_conf.learn_rate= 0.001
-standard_conf.learn_iterations = 1
+standard_conf.learn_epochs = 1
 standard_conf.run_episodes = 20
-standard_conf.offline_epochs = 1000
+standard_conf.offline_episodes = 1000
 standard_conf.offline_validate_every_x_iteration = 10
 standard_conf.load_ann = False
 standard_conf.load_mem = False
-- 
GitLab