refactroing

61a80519 · Armin Co · 0fe0f9b8 · 61a80519 · 61a80519 · 61a80519
Commit 61a80519 authored Apr 4, 2021 by Armin Co
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ orientation
 workspace.code-workspace
 test
 tech_demo.py
+Screencast
 *.png
--- a/agents.py
+++ b/agents.py
@@ -79,12 +79,6 @@ class DQAgent(QAgent):
        self.q2 = QNet(conf)
        self.name = str(self.name) + 'DBL'

-    def get_action(self, state):
-        if np.random.rand() <= self.epsilon:
-            return random.randrange(self.action_space)
-        action_values = self.q.predict(state)
-        return np.argmax(action_values[0])
-
    def learn(self, offline=False, epochs=1):
        if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
@@ -114,13 +108,9 @@ class DQAgent(QAgent):
        return loss

    def load(self, path, net=True, memory=True):
-        print('Load:  ' + path)
+        super().load(path, net=net, memory=memory)
        if net:
-            print('Network')
-            self.q.load(path+'.net')
            self.q2.load(path+'.net')
-        if memory:
-            self.memory.load(path+'.mem')

 class CarlaManual(QAgent):
    control = None

--- a/carla_environment.py
+++ b/carla_environment.py
@@ -12,7 +12,6 @@ import time

 from steering_wheel import ACTION_SPACE, Controller, IDLE

-# find carla module
 try:
    CARLA_PATH='/media/armin/Games/carla/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
    CARLA_PATH='/opt/carla-simulator/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
@@ -23,8 +22,11 @@ try:
 except IndexError:
    pass

+try:
    import carla
-
+except:
+    print('Could not import carla, you will not be able to crate a client!')
+    print('Contiuing...')

 class Camera:
    """ Add camera sensor to the carla world """
@@ -71,7 +73,7 @@ class Camera:
 class CollisionSensor:
    sensor = None
    collision = None
-
+    intensity = 0
    def __init__(self, world, parent):
        bp = world.get_blueprint_library().find('sensor.other.collision')
        self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
@@ -82,16 +84,13 @@ class CollisionSensor:
        self = weak_self()
        if not self:
            return
-        # print(event.other_actor)
        self.collision = event
        impulse = event.normal_impulse
-        intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
-        # print(intensity)
+        self.intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)

 class ObstacleSensor:
    sensor = None
    parent = None
-    ZONES = 4
    obstacle = []

    def __init__(self, world, parent):
@@ -99,8 +98,6 @@ class ObstacleSensor:
        bp = world.get_blueprint_library().find('sensor.other.obstacle')
        bp.set_attribute('distance', '10')
        bp.set_attribute('hit_radius', '5')
-        # print(bp.get_attribute('hit_radius'))
-        position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0))
        self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid)
        weak_self = weakref.ref(self)
        self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event))
@@ -110,7 +107,8 @@ class ObstacleSensor:
        if not self:
            return
        self.collision = event
-        if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False:
+        actor_id = event.other_actor.type_id
+        if not actor_id == 'static.road' and not actor_id == 'static.roadline' and not actor_id == 'static.sidewalk' and False:
            print(self.parent.get_transform())
            print(event.transform)
            print(self.parent.get_velocity())
@@ -139,12 +137,12 @@ class LidarSensor:
        self = weak_self()
        if not self:
            return
-        for m in event: print(m.point)
+        for m in event: 
+            if False: print(m.point)
        if len(event) > 0:
-            print(event.horizontal_angle)
+            if False: print(event.horizontal_angle)

 class World:
-    """ Wrapper for the carla environment, incl. player/vehicle """
    player = None
    collision_sensor = None
    obstacle_sensor = None
@@ -166,7 +164,6 @@ class World:
        self.reset()
    
    def reset(self):
-        """ Remove and create new player/vehicle. """
        self.destroy()
        time.sleep(0.5)
        self.spawn_on_sidewalk()
@@ -180,12 +177,10 @@ class World:
            return [0,0,0,0,0]

    def spawn_player(self, transform):
-        """ Add a vehicle to the world. """
        while self.player is None:
            blueprint = random.choice(self.blueprint_library.filter('model3'))
            position = transform
            self.player = self.world.try_spawn_actor(blueprint, position)
-            start_location = self.player.get_location()
            self.collision_sensor = CollisionSensor(self.world, self.player)
            self.obstacle_sensor = ObstacleSensor(self.world, self.player)
    
@@ -213,7 +208,6 @@ class World:
                self.actors.append(actor)

    def destroy(self):
-        """ Remove vehicle from the world. """
        if self.player is not None:
            self.player.destroy()
            self.player = None
@@ -230,16 +224,15 @@ class World:


    def step(self, action):
-        """ Apply controls to vehicle. """
        controls = Controller.action_to_controls(action)
        c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
        self.player.apply_control(c)
        self.world.tick()
-        reward = self.reward(action)
+        reward = self.reward()
        self.collision_sensor.collision = None
        return reward

-    def reward(self, action):
+    def reward(self):
        x, y, vx, vy, yaw = self.observation()
        
        target = carla.Transform( \
@@ -280,10 +273,8 @@ class World:

 class ActionSpace:
    n = ACTION_SPACE
-
 class ObservationSpace:
    shape = [5]
-    
 class CarlaEnvironment:
    action_space = ActionSpace
    observation_space = ObservationSpace
@@ -340,7 +331,6 @@ if __name__ == "__main__":
        clock.tick(5)
        ctrl.on_update()
        obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
-        # print(str(reward) + ' ' + str(done))
        cumulated_reward += reward
        if done:
            break

--- a/config.py
+++ b/config.py
+class Config:
+    render = False
+    force_cpu = True
+    env = None
+    agent = None
+    env_type = 'Lunar'
+    name = '00'
+    learn = True
+    learn_online = True
+    learn_offline = False 
+    net_layout= [256, 128]
+    eps_decay = 0.9996
+    learn_rate= 0.001
+    learn_epochs = 1
+    run_episodes = 20
+    offline_batchsize = 64
+    offline_episodes = 100
+    offline_validate_every_x_iteration = -1
+    load_ann = False
+    load_mem = False
+    load_from = 'agent_name'
+    save_to = 'dir/'
+
+    def conf_to_name(self):
+        self.name = str(self.env_type) + '_' + str(self.name)
+        for layer in self.net_layout:
+            self.name += '_' + str(layer) + '_'
+        self.name += str(self.eps_decay) + '_'
+        self.name += str(self.learn_rate) + '_'
+        self.name += str(self.learn_epochs)
+    
\ No newline at end of file
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -5,39 +5,6 @@ from tqdm import trange
 import pandas as pd
 import matplotlib.pyplot as plt

-IS_SOLVED = 195
-
-class Config:
-    render = False
-    force_cpu = True
-    env = None
-    agent = None
-    env_type = 'Lunar'
-    name = 'ConfigTest'
-    learn = True
-    learn_online = True
-    learn_offline = False 
-    net_layout= [256, 128]
-    eps_decay = 0.9996
-    learn_rate= 0.001
-    learn_iterations = 1
-    run_episodes = 20
-    offline_batchsize = 2048
-    offline_epochs = 1000
-    offline_validate_every_x_iteration = 10
-    load_ann = False
-    load_mem = False
-    load_from = 'agnt'
-    save_to = 'saved_agents/'
-
-
-    def conf_to_name(self):
-        self.name = str(self.env_type) + '_' + str(self.name)
-        for layer in self.net_layout:
-            self.name += '_' + str(layer) + '_'
-        self.name += str(self.eps_decay) + '_'
-        self.name += str(self.learn_rate) + '_'
-        self.name += str(self.learn_iterations)



@@ -70,13 +37,14 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
        state = following_state
        if learn:
            if conf is not None:
-                agent.learn(epochs=conf.learn_iterations)
+                agent.learn(epochs=conf.learn_epochs)
            else:
                agent.learn()
        if done:
            break
    return score

+IS_SOLVED = 195
 def learn_offline(agent, conf):
    """ Train the agent with its memories. """
    print('Learning with ', len(agent.memory.history), ' memories.')
@@ -84,9 +52,9 @@ def learn_offline(agent, conf):
    score_history = []
    avg_score_history = []
    desc_train = ''
-    pbar = trange(conf.offline_epochs, desc='Loss: x')
+    pbar = trange(conf.offline_episodes, desc='Loss: x')
    for i in pbar:
-        loss = agent.learn(offline=True, epochs=conf.learn_iterations)
+        loss = agent.learn(offline=True, epochs=conf.learn_epochs)
        desc = ('Loss: %05.4f' %(loss)) + desc_train
        pbar.set_description(desc)
        pbar.refresh()
@@ -108,20 +76,21 @@ def learn_offline(agent, conf):



-def run(environment, agent, episodes, render=True, learn=True, conf=None):
+def run(conf):
    """ Run an agent """
    conf.name = str(conf.name) + 'on'
    # Set the exploring rate to its minimum.
    # (epsilon *greedy*)
+    learn = conf.learn and conf.learn_online
    if not learn:
-        agent.epsilon = agent.epsilon_min
+        conf.agent.epsilon = conf.agent.epsilon_min

    score_history = []
    avg_score_history = []
    
-    pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
+    pbar = trange(conf.run_episodes, desc=conf.agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
    for _ in pbar:
-        score = one_episode(environment, agent, render, learn, conf=conf)
+        score = one_episode(conf.env, conf.agent, conf.render, learn, conf=conf)
        score_history.append(score)

        is_solved = np.mean(score_history[-100:])
@@ -129,7 +98,7 @@ def run(environment, agent, episodes, render=True, learn=True, conf=None):

        if is_solved > IS_SOLVED and learn:
            break
-        desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
+        desc = (conf.agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
        pbar.set_description(desc)
        pbar.refresh()
    return score_history, avg_score_history
@@ -143,14 +112,16 @@ def process_logs(avg_score_history, loss, conf):
        pass
    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')

-    """ Plot the log history """
+    act_score = df['Score']
+    avg_score = df['Average']
    plt.figure()
-    plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
-    plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
+    plt.plot(act_score, label='Episode Score')
+    plt.plot(avg_score, '--', label='Average Score')
+    plt.xlabel('Episode')
+    plt.ylabel('Score')
+    plt.legend()
    plt.title(conf.name)
    plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
-    if conf.render:
-        plt.show()
    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')

 def load_logs(file):

--- a/main.py
+++ b/main.py
@@ -10,15 +10,13 @@ from agents import QAgent
 import environment_wrapper as ew

 def run(conf):
-
-    # 1. Create a learning agent
+    # Set agent
    marvin = conf.agent

-    # (2.) *optional* Load agent memory and/or net from disk.
+    # (optional) Load agent memory and/or net from disk.
    if conf.load_ann or conf.load_mem:
        marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)

-    # 3. Set your configurations for the run.
    # Register an *atexit* callback,
    # to store the corrent result of the agent
    # if the program is interrupted.
@@ -33,8 +31,8 @@ def run(conf):
    # number of specified epochs. Either to
    # verify the performance of the agent or
    # to train the agent.
-    _LEARN = conf.learn_online and conf.learn
-    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf)
+    # loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=(conf.learn_online and conf.learn), conf=conf)
+    loss, avg_score = ew.run(conf)

    # Save the final training result of the agent.
    marvin.save(conf.save_to)

--- a/memory.py
+++ b/memory.py
@@ -10,27 +10,23 @@ NEXT_STATE = 3
 DONE = 4

 class Memory:
-    """ Class to store memories of an agent. """
-    
    history = deque(maxlen=1000000)

    def add(self, state, action, reward, nextstate, done):
        self.history.append((state, action, reward, nextstate, done))

    def get_batch(self, batch_size):
-        """ Get a random batch of samples of "batch_size" """
        batch = random.sample(self.history, batch_size)
-        states = np.array([i[STATE] for i in batch])
-        states = np.squeeze(states)
+        states = np.squeeze(np.array([i[STATE] for i in batch]))
        actions = np.array([i[ACTION] for i in batch])
        rewards = np.array([i[REWARD] for i in batch])
-        nextstates = np.array([i[NEXT_STATE] for i in batch])
-        nextstates = np.squeeze(nextstates)
+        nextstates = np.squeeze(np.array([i[NEXT_STATE] for i in batch]))
        dones = np.array([i[DONE] for i in batch])
        return states, actions, rewards, nextstates, dones

    def save(self, path):
        pickle.dump(self.history, open(path, 'wb'))
+        print('Saved ' + str(len(self.history)) + ' memories.')

    def load(self, path):
        self.history = pickle.load(open(path, 'rb'))

--- a/networks.py
+++ b/networks.py
@@ -4,34 +4,35 @@ from keras.models import load_model
 from keras.layers import Dense
 from keras.optimizers import Adam
 from keras.activations import relu, linear
-from keras.regularizers import l2
 from keras.callbacks import EarlyStopping
-class QNet:
+from numpy.lib.function_base import _parse_input_dimensions

+class QNet:    
    learn_rate = 0.0005

    def __init__(self, conf):
        if conf.force_cpu:
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
            os.environ["CUDA_VISIBLE_DEVICES"] = ""
-        self.net = None    
        self.net = Sequential()
        self.compile_net(conf)
        self.net.summary()
        self.learn_rate = conf.learn_rate

    def compile_net(self, conf):
-        self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
+        input_dimension = conf.env.observation_space.shape[0]
+        first_layer = conf.net_layout[0]
+        self.net.add(Dense(first_layer, input_dim=input_dimension, activation=relu))
        for layer in range(1, len(conf.net_layout)):
            self.net.add(Dense(conf.net_layout[layer], activation=relu))
        self.net.add(Dense(conf.env.action_space.n, activation=linear))
-        self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
+        self.net.compile(loss='mse', optimizer=Adam(learning_rate=self.learn_rate))

    def predict(self, state): 
        return self.net.predict(state)

-    def predict_on_batch(
-        self, states): return self.net.predict_on_batch(states)
+    def predict_on_batch(self, states):
+        return self.net.predict_on_batch(states)

    def fit(self, X, Y, epochs=1, verbose=0):
        callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
@@ -44,13 +45,3 @@ class QNet:
    def load(self, path):
        self.net = load_model(path)
        self.net.summary()
-
-class QL2Net(QNet):
-    def __init__(self, action_space, state_space):
-        super().__init__(action_space, state_space)
-    
-    def compile_net(self, action_space, state_space):
-        self.net.add(Dense(192, input_dim=state_space, activation=relu, kernel_regularizer=l2(0.01)))
-        self.net.add(Dense(64, activation=relu))
-        self.net.add(Dense(action_space, activation=linear))
-        self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
--- a/run_scripts/baselines.py
+++ b/run_scripts/baselines.py
@@ -47,12 +47,12 @@ cd_4.net_layout = [128, 64, 32, 32]

 cd_5 = copy.deepcopy(c_32)
 cd_5.net_layout = [256, 128, 128, 64]
-cd_5.learn_iterations = 5
+cd_5.learn_epochs = 5
 cd_5.learn_rate = 0.001

 cd_6 = copy.deepcopy(c_32)
 cd_6.net_layout = [512, 256, 128, 64]
-cd_6.learn_iterations = 10
+cd_6.learn_epochs = 10
 cd_6.learn_rate = 0.00075

 cd_128 = copy.deepcopy(c_32)
@@ -63,7 +63,7 @@ cd_256.net_layout = [256, 256, 256]

 cd_512 = copy.deepcopy(c_32)
 cd_512.net_layout = [512, 512, 512]
-cd_512.learn_iterations = 10
+cd_512.learn_epochs = 10
 cd_512.learn_rate = 0.001

 offline = copy.deepcopy(c_32)
@@ -76,9 +76,9 @@ offline.learn_online = True
 offline.run_episodes = 100
 offline.net_layout = [1024, 1024, 1024, 256]
 offline.learn_rate = 0.0005
-offline.learn_iterations = 1
+offline.learn_epochs = 1
 offline.offline_validate_every_x_iteration = 1
-offline.offline_epochs = 100
+offline.offline_episodes = 100
 offline.name = 'OnlineValidation'
 offline.render = False
 offline.save_to = 'test/'

--- a/run_scripts/csv_history_to_plot.py
+++ b/run_scripts/csv_history_to_plot.py
@@ -4,8 +4,8 @@ from os import listdir
 from os.path import isfile, join
 from pathlib import Path

-BASE_PATH = '/home/armin/Master/semester_3/carla/'
-DIR='baselines/hyrican'
+BASE_PATH = '/home/armin/Master/semester_3/angewandte_ki/BerichtCarla/Daten'
+DIR=''
 path = BASE_PATH + DIR

 def plot_csv(file_path, show=False):
@@ -19,7 +19,7 @@ def plot_csv(file_path, show=False):
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.legend()
-    plt.title(file_path)
+    # plt.title(file_path)
    plt.savefig(file_path + '.png')
    if show:
        plt.show()
@@ -29,4 +29,4 @@ for dir in Path(path).iterdir():
        file_path = join(dir, file)
        if isfile(file_path):
            if file_path.endswith('.csv'):
-                plot_csv(file_path, show=True)
+                plot_csv(file_path, show=False)
--- a/run_scripts/manual_carla.py
+++ b/run_scripts/manual_carla.py
@@ -35,10 +35,10 @@ o.eps_decay = 0.9915
 o.learn_rate= 0.01
 o.run_episodes = 550
 o.learn_offline = True
-o.offline_epochs = 100000
+o.offline_episodes = 100000
 o.offline_batchsize = 64
 o.offline_validate_every_x_iteration = 500
-o.learn_iterations = 1
+o.learn_epochs = 1

 validate = copy.deepcopy(c)
 validate.name = 'Validate0'
@@ -52,8 +52,8 @@ validate.load_mem = False
 validate.load_ann = True
 validate.learn_offline = False
 validate.offline_batchsize = 64000
-validate.offline_epochs = 20
-validate.learn_iterations = 1
+validate.offline_episodes = 20
+validate.learn_epochs = 1
 validate.offline_validate_every_x_iteration = 1
 validate.learn_online = True
 validate.eps_decay = 0.95

--- a/run_scripts/offline.py
+++ b/run_scripts/offline.py
@@ -18,9 +18,9 @@ standard_conf.learn_offline = True
 standard_conf.net_layout= [256, 128]
 standard_conf.eps_decay = 0.6
 standard_conf.learn_rate= 0.0005
-standard_conf.learn_iterations = 4
+standard_conf.learn_epochs = 4
 standard_conf.run_episodes = 100
-standard_conf.offline_epochs = 2000
+standard_conf.offline_episodes = 2000
 standard_conf.offline_validate_every_x_iteration = -1
 standard_conf.load_ann = False
 standard_conf.load_mem = True

--- a/run_scripts/report.py
+++ b/run_scripts/report.py
@@ -18,9 +18,9 @@ standard_conf.learn_offline = False
 standard_conf.net_layout= [256, 128]
 standard_conf.eps_decay = 0.9996
 standard_conf.learn_rate= 0.001
-standard_conf.learn_iterations = 1
+standard_conf.learn_epochs = 1
 standard_conf.run_episodes = 20
-standard_conf.offline_epochs = 1000
+standard_conf.offline_episodes = 1000
 standard_conf.offline_validate_every_x_iteration = 10
 standard_conf.load_ann = False
 standard_conf.load_mem = False