From 4764b70a146a1bdbc06b901287abe2f96a8b4323 Mon Sep 17 00:00:00 2001 From: Armin <armin.co@hs-bochum.de> Date: Mon, 15 Feb 2021 19:51:40 +0100 Subject: [PATCH] Scriptable Learning --- agents.py | 13 +++---- benchmarks.py | 58 ++++++++++++++++++++++++++++++ carla_environment.py | 81 +++++++++++++++++++++++++++++++++--------- environment_wrapper.py | 40 ++++++++++++++++++--- main.py | 66 +++++++++++++++------------------- networks.py | 20 ++++++----- steering_wheel.py | 23 ++++++++---- 7 files changed, 221 insertions(+), 80 deletions(-) create mode 100644 benchmarks.py diff --git a/agents.py b/agents.py index ede5a6d..cc99e0f 100644 --- a/agents.py +++ b/agents.py @@ -7,14 +7,15 @@ class QAgent: gamma = 0.99 epsilon = 1.0 epsilon_min = 0.01 - epsilon_decay = 0.9996 + epsilon_decay = 0.9999 online_batch_size = 64 - def __init__(self, action_space, state_space, name): - self.q = QNet(action_space, state_space) + def __init__(self, conf):#self, action_space, state_space, name): + self.q = QNet(conf)#conf.env.action_space.n, conf.env.observation_space.shape[0]) self.memory = Memory() - self.action_space = action_space - self.name = name + self.action_space = conf.env.action_space.n + self.name = conf.name + self.epsilon_decay = conf.eps_decay def get_action(self, state): if np.random.rand() <= self.epsilon: @@ -31,7 +32,7 @@ class QAgent: epochs = 1 if offline: - batch_size = 4096 + batch_size = 2048 if len(self.memory.history) < batch_size: return diff --git a/benchmarks.py b/benchmarks.py new file mode 100644 index 0000000..49c348a --- /dev/null +++ b/benchmarks.py @@ -0,0 +1,58 @@ +import main +import environment_wrapper as ew +import gym +from carla_environment import CarlaEnvironment +import copy +from tqdm import trange + +c = ew.Config() + +c.name = 'Base' +c.render = False +c.env = gym.make('LunarLander-v2') +c.env_type = 'Lunar' +c.net_layout = [256, 128] +c.eps_decay = 0.9996 +c.learn_rate= 0.001 +c.run_episodes = 300 +c.save_to = 'benchmarks/' + +smallNet = copy.deepcopy(c) +smallNet.name = 'SmallNet' +smallNet.net_layout = [128, 32] +smallNet.conf_to_name() + +normalNet = copy.deepcopy(c) +normalNet.name = 'NormalNet' +normalNet.net_layout = [256, 128] +normalNet.conf_to_name() + +normalSlowDecay = copy.deepcopy(c) +normalSlowDecay.name = 'NormalSlowDecay' +normalSlowDecay.net_layout = [256, 128] +normalSlowDecay.eps_decay = 0.99995 +normalSlowDecay.conf_to_name() + +normalSlowLearn = copy.deepcopy(c) +normalSlowLearn.name = 'NormalSlowLearn' +normalSlowLearn.net_layout = [256, 128] +normalSlowLearn.learn_rate = 0.0005 +normalSlowLearn.conf_to_name() + +largeNet = copy.deepcopy(c) +largeNet.name = 'LargeNet' +largeNet.net_layout = [512, 256] +largeNet.conf_to_name() + +deepNet = copy.deepcopy(c) +deepNet.name = 'DeppNet' +deepNet.net_layout = [256, 128, 128] +deepNet.conf_to_name() + +# configurations = [smallNet, normalNet ] +# configurations = [normalSlowDecay, normalSlowLearn] +configurations = [largeNet, deepNet] + +configs = trange(len(configurations), desc='Benchmarking configs', unit="Configuration") +for i in configs: + main.run(configurations[i]) diff --git a/carla_environment.py b/carla_environment.py index 10e2d83..e41164c 100644 --- a/carla_environment.py +++ b/carla_environment.py @@ -10,7 +10,7 @@ import numpy as np import pygame import time -from steering_wheel import ACTION_SPACE, Controller +from steering_wheel import ACTION_SPACE, Controller, IDLE # find carla module try: @@ -87,12 +87,36 @@ class CollisionSensor: intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2) # print(intensity) +class LidarSensor: + sensor = None + data = None + def __init__(self, world, parent): + bp = world.get_blueprint_library().find('sensor.lidar.ray_cast') + bp.set_attribute('rotation_frequency', '10') + bp.set_attribute('channels', '1') + bp.set_attribute('points_per_second', '100') + bp.set_attribute('range', '7') + bp.set_attribute('sensor_tick', '0.1') + bp.set_attribute('upper_fov', '0') + bp.set_attribute('lower_fov', '0') + weak_self = weakref.ref(self) + self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent) + self.sensor.listen(lambda event: LidarSensor._on_data(weak_self, event)) + + def _on_data(weak_self, event): + self = weak_self() + if not self: + return + for m in event: print(m.point) + if len(event) > 0: + print(event.horizontal_angle) class World: """ Wrapper for the carla environment, incl. player/vehicle """ player = None collision_sensor = None + lidar_sensor = None world = None blueprint_library = None spawn_points = None @@ -101,6 +125,10 @@ class World: def __init__(self, world): self.world = world + settings = self.world.get_settings() + settings.fixed_delta_seconds = 0.1 + settings.synchronous_mode = True + self.world.apply_settings(settings) self.blueprint_library = self.world.get_blueprint_library() self.spawn_points = self.world.get_map().get_spawn_points() self.reset() @@ -115,7 +143,8 @@ class World: def observation(self): if self.player is not None: pos = self.player.get_transform() - return [pos.location.x, pos.location.y, pos.rotation.yaw] + vel = self.player.get_velocity() + return [pos.location.x, pos.location.y, vel.x, vel.y, pos.rotation.yaw] else: return [0,0,0] @@ -128,6 +157,7 @@ class World: start_location = self.player.get_location() print(str(start_location)) self.collision_sensor = CollisionSensor(self.world, self.player) + # self.lidar_sensor = LidarSensor(self.world, self.player) # start_location.x = 288.0 # start_location.y = 55.0 # self.player.set_location(start_location) @@ -154,6 +184,9 @@ class World: for actor in self.actors: actor.destroy() self.actors = [] + if self.collision_sensor is not None: + self.collision_sensor.sensor.destroy() + self.collision_sensor = None def step(self, action): @@ -161,22 +194,24 @@ class World: controls = Controller.action_to_controls(action) c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3]) self.player.apply_control(c) - reward = self.reward() + self.world.tick() + reward = self.reward(action) self.collision_sensor.collision = None return reward - def reward(self): + def reward(self, action): target = carla.Transform(carla.Location(x=-25.534311, y=54.460903, z=0.112781), carla.Rotation(pitch=0.000000, yaw=-175.922913, roll=-6.221135)) pos = self.player.get_transform() - pos_diff = (target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2 + 1 - rot_diff = (target.rotation.yaw - pos.rotation.yaw)**2 + 1 - r = ((1/pos_diff) - 0.5) + pos_diff = math.sqrt((target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2) + rot_diff = (target.rotation.yaw - pos.rotation.yaw) + r = (-pos_diff / 10) + 1 at_final_position = False - if pos_diff < 1.2: + if pos_diff < 1.8: if self.was_big_reward == False: r += 100 self.was_big_reward = True - if rot_diff < 5: + if abs(rot_diff) < 5: + r += 100 at_final_position = True else: if self.was_big_reward == True: @@ -184,20 +219,28 @@ class World: r -= 50 velocity = self.player.get_velocity() - v = (velocity.x)**2 + (velocity.y)**2 + v = math.sqrt((velocity.x)**2 + (velocity.y)**2) + if v < 0.01: + r -= 1 + if v > 15.0: + r -= 10 done = False if at_final_position and v < 0.01: + print("Success") done = True if self.collision_sensor.collision is not None: - r -= 100 + r -= 250 done = True + if action == IDLE: + r -= 1 return self.observation(), r, done, None class ActionSpace: n = ACTION_SPACE class ObservationSpace: - shape = [3] + shape = [5] + class CarlaEnvironment: action_space = ActionSpace observation_space = ObservationSpace @@ -230,7 +273,13 @@ class CarlaEnvironment: if render: self.render() return self.world.step(action) - + + def __del__(self): + if self.camera is not None: + self.camera.sensor.destroy() + if self.world is not None: + self.world.destroy() + if __name__ == "__main__": LOCALHOST="127.0.0.1" SERVER="192.168.188.20" @@ -241,11 +290,11 @@ if __name__ == "__main__": while ctrl.is_running(): clock.tick(20) ctrl.on_update() - obs, reward, done = env.step(ctrl.get_action(), render=True) - if done: - break + obs, reward, done, _ = env.step(ctrl.get_action(), render=True) print(str(reward) + ' ' + str(done)) cumulated_reward += reward + if done: + break env.world.destroy() pygame.quit() print(cumulated_reward) diff --git a/environment_wrapper.py b/environment_wrapper.py index fe29d01..5f707e4 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -1,10 +1,37 @@ """ Wrapper to abstract different learning environments for an agent. """ import numpy as np -from tqdm import tqdm from tqdm import trange +import pandas as pd import matplotlib.pyplot as plt +class Config: + render = False + force_cpu = True + env = None + env_type = 'Lunar' + name = 'ConfigTest' + learn = True + learn_online = True + learn_offline = False + net_layout= [256, 128] + eps_decay = 0.9996 + learn_rate= 0.001 + run_episodes = 20 + offline_epochs = 1000 + load_ann = False + load_mem = False + load_from = 'agnt' + save_to = 'saved_agents/' + + def conf_to_name(self): + self.name += self.env_type + for layer in self.net_layout: + self.name += '_' + str(layer) + '_' + self.name += str(self.eps_decay) + '_' + self.name += str(self.learn_rate) + + def step(environment, action): """ Perform one iteratino in the environment. """ @@ -74,11 +101,14 @@ def run(environment, agent, episodes, render=True, learn=True): return score_history, avg_score_history -def process_logs(avg_score_history, loss, title="Title", render=False): +def process_logs(avg_score_history, loss, conf): + df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average']) + df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv') + """ Plot the log history """ plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2]) plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--') - plt.title(title) - plt.savefig('saved_agents/' + title + '.png', format="png") - if render: + plt.title(conf.name) + plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png") + if conf.render: plt.show() diff --git a/main.py b/main.py index 8539786..3b9cf51 100644 --- a/main.py +++ b/main.py @@ -4,65 +4,55 @@ Run your desired environment and agent configuration. import os import atexit -from carla_environment import CarlaEnvironment -CARLA=True -# import gym +import gym from agents import QAgent as QAgent import environment_wrapper as ew -# Allow GPU usage or force tensorflow to use the CPU. -FORCE_CPU=True -if FORCE_CPU: - os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - os.environ["CUDA_VISIBLE_DEVICES"] = "" - -RENDER = False -if __name__ == '__main__': - # 1. Create an environment - env = CarlaEnvironment(render=RENDER) +def run(conf): + # 0. Allow GPU usage or force tensorflow to use the CPU. + if conf.force_cpu: + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"] = "" # 2. Create a learning agent - marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'Carla') + marvin = QAgent(conf) #conf.env.action_space.n, conf.env.observation_space.shape[0], conf.name) # (2.5) *optional* Load agent memory and/or net from disk. - agnt = 'Carla' - LOAD_ANN = False - LOAD_MEMORIES = True - if LOAD_ANN or LOAD_MEMORIES: - marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES) + if conf.load_ann or conf.load_mem: + marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem) # 3. Set your configurations for the run. - LEARNING = True - LEARN_ONLINE = True - LEARN_OFFLINE = True - RUN_EPISODES = 100 - LEARN_OFFLINE_EPOCHS = 1000 - SAVE_PATH = "./saved_agents" - # Register an *atexit* callback, # to store the corrent result of the agent # if the program is interrupted. - if LEARNING: - atexit.register(marvin.save, SAVE_PATH) + if conf.learn: + atexit.register(marvin.save, conf.save_to) # Offline training of the agent with # previous collected and saved memories. - if LEARN_OFFLINE and LEARNING: - ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS) + if conf.learn_offline and conf.learn: + ew.learn_offline(marvin, epochs=conf.offline_epochs) # Run the agent in the environment for the # number of specified epochs. Either to # verify the performance of the agent or # to train the agent. - _LEARN = LEARN_ONLINE and LEARNING - loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN) + _LEARN = conf.learn_online and conf.learn + loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN) # Save the final training result of the agent. - if LEARNING: - marvin.save(SAVE_PATH) + if conf.learn: + marvin.save(conf.save_to) + + ew.process_logs(avg_score, loss, conf) - if CARLA: - env.world.destroy() - # Show the result of the runl. - ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER) + if conf.env_type == 'Carla': + conf.env.world.destroy() + +if __name__ == '__main__': + conf = ew.Config() + conf.env = gym.make('LunarLander-v2') + conf.env_type = 'Lunar' + conf.conf_to_name() + run(conf) \ No newline at end of file diff --git a/networks.py b/networks.py index 846e6fa..0492454 100644 --- a/networks.py +++ b/networks.py @@ -6,17 +6,21 @@ from keras.activations import relu, linear from keras.regularizers import l2 class QNet: - net = Sequential() - learn_rate = 0.001 + + learn_rate = 0.0005 - def __init__(self, action_space, state_space): - self.compile_net(action_space, state_space) + def __init__(self, conf): + self.net = None + self.net = Sequential() + self.compile_net(conf) self.net.summary() + self.learn_rate = conf.learn_rate - def compile_net(self, action_space, state_space): - self.net.add(Dense(256, input_dim=state_space, activation=relu)) - self.net.add(Dense(128, activation=relu)) - self.net.add(Dense(action_space, activation=linear)) + def compile_net(self, conf): + self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu)) + for layer in range(1, len(conf.net_layout)): + self.net.add(Dense(conf.net_layout[layer], activation=relu)) + self.net.add(Dense(conf.env.action_space.n, activation=linear)) self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate)) def predict(self, state): diff --git a/steering_wheel.py b/steering_wheel.py index 2828f7b..9f968f4 100644 --- a/steering_wheel.py +++ b/steering_wheel.py @@ -67,24 +67,33 @@ class ManualSteeringWheel: def action_to_controls(action): if action == IDLE: return [0.0, 0.0, 0.0, False] + if action == FORWARD: - return [0.8, 0.0, 0.0, False] + return [0.6, 0.0, 0.0, False] + if action == REVERSE: - return [0.8, 0.0, 0.0, True] + return [0.5, 0.0, 0.0, True] + if action == FORWARD_LEFT: - return [0.8, -0.4, 0.0, False] + return [0.4, -0.4, 0.0, False] + if action == FORWARD_RIGHT: - return [0.8, 0.4, 0.0, False] + return [0.4, 0.4, 0.0, False] + if action == LEFT: return [0.0, -0.4, 0.0, False] + if action == RIGHT: - return [0.0, 0.5, 0.0, False] + return [0.0, 0.4, 0.0, False] + if action == BRAKE: return [0.0, 0.0, 0.5, False] + if action == REVERSE_LEFT: - return [0.5, -0.4, 0.0, True] + return [0.4, -0.4, 0.0, True] + if action == REVERSE_RIGHT: - return [0.5, 0.4, 0.0, True] + return [0.4, 0.4, 0.0, True] def get_action(self): action = IDLE -- GitLab