From 61a8051966139b9b3cdfd3923059105f37dd08c5 Mon Sep 17 00:00:00 2001 From: Armin <armin.co@hs-bochum.de> Date: Sun, 4 Apr 2021 21:01:46 +0200 Subject: [PATCH] refactroing --- .gitignore | 1 + agents.py | 12 +----- carla_environment.py | 38 +++++++----------- config.py | 31 +++++++++++++++ environment_wrapper.py | 63 ++++++++---------------------- main.py | 10 ++--- memory.py | 12 ++---- networks.py | 27 +++++-------- run_scripts/baselines.py | 10 ++--- run_scripts/csv_history_to_plot.py | 8 ++-- run_scripts/manual_carla.py | 8 ++-- run_scripts/offline.py | 4 +- run_scripts/report.py | 4 +- 13 files changed, 98 insertions(+), 130 deletions(-) create mode 100644 config.py diff --git a/.gitignore b/.gitignore index 068c0b7..24d5ee1 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ orientation workspace.code-workspace test tech_demo.py +Screencast *.png diff --git a/agents.py b/agents.py index adf7204..38606ff 100644 --- a/agents.py +++ b/agents.py @@ -79,12 +79,6 @@ class DQAgent(QAgent): self.q2 = QNet(conf) self.name = str(self.name) + 'DBL' - def get_action(self, state): - if np.random.rand() <= self.epsilon: - return random.randrange(self.action_space) - action_values = self.q.predict(state) - return np.argmax(action_values[0]) - def learn(self, offline=False, epochs=1): if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay @@ -114,13 +108,9 @@ class DQAgent(QAgent): return loss def load(self, path, net=True, memory=True): - print('Load: ' + path) + super().load(path, net=net, memory=memory) if net: - print('Network') - self.q.load(path+'.net') self.q2.load(path+'.net') - if memory: - self.memory.load(path+'.mem') class CarlaManual(QAgent): control = None diff --git a/carla_environment.py b/carla_environment.py index fc0ad1c..e8bd989 100644 --- a/carla_environment.py +++ b/carla_environment.py @@ -12,7 +12,6 @@ import time from steering_wheel import ACTION_SPACE, Controller, IDLE -# find carla module try: CARLA_PATH='/media/armin/Games/carla/PythonAPI/carla/dist/carla-*%d.%d-%s.egg' CARLA_PATH='/opt/carla-simulator/PythonAPI/carla/dist/carla-*%d.%d-%s.egg' @@ -23,8 +22,11 @@ try: except IndexError: pass -import carla - +try: + import carla +except: + print('Could not import carla, you will not be able to crate a client!') + print('Contiuing...') class Camera: """ Add camera sensor to the carla world """ @@ -71,7 +73,7 @@ class Camera: class CollisionSensor: sensor = None collision = None - + intensity = 0 def __init__(self, world, parent): bp = world.get_blueprint_library().find('sensor.other.collision') self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent) @@ -82,16 +84,13 @@ class CollisionSensor: self = weak_self() if not self: return - # print(event.other_actor) self.collision = event impulse = event.normal_impulse - intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2) - # print(intensity) + self.intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2) class ObstacleSensor: sensor = None parent = None - ZONES = 4 obstacle = [] def __init__(self, world, parent): @@ -99,8 +98,6 @@ class ObstacleSensor: bp = world.get_blueprint_library().find('sensor.other.obstacle') bp.set_attribute('distance', '10') bp.set_attribute('hit_radius', '5') - # print(bp.get_attribute('hit_radius')) - position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0)) self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid) weak_self = weakref.ref(self) self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event)) @@ -110,7 +107,8 @@ class ObstacleSensor: if not self: return self.collision = event - if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False: + actor_id = event.other_actor.type_id + if not actor_id == 'static.road' and not actor_id == 'static.roadline' and not actor_id == 'static.sidewalk' and False: print(self.parent.get_transform()) print(event.transform) print(self.parent.get_velocity()) @@ -139,12 +137,12 @@ class LidarSensor: self = weak_self() if not self: return - for m in event: print(m.point) + for m in event: + if False: print(m.point) if len(event) > 0: - print(event.horizontal_angle) + if False: print(event.horizontal_angle) class World: - """ Wrapper for the carla environment, incl. player/vehicle """ player = None collision_sensor = None obstacle_sensor = None @@ -166,7 +164,6 @@ class World: self.reset() def reset(self): - """ Remove and create new player/vehicle. """ self.destroy() time.sleep(0.5) self.spawn_on_sidewalk() @@ -180,12 +177,10 @@ class World: return [0,0,0,0,0] def spawn_player(self, transform): - """ Add a vehicle to the world. """ while self.player is None: blueprint = random.choice(self.blueprint_library.filter('model3')) position = transform self.player = self.world.try_spawn_actor(blueprint, position) - start_location = self.player.get_location() self.collision_sensor = CollisionSensor(self.world, self.player) self.obstacle_sensor = ObstacleSensor(self.world, self.player) @@ -213,7 +208,6 @@ class World: self.actors.append(actor) def destroy(self): - """ Remove vehicle from the world. """ if self.player is not None: self.player.destroy() self.player = None @@ -230,16 +224,15 @@ class World: def step(self, action): - """ Apply controls to vehicle. """ controls = Controller.action_to_controls(action) c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3]) self.player.apply_control(c) self.world.tick() - reward = self.reward(action) + reward = self.reward() self.collision_sensor.collision = None return reward - def reward(self, action): + def reward(self): x, y, vx, vy, yaw = self.observation() target = carla.Transform( \ @@ -280,10 +273,8 @@ class World: class ActionSpace: n = ACTION_SPACE - class ObservationSpace: shape = [5] - class CarlaEnvironment: action_space = ActionSpace observation_space = ObservationSpace @@ -340,7 +331,6 @@ if __name__ == "__main__": clock.tick(5) ctrl.on_update() obs, reward, done, _ = env.step(ctrl.get_action(), render=True) - # print(str(reward) + ' ' + str(done)) cumulated_reward += reward if done: break diff --git a/config.py b/config.py new file mode 100644 index 0000000..550a001 --- /dev/null +++ b/config.py @@ -0,0 +1,31 @@ +class Config: + render = False + force_cpu = True + env = None + agent = None + env_type = 'Lunar' + name = '00' + learn = True + learn_online = True + learn_offline = False + net_layout= [256, 128] + eps_decay = 0.9996 + learn_rate= 0.001 + learn_epochs = 1 + run_episodes = 20 + offline_batchsize = 64 + offline_episodes = 100 + offline_validate_every_x_iteration = -1 + load_ann = False + load_mem = False + load_from = 'agent_name' + save_to = 'dir/' + + def conf_to_name(self): + self.name = str(self.env_type) + '_' + str(self.name) + for layer in self.net_layout: + self.name += '_' + str(layer) + '_' + self.name += str(self.eps_decay) + '_' + self.name += str(self.learn_rate) + '_' + self.name += str(self.learn_epochs) + \ No newline at end of file diff --git a/environment_wrapper.py b/environment_wrapper.py index 2c61cc0..c13585c 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -5,40 +5,7 @@ from tqdm import trange import pandas as pd import matplotlib.pyplot as plt -IS_SOLVED = 195 -class Config: - render = False - force_cpu = True - env = None - agent = None - env_type = 'Lunar' - name = 'ConfigTest' - learn = True - learn_online = True - learn_offline = False - net_layout= [256, 128] - eps_decay = 0.9996 - learn_rate= 0.001 - learn_iterations = 1 - run_episodes = 20 - offline_batchsize = 2048 - offline_epochs = 1000 - offline_validate_every_x_iteration = 10 - load_ann = False - load_mem = False - load_from = 'agnt' - save_to = 'saved_agents/' - - - def conf_to_name(self): - self.name = str(self.env_type) + '_' + str(self.name) - for layer in self.net_layout: - self.name += '_' + str(layer) + '_' - self.name += str(self.eps_decay) + '_' - self.name += str(self.learn_rate) + '_' - self.name += str(self.learn_iterations) - def step(environment, action): @@ -70,13 +37,14 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000): state = following_state if learn: if conf is not None: - agent.learn(epochs=conf.learn_iterations) + agent.learn(epochs=conf.learn_epochs) else: agent.learn() if done: break return score +IS_SOLVED = 195 def learn_offline(agent, conf): """ Train the agent with its memories. """ print('Learning with ', len(agent.memory.history), ' memories.') @@ -84,9 +52,9 @@ def learn_offline(agent, conf): score_history = [] avg_score_history = [] desc_train = '' - pbar = trange(conf.offline_epochs, desc='Loss: x') + pbar = trange(conf.offline_episodes, desc='Loss: x') for i in pbar: - loss = agent.learn(offline=True, epochs=conf.learn_iterations) + loss = agent.learn(offline=True, epochs=conf.learn_epochs) desc = ('Loss: %05.4f' %(loss)) + desc_train pbar.set_description(desc) pbar.refresh() @@ -108,20 +76,21 @@ def learn_offline(agent, conf): -def run(environment, agent, episodes, render=True, learn=True, conf=None): +def run(conf): """ Run an agent """ conf.name = str(conf.name) + 'on' # Set the exploring rate to its minimum. # (epsilon *greedy*) + learn = conf.learn and conf.learn_online if not learn: - agent.epsilon = agent.epsilon_min + conf.agent.epsilon = conf.agent.epsilon_min score_history = [] avg_score_history = [] - pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes") + pbar = trange(conf.run_episodes, desc=conf.agent.name + ' [act, avg]: [0, 0]', unit="Episodes") for _ in pbar: - score = one_episode(environment, agent, render, learn, conf=conf) + score = one_episode(conf.env, conf.agent, conf.render, learn, conf=conf) score_history.append(score) is_solved = np.mean(score_history[-100:]) @@ -129,7 +98,7 @@ def run(environment, agent, episodes, render=True, learn=True, conf=None): if is_solved > IS_SOLVED and learn: break - desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved)) + desc = (conf.agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved)) pbar.set_description(desc) pbar.refresh() return score_history, avg_score_history @@ -143,14 +112,16 @@ def process_logs(avg_score_history, loss, conf): pass df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv') - """ Plot the log history """ + act_score = df['Score'] + avg_score = df['Average'] plt.figure() - plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2]) - plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--') + plt.plot(act_score, label='Episode Score') + plt.plot(avg_score, '--', label='Average Score') + plt.xlabel('Episode') + plt.ylabel('Score') + plt.legend() plt.title(conf.name) plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png") - if conf.render: - plt.show() df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv') def load_logs(file): diff --git a/main.py b/main.py index a362e28..860462a 100644 --- a/main.py +++ b/main.py @@ -10,15 +10,13 @@ from agents import QAgent import environment_wrapper as ew def run(conf): - - # 1. Create a learning agent + # Set agent marvin = conf.agent - # (2.) *optional* Load agent memory and/or net from disk. + # (optional) Load agent memory and/or net from disk. if conf.load_ann or conf.load_mem: marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem) - # 3. Set your configurations for the run. # Register an *atexit* callback, # to store the corrent result of the agent # if the program is interrupted. @@ -33,8 +31,8 @@ def run(conf): # number of specified epochs. Either to # verify the performance of the agent or # to train the agent. - _LEARN = conf.learn_online and conf.learn - loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf) + # loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=(conf.learn_online and conf.learn), conf=conf) + loss, avg_score = ew.run(conf) # Save the final training result of the agent. marvin.save(conf.save_to) diff --git a/memory.py b/memory.py index b8f2995..af47713 100644 --- a/memory.py +++ b/memory.py @@ -10,28 +10,24 @@ NEXT_STATE = 3 DONE = 4 class Memory: - """ Class to store memories of an agent. """ - history = deque(maxlen=1000000) def add(self, state, action, reward, nextstate, done): self.history.append((state, action, reward, nextstate, done)) def get_batch(self, batch_size): - """ Get a random batch of samples of "batch_size" """ batch = random.sample(self.history, batch_size) - states = np.array([i[STATE] for i in batch]) - states = np.squeeze(states) + states = np.squeeze(np.array([i[STATE] for i in batch])) actions = np.array([i[ACTION] for i in batch]) rewards = np.array([i[REWARD] for i in batch]) - nextstates = np.array([i[NEXT_STATE] for i in batch]) - nextstates = np.squeeze(nextstates) + nextstates = np.squeeze(np.array([i[NEXT_STATE] for i in batch])) dones = np.array([i[DONE] for i in batch]) return states, actions, rewards, nextstates, dones def save(self, path): pickle.dump(self.history, open(path, 'wb')) + print('Saved ' + str(len(self.history)) + ' memories.') def load(self, path): self.history = pickle.load(open(path, 'rb')) - print('Loaded '+ str(len(self.history)) + ' memories.') \ No newline at end of file + print('Loaded ' + str(len(self.history)) + ' memories.') \ No newline at end of file diff --git a/networks.py b/networks.py index cf4e5b9..f66dda6 100644 --- a/networks.py +++ b/networks.py @@ -4,34 +4,35 @@ from keras.models import load_model from keras.layers import Dense from keras.optimizers import Adam from keras.activations import relu, linear -from keras.regularizers import l2 from keras.callbacks import EarlyStopping -class QNet: - +from numpy.lib.function_base import _parse_input_dimensions + +class QNet: learn_rate = 0.0005 def __init__(self, conf): if conf.force_cpu: os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "" - self.net = None self.net = Sequential() self.compile_net(conf) self.net.summary() self.learn_rate = conf.learn_rate def compile_net(self, conf): - self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu)) + input_dimension = conf.env.observation_space.shape[0] + first_layer = conf.net_layout[0] + self.net.add(Dense(first_layer, input_dim=input_dimension, activation=relu)) for layer in range(1, len(conf.net_layout)): self.net.add(Dense(conf.net_layout[layer], activation=relu)) self.net.add(Dense(conf.env.action_space.n, activation=linear)) - self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate)) + self.net.compile(loss='mse', optimizer=Adam(learning_rate=self.learn_rate)) def predict(self, state): return self.net.predict(state) - def predict_on_batch( - self, states): return self.net.predict_on_batch(states) + def predict_on_batch(self, states): + return self.net.predict_on_batch(states) def fit(self, X, Y, epochs=1, verbose=0): callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True) @@ -44,13 +45,3 @@ class QNet: def load(self, path): self.net = load_model(path) self.net.summary() - -class QL2Net(QNet): - def __init__(self, action_space, state_space): - super().__init__(action_space, state_space) - - def compile_net(self, action_space, state_space): - self.net.add(Dense(192, input_dim=state_space, activation=relu, kernel_regularizer=l2(0.01))) - self.net.add(Dense(64, activation=relu)) - self.net.add(Dense(action_space, activation=linear)) - self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate)) diff --git a/run_scripts/baselines.py b/run_scripts/baselines.py index bc98308..fc5674f 100644 --- a/run_scripts/baselines.py +++ b/run_scripts/baselines.py @@ -47,12 +47,12 @@ cd_4.net_layout = [128, 64, 32, 32] cd_5 = copy.deepcopy(c_32) cd_5.net_layout = [256, 128, 128, 64] -cd_5.learn_iterations = 5 +cd_5.learn_epochs = 5 cd_5.learn_rate = 0.001 cd_6 = copy.deepcopy(c_32) cd_6.net_layout = [512, 256, 128, 64] -cd_6.learn_iterations = 10 +cd_6.learn_epochs = 10 cd_6.learn_rate = 0.00075 cd_128 = copy.deepcopy(c_32) @@ -63,7 +63,7 @@ cd_256.net_layout = [256, 256, 256] cd_512 = copy.deepcopy(c_32) cd_512.net_layout = [512, 512, 512] -cd_512.learn_iterations = 10 +cd_512.learn_epochs = 10 cd_512.learn_rate = 0.001 offline = copy.deepcopy(c_32) @@ -76,9 +76,9 @@ offline.learn_online = True offline.run_episodes = 100 offline.net_layout = [1024, 1024, 1024, 256] offline.learn_rate = 0.0005 -offline.learn_iterations = 1 +offline.learn_epochs = 1 offline.offline_validate_every_x_iteration = 1 -offline.offline_epochs = 100 +offline.offline_episodes = 100 offline.name = 'OnlineValidation' offline.render = False offline.save_to = 'test/' diff --git a/run_scripts/csv_history_to_plot.py b/run_scripts/csv_history_to_plot.py index e20d257..ddda636 100644 --- a/run_scripts/csv_history_to_plot.py +++ b/run_scripts/csv_history_to_plot.py @@ -4,8 +4,8 @@ from os import listdir from os.path import isfile, join from pathlib import Path -BASE_PATH = '/home/armin/Master/semester_3/carla/' -DIR='baselines/hyrican' +BASE_PATH = '/home/armin/Master/semester_3/angewandte_ki/BerichtCarla/Daten' +DIR='' path = BASE_PATH + DIR def plot_csv(file_path, show=False): @@ -19,7 +19,7 @@ def plot_csv(file_path, show=False): plt.xlabel('Episode') plt.ylabel('Score') plt.legend() - plt.title(file_path) + # plt.title(file_path) plt.savefig(file_path + '.png') if show: plt.show() @@ -29,4 +29,4 @@ for dir in Path(path).iterdir(): file_path = join(dir, file) if isfile(file_path): if file_path.endswith('.csv'): - plot_csv(file_path, show=True) + plot_csv(file_path, show=False) diff --git a/run_scripts/manual_carla.py b/run_scripts/manual_carla.py index 4572ae7..d018f01 100644 --- a/run_scripts/manual_carla.py +++ b/run_scripts/manual_carla.py @@ -35,10 +35,10 @@ o.eps_decay = 0.9915 o.learn_rate= 0.01 o.run_episodes = 550 o.learn_offline = True -o.offline_epochs = 100000 +o.offline_episodes = 100000 o.offline_batchsize = 64 o.offline_validate_every_x_iteration = 500 -o.learn_iterations = 1 +o.learn_epochs = 1 validate = copy.deepcopy(c) validate.name = 'Validate0' @@ -52,8 +52,8 @@ validate.load_mem = False validate.load_ann = True validate.learn_offline = False validate.offline_batchsize = 64000 -validate.offline_epochs = 20 -validate.learn_iterations = 1 +validate.offline_episodes = 20 +validate.learn_epochs = 1 validate.offline_validate_every_x_iteration = 1 validate.learn_online = True validate.eps_decay = 0.95 diff --git a/run_scripts/offline.py b/run_scripts/offline.py index 4307d41..fd789b8 100644 --- a/run_scripts/offline.py +++ b/run_scripts/offline.py @@ -18,9 +18,9 @@ standard_conf.learn_offline = True standard_conf.net_layout= [256, 128] standard_conf.eps_decay = 0.6 standard_conf.learn_rate= 0.0005 -standard_conf.learn_iterations = 4 +standard_conf.learn_epochs = 4 standard_conf.run_episodes = 100 -standard_conf.offline_epochs = 2000 +standard_conf.offline_episodes = 2000 standard_conf.offline_validate_every_x_iteration = -1 standard_conf.load_ann = False standard_conf.load_mem = True diff --git a/run_scripts/report.py b/run_scripts/report.py index 7bf76df..e2d78ad 100644 --- a/run_scripts/report.py +++ b/run_scripts/report.py @@ -18,9 +18,9 @@ standard_conf.learn_offline = False standard_conf.net_layout= [256, 128] standard_conf.eps_decay = 0.9996 standard_conf.learn_rate= 0.001 -standard_conf.learn_iterations = 1 +standard_conf.learn_epochs = 1 standard_conf.run_episodes = 20 -standard_conf.offline_epochs = 1000 +standard_conf.offline_episodes = 1000 standard_conf.offline_validate_every_x_iteration = 10 standard_conf.load_ann = False standard_conf.load_mem = False -- GitLab