From 0fe0f9b8c13f84546317f907e0c56d0186c3cc02 Mon Sep 17 00:00:00 2001 From: Armin <armin.co@hs-bochum.de> Date: Tue, 16 Mar 2021 16:48:20 +0100 Subject: [PATCH] Working Environment --- .gitignore | 1 + agents.py | 12 +++-- carla_environment.py | 2 +- environment_wrapper.py | 6 +++ memory.py | 2 +- networks.py | 2 +- run_scripts/cartpole.py | 11 ++-- run_scripts/manual_carla.py | 103 +++++++++--------------------------- run_scripts/start_carla.sh | 2 +- 9 files changed, 49 insertions(+), 92 deletions(-) diff --git a/.gitignore b/.gitignore index e624cfb..068c0b7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ benchmarks baselines simple final_wo_o +orientation workspace.code-workspace test tech_demo.py diff --git a/agents.py b/agents.py index 2e7cdbe..adf7204 100644 --- a/agents.py +++ b/agents.py @@ -3,6 +3,7 @@ import numpy as np from memory import Memory from networks import QNet from steering_wheel import Controller +from keras.callbacks import EarlyStopping class QAgent: gamma = 0.99 @@ -38,7 +39,7 @@ class QAgent: if offline: batch_size = self.OFFLINE_BATCHSIZE - if len(self.memory.history) < batch_size: + if len(self.memory.history) < batch_size * 35: return states, actions, rewards, following_states, dones = self.memory.get_batch( @@ -85,6 +86,8 @@ class DQAgent(QAgent): return np.argmax(action_values[0]) def learn(self, offline=False, epochs=1): + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay for _ in range(2): if np.random.rand() < 0.5: temp = self.q @@ -93,7 +96,7 @@ class DQAgent(QAgent): batch_size = self.online_batch_size if offline: batch_size = self.OFFLINE_BATCHSIZE - if len(self.memory.history) < batch_size: + if len(self.memory.history) < batch_size * 35: return states, actions, rewards, following_states, dones = self.memory.get_batch( batch_size) @@ -103,12 +106,11 @@ class DQAgent(QAgent): idx = np.array([i for i in range(batch_size)]) y[[idx], [actions]] = qMax if offline: - history = self.q.net.fit(states, y, epochs=epochs, verbose=0) + callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True) + history = self.q.net.fit(states, y, epochs=epochs, verbose=0, callbacks=[callback]) loss = history.history['loss'][-1] else: loss = self.q.fit(states, y, epochs) - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay return loss def load(self, path, net=True, memory=True): diff --git a/carla_environment.py b/carla_environment.py index 5f00231..fc0ad1c 100644 --- a/carla_environment.py +++ b/carla_environment.py @@ -266,7 +266,7 @@ class World: if pos_diff < 1.1 and v < 0.01: done = True r += 150 - if yaw_dif < 0.01: + if abs(yaw_dif) < 2: r+= 50 if self.collision_sensor.collision is not None: diff --git a/environment_wrapper.py b/environment_wrapper.py index 61242e3..2c61cc0 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -57,6 +57,8 @@ def reset(environment): def one_episode(environment, agent, render, learn, conf=None, max_steps=1000): """ Perform one episode of the agent in the environment. """ score = 0 + if conf.env_type == 'Carla': + max_steps = 300 state = reset(environment) for _ in range(max_steps): if render: @@ -88,6 +90,9 @@ def learn_offline(agent, conf): desc = ('Loss: %05.4f' %(loss)) + desc_train pbar.set_description(desc) pbar.refresh() + if loss > 1000: + print("Loss exceeded 1000!!") + exit() if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1: agent.epsilon = agent.epsilon_min score = one_episode(conf.env, agent, conf.render, False, conf=conf) @@ -97,6 +102,7 @@ def learn_offline(agent, conf): avg_score_history.append(is_solved) if is_solved > IS_SOLVED: break + if conf.offline_validate_every_x_iteration is not -1: process_logs(avg_score_history, score_history, conf) diff --git a/memory.py b/memory.py index d6393a7..b8f2995 100644 --- a/memory.py +++ b/memory.py @@ -34,4 +34,4 @@ class Memory: def load(self, path): self.history = pickle.load(open(path, 'rb')) - print('Loaded '+ str(len(self.history)) + ' memories.') + print('Loaded '+ str(len(self.history)) + ' memories.') \ No newline at end of file diff --git a/networks.py b/networks.py index 32b9461..cf4e5b9 100644 --- a/networks.py +++ b/networks.py @@ -34,7 +34,7 @@ class QNet: self, states): return self.net.predict_on_batch(states) def fit(self, X, Y, epochs=1, verbose=0): - callback = EarlyStopping(monitor='loss', patience=3) + callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True) history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback]) return history.history['loss'][-1] diff --git a/run_scripts/cartpole.py b/run_scripts/cartpole.py index ad2bb1d..7951918 100644 --- a/run_scripts/cartpole.py +++ b/run_scripts/cartpole.py @@ -5,16 +5,15 @@ from agents import DQAgent, QAgent c = ew.Config() -c.name = 'DoubleCartPole' -c.render = False +c.name = '01' +c.render = True c.env = gym.make('CartPole-v0') c.env_type = 'CartPole' -c.net_layout = [128, 64, 32] +c.net_layout = [128, 64] c.eps_decay = 0.9991 c.learn_rate= 0.001 c.run_episodes = 300 -c.save_to = 'benchmarks/' +c.save_to = 'Screencast/' c.conf_to_name() -c.agent = QAgent(c) - +c.agent = DQAgent(c) main.run(c) diff --git a/run_scripts/manual_carla.py b/run_scripts/manual_carla.py index d09a7b1..4572ae7 100644 --- a/run_scripts/manual_carla.py +++ b/run_scripts/manual_carla.py @@ -2,7 +2,7 @@ import main import environment_wrapper as ew import gym import copy -from agents import QAgent, DQAgent +from agents import CarlaManual, QAgent, DQAgent from carla_environment import CarlaEnvironment c = ew.Config() @@ -19,101 +19,50 @@ c.load_from = 'Carla_2_256__128__128_0.9995_0.001_1DBL' c.load_mem = True c.load_ann = True -# o = copy.deepcopy(c) -# o.name = 'JTAP_4' -# o.force_cpu = True -# o.render = True -# o.learn = True -# o.env_type = 'Carla' -# o.net_layout = [256, 128, 128] -# o.save_to = 'simple/' -# o.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL' -# o.load_mem = True -# o.load_ann = False -# o.learn_offline = True -# o.offline_epochs = 75000 -# o.offline_batchsize = 256 -# o.learn_iterations = 1 -# o.offline_validate_every_x_iteration = -1 -# o.learn_online = False -# o.eps_decay = 0.9995 -# o.learn_rate= 0.001 -# o.run_episodes = 15 - o = copy.deepcopy(c) -o.name = 'JTAP_0' +o.name = 'Final_wo_Obstacles_052O_2M' o.force_cpu = True -o.render = True +o.render = False o.learn = True o.env_type = 'Carla' -o.net_layout = [256, 128] -o.save_to = 'orientation/' -o.load_from = '' -o.load_mem = False +o.net_layout = [1024, 1024, 256, 128] +o.save_to = 'final_wo_o/' +o.load_from = 'Carla_Final_wo_Obstacles_02_2M_256__256__128_0.99974_0.001_1DBL' +o.load_mem = True o.load_ann = False o.learn_online = True -o.eps_decay = 0.9995 -o.learn_rate= 0.001 -o.run_episodes = 750 +o.eps_decay = 0.9915 +o.learn_rate= 0.01 +o.run_episodes = 550 +o.learn_offline = True +o.offline_epochs = 100000 +o.offline_batchsize = 64 +o.offline_validate_every_x_iteration = 500 +o.learn_iterations = 1 validate = copy.deepcopy(c) -validate.name = 'JTAP_Validate' +validate.name = 'Validate0' validate.render = True -validate.learn = True +validate.learn = False validate.env_type = 'Carla' validate.net_layout = [256, 128] validate.save_to = 'simple/' -validate.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL' +validate.load_from = 'Carla_JTAP_1_256__128__128_0.9995_0.001_1' validate.load_mem = False validate.load_ann = True validate.learn_offline = False -validate.offline_epochs = 1500 +validate.offline_batchsize = 64000 +validate.offline_epochs = 20 validate.learn_iterations = 1 -validate.offline_validate_every_x_iteration = -1 +validate.offline_validate_every_x_iteration = 1 validate.learn_online = True -validate.eps_decay = 0.9995 -validate.learn_rate= 0.001 +validate.eps_decay = 0.95 +validate.learn_rate= 0.0000005 validate.run_episodes = 10 -# t = copy.deepcopy(c) -# t.render = True -# t.net_layout = [1024, 1024, 256, 32] -# t.eps_decay = 0.9993 -# t.learn_rate = 0.0005 -# t.force_cpu = False -# t.load_mem = True -# t.load_ann = False -# t.save_to = 'test/' -# t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50' -# t.name = 'Offline' -# t.learn_offline = True -# t.learn_online = True -# t.run_episodes = 500 -# t.offline_epochs = 100 -# t.learn_iterations = 100 -# t.offline_validate_every_x_iteration = -1 - -configuration = o +configuration = validate configuration.env = CarlaEnvironment(render=configuration.render, manual=False) configuration.conf_to_name() -configuration.agent = QAgent(configuration) +configuration.agent = DQAgent(configuration) +# configuration.agent = CarlaManual(configuration) main.run(configuration) - -# o = copy.deepcopy(c) -# o.name = '5D' -# o.render = True -# o.learn = False -# o.env_type = 'Carla' -# o.net_layout = [1024, 512, 256] -# o.save_to = 'test/' -# o.load_from = 'Carla_01D_1024__512__512_0.9991_0.00025_1DBLoffline' -# o.load_mem = True -# o.load_ann = True -# o.learn_offline = False -# o.offline_epochs = 1000 -# o.learn_iterations = 1 -# o.offline_validate_every_x_iteration = -1 -# o.learn_online = True -# o.eps_decay = 0.9991 -# o.learn_rate= 0.0005 -# o.run_episodes = 20 \ No newline at end of file diff --git a/run_scripts/start_carla.sh b/run_scripts/start_carla.sh index bc6da0a..5302c74 100755 --- a/run_scripts/start_carla.sh +++ b/run_scripts/start_carla.sh @@ -5,4 +5,4 @@ # apt install carla-simulator # -DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=15 -quality-level=Low -opengl \ No newline at end of file +DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=10 -quality-level=Low -opengl \ No newline at end of file -- GitLab