diff --git a/.gitignore b/.gitignore index a206662474aac73cc18f10d9447e97adc498e338..31adcd1eea6e6b910b2362ee6452e6fc672ee573 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,6 @@ saved_agents benchmarks baselines workspace.code-workspace +test tech_demo.py *.png diff --git a/agents.py b/agents.py index 8672ba5d3b39524e58d2478d77035d6e64f2fa9f..300d84ba3181e88a1760e503cbf4e60f5703163f 100644 --- a/agents.py +++ b/agents.py @@ -2,6 +2,7 @@ import random import numpy as np from memory import Memory from networks import QNet +from steering_wheel import Controller class QAgent: gamma = 0.99 @@ -29,10 +30,9 @@ class QAgent: def remember(self, state, action, reward, following_state, done): self.memory.add(state, action, reward, following_state, done) - def learn(self, offline=False): + def learn(self, offline=False, epochs=1): """ Learn the Q-Function. """ batch_size = self.online_batch_size - epochs = 1 if offline: batch_size = self.OFFLINE_BATCHSIZE @@ -49,7 +49,7 @@ class QAgent: y[[idx], [actions]] = qMax if offline: - history = self.q.net.fit(states, y, epochs=2, verbose=0) + history = self.q.net.fit(states, y, epochs=epochs, verbose=0) loss = history.history['loss'][-1] else: loss = self.q.fit(states, y, epochs) @@ -64,8 +64,9 @@ class QAgent: self.memory.save(path+'/' + self.name + '.mem') def load(self, path, net=True, memory=True): - print(path) + print('Load: ' + path) if net: + print('Network') self.q.load(path+'.net') if memory: self.memory.load(path+'.mem') @@ -81,14 +82,13 @@ class DQAgent(QAgent): action_values = (self.q.predict(state) + self.q2.predict(state)) / 2 return np.argmax(action_values[0]) - def learn(self, offline=False): + def learn(self, offline=False, epochs=1): for _ in range(2): if np.random.rand() < 0.5: temp = self.q self.q = self.q2 self.q2 = temp batch_size = self.online_batch_size - epochs = 1 if offline: batch_size = self.OFFLINE_BATCHSIZE if len(self.memory.history) < batch_size: @@ -101,10 +101,22 @@ class DQAgent(QAgent): idx = np.array([i for i in range(batch_size)]) y[[idx], [actions]] = qMax if offline: - history = self.q.net.fit(states, y, epochs=2, verbose=0) + history = self.q.net.fit(states, y, epochs=epochs, verbose=0) loss = history.history['loss'][-1] else: loss = self.q.fit(states, y, epochs) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay - return loss \ No newline at end of file + return loss + +class CarlaManual(QAgent): + control = None + + def __init__(self, conf): + super().__init__(conf) + self.control = Controller() + + def get_action(self, state): + self.control.on_update() + return self.control.get_action() + diff --git a/carla_environment.py b/carla_environment.py index 7edd56d3f9085fbdc1c300676edfcbd55a0a41be..8854f528063b962f4cef66f53e63d0a85272691a 100644 --- a/carla_environment.py +++ b/carla_environment.py @@ -97,8 +97,9 @@ class ObstacleSensor: self.parent = parent bp = world.get_blueprint_library().find('sensor.other.obstacle') bp.set_attribute('distance', '10') - bp.set_attribute('hit_radius', '3') - print(bp.get_attribute('hit_radius')) + bp.set_attribute('hit_radius', '5') + # print(bp.get_attribute('hit_radius')) + position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0)) self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid) weak_self = weakref.ref(self) self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event)) @@ -108,12 +109,12 @@ class ObstacleSensor: if not self: return self.collision = event - if not event.other_actor.type_id == 'static.road': - print(self.parent.get_location()) + if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False: + print(self.parent.get_transform()) + print(event.transform) print(self.parent.get_velocity()) print(event.other_actor.type_id) - print(event.other_actor.transform) - print(event.distance) + print(str(event.distance)) class LidarSensor: @@ -333,7 +334,7 @@ if __name__ == "__main__": clock.tick(5) ctrl.on_update() obs, reward, done, _ = env.step(ctrl.get_action(), render=True) - print(str(reward) + ' ' + str(done)) + # print(str(reward) + ' ' + str(done)) cumulated_reward += reward if done: break diff --git a/environment_wrapper.py b/environment_wrapper.py index 5b4f5fbb7c102fc766d93aa074cb8019944a7e17..cf39665bc9605f7738116bed1ff7bc18706d7efd 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -5,6 +5,8 @@ from tqdm import trange import pandas as pd import matplotlib.pyplot as plt +IS_SOLVED = 195 + class Config: render = False force_cpu = True @@ -18,8 +20,10 @@ class Config: net_layout= [256, 128] eps_decay = 0.9996 learn_rate= 0.001 + learn_iterations = 1 run_episodes = 20 offline_epochs = 1000 + offline_validate_every_x_iteration = 10 load_ann = False load_mem = False load_from = 'agnt' @@ -35,7 +39,8 @@ class Config: for layer in self.net_layout: self.name += '_' + str(layer) + '_' self.name += str(self.eps_decay) + '_' - self.name += str(self.learn_rate) + self.name += str(self.learn_rate) + '_' + self.name += str(self.learn_iterations) @@ -52,7 +57,7 @@ def reset(environment): return state -def one_episode(environment, agent, render, learn, max_steps=1000): +def one_episode(environment, agent, render, learn, conf=None, max_steps=1000): """ Perform one episode of the agent in the environment. """ score = 0 state = reset(environment) @@ -65,22 +70,33 @@ def one_episode(environment, agent, render, learn, max_steps=1000): score += reward state = following_state if learn: - agent.learn() + if conf is not None: + agent.learn(epochs=conf.learn_iterations) + else: + agent.learn() if done: break return score -def learn_offline(agent, epochs=1): +def learn_offline(agent, conf): """ Train the agent with its memories. """ print('Learning with ', len(agent.memory.history), ' memories.') - pbar = trange(epochs, desc='Loss: x') - for _ in pbar: - loss = agent.learn(offline=True) + pbar = trange(conf.offline_epochs, desc='Loss: x') + for i in pbar: + loss = agent.learn(offline=True, epochs=conf.learn_iterations) desc = ('Loss: %05.4f' %(loss)) pbar.set_description(desc) pbar.refresh() + if i % conf.offline_validate_every_x_iteration == 0 and conf.offline_validate_every_x_iteration is not -1: + score, avg = run(conf.env, conf.agent, 1, render=conf.render, learn=False, conf=conf) + conf.name += '1' + process_logs(avg, score, conf) + if avg[-1] > IS_SOLVED: + break + -def run(environment, agent, episodes, render=True, learn=True): + +def run(environment, agent, episodes, render=True, learn=True, conf=None): """ Run an agent """ # Set the exploring rate to its minimum. @@ -93,13 +109,13 @@ def run(environment, agent, episodes, render=True, learn=True): pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes") for _ in pbar: - score = one_episode(environment, agent, render, learn) + score = one_episode(environment, agent, render, learn, conf=conf) score_history.append(score) is_solved = np.mean(score_history[-100:]) avg_score_history.append(is_solved) - if is_solved > 195 and learn: + if is_solved > IS_SOLVED and learn: break desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved)) pbar.set_description(desc) @@ -109,6 +125,10 @@ def run(environment, agent, episodes, render=True, learn=True): def process_logs(avg_score_history, loss, conf): df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average']) + try: + os.makedirs(conf.save_to + conf.name) + except: + pass df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv') """ Plot the log history """ @@ -119,6 +139,7 @@ def process_logs(avg_score_history, loss, conf): plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png") if conf.render: plt.show() + df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv') def load_logs(file): df = pd.read_csv(file) diff --git a/main.py b/main.py index 886cddb1afecf3ef9ab677758c892799910e8a18..5214a52f8a7b88144febb528c280935b08fc8e66 100644 --- a/main.py +++ b/main.py @@ -28,14 +28,14 @@ def run(conf): # Offline training of the agent with # previous collected and saved memories. if conf.learn_offline and conf.learn: - ew.learn_offline(marvin, epochs=conf.offline_epochs) + ew.learn_offline(marvin, conf) # Run the agent in the environment for the # number of specified epochs. Either to # verify the performance of the agent or # to train the agent. _LEARN = conf.learn_online and conf.learn - loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN) + loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf) # Save the final training result of the agent. if conf.learn: diff --git a/run_scripts/baselines.py b/run_scripts/baselines.py index 9afa1c14b4c35505c1dec5f3582332c50a829f84..bc98308122cd3f20662f45aabfeb81b563341243 100644 --- a/run_scripts/baselines.py +++ b/run_scripts/baselines.py @@ -17,7 +17,7 @@ c_32.env_type = 'CartPole' c_32.net_layout = [32, 32] c_32.eps_decay = 0.9995 c_32.learn_rate= 0.00075 -c_32.run_episodes = 350 +c_32.run_episodes = 1000 c_32.save_to = 'baselines/' @@ -36,22 +36,54 @@ c_512.net_layout = [512, 32] c_1024 = copy.deepcopy(c_32) c_1024.net_layout = [1024, 32] +c_2048 = copy.deepcopy(c_32) +c_2048.net_layout = [2048, 32] + cd_3 = copy.deepcopy(c_32) -cd_3.net_layout[128, 64, 32] +cd_3.net_layout = [128, 64, 32] cd_4 = copy.deepcopy(c_32) -cd_4.net_layout[128, 64, 32, 32] +cd_4.net_layout = [128, 64, 32, 32] + +cd_5 = copy.deepcopy(c_32) +cd_5.net_layout = [256, 128, 128, 64] +cd_5.learn_iterations = 5 +cd_5.learn_rate = 0.001 + +cd_6 = copy.deepcopy(c_32) +cd_6.net_layout = [512, 256, 128, 64] +cd_6.learn_iterations = 10 +cd_6.learn_rate = 0.00075 cd_128 = copy.deepcopy(c_32) -cd_128.net_layout[128, 128, 128] +cd_128.net_layout = [128, 128, 128] cd_256 = copy.deepcopy(c_32) -cd_256.net_layout[256, 256, 256] +cd_256.net_layout = [256, 256, 256] cd_512 = copy.deepcopy(c_32) -cd_512.net_layout[512, 512, 512] - -conf = c_32 +cd_512.net_layout = [512, 512, 512] +cd_512.learn_iterations = 10 +cd_512.learn_rate = 0.001 + +offline = copy.deepcopy(c_32) +offline.force_cpu = False +offline.load_from = 'Offline_Config_Test' +offline.load_mem = True +offline.load_ann = False +offline.learn_offline = False +offline.learn_online = True +offline.run_episodes = 100 +offline.net_layout = [1024, 1024, 1024, 256] +offline.learn_rate = 0.0005 +offline.learn_iterations = 1 +offline.offline_validate_every_x_iteration = 1 +offline.offline_epochs = 100 +offline.name = 'OnlineValidation' +offline.render = False +offline.save_to = 'test/' + +conf = offline conf.conf_to_name() conf.agent = QAgent(conf) diff --git a/run_scripts/benchmarks.py b/run_scripts/benchmarks.py index 77c63913409ba74f26b5cff7471eeceba250b54a..a18117c53aef1a4ebe21d219e604da2b83633fd5 100644 --- a/run_scripts/benchmarks.py +++ b/run_scripts/benchmarks.py @@ -12,7 +12,7 @@ c.render = False c.env = gym.make('LunarLander-v2') c.env_type = 'Lunar' c.net_layout = [256, 128] -c.eps_decay = 0.9996 +c.eps_decay = 0.9995 c.learn_rate= 0.001 c.run_episodes = 300 c.save_to = 'benchmarks/' @@ -21,7 +21,12 @@ smallNet = copy.deepcopy(c) smallNet.name = 'SmallNet' smallNet.net_layout = [128, 32] smallNet.conf_to_name() -# smallNet.agent = QAgent(smallNet) + +smallNetSlow = copy.deepcopy(c) +smallNetSlow.name = 'SmallNetSlow' +smallNetSlow.net_layout = [128, 32] +smallNetSlow.learn_rate = 0.0005 +smallNetSlow.conf_to_name() smallNetDeep = copy.deepcopy(c) smallNetDeep.name = 'SmallNetDeep' @@ -95,8 +100,9 @@ lun.conf_to_name() # lun.agent = QAgent(lun) # configuration = smallNet +configuration = smallNetSlow # configuration = smallNetDeep -configuration = normalNet +# configuration = normalNet # configuration = normalSlowDecay # configuration = normalSlowLearn # configuration = largeNet diff --git a/run_scripts/csv_history_to_plot.py b/run_scripts/csv_history_to_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..e20d257ba9c27f2dd849aa9da4e3475e65bf8bae --- /dev/null +++ b/run_scripts/csv_history_to_plot.py @@ -0,0 +1,32 @@ +import pandas as pd +import matplotlib.pyplot as plt +from os import listdir +from os.path import isfile, join +from pathlib import Path + +BASE_PATH = '/home/armin/Master/semester_3/carla/' +DIR='baselines/hyrican' +path = BASE_PATH + DIR + +def plot_csv(file_path, show=False): + df = pd.read_csv(file_path) + act_score = df['Score'] + avg_score = df['Average'] + + plt.figure() + plt.plot(act_score, label='Episode Score') + plt.plot(avg_score, label='Average Score') + plt.xlabel('Episode') + plt.ylabel('Score') + plt.legend() + plt.title(file_path) + plt.savefig(file_path + '.png') + if show: + plt.show() + +for dir in Path(path).iterdir(): + for file in listdir(dir): + file_path = join(dir, file) + if isfile(file_path): + if file_path.endswith('.csv'): + plot_csv(file_path, show=True) diff --git a/run_scripts/manual_carla.py b/run_scripts/manual_carla.py new file mode 100644 index 0000000000000000000000000000000000000000..7b77e09ef1ae912c4b5025fb1aae2f45a0a0ca31 --- /dev/null +++ b/run_scripts/manual_carla.py @@ -0,0 +1,42 @@ +import main +import environment_wrapper as ew +import gym +import copy +from agents import QAgent as QAgent +from carla_environment import CarlaEnvironment + +c = ew.Config() + +c.name = 'Base' +c.render = True +c.env_type = 'Carla' +c.net_layout = [256, 128] +c.eps_decay = 0.9995 +c.learn_rate= 0.001 +c.run_episodes = 20 +c.save_to = 'test/' + + +t = copy.deepcopy(c) +t.render = True +t.net_layout = [1024, 1024, 256, 32] +t.eps_decay = 0.9993 +t.learn_rate = 0.0005 +t.force_cpu = False +t.load_mem = True +t.load_ann = False +t.save_to = 'test/' +t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50' +t.name = 'Offline' +t.learn_offline = True +t.learn_online = True +t.run_episodes = 500 +t.offline_epochs = 100 +t.learn_iterations = 100 +t.offline_validate_every_x_iteration = -1 + +configuration = t +configuration.env = CarlaEnvironment(render=configuration.render) +configuration.conf_to_name() +configuration.agent = QAgent(configuration) +main.run(configuration)