Skip to content
Snippets Groups Projects
Commit 1b465405 authored by Armin Co's avatar Armin Co
Browse files

Added new config options and launch configurations

- learn_iterations
- offline_validation_every_x
parent 353aaf19
No related branches found
No related tags found
No related merge requests found
...@@ -4,5 +4,6 @@ saved_agents ...@@ -4,5 +4,6 @@ saved_agents
benchmarks benchmarks
baselines baselines
workspace.code-workspace workspace.code-workspace
test
tech_demo.py tech_demo.py
*.png *.png
...@@ -2,6 +2,7 @@ import random ...@@ -2,6 +2,7 @@ import random
import numpy as np import numpy as np
from memory import Memory from memory import Memory
from networks import QNet from networks import QNet
from steering_wheel import Controller
class QAgent: class QAgent:
gamma = 0.99 gamma = 0.99
...@@ -29,10 +30,9 @@ class QAgent: ...@@ -29,10 +30,9 @@ class QAgent:
def remember(self, state, action, reward, following_state, done): def remember(self, state, action, reward, following_state, done):
self.memory.add(state, action, reward, following_state, done) self.memory.add(state, action, reward, following_state, done)
def learn(self, offline=False): def learn(self, offline=False, epochs=1):
""" Learn the Q-Function. """ """ Learn the Q-Function. """
batch_size = self.online_batch_size batch_size = self.online_batch_size
epochs = 1
if offline: if offline:
batch_size = self.OFFLINE_BATCHSIZE batch_size = self.OFFLINE_BATCHSIZE
...@@ -49,7 +49,7 @@ class QAgent: ...@@ -49,7 +49,7 @@ class QAgent:
y[[idx], [actions]] = qMax y[[idx], [actions]] = qMax
if offline: if offline:
history = self.q.net.fit(states, y, epochs=2, verbose=0) history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
loss = history.history['loss'][-1] loss = history.history['loss'][-1]
else: else:
loss = self.q.fit(states, y, epochs) loss = self.q.fit(states, y, epochs)
...@@ -64,8 +64,9 @@ class QAgent: ...@@ -64,8 +64,9 @@ class QAgent:
self.memory.save(path+'/' + self.name + '.mem') self.memory.save(path+'/' + self.name + '.mem')
def load(self, path, net=True, memory=True): def load(self, path, net=True, memory=True):
print(path) print('Load: ' + path)
if net: if net:
print('Network')
self.q.load(path+'.net') self.q.load(path+'.net')
if memory: if memory:
self.memory.load(path+'.mem') self.memory.load(path+'.mem')
...@@ -81,14 +82,13 @@ class DQAgent(QAgent): ...@@ -81,14 +82,13 @@ class DQAgent(QAgent):
action_values = (self.q.predict(state) + self.q2.predict(state)) / 2 action_values = (self.q.predict(state) + self.q2.predict(state)) / 2
return np.argmax(action_values[0]) return np.argmax(action_values[0])
def learn(self, offline=False): def learn(self, offline=False, epochs=1):
for _ in range(2): for _ in range(2):
if np.random.rand() < 0.5: if np.random.rand() < 0.5:
temp = self.q temp = self.q
self.q = self.q2 self.q = self.q2
self.q2 = temp self.q2 = temp
batch_size = self.online_batch_size batch_size = self.online_batch_size
epochs = 1
if offline: if offline:
batch_size = self.OFFLINE_BATCHSIZE batch_size = self.OFFLINE_BATCHSIZE
if len(self.memory.history) < batch_size: if len(self.memory.history) < batch_size:
...@@ -101,10 +101,22 @@ class DQAgent(QAgent): ...@@ -101,10 +101,22 @@ class DQAgent(QAgent):
idx = np.array([i for i in range(batch_size)]) idx = np.array([i for i in range(batch_size)])
y[[idx], [actions]] = qMax y[[idx], [actions]] = qMax
if offline: if offline:
history = self.q.net.fit(states, y, epochs=2, verbose=0) history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
loss = history.history['loss'][-1] loss = history.history['loss'][-1]
else: else:
loss = self.q.fit(states, y, epochs) loss = self.q.fit(states, y, epochs)
if self.epsilon > self.epsilon_min: if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay self.epsilon *= self.epsilon_decay
return loss return loss
class CarlaManual(QAgent):
control = None
def __init__(self, conf):
super().__init__(conf)
self.control = Controller()
def get_action(self, state):
self.control.on_update()
return self.control.get_action()
...@@ -97,8 +97,9 @@ class ObstacleSensor: ...@@ -97,8 +97,9 @@ class ObstacleSensor:
self.parent = parent self.parent = parent
bp = world.get_blueprint_library().find('sensor.other.obstacle') bp = world.get_blueprint_library().find('sensor.other.obstacle')
bp.set_attribute('distance', '10') bp.set_attribute('distance', '10')
bp.set_attribute('hit_radius', '3') bp.set_attribute('hit_radius', '5')
print(bp.get_attribute('hit_radius')) # print(bp.get_attribute('hit_radius'))
position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0))
self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid) self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid)
weak_self = weakref.ref(self) weak_self = weakref.ref(self)
self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event)) self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event))
...@@ -108,12 +109,12 @@ class ObstacleSensor: ...@@ -108,12 +109,12 @@ class ObstacleSensor:
if not self: if not self:
return return
self.collision = event self.collision = event
if not event.other_actor.type_id == 'static.road': if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False:
print(self.parent.get_location()) print(self.parent.get_transform())
print(event.transform)
print(self.parent.get_velocity()) print(self.parent.get_velocity())
print(event.other_actor.type_id) print(event.other_actor.type_id)
print(event.other_actor.transform) print(str(event.distance))
print(event.distance)
class LidarSensor: class LidarSensor:
...@@ -333,7 +334,7 @@ if __name__ == "__main__": ...@@ -333,7 +334,7 @@ if __name__ == "__main__":
clock.tick(5) clock.tick(5)
ctrl.on_update() ctrl.on_update()
obs, reward, done, _ = env.step(ctrl.get_action(), render=True) obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
print(str(reward) + ' ' + str(done)) # print(str(reward) + ' ' + str(done))
cumulated_reward += reward cumulated_reward += reward
if done: if done:
break break
......
...@@ -5,6 +5,8 @@ from tqdm import trange ...@@ -5,6 +5,8 @@ from tqdm import trange
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
IS_SOLVED = 195
class Config: class Config:
render = False render = False
force_cpu = True force_cpu = True
...@@ -18,8 +20,10 @@ class Config: ...@@ -18,8 +20,10 @@ class Config:
net_layout= [256, 128] net_layout= [256, 128]
eps_decay = 0.9996 eps_decay = 0.9996
learn_rate= 0.001 learn_rate= 0.001
learn_iterations = 1
run_episodes = 20 run_episodes = 20
offline_epochs = 1000 offline_epochs = 1000
offline_validate_every_x_iteration = 10
load_ann = False load_ann = False
load_mem = False load_mem = False
load_from = 'agnt' load_from = 'agnt'
...@@ -35,7 +39,8 @@ class Config: ...@@ -35,7 +39,8 @@ class Config:
for layer in self.net_layout: for layer in self.net_layout:
self.name += '_' + str(layer) + '_' self.name += '_' + str(layer) + '_'
self.name += str(self.eps_decay) + '_' self.name += str(self.eps_decay) + '_'
self.name += str(self.learn_rate) self.name += str(self.learn_rate) + '_'
self.name += str(self.learn_iterations)
...@@ -52,7 +57,7 @@ def reset(environment): ...@@ -52,7 +57,7 @@ def reset(environment):
return state return state
def one_episode(environment, agent, render, learn, max_steps=1000): def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
""" Perform one episode of the agent in the environment. """ """ Perform one episode of the agent in the environment. """
score = 0 score = 0
state = reset(environment) state = reset(environment)
...@@ -65,22 +70,33 @@ def one_episode(environment, agent, render, learn, max_steps=1000): ...@@ -65,22 +70,33 @@ def one_episode(environment, agent, render, learn, max_steps=1000):
score += reward score += reward
state = following_state state = following_state
if learn: if learn:
if conf is not None:
agent.learn(epochs=conf.learn_iterations)
else:
agent.learn() agent.learn()
if done: if done:
break break
return score return score
def learn_offline(agent, epochs=1): def learn_offline(agent, conf):
""" Train the agent with its memories. """ """ Train the agent with its memories. """
print('Learning with ', len(agent.memory.history), ' memories.') print('Learning with ', len(agent.memory.history), ' memories.')
pbar = trange(epochs, desc='Loss: x') pbar = trange(conf.offline_epochs, desc='Loss: x')
for _ in pbar: for i in pbar:
loss = agent.learn(offline=True) loss = agent.learn(offline=True, epochs=conf.learn_iterations)
desc = ('Loss: %05.4f' %(loss)) desc = ('Loss: %05.4f' %(loss))
pbar.set_description(desc) pbar.set_description(desc)
pbar.refresh() pbar.refresh()
if i % conf.offline_validate_every_x_iteration == 0 and conf.offline_validate_every_x_iteration is not -1:
score, avg = run(conf.env, conf.agent, 1, render=conf.render, learn=False, conf=conf)
conf.name += '1'
process_logs(avg, score, conf)
if avg[-1] > IS_SOLVED:
break
def run(environment, agent, episodes, render=True, learn=True):
def run(environment, agent, episodes, render=True, learn=True, conf=None):
""" Run an agent """ """ Run an agent """
# Set the exploring rate to its minimum. # Set the exploring rate to its minimum.
...@@ -93,13 +109,13 @@ def run(environment, agent, episodes, render=True, learn=True): ...@@ -93,13 +109,13 @@ def run(environment, agent, episodes, render=True, learn=True):
pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes") pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
for _ in pbar: for _ in pbar:
score = one_episode(environment, agent, render, learn) score = one_episode(environment, agent, render, learn, conf=conf)
score_history.append(score) score_history.append(score)
is_solved = np.mean(score_history[-100:]) is_solved = np.mean(score_history[-100:])
avg_score_history.append(is_solved) avg_score_history.append(is_solved)
if is_solved > 195 and learn: if is_solved > IS_SOLVED and learn:
break break
desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved)) desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
pbar.set_description(desc) pbar.set_description(desc)
...@@ -109,6 +125,10 @@ def run(environment, agent, episodes, render=True, learn=True): ...@@ -109,6 +125,10 @@ def run(environment, agent, episodes, render=True, learn=True):
def process_logs(avg_score_history, loss, conf): def process_logs(avg_score_history, loss, conf):
df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average']) df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
try:
os.makedirs(conf.save_to + conf.name)
except:
pass
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv') df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
""" Plot the log history """ """ Plot the log history """
...@@ -119,6 +139,7 @@ def process_logs(avg_score_history, loss, conf): ...@@ -119,6 +139,7 @@ def process_logs(avg_score_history, loss, conf):
plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png") plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
if conf.render: if conf.render:
plt.show() plt.show()
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
def load_logs(file): def load_logs(file):
df = pd.read_csv(file) df = pd.read_csv(file)
......
...@@ -28,14 +28,14 @@ def run(conf): ...@@ -28,14 +28,14 @@ def run(conf):
# Offline training of the agent with # Offline training of the agent with
# previous collected and saved memories. # previous collected and saved memories.
if conf.learn_offline and conf.learn: if conf.learn_offline and conf.learn:
ew.learn_offline(marvin, epochs=conf.offline_epochs) ew.learn_offline(marvin, conf)
# Run the agent in the environment for the # Run the agent in the environment for the
# number of specified epochs. Either to # number of specified epochs. Either to
# verify the performance of the agent or # verify the performance of the agent or
# to train the agent. # to train the agent.
_LEARN = conf.learn_online and conf.learn _LEARN = conf.learn_online and conf.learn
loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN) loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf)
# Save the final training result of the agent. # Save the final training result of the agent.
if conf.learn: if conf.learn:
......
...@@ -17,7 +17,7 @@ c_32.env_type = 'CartPole' ...@@ -17,7 +17,7 @@ c_32.env_type = 'CartPole'
c_32.net_layout = [32, 32] c_32.net_layout = [32, 32]
c_32.eps_decay = 0.9995 c_32.eps_decay = 0.9995
c_32.learn_rate= 0.00075 c_32.learn_rate= 0.00075
c_32.run_episodes = 350 c_32.run_episodes = 1000
c_32.save_to = 'baselines/' c_32.save_to = 'baselines/'
...@@ -36,22 +36,54 @@ c_512.net_layout = [512, 32] ...@@ -36,22 +36,54 @@ c_512.net_layout = [512, 32]
c_1024 = copy.deepcopy(c_32) c_1024 = copy.deepcopy(c_32)
c_1024.net_layout = [1024, 32] c_1024.net_layout = [1024, 32]
c_2048 = copy.deepcopy(c_32)
c_2048.net_layout = [2048, 32]
cd_3 = copy.deepcopy(c_32) cd_3 = copy.deepcopy(c_32)
cd_3.net_layout[128, 64, 32] cd_3.net_layout = [128, 64, 32]
cd_4 = copy.deepcopy(c_32) cd_4 = copy.deepcopy(c_32)
cd_4.net_layout[128, 64, 32, 32] cd_4.net_layout = [128, 64, 32, 32]
cd_5 = copy.deepcopy(c_32)
cd_5.net_layout = [256, 128, 128, 64]
cd_5.learn_iterations = 5
cd_5.learn_rate = 0.001
cd_6 = copy.deepcopy(c_32)
cd_6.net_layout = [512, 256, 128, 64]
cd_6.learn_iterations = 10
cd_6.learn_rate = 0.00075
cd_128 = copy.deepcopy(c_32) cd_128 = copy.deepcopy(c_32)
cd_128.net_layout[128, 128, 128] cd_128.net_layout = [128, 128, 128]
cd_256 = copy.deepcopy(c_32) cd_256 = copy.deepcopy(c_32)
cd_256.net_layout[256, 256, 256] cd_256.net_layout = [256, 256, 256]
cd_512 = copy.deepcopy(c_32) cd_512 = copy.deepcopy(c_32)
cd_512.net_layout[512, 512, 512] cd_512.net_layout = [512, 512, 512]
cd_512.learn_iterations = 10
conf = c_32 cd_512.learn_rate = 0.001
offline = copy.deepcopy(c_32)
offline.force_cpu = False
offline.load_from = 'Offline_Config_Test'
offline.load_mem = True
offline.load_ann = False
offline.learn_offline = False
offline.learn_online = True
offline.run_episodes = 100
offline.net_layout = [1024, 1024, 1024, 256]
offline.learn_rate = 0.0005
offline.learn_iterations = 1
offline.offline_validate_every_x_iteration = 1
offline.offline_epochs = 100
offline.name = 'OnlineValidation'
offline.render = False
offline.save_to = 'test/'
conf = offline
conf.conf_to_name() conf.conf_to_name()
conf.agent = QAgent(conf) conf.agent = QAgent(conf)
......
...@@ -12,7 +12,7 @@ c.render = False ...@@ -12,7 +12,7 @@ c.render = False
c.env = gym.make('LunarLander-v2') c.env = gym.make('LunarLander-v2')
c.env_type = 'Lunar' c.env_type = 'Lunar'
c.net_layout = [256, 128] c.net_layout = [256, 128]
c.eps_decay = 0.9996 c.eps_decay = 0.9995
c.learn_rate= 0.001 c.learn_rate= 0.001
c.run_episodes = 300 c.run_episodes = 300
c.save_to = 'benchmarks/' c.save_to = 'benchmarks/'
...@@ -21,7 +21,12 @@ smallNet = copy.deepcopy(c) ...@@ -21,7 +21,12 @@ smallNet = copy.deepcopy(c)
smallNet.name = 'SmallNet' smallNet.name = 'SmallNet'
smallNet.net_layout = [128, 32] smallNet.net_layout = [128, 32]
smallNet.conf_to_name() smallNet.conf_to_name()
# smallNet.agent = QAgent(smallNet)
smallNetSlow = copy.deepcopy(c)
smallNetSlow.name = 'SmallNetSlow'
smallNetSlow.net_layout = [128, 32]
smallNetSlow.learn_rate = 0.0005
smallNetSlow.conf_to_name()
smallNetDeep = copy.deepcopy(c) smallNetDeep = copy.deepcopy(c)
smallNetDeep.name = 'SmallNetDeep' smallNetDeep.name = 'SmallNetDeep'
...@@ -95,8 +100,9 @@ lun.conf_to_name() ...@@ -95,8 +100,9 @@ lun.conf_to_name()
# lun.agent = QAgent(lun) # lun.agent = QAgent(lun)
# configuration = smallNet # configuration = smallNet
configuration = smallNetSlow
# configuration = smallNetDeep # configuration = smallNetDeep
configuration = normalNet # configuration = normalNet
# configuration = normalSlowDecay # configuration = normalSlowDecay
# configuration = normalSlowLearn # configuration = normalSlowLearn
# configuration = largeNet # configuration = largeNet
......
import pandas as pd
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
from pathlib import Path
BASE_PATH = '/home/armin/Master/semester_3/carla/'
DIR='baselines/hyrican'
path = BASE_PATH + DIR
def plot_csv(file_path, show=False):
df = pd.read_csv(file_path)
act_score = df['Score']
avg_score = df['Average']
plt.figure()
plt.plot(act_score, label='Episode Score')
plt.plot(avg_score, label='Average Score')
plt.xlabel('Episode')
plt.ylabel('Score')
plt.legend()
plt.title(file_path)
plt.savefig(file_path + '.png')
if show:
plt.show()
for dir in Path(path).iterdir():
for file in listdir(dir):
file_path = join(dir, file)
if isfile(file_path):
if file_path.endswith('.csv'):
plot_csv(file_path, show=True)
import main
import environment_wrapper as ew
import gym
import copy
from agents import QAgent as QAgent
from carla_environment import CarlaEnvironment
c = ew.Config()
c.name = 'Base'
c.render = True
c.env_type = 'Carla'
c.net_layout = [256, 128]
c.eps_decay = 0.9995
c.learn_rate= 0.001
c.run_episodes = 20
c.save_to = 'test/'
t = copy.deepcopy(c)
t.render = True
t.net_layout = [1024, 1024, 256, 32]
t.eps_decay = 0.9993
t.learn_rate = 0.0005
t.force_cpu = False
t.load_mem = True
t.load_ann = False
t.save_to = 'test/'
t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
t.name = 'Offline'
t.learn_offline = True
t.learn_online = True
t.run_episodes = 500
t.offline_epochs = 100
t.learn_iterations = 100
t.offline_validate_every_x_iteration = -1
configuration = t
configuration.env = CarlaEnvironment(render=configuration.render)
configuration.conf_to_name()
configuration.agent = QAgent(configuration)
main.run(configuration)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment