Skip to content
Snippets Groups Projects
Commit 61a80519 authored by Armin Co's avatar Armin Co
Browse files

refactroing

parent 0fe0f9b8
No related branches found
No related tags found
No related merge requests found
......@@ -9,4 +9,5 @@ orientation
workspace.code-workspace
test
tech_demo.py
Screencast
*.png
......@@ -79,12 +79,6 @@ class DQAgent(QAgent):
self.q2 = QNet(conf)
self.name = str(self.name) + 'DBL'
def get_action(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_space)
action_values = self.q.predict(state)
return np.argmax(action_values[0])
def learn(self, offline=False, epochs=1):
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
......@@ -114,13 +108,9 @@ class DQAgent(QAgent):
return loss
def load(self, path, net=True, memory=True):
print('Load: ' + path)
super().load(path, net=net, memory=memory)
if net:
print('Network')
self.q.load(path+'.net')
self.q2.load(path+'.net')
if memory:
self.memory.load(path+'.mem')
class CarlaManual(QAgent):
control = None
......
......@@ -12,7 +12,6 @@ import time
from steering_wheel import ACTION_SPACE, Controller, IDLE
# find carla module
try:
CARLA_PATH='/media/armin/Games/carla/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
CARLA_PATH='/opt/carla-simulator/PythonAPI/carla/dist/carla-*%d.%d-%s.egg'
......@@ -23,8 +22,11 @@ try:
except IndexError:
pass
try:
import carla
except:
print('Could not import carla, you will not be able to crate a client!')
print('Contiuing...')
class Camera:
""" Add camera sensor to the carla world """
......@@ -71,7 +73,7 @@ class Camera:
class CollisionSensor:
sensor = None
collision = None
intensity = 0
def __init__(self, world, parent):
bp = world.get_blueprint_library().find('sensor.other.collision')
self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
......@@ -82,16 +84,13 @@ class CollisionSensor:
self = weak_self()
if not self:
return
# print(event.other_actor)
self.collision = event
impulse = event.normal_impulse
intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
# print(intensity)
self.intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
class ObstacleSensor:
sensor = None
parent = None
ZONES = 4
obstacle = []
def __init__(self, world, parent):
......@@ -99,8 +98,6 @@ class ObstacleSensor:
bp = world.get_blueprint_library().find('sensor.other.obstacle')
bp.set_attribute('distance', '10')
bp.set_attribute('hit_radius', '5')
# print(bp.get_attribute('hit_radius'))
position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0))
self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid)
weak_self = weakref.ref(self)
self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event))
......@@ -110,7 +107,8 @@ class ObstacleSensor:
if not self:
return
self.collision = event
if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False:
actor_id = event.other_actor.type_id
if not actor_id == 'static.road' and not actor_id == 'static.roadline' and not actor_id == 'static.sidewalk' and False:
print(self.parent.get_transform())
print(event.transform)
print(self.parent.get_velocity())
......@@ -139,12 +137,12 @@ class LidarSensor:
self = weak_self()
if not self:
return
for m in event: print(m.point)
for m in event:
if False: print(m.point)
if len(event) > 0:
print(event.horizontal_angle)
if False: print(event.horizontal_angle)
class World:
""" Wrapper for the carla environment, incl. player/vehicle """
player = None
collision_sensor = None
obstacle_sensor = None
......@@ -166,7 +164,6 @@ class World:
self.reset()
def reset(self):
""" Remove and create new player/vehicle. """
self.destroy()
time.sleep(0.5)
self.spawn_on_sidewalk()
......@@ -180,12 +177,10 @@ class World:
return [0,0,0,0,0]
def spawn_player(self, transform):
""" Add a vehicle to the world. """
while self.player is None:
blueprint = random.choice(self.blueprint_library.filter('model3'))
position = transform
self.player = self.world.try_spawn_actor(blueprint, position)
start_location = self.player.get_location()
self.collision_sensor = CollisionSensor(self.world, self.player)
self.obstacle_sensor = ObstacleSensor(self.world, self.player)
......@@ -213,7 +208,6 @@ class World:
self.actors.append(actor)
def destroy(self):
""" Remove vehicle from the world. """
if self.player is not None:
self.player.destroy()
self.player = None
......@@ -230,16 +224,15 @@ class World:
def step(self, action):
""" Apply controls to vehicle. """
controls = Controller.action_to_controls(action)
c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
self.player.apply_control(c)
self.world.tick()
reward = self.reward(action)
reward = self.reward()
self.collision_sensor.collision = None
return reward
def reward(self, action):
def reward(self):
x, y, vx, vy, yaw = self.observation()
target = carla.Transform( \
......@@ -280,10 +273,8 @@ class World:
class ActionSpace:
n = ACTION_SPACE
class ObservationSpace:
shape = [5]
class CarlaEnvironment:
action_space = ActionSpace
observation_space = ObservationSpace
......@@ -340,7 +331,6 @@ if __name__ == "__main__":
clock.tick(5)
ctrl.on_update()
obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
# print(str(reward) + ' ' + str(done))
cumulated_reward += reward
if done:
break
......
class Config:
render = False
force_cpu = True
env = None
agent = None
env_type = 'Lunar'
name = '00'
learn = True
learn_online = True
learn_offline = False
net_layout= [256, 128]
eps_decay = 0.9996
learn_rate= 0.001
learn_epochs = 1
run_episodes = 20
offline_batchsize = 64
offline_episodes = 100
offline_validate_every_x_iteration = -1
load_ann = False
load_mem = False
load_from = 'agent_name'
save_to = 'dir/'
def conf_to_name(self):
self.name = str(self.env_type) + '_' + str(self.name)
for layer in self.net_layout:
self.name += '_' + str(layer) + '_'
self.name += str(self.eps_decay) + '_'
self.name += str(self.learn_rate) + '_'
self.name += str(self.learn_epochs)
\ No newline at end of file
......@@ -5,39 +5,6 @@ from tqdm import trange
import pandas as pd
import matplotlib.pyplot as plt
IS_SOLVED = 195
class Config:
render = False
force_cpu = True
env = None
agent = None
env_type = 'Lunar'
name = 'ConfigTest'
learn = True
learn_online = True
learn_offline = False
net_layout= [256, 128]
eps_decay = 0.9996
learn_rate= 0.001
learn_iterations = 1
run_episodes = 20
offline_batchsize = 2048
offline_epochs = 1000
offline_validate_every_x_iteration = 10
load_ann = False
load_mem = False
load_from = 'agnt'
save_to = 'saved_agents/'
def conf_to_name(self):
self.name = str(self.env_type) + '_' + str(self.name)
for layer in self.net_layout:
self.name += '_' + str(layer) + '_'
self.name += str(self.eps_decay) + '_'
self.name += str(self.learn_rate) + '_'
self.name += str(self.learn_iterations)
......@@ -70,13 +37,14 @@ def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
state = following_state
if learn:
if conf is not None:
agent.learn(epochs=conf.learn_iterations)
agent.learn(epochs=conf.learn_epochs)
else:
agent.learn()
if done:
break
return score
IS_SOLVED = 195
def learn_offline(agent, conf):
""" Train the agent with its memories. """
print('Learning with ', len(agent.memory.history), ' memories.')
......@@ -84,9 +52,9 @@ def learn_offline(agent, conf):
score_history = []
avg_score_history = []
desc_train = ''
pbar = trange(conf.offline_epochs, desc='Loss: x')
pbar = trange(conf.offline_episodes, desc='Loss: x')
for i in pbar:
loss = agent.learn(offline=True, epochs=conf.learn_iterations)
loss = agent.learn(offline=True, epochs=conf.learn_epochs)
desc = ('Loss: %05.4f' %(loss)) + desc_train
pbar.set_description(desc)
pbar.refresh()
......@@ -108,20 +76,21 @@ def learn_offline(agent, conf):
def run(environment, agent, episodes, render=True, learn=True, conf=None):
def run(conf):
""" Run an agent """
conf.name = str(conf.name) + 'on'
# Set the exploring rate to its minimum.
# (epsilon *greedy*)
learn = conf.learn and conf.learn_online
if not learn:
agent.epsilon = agent.epsilon_min
conf.agent.epsilon = conf.agent.epsilon_min
score_history = []
avg_score_history = []
pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
pbar = trange(conf.run_episodes, desc=conf.agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
for _ in pbar:
score = one_episode(environment, agent, render, learn, conf=conf)
score = one_episode(conf.env, conf.agent, conf.render, learn, conf=conf)
score_history.append(score)
is_solved = np.mean(score_history[-100:])
......@@ -129,7 +98,7 @@ def run(environment, agent, episodes, render=True, learn=True, conf=None):
if is_solved > IS_SOLVED and learn:
break
desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
desc = (conf.agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
pbar.set_description(desc)
pbar.refresh()
return score_history, avg_score_history
......@@ -143,14 +112,16 @@ def process_logs(avg_score_history, loss, conf):
pass
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
""" Plot the log history """
act_score = df['Score']
avg_score = df['Average']
plt.figure()
plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
plt.plot(act_score, label='Episode Score')
plt.plot(avg_score, '--', label='Average Score')
plt.xlabel('Episode')
plt.ylabel('Score')
plt.legend()
plt.title(conf.name)
plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
if conf.render:
plt.show()
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
def load_logs(file):
......
......@@ -10,15 +10,13 @@ from agents import QAgent
import environment_wrapper as ew
def run(conf):
# 1. Create a learning agent
# Set agent
marvin = conf.agent
# (2.) *optional* Load agent memory and/or net from disk.
# (optional) Load agent memory and/or net from disk.
if conf.load_ann or conf.load_mem:
marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)
# 3. Set your configurations for the run.
# Register an *atexit* callback,
# to store the corrent result of the agent
# if the program is interrupted.
......@@ -33,8 +31,8 @@ def run(conf):
# number of specified epochs. Either to
# verify the performance of the agent or
# to train the agent.
_LEARN = conf.learn_online and conf.learn
loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf)
# loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=(conf.learn_online and conf.learn), conf=conf)
loss, avg_score = ew.run(conf)
# Save the final training result of the agent.
marvin.save(conf.save_to)
......
......@@ -10,27 +10,23 @@ NEXT_STATE = 3
DONE = 4
class Memory:
""" Class to store memories of an agent. """
history = deque(maxlen=1000000)
def add(self, state, action, reward, nextstate, done):
self.history.append((state, action, reward, nextstate, done))
def get_batch(self, batch_size):
""" Get a random batch of samples of "batch_size" """
batch = random.sample(self.history, batch_size)
states = np.array([i[STATE] for i in batch])
states = np.squeeze(states)
states = np.squeeze(np.array([i[STATE] for i in batch]))
actions = np.array([i[ACTION] for i in batch])
rewards = np.array([i[REWARD] for i in batch])
nextstates = np.array([i[NEXT_STATE] for i in batch])
nextstates = np.squeeze(nextstates)
nextstates = np.squeeze(np.array([i[NEXT_STATE] for i in batch]))
dones = np.array([i[DONE] for i in batch])
return states, actions, rewards, nextstates, dones
def save(self, path):
pickle.dump(self.history, open(path, 'wb'))
print('Saved ' + str(len(self.history)) + ' memories.')
def load(self, path):
self.history = pickle.load(open(path, 'rb'))
......
......@@ -4,34 +4,35 @@ from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
from keras.activations import relu, linear
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
class QNet:
from numpy.lib.function_base import _parse_input_dimensions
class QNet:
learn_rate = 0.0005
def __init__(self, conf):
if conf.force_cpu:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
self.net = None
self.net = Sequential()
self.compile_net(conf)
self.net.summary()
self.learn_rate = conf.learn_rate
def compile_net(self, conf):
self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
input_dimension = conf.env.observation_space.shape[0]
first_layer = conf.net_layout[0]
self.net.add(Dense(first_layer, input_dim=input_dimension, activation=relu))
for layer in range(1, len(conf.net_layout)):
self.net.add(Dense(conf.net_layout[layer], activation=relu))
self.net.add(Dense(conf.env.action_space.n, activation=linear))
self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
self.net.compile(loss='mse', optimizer=Adam(learning_rate=self.learn_rate))
def predict(self, state):
return self.net.predict(state)
def predict_on_batch(
self, states): return self.net.predict_on_batch(states)
def predict_on_batch(self, states):
return self.net.predict_on_batch(states)
def fit(self, X, Y, epochs=1, verbose=0):
callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
......@@ -44,13 +45,3 @@ class QNet:
def load(self, path):
self.net = load_model(path)
self.net.summary()
class QL2Net(QNet):
def __init__(self, action_space, state_space):
super().__init__(action_space, state_space)
def compile_net(self, action_space, state_space):
self.net.add(Dense(192, input_dim=state_space, activation=relu, kernel_regularizer=l2(0.01)))
self.net.add(Dense(64, activation=relu))
self.net.add(Dense(action_space, activation=linear))
self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
......@@ -47,12 +47,12 @@ cd_4.net_layout = [128, 64, 32, 32]
cd_5 = copy.deepcopy(c_32)
cd_5.net_layout = [256, 128, 128, 64]
cd_5.learn_iterations = 5
cd_5.learn_epochs = 5
cd_5.learn_rate = 0.001
cd_6 = copy.deepcopy(c_32)
cd_6.net_layout = [512, 256, 128, 64]
cd_6.learn_iterations = 10
cd_6.learn_epochs = 10
cd_6.learn_rate = 0.00075
cd_128 = copy.deepcopy(c_32)
......@@ -63,7 +63,7 @@ cd_256.net_layout = [256, 256, 256]
cd_512 = copy.deepcopy(c_32)
cd_512.net_layout = [512, 512, 512]
cd_512.learn_iterations = 10
cd_512.learn_epochs = 10
cd_512.learn_rate = 0.001
offline = copy.deepcopy(c_32)
......@@ -76,9 +76,9 @@ offline.learn_online = True
offline.run_episodes = 100
offline.net_layout = [1024, 1024, 1024, 256]
offline.learn_rate = 0.0005
offline.learn_iterations = 1
offline.learn_epochs = 1
offline.offline_validate_every_x_iteration = 1
offline.offline_epochs = 100
offline.offline_episodes = 100
offline.name = 'OnlineValidation'
offline.render = False
offline.save_to = 'test/'
......
......@@ -4,8 +4,8 @@ from os import listdir
from os.path import isfile, join
from pathlib import Path
BASE_PATH = '/home/armin/Master/semester_3/carla/'
DIR='baselines/hyrican'
BASE_PATH = '/home/armin/Master/semester_3/angewandte_ki/BerichtCarla/Daten'
DIR=''
path = BASE_PATH + DIR
def plot_csv(file_path, show=False):
......@@ -19,7 +19,7 @@ def plot_csv(file_path, show=False):
plt.xlabel('Episode')
plt.ylabel('Score')
plt.legend()
plt.title(file_path)
# plt.title(file_path)
plt.savefig(file_path + '.png')
if show:
plt.show()
......@@ -29,4 +29,4 @@ for dir in Path(path).iterdir():
file_path = join(dir, file)
if isfile(file_path):
if file_path.endswith('.csv'):
plot_csv(file_path, show=True)
plot_csv(file_path, show=False)
......@@ -35,10 +35,10 @@ o.eps_decay = 0.9915
o.learn_rate= 0.01
o.run_episodes = 550
o.learn_offline = True
o.offline_epochs = 100000
o.offline_episodes = 100000
o.offline_batchsize = 64
o.offline_validate_every_x_iteration = 500
o.learn_iterations = 1
o.learn_epochs = 1
validate = copy.deepcopy(c)
validate.name = 'Validate0'
......@@ -52,8 +52,8 @@ validate.load_mem = False
validate.load_ann = True
validate.learn_offline = False
validate.offline_batchsize = 64000
validate.offline_epochs = 20
validate.learn_iterations = 1
validate.offline_episodes = 20
validate.learn_epochs = 1
validate.offline_validate_every_x_iteration = 1
validate.learn_online = True
validate.eps_decay = 0.95
......
......@@ -18,9 +18,9 @@ standard_conf.learn_offline = True
standard_conf.net_layout= [256, 128]
standard_conf.eps_decay = 0.6
standard_conf.learn_rate= 0.0005
standard_conf.learn_iterations = 4
standard_conf.learn_epochs = 4
standard_conf.run_episodes = 100
standard_conf.offline_epochs = 2000
standard_conf.offline_episodes = 2000
standard_conf.offline_validate_every_x_iteration = -1
standard_conf.load_ann = False
standard_conf.load_mem = True
......
......@@ -18,9 +18,9 @@ standard_conf.learn_offline = False
standard_conf.net_layout= [256, 128]
standard_conf.eps_decay = 0.9996
standard_conf.learn_rate= 0.001
standard_conf.learn_iterations = 1
standard_conf.learn_epochs = 1
standard_conf.run_episodes = 20
standard_conf.offline_epochs = 1000
standard_conf.offline_episodes = 1000
standard_conf.offline_validate_every_x_iteration = 10
standard_conf.load_ann = False
standard_conf.load_mem = False
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment