Skip to content
Snippets Groups Projects
Commit 4764b70a authored by Armin Co's avatar Armin Co
Browse files

Scriptable Learning

parent 747e4d42
Branches
No related tags found
No related merge requests found
...@@ -7,14 +7,15 @@ class QAgent: ...@@ -7,14 +7,15 @@ class QAgent:
gamma = 0.99 gamma = 0.99
epsilon = 1.0 epsilon = 1.0
epsilon_min = 0.01 epsilon_min = 0.01
epsilon_decay = 0.9996 epsilon_decay = 0.9999
online_batch_size = 64 online_batch_size = 64
def __init__(self, action_space, state_space, name): def __init__(self, conf):#self, action_space, state_space, name):
self.q = QNet(action_space, state_space) self.q = QNet(conf)#conf.env.action_space.n, conf.env.observation_space.shape[0])
self.memory = Memory() self.memory = Memory()
self.action_space = action_space self.action_space = conf.env.action_space.n
self.name = name self.name = conf.name
self.epsilon_decay = conf.eps_decay
def get_action(self, state): def get_action(self, state):
if np.random.rand() <= self.epsilon: if np.random.rand() <= self.epsilon:
...@@ -31,7 +32,7 @@ class QAgent: ...@@ -31,7 +32,7 @@ class QAgent:
epochs = 1 epochs = 1
if offline: if offline:
batch_size = 4096 batch_size = 2048
if len(self.memory.history) < batch_size: if len(self.memory.history) < batch_size:
return return
......
import main
import environment_wrapper as ew
import gym
from carla_environment import CarlaEnvironment
import copy
from tqdm import trange
c = ew.Config()
c.name = 'Base'
c.render = False
c.env = gym.make('LunarLander-v2')
c.env_type = 'Lunar'
c.net_layout = [256, 128]
c.eps_decay = 0.9996
c.learn_rate= 0.001
c.run_episodes = 300
c.save_to = 'benchmarks/'
smallNet = copy.deepcopy(c)
smallNet.name = 'SmallNet'
smallNet.net_layout = [128, 32]
smallNet.conf_to_name()
normalNet = copy.deepcopy(c)
normalNet.name = 'NormalNet'
normalNet.net_layout = [256, 128]
normalNet.conf_to_name()
normalSlowDecay = copy.deepcopy(c)
normalSlowDecay.name = 'NormalSlowDecay'
normalSlowDecay.net_layout = [256, 128]
normalSlowDecay.eps_decay = 0.99995
normalSlowDecay.conf_to_name()
normalSlowLearn = copy.deepcopy(c)
normalSlowLearn.name = 'NormalSlowLearn'
normalSlowLearn.net_layout = [256, 128]
normalSlowLearn.learn_rate = 0.0005
normalSlowLearn.conf_to_name()
largeNet = copy.deepcopy(c)
largeNet.name = 'LargeNet'
largeNet.net_layout = [512, 256]
largeNet.conf_to_name()
deepNet = copy.deepcopy(c)
deepNet.name = 'DeppNet'
deepNet.net_layout = [256, 128, 128]
deepNet.conf_to_name()
# configurations = [smallNet, normalNet ]
# configurations = [normalSlowDecay, normalSlowLearn]
configurations = [largeNet, deepNet]
configs = trange(len(configurations), desc='Benchmarking configs', unit="Configuration")
for i in configs:
main.run(configurations[i])
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
import pygame import pygame
import time import time
from steering_wheel import ACTION_SPACE, Controller from steering_wheel import ACTION_SPACE, Controller, IDLE
# find carla module # find carla module
try: try:
...@@ -87,12 +87,36 @@ class CollisionSensor: ...@@ -87,12 +87,36 @@ class CollisionSensor:
intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2) intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
# print(intensity) # print(intensity)
class LidarSensor:
sensor = None
data = None
def __init__(self, world, parent):
bp = world.get_blueprint_library().find('sensor.lidar.ray_cast')
bp.set_attribute('rotation_frequency', '10')
bp.set_attribute('channels', '1')
bp.set_attribute('points_per_second', '100')
bp.set_attribute('range', '7')
bp.set_attribute('sensor_tick', '0.1')
bp.set_attribute('upper_fov', '0')
bp.set_attribute('lower_fov', '0')
weak_self = weakref.ref(self)
self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
self.sensor.listen(lambda event: LidarSensor._on_data(weak_self, event))
def _on_data(weak_self, event):
self = weak_self()
if not self:
return
for m in event: print(m.point)
if len(event) > 0:
print(event.horizontal_angle)
class World: class World:
""" Wrapper for the carla environment, incl. player/vehicle """ """ Wrapper for the carla environment, incl. player/vehicle """
player = None player = None
collision_sensor = None collision_sensor = None
lidar_sensor = None
world = None world = None
blueprint_library = None blueprint_library = None
spawn_points = None spawn_points = None
...@@ -101,6 +125,10 @@ class World: ...@@ -101,6 +125,10 @@ class World:
def __init__(self, world): def __init__(self, world):
self.world = world self.world = world
settings = self.world.get_settings()
settings.fixed_delta_seconds = 0.1
settings.synchronous_mode = True
self.world.apply_settings(settings)
self.blueprint_library = self.world.get_blueprint_library() self.blueprint_library = self.world.get_blueprint_library()
self.spawn_points = self.world.get_map().get_spawn_points() self.spawn_points = self.world.get_map().get_spawn_points()
self.reset() self.reset()
...@@ -115,7 +143,8 @@ class World: ...@@ -115,7 +143,8 @@ class World:
def observation(self): def observation(self):
if self.player is not None: if self.player is not None:
pos = self.player.get_transform() pos = self.player.get_transform()
return [pos.location.x, pos.location.y, pos.rotation.yaw] vel = self.player.get_velocity()
return [pos.location.x, pos.location.y, vel.x, vel.y, pos.rotation.yaw]
else: else:
return [0,0,0] return [0,0,0]
...@@ -128,6 +157,7 @@ class World: ...@@ -128,6 +157,7 @@ class World:
start_location = self.player.get_location() start_location = self.player.get_location()
print(str(start_location)) print(str(start_location))
self.collision_sensor = CollisionSensor(self.world, self.player) self.collision_sensor = CollisionSensor(self.world, self.player)
# self.lidar_sensor = LidarSensor(self.world, self.player)
# start_location.x = 288.0 # start_location.x = 288.0
# start_location.y = 55.0 # start_location.y = 55.0
# self.player.set_location(start_location) # self.player.set_location(start_location)
...@@ -154,6 +184,9 @@ class World: ...@@ -154,6 +184,9 @@ class World:
for actor in self.actors: for actor in self.actors:
actor.destroy() actor.destroy()
self.actors = [] self.actors = []
if self.collision_sensor is not None:
self.collision_sensor.sensor.destroy()
self.collision_sensor = None
def step(self, action): def step(self, action):
...@@ -161,22 +194,24 @@ class World: ...@@ -161,22 +194,24 @@ class World:
controls = Controller.action_to_controls(action) controls = Controller.action_to_controls(action)
c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3]) c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
self.player.apply_control(c) self.player.apply_control(c)
reward = self.reward() self.world.tick()
reward = self.reward(action)
self.collision_sensor.collision = None self.collision_sensor.collision = None
return reward return reward
def reward(self): def reward(self, action):
target = carla.Transform(carla.Location(x=-25.534311, y=54.460903, z=0.112781), carla.Rotation(pitch=0.000000, yaw=-175.922913, roll=-6.221135)) target = carla.Transform(carla.Location(x=-25.534311, y=54.460903, z=0.112781), carla.Rotation(pitch=0.000000, yaw=-175.922913, roll=-6.221135))
pos = self.player.get_transform() pos = self.player.get_transform()
pos_diff = (target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2 + 1 pos_diff = math.sqrt((target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2)
rot_diff = (target.rotation.yaw - pos.rotation.yaw)**2 + 1 rot_diff = (target.rotation.yaw - pos.rotation.yaw)
r = ((1/pos_diff) - 0.5) r = (-pos_diff / 10) + 1
at_final_position = False at_final_position = False
if pos_diff < 1.2: if pos_diff < 1.8:
if self.was_big_reward == False: if self.was_big_reward == False:
r += 100 r += 100
self.was_big_reward = True self.was_big_reward = True
if rot_diff < 5: if abs(rot_diff) < 5:
r += 100
at_final_position = True at_final_position = True
else: else:
if self.was_big_reward == True: if self.was_big_reward == True:
...@@ -184,20 +219,28 @@ class World: ...@@ -184,20 +219,28 @@ class World:
r -= 50 r -= 50
velocity = self.player.get_velocity() velocity = self.player.get_velocity()
v = (velocity.x)**2 + (velocity.y)**2 v = math.sqrt((velocity.x)**2 + (velocity.y)**2)
if v < 0.01:
r -= 1
if v > 15.0:
r -= 10
done = False done = False
if at_final_position and v < 0.01: if at_final_position and v < 0.01:
print("Success")
done = True done = True
if self.collision_sensor.collision is not None: if self.collision_sensor.collision is not None:
r -= 100 r -= 250
done = True done = True
if action == IDLE:
r -= 1
return self.observation(), r, done, None return self.observation(), r, done, None
class ActionSpace: class ActionSpace:
n = ACTION_SPACE n = ACTION_SPACE
class ObservationSpace: class ObservationSpace:
shape = [3] shape = [5]
class CarlaEnvironment: class CarlaEnvironment:
action_space = ActionSpace action_space = ActionSpace
observation_space = ObservationSpace observation_space = ObservationSpace
...@@ -231,6 +274,12 @@ class CarlaEnvironment: ...@@ -231,6 +274,12 @@ class CarlaEnvironment:
self.render() self.render()
return self.world.step(action) return self.world.step(action)
def __del__(self):
if self.camera is not None:
self.camera.sensor.destroy()
if self.world is not None:
self.world.destroy()
if __name__ == "__main__": if __name__ == "__main__":
LOCALHOST="127.0.0.1" LOCALHOST="127.0.0.1"
SERVER="192.168.188.20" SERVER="192.168.188.20"
...@@ -241,11 +290,11 @@ if __name__ == "__main__": ...@@ -241,11 +290,11 @@ if __name__ == "__main__":
while ctrl.is_running(): while ctrl.is_running():
clock.tick(20) clock.tick(20)
ctrl.on_update() ctrl.on_update()
obs, reward, done = env.step(ctrl.get_action(), render=True) obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
if done:
break
print(str(reward) + ' ' + str(done)) print(str(reward) + ' ' + str(done))
cumulated_reward += reward cumulated_reward += reward
if done:
break
env.world.destroy() env.world.destroy()
pygame.quit() pygame.quit()
print(cumulated_reward) print(cumulated_reward)
""" Wrapper to abstract different learning environments for an agent. """ """ Wrapper to abstract different learning environments for an agent. """
import numpy as np import numpy as np
from tqdm import tqdm
from tqdm import trange from tqdm import trange
import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
class Config:
render = False
force_cpu = True
env = None
env_type = 'Lunar'
name = 'ConfigTest'
learn = True
learn_online = True
learn_offline = False
net_layout= [256, 128]
eps_decay = 0.9996
learn_rate= 0.001
run_episodes = 20
offline_epochs = 1000
load_ann = False
load_mem = False
load_from = 'agnt'
save_to = 'saved_agents/'
def conf_to_name(self):
self.name += self.env_type
for layer in self.net_layout:
self.name += '_' + str(layer) + '_'
self.name += str(self.eps_decay) + '_'
self.name += str(self.learn_rate)
def step(environment, action): def step(environment, action):
""" Perform one iteratino in the environment. """ """ Perform one iteratino in the environment. """
...@@ -74,11 +101,14 @@ def run(environment, agent, episodes, render=True, learn=True): ...@@ -74,11 +101,14 @@ def run(environment, agent, episodes, render=True, learn=True):
return score_history, avg_score_history return score_history, avg_score_history
def process_logs(avg_score_history, loss, title="Title", render=False): def process_logs(avg_score_history, loss, conf):
df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
""" Plot the log history """ """ Plot the log history """
plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2]) plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--') plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
plt.title(title) plt.title(conf.name)
plt.savefig('saved_agents/' + title + '.png', format="png") plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
if render: if conf.render:
plt.show() plt.show()
...@@ -4,65 +4,55 @@ Run your desired environment and agent configuration. ...@@ -4,65 +4,55 @@ Run your desired environment and agent configuration.
import os import os
import atexit import atexit
from carla_environment import CarlaEnvironment import gym
CARLA=True
# import gym
from agents import QAgent as QAgent from agents import QAgent as QAgent
import environment_wrapper as ew import environment_wrapper as ew
# Allow GPU usage or force tensorflow to use the CPU. def run(conf):
FORCE_CPU=True # 0. Allow GPU usage or force tensorflow to use the CPU.
if FORCE_CPU: if conf.force_cpu:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ["CUDA_VISIBLE_DEVICES"] = ""
RENDER = False
if __name__ == '__main__':
# 1. Create an environment
env = CarlaEnvironment(render=RENDER)
# 2. Create a learning agent # 2. Create a learning agent
marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'Carla') marvin = QAgent(conf) #conf.env.action_space.n, conf.env.observation_space.shape[0], conf.name)
# (2.5) *optional* Load agent memory and/or net from disk. # (2.5) *optional* Load agent memory and/or net from disk.
agnt = 'Carla' if conf.load_ann or conf.load_mem:
LOAD_ANN = False marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)
LOAD_MEMORIES = True
if LOAD_ANN or LOAD_MEMORIES:
marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
# 3. Set your configurations for the run. # 3. Set your configurations for the run.
LEARNING = True
LEARN_ONLINE = True
LEARN_OFFLINE = True
RUN_EPISODES = 100
LEARN_OFFLINE_EPOCHS = 1000
SAVE_PATH = "./saved_agents"
# Register an *atexit* callback, # Register an *atexit* callback,
# to store the corrent result of the agent # to store the corrent result of the agent
# if the program is interrupted. # if the program is interrupted.
if LEARNING: if conf.learn:
atexit.register(marvin.save, SAVE_PATH) atexit.register(marvin.save, conf.save_to)
# Offline training of the agent with # Offline training of the agent with
# previous collected and saved memories. # previous collected and saved memories.
if LEARN_OFFLINE and LEARNING: if conf.learn_offline and conf.learn:
ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS) ew.learn_offline(marvin, epochs=conf.offline_epochs)
# Run the agent in the environment for the # Run the agent in the environment for the
# number of specified epochs. Either to # number of specified epochs. Either to
# verify the performance of the agent or # verify the performance of the agent or
# to train the agent. # to train the agent.
_LEARN = LEARN_ONLINE and LEARNING _LEARN = conf.learn_online and conf.learn
loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN) loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN)
# Save the final training result of the agent. # Save the final training result of the agent.
if LEARNING: if conf.learn:
marvin.save(SAVE_PATH) marvin.save(conf.save_to)
if CARLA: ew.process_logs(avg_score, loss, conf)
env.world.destroy()
# Show the result of the runl. if conf.env_type == 'Carla':
ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER) conf.env.world.destroy()
if __name__ == '__main__':
conf = ew.Config()
conf.env = gym.make('LunarLander-v2')
conf.env_type = 'Lunar'
conf.conf_to_name()
run(conf)
\ No newline at end of file
...@@ -6,17 +6,21 @@ from keras.activations import relu, linear ...@@ -6,17 +6,21 @@ from keras.activations import relu, linear
from keras.regularizers import l2 from keras.regularizers import l2
class QNet: class QNet:
net = Sequential()
learn_rate = 0.001
def __init__(self, action_space, state_space): learn_rate = 0.0005
self.compile_net(action_space, state_space)
def __init__(self, conf):
self.net = None
self.net = Sequential()
self.compile_net(conf)
self.net.summary() self.net.summary()
self.learn_rate = conf.learn_rate
def compile_net(self, action_space, state_space): def compile_net(self, conf):
self.net.add(Dense(256, input_dim=state_space, activation=relu)) self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
self.net.add(Dense(128, activation=relu)) for layer in range(1, len(conf.net_layout)):
self.net.add(Dense(action_space, activation=linear)) self.net.add(Dense(conf.net_layout[layer], activation=relu))
self.net.add(Dense(conf.env.action_space.n, activation=linear))
self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate)) self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
def predict(self, state): def predict(self, state):
......
...@@ -67,24 +67,33 @@ class ManualSteeringWheel: ...@@ -67,24 +67,33 @@ class ManualSteeringWheel:
def action_to_controls(action): def action_to_controls(action):
if action == IDLE: if action == IDLE:
return [0.0, 0.0, 0.0, False] return [0.0, 0.0, 0.0, False]
if action == FORWARD: if action == FORWARD:
return [0.8, 0.0, 0.0, False] return [0.6, 0.0, 0.0, False]
if action == REVERSE: if action == REVERSE:
return [0.8, 0.0, 0.0, True] return [0.5, 0.0, 0.0, True]
if action == FORWARD_LEFT: if action == FORWARD_LEFT:
return [0.8, -0.4, 0.0, False] return [0.4, -0.4, 0.0, False]
if action == FORWARD_RIGHT: if action == FORWARD_RIGHT:
return [0.8, 0.4, 0.0, False] return [0.4, 0.4, 0.0, False]
if action == LEFT: if action == LEFT:
return [0.0, -0.4, 0.0, False] return [0.0, -0.4, 0.0, False]
if action == RIGHT: if action == RIGHT:
return [0.0, 0.5, 0.0, False] return [0.0, 0.4, 0.0, False]
if action == BRAKE: if action == BRAKE:
return [0.0, 0.0, 0.5, False] return [0.0, 0.0, 0.5, False]
if action == REVERSE_LEFT: if action == REVERSE_LEFT:
return [0.5, -0.4, 0.0, True] return [0.4, -0.4, 0.0, True]
if action == REVERSE_RIGHT: if action == REVERSE_RIGHT:
return [0.5, 0.4, 0.0, True] return [0.4, 0.4, 0.0, True]
def get_action(self): def get_action(self):
action = IDLE action = IDLE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment