Skip to content
Snippets Groups Projects
Commit 4764b70a authored by Armin Co's avatar Armin Co
Browse files

Scriptable Learning

parent 747e4d42
No related branches found
No related tags found
No related merge requests found
......@@ -7,14 +7,15 @@ class QAgent:
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.9996
epsilon_decay = 0.9999
online_batch_size = 64
def __init__(self, action_space, state_space, name):
self.q = QNet(action_space, state_space)
def __init__(self, conf):#self, action_space, state_space, name):
self.q = QNet(conf)#conf.env.action_space.n, conf.env.observation_space.shape[0])
self.memory = Memory()
self.action_space = action_space
self.name = name
self.action_space = conf.env.action_space.n
self.name = conf.name
self.epsilon_decay = conf.eps_decay
def get_action(self, state):
if np.random.rand() <= self.epsilon:
......@@ -31,7 +32,7 @@ class QAgent:
epochs = 1
if offline:
batch_size = 4096
batch_size = 2048
if len(self.memory.history) < batch_size:
return
......
import main
import environment_wrapper as ew
import gym
from carla_environment import CarlaEnvironment
import copy
from tqdm import trange
c = ew.Config()
c.name = 'Base'
c.render = False
c.env = gym.make('LunarLander-v2')
c.env_type = 'Lunar'
c.net_layout = [256, 128]
c.eps_decay = 0.9996
c.learn_rate= 0.001
c.run_episodes = 300
c.save_to = 'benchmarks/'
smallNet = copy.deepcopy(c)
smallNet.name = 'SmallNet'
smallNet.net_layout = [128, 32]
smallNet.conf_to_name()
normalNet = copy.deepcopy(c)
normalNet.name = 'NormalNet'
normalNet.net_layout = [256, 128]
normalNet.conf_to_name()
normalSlowDecay = copy.deepcopy(c)
normalSlowDecay.name = 'NormalSlowDecay'
normalSlowDecay.net_layout = [256, 128]
normalSlowDecay.eps_decay = 0.99995
normalSlowDecay.conf_to_name()
normalSlowLearn = copy.deepcopy(c)
normalSlowLearn.name = 'NormalSlowLearn'
normalSlowLearn.net_layout = [256, 128]
normalSlowLearn.learn_rate = 0.0005
normalSlowLearn.conf_to_name()
largeNet = copy.deepcopy(c)
largeNet.name = 'LargeNet'
largeNet.net_layout = [512, 256]
largeNet.conf_to_name()
deepNet = copy.deepcopy(c)
deepNet.name = 'DeppNet'
deepNet.net_layout = [256, 128, 128]
deepNet.conf_to_name()
# configurations = [smallNet, normalNet ]
# configurations = [normalSlowDecay, normalSlowLearn]
configurations = [largeNet, deepNet]
configs = trange(len(configurations), desc='Benchmarking configs', unit="Configuration")
for i in configs:
main.run(configurations[i])
......@@ -10,7 +10,7 @@ import numpy as np
import pygame
import time
from steering_wheel import ACTION_SPACE, Controller
from steering_wheel import ACTION_SPACE, Controller, IDLE
# find carla module
try:
......@@ -87,12 +87,36 @@ class CollisionSensor:
intensity = math.sqrt(impulse.x**2 + impulse.y**2 + impulse.z**2)
# print(intensity)
class LidarSensor:
sensor = None
data = None
def __init__(self, world, parent):
bp = world.get_blueprint_library().find('sensor.lidar.ray_cast')
bp.set_attribute('rotation_frequency', '10')
bp.set_attribute('channels', '1')
bp.set_attribute('points_per_second', '100')
bp.set_attribute('range', '7')
bp.set_attribute('sensor_tick', '0.1')
bp.set_attribute('upper_fov', '0')
bp.set_attribute('lower_fov', '0')
weak_self = weakref.ref(self)
self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent)
self.sensor.listen(lambda event: LidarSensor._on_data(weak_self, event))
def _on_data(weak_self, event):
self = weak_self()
if not self:
return
for m in event: print(m.point)
if len(event) > 0:
print(event.horizontal_angle)
class World:
""" Wrapper for the carla environment, incl. player/vehicle """
player = None
collision_sensor = None
lidar_sensor = None
world = None
blueprint_library = None
spawn_points = None
......@@ -101,6 +125,10 @@ class World:
def __init__(self, world):
self.world = world
settings = self.world.get_settings()
settings.fixed_delta_seconds = 0.1
settings.synchronous_mode = True
self.world.apply_settings(settings)
self.blueprint_library = self.world.get_blueprint_library()
self.spawn_points = self.world.get_map().get_spawn_points()
self.reset()
......@@ -115,7 +143,8 @@ class World:
def observation(self):
if self.player is not None:
pos = self.player.get_transform()
return [pos.location.x, pos.location.y, pos.rotation.yaw]
vel = self.player.get_velocity()
return [pos.location.x, pos.location.y, vel.x, vel.y, pos.rotation.yaw]
else:
return [0,0,0]
......@@ -128,6 +157,7 @@ class World:
start_location = self.player.get_location()
print(str(start_location))
self.collision_sensor = CollisionSensor(self.world, self.player)
# self.lidar_sensor = LidarSensor(self.world, self.player)
# start_location.x = 288.0
# start_location.y = 55.0
# self.player.set_location(start_location)
......@@ -154,6 +184,9 @@ class World:
for actor in self.actors:
actor.destroy()
self.actors = []
if self.collision_sensor is not None:
self.collision_sensor.sensor.destroy()
self.collision_sensor = None
def step(self, action):
......@@ -161,22 +194,24 @@ class World:
controls = Controller.action_to_controls(action)
c = carla.VehicleControl(throttle=controls[0], steer=controls[1], brake=controls[2], reverse=controls[3])
self.player.apply_control(c)
reward = self.reward()
self.world.tick()
reward = self.reward(action)
self.collision_sensor.collision = None
return reward
def reward(self):
def reward(self, action):
target = carla.Transform(carla.Location(x=-25.534311, y=54.460903, z=0.112781), carla.Rotation(pitch=0.000000, yaw=-175.922913, roll=-6.221135))
pos = self.player.get_transform()
pos_diff = (target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2 + 1
rot_diff = (target.rotation.yaw - pos.rotation.yaw)**2 + 1
r = ((1/pos_diff) - 0.5)
pos_diff = math.sqrt((target.location.x - pos.location.x)**2 + (target.location.y - pos.location.y)**2)
rot_diff = (target.rotation.yaw - pos.rotation.yaw)
r = (-pos_diff / 10) + 1
at_final_position = False
if pos_diff < 1.2:
if pos_diff < 1.8:
if self.was_big_reward == False:
r += 100
self.was_big_reward = True
if rot_diff < 5:
if abs(rot_diff) < 5:
r += 100
at_final_position = True
else:
if self.was_big_reward == True:
......@@ -184,20 +219,28 @@ class World:
r -= 50
velocity = self.player.get_velocity()
v = (velocity.x)**2 + (velocity.y)**2
v = math.sqrt((velocity.x)**2 + (velocity.y)**2)
if v < 0.01:
r -= 1
if v > 15.0:
r -= 10
done = False
if at_final_position and v < 0.01:
print("Success")
done = True
if self.collision_sensor.collision is not None:
r -= 100
r -= 250
done = True
if action == IDLE:
r -= 1
return self.observation(), r, done, None
class ActionSpace:
n = ACTION_SPACE
class ObservationSpace:
shape = [3]
shape = [5]
class CarlaEnvironment:
action_space = ActionSpace
observation_space = ObservationSpace
......@@ -231,6 +274,12 @@ class CarlaEnvironment:
self.render()
return self.world.step(action)
def __del__(self):
if self.camera is not None:
self.camera.sensor.destroy()
if self.world is not None:
self.world.destroy()
if __name__ == "__main__":
LOCALHOST="127.0.0.1"
SERVER="192.168.188.20"
......@@ -241,11 +290,11 @@ if __name__ == "__main__":
while ctrl.is_running():
clock.tick(20)
ctrl.on_update()
obs, reward, done = env.step(ctrl.get_action(), render=True)
if done:
break
obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
print(str(reward) + ' ' + str(done))
cumulated_reward += reward
if done:
break
env.world.destroy()
pygame.quit()
print(cumulated_reward)
""" Wrapper to abstract different learning environments for an agent. """
import numpy as np
from tqdm import tqdm
from tqdm import trange
import pandas as pd
import matplotlib.pyplot as plt
class Config:
render = False
force_cpu = True
env = None
env_type = 'Lunar'
name = 'ConfigTest'
learn = True
learn_online = True
learn_offline = False
net_layout= [256, 128]
eps_decay = 0.9996
learn_rate= 0.001
run_episodes = 20
offline_epochs = 1000
load_ann = False
load_mem = False
load_from = 'agnt'
save_to = 'saved_agents/'
def conf_to_name(self):
self.name += self.env_type
for layer in self.net_layout:
self.name += '_' + str(layer) + '_'
self.name += str(self.eps_decay) + '_'
self.name += str(self.learn_rate)
def step(environment, action):
""" Perform one iteratino in the environment. """
......@@ -74,11 +101,14 @@ def run(environment, agent, episodes, render=True, learn=True):
return score_history, avg_score_history
def process_logs(avg_score_history, loss, title="Title", render=False):
def process_logs(avg_score_history, loss, conf):
df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
""" Plot the log history """
plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
plt.title(title)
plt.savefig('saved_agents/' + title + '.png', format="png")
if render:
plt.title(conf.name)
plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
if conf.render:
plt.show()
......@@ -4,65 +4,55 @@ Run your desired environment and agent configuration.
import os
import atexit
from carla_environment import CarlaEnvironment
CARLA=True
# import gym
import gym
from agents import QAgent as QAgent
import environment_wrapper as ew
# Allow GPU usage or force tensorflow to use the CPU.
FORCE_CPU=True
if FORCE_CPU:
def run(conf):
# 0. Allow GPU usage or force tensorflow to use the CPU.
if conf.force_cpu:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
RENDER = False
if __name__ == '__main__':
# 1. Create an environment
env = CarlaEnvironment(render=RENDER)
# 2. Create a learning agent
marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'Carla')
marvin = QAgent(conf) #conf.env.action_space.n, conf.env.observation_space.shape[0], conf.name)
# (2.5) *optional* Load agent memory and/or net from disk.
agnt = 'Carla'
LOAD_ANN = False
LOAD_MEMORIES = True
if LOAD_ANN or LOAD_MEMORIES:
marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
if conf.load_ann or conf.load_mem:
marvin.load(conf.save_to + conf.load_from + '/' + conf.load_from, net=conf.load_ann, memory=conf.load_mem)
# 3. Set your configurations for the run.
LEARNING = True
LEARN_ONLINE = True
LEARN_OFFLINE = True
RUN_EPISODES = 100
LEARN_OFFLINE_EPOCHS = 1000
SAVE_PATH = "./saved_agents"
# Register an *atexit* callback,
# to store the corrent result of the agent
# if the program is interrupted.
if LEARNING:
atexit.register(marvin.save, SAVE_PATH)
if conf.learn:
atexit.register(marvin.save, conf.save_to)
# Offline training of the agent with
# previous collected and saved memories.
if LEARN_OFFLINE and LEARNING:
ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS)
if conf.learn_offline and conf.learn:
ew.learn_offline(marvin, epochs=conf.offline_epochs)
# Run the agent in the environment for the
# number of specified epochs. Either to
# verify the performance of the agent or
# to train the agent.
_LEARN = LEARN_ONLINE and LEARNING
loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN)
_LEARN = conf.learn_online and conf.learn
loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN)
# Save the final training result of the agent.
if LEARNING:
marvin.save(SAVE_PATH)
if conf.learn:
marvin.save(conf.save_to)
if CARLA:
env.world.destroy()
# Show the result of the runl.
ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER)
ew.process_logs(avg_score, loss, conf)
if conf.env_type == 'Carla':
conf.env.world.destroy()
if __name__ == '__main__':
conf = ew.Config()
conf.env = gym.make('LunarLander-v2')
conf.env_type = 'Lunar'
conf.conf_to_name()
run(conf)
\ No newline at end of file
......@@ -6,17 +6,21 @@ from keras.activations import relu, linear
from keras.regularizers import l2
class QNet:
net = Sequential()
learn_rate = 0.001
def __init__(self, action_space, state_space):
self.compile_net(action_space, state_space)
learn_rate = 0.0005
def __init__(self, conf):
self.net = None
self.net = Sequential()
self.compile_net(conf)
self.net.summary()
self.learn_rate = conf.learn_rate
def compile_net(self, action_space, state_space):
self.net.add(Dense(256, input_dim=state_space, activation=relu))
self.net.add(Dense(128, activation=relu))
self.net.add(Dense(action_space, activation=linear))
def compile_net(self, conf):
self.net.add(Dense(conf.net_layout[0], input_dim=conf.env.observation_space.shape[0], activation=relu))
for layer in range(1, len(conf.net_layout)):
self.net.add(Dense(conf.net_layout[layer], activation=relu))
self.net.add(Dense(conf.env.action_space.n, activation=linear))
self.net.compile(loss='mse', optimizer=Adam(lr=self.learn_rate))
def predict(self, state):
......
......@@ -67,24 +67,33 @@ class ManualSteeringWheel:
def action_to_controls(action):
if action == IDLE:
return [0.0, 0.0, 0.0, False]
if action == FORWARD:
return [0.8, 0.0, 0.0, False]
return [0.6, 0.0, 0.0, False]
if action == REVERSE:
return [0.8, 0.0, 0.0, True]
return [0.5, 0.0, 0.0, True]
if action == FORWARD_LEFT:
return [0.8, -0.4, 0.0, False]
return [0.4, -0.4, 0.0, False]
if action == FORWARD_RIGHT:
return [0.8, 0.4, 0.0, False]
return [0.4, 0.4, 0.0, False]
if action == LEFT:
return [0.0, -0.4, 0.0, False]
if action == RIGHT:
return [0.0, 0.5, 0.0, False]
return [0.0, 0.4, 0.0, False]
if action == BRAKE:
return [0.0, 0.0, 0.5, False]
if action == REVERSE_LEFT:
return [0.5, -0.4, 0.0, True]
return [0.4, -0.4, 0.0, True]
if action == REVERSE_RIGHT:
return [0.5, 0.4, 0.0, True]
return [0.4, 0.4, 0.0, True]
def get_action(self):
action = IDLE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment