Skip to content
Snippets Groups Projects
Commit 0fe0f9b8 authored by Armin Co's avatar Armin Co
Browse files

Working Environment

parent c2fed4b4
Branches change_modified_reward_v0
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ benchmarks ...@@ -5,6 +5,7 @@ benchmarks
baselines baselines
simple simple
final_wo_o final_wo_o
orientation
workspace.code-workspace workspace.code-workspace
test test
tech_demo.py tech_demo.py
......
...@@ -3,6 +3,7 @@ import numpy as np ...@@ -3,6 +3,7 @@ import numpy as np
from memory import Memory from memory import Memory
from networks import QNet from networks import QNet
from steering_wheel import Controller from steering_wheel import Controller
from keras.callbacks import EarlyStopping
class QAgent: class QAgent:
gamma = 0.99 gamma = 0.99
...@@ -38,7 +39,7 @@ class QAgent: ...@@ -38,7 +39,7 @@ class QAgent:
if offline: if offline:
batch_size = self.OFFLINE_BATCHSIZE batch_size = self.OFFLINE_BATCHSIZE
if len(self.memory.history) < batch_size: if len(self.memory.history) < batch_size * 35:
return return
states, actions, rewards, following_states, dones = self.memory.get_batch( states, actions, rewards, following_states, dones = self.memory.get_batch(
...@@ -85,6 +86,8 @@ class DQAgent(QAgent): ...@@ -85,6 +86,8 @@ class DQAgent(QAgent):
return np.argmax(action_values[0]) return np.argmax(action_values[0])
def learn(self, offline=False, epochs=1): def learn(self, offline=False, epochs=1):
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
for _ in range(2): for _ in range(2):
if np.random.rand() < 0.5: if np.random.rand() < 0.5:
temp = self.q temp = self.q
...@@ -93,7 +96,7 @@ class DQAgent(QAgent): ...@@ -93,7 +96,7 @@ class DQAgent(QAgent):
batch_size = self.online_batch_size batch_size = self.online_batch_size
if offline: if offline:
batch_size = self.OFFLINE_BATCHSIZE batch_size = self.OFFLINE_BATCHSIZE
if len(self.memory.history) < batch_size: if len(self.memory.history) < batch_size * 35:
return return
states, actions, rewards, following_states, dones = self.memory.get_batch( states, actions, rewards, following_states, dones = self.memory.get_batch(
batch_size) batch_size)
...@@ -103,12 +106,11 @@ class DQAgent(QAgent): ...@@ -103,12 +106,11 @@ class DQAgent(QAgent):
idx = np.array([i for i in range(batch_size)]) idx = np.array([i for i in range(batch_size)])
y[[idx], [actions]] = qMax y[[idx], [actions]] = qMax
if offline: if offline:
history = self.q.net.fit(states, y, epochs=epochs, verbose=0) callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
history = self.q.net.fit(states, y, epochs=epochs, verbose=0, callbacks=[callback])
loss = history.history['loss'][-1] loss = history.history['loss'][-1]
else: else:
loss = self.q.fit(states, y, epochs) loss = self.q.fit(states, y, epochs)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
return loss return loss
def load(self, path, net=True, memory=True): def load(self, path, net=True, memory=True):
......
...@@ -266,7 +266,7 @@ class World: ...@@ -266,7 +266,7 @@ class World:
if pos_diff < 1.1 and v < 0.01: if pos_diff < 1.1 and v < 0.01:
done = True done = True
r += 150 r += 150
if yaw_dif < 0.01: if abs(yaw_dif) < 2:
r+= 50 r+= 50
if self.collision_sensor.collision is not None: if self.collision_sensor.collision is not None:
......
...@@ -57,6 +57,8 @@ def reset(environment): ...@@ -57,6 +57,8 @@ def reset(environment):
def one_episode(environment, agent, render, learn, conf=None, max_steps=1000): def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
""" Perform one episode of the agent in the environment. """ """ Perform one episode of the agent in the environment. """
score = 0 score = 0
if conf.env_type == 'Carla':
max_steps = 300
state = reset(environment) state = reset(environment)
for _ in range(max_steps): for _ in range(max_steps):
if render: if render:
...@@ -88,6 +90,9 @@ def learn_offline(agent, conf): ...@@ -88,6 +90,9 @@ def learn_offline(agent, conf):
desc = ('Loss: %05.4f' %(loss)) + desc_train desc = ('Loss: %05.4f' %(loss)) + desc_train
pbar.set_description(desc) pbar.set_description(desc)
pbar.refresh() pbar.refresh()
if loss > 1000:
print("Loss exceeded 1000!!")
exit()
if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1: if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
agent.epsilon = agent.epsilon_min agent.epsilon = agent.epsilon_min
score = one_episode(conf.env, agent, conf.render, False, conf=conf) score = one_episode(conf.env, agent, conf.render, False, conf=conf)
...@@ -97,6 +102,7 @@ def learn_offline(agent, conf): ...@@ -97,6 +102,7 @@ def learn_offline(agent, conf):
avg_score_history.append(is_solved) avg_score_history.append(is_solved)
if is_solved > IS_SOLVED: if is_solved > IS_SOLVED:
break break
if conf.offline_validate_every_x_iteration is not -1: if conf.offline_validate_every_x_iteration is not -1:
process_logs(avg_score_history, score_history, conf) process_logs(avg_score_history, score_history, conf)
......
...@@ -34,7 +34,7 @@ class QNet: ...@@ -34,7 +34,7 @@ class QNet:
self, states): return self.net.predict_on_batch(states) self, states): return self.net.predict_on_batch(states)
def fit(self, X, Y, epochs=1, verbose=0): def fit(self, X, Y, epochs=1, verbose=0):
callback = EarlyStopping(monitor='loss', patience=3) callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback]) history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback])
return history.history['loss'][-1] return history.history['loss'][-1]
......
...@@ -5,16 +5,15 @@ from agents import DQAgent, QAgent ...@@ -5,16 +5,15 @@ from agents import DQAgent, QAgent
c = ew.Config() c = ew.Config()
c.name = 'DoubleCartPole' c.name = '01'
c.render = False c.render = True
c.env = gym.make('CartPole-v0') c.env = gym.make('CartPole-v0')
c.env_type = 'CartPole' c.env_type = 'CartPole'
c.net_layout = [128, 64, 32] c.net_layout = [128, 64]
c.eps_decay = 0.9991 c.eps_decay = 0.9991
c.learn_rate= 0.001 c.learn_rate= 0.001
c.run_episodes = 300 c.run_episodes = 300
c.save_to = 'benchmarks/' c.save_to = 'Screencast/'
c.conf_to_name() c.conf_to_name()
c.agent = QAgent(c) c.agent = DQAgent(c)
main.run(c) main.run(c)
...@@ -2,7 +2,7 @@ import main ...@@ -2,7 +2,7 @@ import main
import environment_wrapper as ew import environment_wrapper as ew
import gym import gym
import copy import copy
from agents import QAgent, DQAgent from agents import CarlaManual, QAgent, DQAgent
from carla_environment import CarlaEnvironment from carla_environment import CarlaEnvironment
c = ew.Config() c = ew.Config()
...@@ -19,101 +19,50 @@ c.load_from = 'Carla_2_256__128__128_0.9995_0.001_1DBL' ...@@ -19,101 +19,50 @@ c.load_from = 'Carla_2_256__128__128_0.9995_0.001_1DBL'
c.load_mem = True c.load_mem = True
c.load_ann = True c.load_ann = True
# o = copy.deepcopy(c)
# o.name = 'JTAP_4'
# o.force_cpu = True
# o.render = True
# o.learn = True
# o.env_type = 'Carla'
# o.net_layout = [256, 128, 128]
# o.save_to = 'simple/'
# o.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
# o.load_mem = True
# o.load_ann = False
# o.learn_offline = True
# o.offline_epochs = 75000
# o.offline_batchsize = 256
# o.learn_iterations = 1
# o.offline_validate_every_x_iteration = -1
# o.learn_online = False
# o.eps_decay = 0.9995
# o.learn_rate= 0.001
# o.run_episodes = 15
o = copy.deepcopy(c) o = copy.deepcopy(c)
o.name = 'JTAP_0' o.name = 'Final_wo_Obstacles_052O_2M'
o.force_cpu = True o.force_cpu = True
o.render = True o.render = False
o.learn = True o.learn = True
o.env_type = 'Carla' o.env_type = 'Carla'
o.net_layout = [256, 128] o.net_layout = [1024, 1024, 256, 128]
o.save_to = 'orientation/' o.save_to = 'final_wo_o/'
o.load_from = '' o.load_from = 'Carla_Final_wo_Obstacles_02_2M_256__256__128_0.99974_0.001_1DBL'
o.load_mem = False o.load_mem = True
o.load_ann = False o.load_ann = False
o.learn_online = True o.learn_online = True
o.eps_decay = 0.9995 o.eps_decay = 0.9915
o.learn_rate= 0.001 o.learn_rate= 0.01
o.run_episodes = 750 o.run_episodes = 550
o.learn_offline = True
o.offline_epochs = 100000
o.offline_batchsize = 64
o.offline_validate_every_x_iteration = 500
o.learn_iterations = 1
validate = copy.deepcopy(c) validate = copy.deepcopy(c)
validate.name = 'JTAP_Validate' validate.name = 'Validate0'
validate.render = True validate.render = True
validate.learn = True validate.learn = False
validate.env_type = 'Carla' validate.env_type = 'Carla'
validate.net_layout = [256, 128] validate.net_layout = [256, 128]
validate.save_to = 'simple/' validate.save_to = 'simple/'
validate.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL' validate.load_from = 'Carla_JTAP_1_256__128__128_0.9995_0.001_1'
validate.load_mem = False validate.load_mem = False
validate.load_ann = True validate.load_ann = True
validate.learn_offline = False validate.learn_offline = False
validate.offline_epochs = 1500 validate.offline_batchsize = 64000
validate.offline_epochs = 20
validate.learn_iterations = 1 validate.learn_iterations = 1
validate.offline_validate_every_x_iteration = -1 validate.offline_validate_every_x_iteration = 1
validate.learn_online = True validate.learn_online = True
validate.eps_decay = 0.9995 validate.eps_decay = 0.95
validate.learn_rate= 0.001 validate.learn_rate= 0.0000005
validate.run_episodes = 10 validate.run_episodes = 10
# t = copy.deepcopy(c) configuration = validate
# t.render = True
# t.net_layout = [1024, 1024, 256, 32]
# t.eps_decay = 0.9993
# t.learn_rate = 0.0005
# t.force_cpu = False
# t.load_mem = True
# t.load_ann = False
# t.save_to = 'test/'
# t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
# t.name = 'Offline'
# t.learn_offline = True
# t.learn_online = True
# t.run_episodes = 500
# t.offline_epochs = 100
# t.learn_iterations = 100
# t.offline_validate_every_x_iteration = -1
configuration = o
configuration.env = CarlaEnvironment(render=configuration.render, manual=False) configuration.env = CarlaEnvironment(render=configuration.render, manual=False)
configuration.conf_to_name() configuration.conf_to_name()
configuration.agent = QAgent(configuration) configuration.agent = DQAgent(configuration)
# configuration.agent = CarlaManual(configuration)
main.run(configuration) main.run(configuration)
# o = copy.deepcopy(c)
# o.name = '5D'
# o.render = True
# o.learn = False
# o.env_type = 'Carla'
# o.net_layout = [1024, 512, 256]
# o.save_to = 'test/'
# o.load_from = 'Carla_01D_1024__512__512_0.9991_0.00025_1DBLoffline'
# o.load_mem = True
# o.load_ann = True
# o.learn_offline = False
# o.offline_epochs = 1000
# o.learn_iterations = 1
# o.offline_validate_every_x_iteration = -1
# o.learn_online = True
# o.eps_decay = 0.9991
# o.learn_rate= 0.0005
# o.run_episodes = 20
\ No newline at end of file
...@@ -5,4 +5,4 @@ ...@@ -5,4 +5,4 @@
# apt install carla-simulator # apt install carla-simulator
# #
DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=15 -quality-level=Low -opengl DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=10 -quality-level=Low -opengl
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment