Skip to content
Snippets Groups Projects
Commit 0fe0f9b8 authored by Armin Co's avatar Armin Co
Browse files

Working Environment

parent c2fed4b4
Branches change_modified_reward_v0
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ benchmarks
baselines
simple
final_wo_o
orientation
workspace.code-workspace
test
tech_demo.py
......
......@@ -3,6 +3,7 @@ import numpy as np
from memory import Memory
from networks import QNet
from steering_wheel import Controller
from keras.callbacks import EarlyStopping
class QAgent:
gamma = 0.99
......@@ -38,7 +39,7 @@ class QAgent:
if offline:
batch_size = self.OFFLINE_BATCHSIZE
if len(self.memory.history) < batch_size:
if len(self.memory.history) < batch_size * 35:
return
states, actions, rewards, following_states, dones = self.memory.get_batch(
......@@ -85,6 +86,8 @@ class DQAgent(QAgent):
return np.argmax(action_values[0])
def learn(self, offline=False, epochs=1):
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
for _ in range(2):
if np.random.rand() < 0.5:
temp = self.q
......@@ -93,7 +96,7 @@ class DQAgent(QAgent):
batch_size = self.online_batch_size
if offline:
batch_size = self.OFFLINE_BATCHSIZE
if len(self.memory.history) < batch_size:
if len(self.memory.history) < batch_size * 35:
return
states, actions, rewards, following_states, dones = self.memory.get_batch(
batch_size)
......@@ -103,12 +106,11 @@ class DQAgent(QAgent):
idx = np.array([i for i in range(batch_size)])
y[[idx], [actions]] = qMax
if offline:
history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
history = self.q.net.fit(states, y, epochs=epochs, verbose=0, callbacks=[callback])
loss = history.history['loss'][-1]
else:
loss = self.q.fit(states, y, epochs)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
return loss
def load(self, path, net=True, memory=True):
......
......@@ -266,7 +266,7 @@ class World:
if pos_diff < 1.1 and v < 0.01:
done = True
r += 150
if yaw_dif < 0.01:
if abs(yaw_dif) < 2:
r+= 50
if self.collision_sensor.collision is not None:
......
......@@ -57,6 +57,8 @@ def reset(environment):
def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
""" Perform one episode of the agent in the environment. """
score = 0
if conf.env_type == 'Carla':
max_steps = 300
state = reset(environment)
for _ in range(max_steps):
if render:
......@@ -88,6 +90,9 @@ def learn_offline(agent, conf):
desc = ('Loss: %05.4f' %(loss)) + desc_train
pbar.set_description(desc)
pbar.refresh()
if loss > 1000:
print("Loss exceeded 1000!!")
exit()
if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
agent.epsilon = agent.epsilon_min
score = one_episode(conf.env, agent, conf.render, False, conf=conf)
......@@ -97,6 +102,7 @@ def learn_offline(agent, conf):
avg_score_history.append(is_solved)
if is_solved > IS_SOLVED:
break
if conf.offline_validate_every_x_iteration is not -1:
process_logs(avg_score_history, score_history, conf)
......
......@@ -34,7 +34,7 @@ class QNet:
self, states): return self.net.predict_on_batch(states)
def fit(self, X, Y, epochs=1, verbose=0):
callback = EarlyStopping(monitor='loss', patience=3)
callback = EarlyStopping(monitor='loss', patience=2, min_delta=0.1, restore_best_weights=True)
history = self.net.fit(X, Y, epochs=epochs, verbose=verbose, callbacks=[callback])
return history.history['loss'][-1]
......
......@@ -5,16 +5,15 @@ from agents import DQAgent, QAgent
c = ew.Config()
c.name = 'DoubleCartPole'
c.render = False
c.name = '01'
c.render = True
c.env = gym.make('CartPole-v0')
c.env_type = 'CartPole'
c.net_layout = [128, 64, 32]
c.net_layout = [128, 64]
c.eps_decay = 0.9991
c.learn_rate= 0.001
c.run_episodes = 300
c.save_to = 'benchmarks/'
c.save_to = 'Screencast/'
c.conf_to_name()
c.agent = QAgent(c)
c.agent = DQAgent(c)
main.run(c)
......@@ -2,7 +2,7 @@ import main
import environment_wrapper as ew
import gym
import copy
from agents import QAgent, DQAgent
from agents import CarlaManual, QAgent, DQAgent
from carla_environment import CarlaEnvironment
c = ew.Config()
......@@ -19,101 +19,50 @@ c.load_from = 'Carla_2_256__128__128_0.9995_0.001_1DBL'
c.load_mem = True
c.load_ann = True
# o = copy.deepcopy(c)
# o.name = 'JTAP_4'
# o.force_cpu = True
# o.render = True
# o.learn = True
# o.env_type = 'Carla'
# o.net_layout = [256, 128, 128]
# o.save_to = 'simple/'
# o.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
# o.load_mem = True
# o.load_ann = False
# o.learn_offline = True
# o.offline_epochs = 75000
# o.offline_batchsize = 256
# o.learn_iterations = 1
# o.offline_validate_every_x_iteration = -1
# o.learn_online = False
# o.eps_decay = 0.9995
# o.learn_rate= 0.001
# o.run_episodes = 15
o = copy.deepcopy(c)
o.name = 'JTAP_0'
o.name = 'Final_wo_Obstacles_052O_2M'
o.force_cpu = True
o.render = True
o.render = False
o.learn = True
o.env_type = 'Carla'
o.net_layout = [256, 128]
o.save_to = 'orientation/'
o.load_from = ''
o.load_mem = False
o.net_layout = [1024, 1024, 256, 128]
o.save_to = 'final_wo_o/'
o.load_from = 'Carla_Final_wo_Obstacles_02_2M_256__256__128_0.99974_0.001_1DBL'
o.load_mem = True
o.load_ann = False
o.learn_online = True
o.eps_decay = 0.9995
o.learn_rate= 0.001
o.run_episodes = 750
o.eps_decay = 0.9915
o.learn_rate= 0.01
o.run_episodes = 550
o.learn_offline = True
o.offline_epochs = 100000
o.offline_batchsize = 64
o.offline_validate_every_x_iteration = 500
o.learn_iterations = 1
validate = copy.deepcopy(c)
validate.name = 'JTAP_Validate'
validate.name = 'Validate0'
validate.render = True
validate.learn = True
validate.learn = False
validate.env_type = 'Carla'
validate.net_layout = [256, 128]
validate.save_to = 'simple/'
validate.load_from = 'Carla_JTAP_0_256__128__32_0.9995_0.001_1DBL'
validate.load_from = 'Carla_JTAP_1_256__128__128_0.9995_0.001_1'
validate.load_mem = False
validate.load_ann = True
validate.learn_offline = False
validate.offline_epochs = 1500
validate.offline_batchsize = 64000
validate.offline_epochs = 20
validate.learn_iterations = 1
validate.offline_validate_every_x_iteration = -1
validate.offline_validate_every_x_iteration = 1
validate.learn_online = True
validate.eps_decay = 0.9995
validate.learn_rate= 0.001
validate.eps_decay = 0.95
validate.learn_rate= 0.0000005
validate.run_episodes = 10
# t = copy.deepcopy(c)
# t.render = True
# t.net_layout = [1024, 1024, 256, 32]
# t.eps_decay = 0.9993
# t.learn_rate = 0.0005
# t.force_cpu = False
# t.load_mem = True
# t.load_ann = False
# t.save_to = 'test/'
# t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
# t.name = 'Offline'
# t.learn_offline = True
# t.learn_online = True
# t.run_episodes = 500
# t.offline_epochs = 100
# t.learn_iterations = 100
# t.offline_validate_every_x_iteration = -1
configuration = o
configuration = validate
configuration.env = CarlaEnvironment(render=configuration.render, manual=False)
configuration.conf_to_name()
configuration.agent = QAgent(configuration)
configuration.agent = DQAgent(configuration)
# configuration.agent = CarlaManual(configuration)
main.run(configuration)
# o = copy.deepcopy(c)
# o.name = '5D'
# o.render = True
# o.learn = False
# o.env_type = 'Carla'
# o.net_layout = [1024, 512, 256]
# o.save_to = 'test/'
# o.load_from = 'Carla_01D_1024__512__512_0.9991_0.00025_1DBLoffline'
# o.load_mem = True
# o.load_ann = True
# o.learn_offline = False
# o.offline_epochs = 1000
# o.learn_iterations = 1
# o.offline_validate_every_x_iteration = -1
# o.learn_online = True
# o.eps_decay = 0.9991
# o.learn_rate= 0.0005
# o.run_episodes = 20
\ No newline at end of file
......@@ -5,4 +5,4 @@
# apt install carla-simulator
#
DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=15 -quality-level=Low -opengl
\ No newline at end of file
DISPLAY= /opt/carla-simulator/CarlaUE4.sh -benchmark -fps=10 -quality-level=Low -opengl
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment