From de2747b719d7ada9a67210efa358e96f2df9f69f Mon Sep 17 00:00:00 2001 From: Armin <armin.co@hs-bochum.de> Date: Sat, 20 Feb 2021 19:56:05 +0100 Subject: [PATCH] Fixed learning on double QAgent. --- .gitignore | 1 + agents.py | 51 +++++++++++++++------------- benchmarks.py | 94 --------------------------------------------------- 3 files changed, 29 insertions(+), 117 deletions(-) delete mode 100644 benchmarks.py diff --git a/.gitignore b/.gitignore index d6c49c6..53380c2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ __pycache__ .vscode saved_agents +benchmarks workspace.code-workspace tech_demo.py *.png diff --git a/agents.py b/agents.py index cc99e0f..8672ba5 100644 --- a/agents.py +++ b/agents.py @@ -9,6 +9,9 @@ class QAgent: epsilon_min = 0.01 epsilon_decay = 0.9999 online_batch_size = 64 + action_space = 1 + name = "Q" + OFFLINE_BATCHSIZE = 2048 def __init__(self, conf):#self, action_space, state_space, name): self.q = QNet(conf)#conf.env.action_space.n, conf.env.observation_space.shape[0]) @@ -32,7 +35,7 @@ class QAgent: epochs = 1 if offline: - batch_size = 2048 + batch_size = self.OFFLINE_BATCHSIZE if len(self.memory.history) < batch_size: return @@ -68,9 +71,9 @@ class QAgent: self.memory.load(path+'.mem') class DQAgent(QAgent): - def __init__(self, action_space, state_space, name): - super().__init__(action_space, state_space, name) - self.q2 = QNet(action_space, state_space) + def __init__(self, conf): + super().__init__(conf) + self.q2 = QNet(conf) def get_action(self, state): if np.random.rand() <= self.epsilon: @@ -79,27 +82,29 @@ class DQAgent(QAgent): return np.argmax(action_values[0]) def learn(self, offline=False): - for _ in range(3): + for _ in range(2): if np.random.rand() < 0.5: temp = self.q self.q = self.q2 self.q2 = temp - batch_size = self.online_batch_size - epochs = 1 - if offline: - batch_size = 4096 - if len(self.memory.history) < batch_size: - return - states, actions, rewards, following_states, dones = self.memory.get_batch(batch_size) - q_max_hat = rewards + self.gamma * (np.amax(self.q2.predict_on_batch(following_states), axis=1)) * (1-dones) - y = self.q.predict_on_batch(states) - idx = np.array([i for i in range(batch_size)]) - y[[idx], [actions]] = q_max_hat - if offline: - history = self.q.net.fit(states, y, epochs=2, verbose=0) - loss = history.history['loss'][-1] - else: - loss = self.q.fit(states, y, epochs) - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay + batch_size = self.online_batch_size + epochs = 1 + if offline: + batch_size = self.OFFLINE_BATCHSIZE + if len(self.memory.history) < batch_size: + return + states, actions, rewards, following_states, dones = self.memory.get_batch( + batch_size) + qMax = rewards + self.gamma * \ + (np.amax(self.q2.predict_on_batch(following_states), axis=1)) * (1-dones) + y = self.q.predict_on_batch(states) + idx = np.array([i for i in range(batch_size)]) + y[[idx], [actions]] = qMax + if offline: + history = self.q.net.fit(states, y, epochs=2, verbose=0) + loss = history.history['loss'][-1] + else: + loss = self.q.fit(states, y, epochs) + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay return loss \ No newline at end of file diff --git a/benchmarks.py b/benchmarks.py deleted file mode 100644 index bd25b92..0000000 --- a/benchmarks.py +++ /dev/null @@ -1,94 +0,0 @@ -import main -import environment_wrapper as ew -import gym -from carla_environment import CarlaEnvironment -import copy -import threading - -c = ew.Config() - -c.name = 'Base' -c.render = False -c.env = gym.make('LunarLander-v2') -c.env_type = 'Lunar' -c.net_layout = [256, 128] -c.eps_decay = 0.9996 -c.learn_rate= 0.001 -c.run_episodes = 300 -c.save_to = 'benchmarks/' - -smallNet = copy.deepcopy(c) -smallNet.name = 'SmallNet' -smallNet.net_layout = [128, 32] -smallNet.conf_to_name() - -smallNetDeep = copy.deepcopy(c) -smallNetDeep.name = 'SmallNetDepp' -smallNetDeep.net_layout = [128, 32, 32] -smallNetDeep.conf_to_name() - -normalNet = copy.deepcopy(c) -normalNet.name = 'NormalNet' -normalNet.net_layout = [256, 128] -normalNet.conf_to_name() - -normalSlowDecay = copy.deepcopy(c) -normalSlowDecay.name = 'NormalSlowDecay' -normalSlowDecay.net_layout = [256, 128] -normalSlowDecay.eps_decay = 0.99995 -normalSlowDecay.conf_to_name() - -normalSlowLearn = copy.deepcopy(c) -normalSlowLearn.name = 'NormalSlowLearn' -normalSlowLearn.net_layout = [256, 128] -normalSlowLearn.learn_rate = 0.0005 -normalSlowLearn.conf_to_name() - -largeNet = copy.deepcopy(c) -largeNet.name = 'LargeNet' -largeNet.net_layout = [512, 256] -largeNet.conf_to_name() - -deepNet = copy.deepcopy(c) -deepNet.name = 'DeppNet' -deepNet.net_layout = [256, 128, 128] -deepNet.conf_to_name() - -littleNet = copy.deepcopy(c) -littleNet.name = 'LittleNet' -littleNet.net_layout = [64, 64] -littleNet.conf_to_name() - -verryLittleNet = copy.deepcopy(c) -verryLittleNet.name = 'VerryLittleNet' -verryLittleNet.net_layout = [64, 32] -verryLittleNet.conf_to_name() - -verryLittleNetDeep = copy.deepcopy(c) -verryLittleNetDeep.name = 'VerryLittleNetDeep' -verryLittleNetDeep.net_layout = [64, 32, 32] -verryLittleNetDeep.conf_to_name() - -# configuration = smallNet -# configuration = smallNetDeep -# configuration = normalNet -# configuration = normalSlowDecay -# configuration = normalSlowLearn -# configuration = largeNet -# configuration = deepNet -# configuration = verryLittleNet -# configuration = littleNet -# configuration = verryLittleNetDeep -# main.run(configuration) - -configurations = [smallNet, smallNetDeep, normalNet, normalSlowDecay, normalSlowLearn, largeNet, deepNet, verryLittleNet, littleNet, verryLittleNetDeep] - -threads = [] -for conf in configurations: - threads.append(threading.Thread(target=main.run, args=conf)) - -for thread in threads: - thread.start() - -for thread in threads: - thread.join() \ No newline at end of file -- GitLab