From 89853126b9028b20d05617d1e3cc45aa81e81612 Mon Sep 17 00:00:00 2001 From: Armin <armin.co@hs-bochum.de> Date: Mon, 8 Mar 2021 09:53:24 +0100 Subject: [PATCH] New run scripts --- agents.py | 3 ++- run_scripts/benchmarks.py | 11 +++++----- run_scripts/offline.py | 42 +++++++++++++++++++++++++++++++++++++++ run_scripts/report.py | 28 ++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 run_scripts/offline.py create mode 100644 run_scripts/report.py diff --git a/agents.py b/agents.py index 300d84b..49d0c69 100644 --- a/agents.py +++ b/agents.py @@ -7,7 +7,7 @@ from steering_wheel import Controller class QAgent: gamma = 0.99 epsilon = 1.0 - epsilon_min = 0.01 + epsilon_min = 0.005 epsilon_decay = 0.9999 online_batch_size = 64 action_space = 1 @@ -75,6 +75,7 @@ class DQAgent(QAgent): def __init__(self, conf): super().__init__(conf) self.q2 = QNet(conf) + self.name = 'D_' + self.name def get_action(self, state): if np.random.rand() <= self.epsilon: diff --git a/run_scripts/benchmarks.py b/run_scripts/benchmarks.py index 28382e6..83fa817 100644 --- a/run_scripts/benchmarks.py +++ b/run_scripts/benchmarks.py @@ -14,7 +14,7 @@ c.env_type = 'Lunar' c.net_layout = [256, 128] c.eps_decay = 0.9995 c.learn_rate= 0.001 -c.run_episodes = 300 +c.run_episodes = 500 c.save_to = 'benchmarks/' smallNet = copy.deepcopy(c) @@ -103,10 +103,11 @@ lun.run_episodes = 500 lun.name = 'NormalLunarDoubleNotSoMoreLearn' lun.net_layout = [256, 128] lun.conf_to_name() -# lun.agent = QAgent(lun) + +configuration = c # configuration = smallNet -configuration = smallNetSlow +# configuration = smallNetSlow # configuration = smallNetDeep # configuration = normalNet # configuration = normalSlowDecay @@ -120,10 +121,10 @@ configuration = smallNetSlow # configuration = deepNetSlowLearn # configuration = smallNetDeepSlowLearn # configuration = lun -configuration = ThisHasToBeTooBig +# configuration = ThisHasToBeTooBig print(configuration.name) -configuration.agent = QAgent(configuration) +configuration.agent = DQAgent(configuration) main.run(configuration) # configurations = [smallNet, smallNetDeep, normalNet, normalSlowDecay, normalSlowLearn, largeNet, deepNet, verryLittleNet, littleNet, verryLittleNetDeep, smallNetDeepSlowLearn, deepNetSlowLearn] diff --git a/run_scripts/offline.py b/run_scripts/offline.py new file mode 100644 index 0000000..4307d41 --- /dev/null +++ b/run_scripts/offline.py @@ -0,0 +1,42 @@ +import main +import environment_wrapper as ew +import gym +import copy +import threading +from agents import QAgent, DQAgent + +standard_conf = ew.Config() +standard_conf.render = False +standard_conf.force_cpu = True +standard_conf.env = gym.make('LunarLander-v2') +standard_conf.agent = None +standard_conf.env_type = 'Lunar' +standard_conf.name = 'StandardConf' +standard_conf.learn = True +standard_conf.learn_online = False +standard_conf.learn_offline = True +standard_conf.net_layout= [256, 128] +standard_conf.eps_decay = 0.6 +standard_conf.learn_rate= 0.0005 +standard_conf.learn_iterations = 4 +standard_conf.run_episodes = 100 +standard_conf.offline_epochs = 2000 +standard_conf.offline_validate_every_x_iteration = -1 +standard_conf.load_ann = False +standard_conf.load_mem = True +standard_conf.load_from = 'agnt' +standard_conf.save_to = 'saved_agents/' + + +test = copy.deepcopy(standard_conf) +test.name = 'T2000' +test.net_layout = [256, 128, 128] +test.save_to = 'benchmarks/offline/' +test.load_from = 'NormalDouble' + + +configuration = test +configuration.conf_to_name() + +configuration.agent = QAgent(configuration) +main.run(configuration) \ No newline at end of file diff --git a/run_scripts/report.py b/run_scripts/report.py new file mode 100644 index 0000000..7bf76df --- /dev/null +++ b/run_scripts/report.py @@ -0,0 +1,28 @@ +import main +import environment_wrapper as ew +import gym +import copy +import threading +from agents import QAgent, DQAgent + +standard_conf = ew.Config() +standard_conf.render = False +standard_conf.force_cpu = True +standard_conf.env = None +standard_conf.agent = None +standard_conf.env_type = 'Lunar' +standard_conf.name = 'ConfigTest' +standard_conf.learn = True +standard_conf.learn_online = True +standard_conf.learn_offline = False +standard_conf.net_layout= [256, 128] +standard_conf.eps_decay = 0.9996 +standard_conf.learn_rate= 0.001 +standard_conf.learn_iterations = 1 +standard_conf.run_episodes = 20 +standard_conf.offline_epochs = 1000 +standard_conf.offline_validate_every_x_iteration = 10 +standard_conf.load_ann = False +standard_conf.load_mem = False +standard_conf.load_from = 'agnt' +standard_conf.save_to = 'saved_agents/' \ No newline at end of file -- GitLab