diff --git a/agents.py b/agents.py index 300d84ba3181e88a1760e503cbf4e60f5703163f..49d0c695bf41f231c9bfa6227cc361000540898b 100644 --- a/agents.py +++ b/agents.py @@ -7,7 +7,7 @@ from steering_wheel import Controller class QAgent: gamma = 0.99 epsilon = 1.0 - epsilon_min = 0.01 + epsilon_min = 0.005 epsilon_decay = 0.9999 online_batch_size = 64 action_space = 1 @@ -75,6 +75,7 @@ class DQAgent(QAgent): def __init__(self, conf): super().__init__(conf) self.q2 = QNet(conf) + self.name = 'D_' + self.name def get_action(self, state): if np.random.rand() <= self.epsilon: diff --git a/run_scripts/benchmarks.py b/run_scripts/benchmarks.py index 28382e658f52b16ebc231f7e3e9c09cdc63a80f8..83fa817dfa314c0d0b09df1bd061edee6be00acd 100644 --- a/run_scripts/benchmarks.py +++ b/run_scripts/benchmarks.py @@ -14,7 +14,7 @@ c.env_type = 'Lunar' c.net_layout = [256, 128] c.eps_decay = 0.9995 c.learn_rate= 0.001 -c.run_episodes = 300 +c.run_episodes = 500 c.save_to = 'benchmarks/' smallNet = copy.deepcopy(c) @@ -103,10 +103,11 @@ lun.run_episodes = 500 lun.name = 'NormalLunarDoubleNotSoMoreLearn' lun.net_layout = [256, 128] lun.conf_to_name() -# lun.agent = QAgent(lun) + +configuration = c # configuration = smallNet -configuration = smallNetSlow +# configuration = smallNetSlow # configuration = smallNetDeep # configuration = normalNet # configuration = normalSlowDecay @@ -120,10 +121,10 @@ configuration = smallNetSlow # configuration = deepNetSlowLearn # configuration = smallNetDeepSlowLearn # configuration = lun -configuration = ThisHasToBeTooBig +# configuration = ThisHasToBeTooBig print(configuration.name) -configuration.agent = QAgent(configuration) +configuration.agent = DQAgent(configuration) main.run(configuration) # configurations = [smallNet, smallNetDeep, normalNet, normalSlowDecay, normalSlowLearn, largeNet, deepNet, verryLittleNet, littleNet, verryLittleNetDeep, smallNetDeepSlowLearn, deepNetSlowLearn] diff --git a/run_scripts/offline.py b/run_scripts/offline.py new file mode 100644 index 0000000000000000000000000000000000000000..4307d4142849b5e4a56f2d00f939c74bfaa43c29 --- /dev/null +++ b/run_scripts/offline.py @@ -0,0 +1,42 @@ +import main +import environment_wrapper as ew +import gym +import copy +import threading +from agents import QAgent, DQAgent + +standard_conf = ew.Config() +standard_conf.render = False +standard_conf.force_cpu = True +standard_conf.env = gym.make('LunarLander-v2') +standard_conf.agent = None +standard_conf.env_type = 'Lunar' +standard_conf.name = 'StandardConf' +standard_conf.learn = True +standard_conf.learn_online = False +standard_conf.learn_offline = True +standard_conf.net_layout= [256, 128] +standard_conf.eps_decay = 0.6 +standard_conf.learn_rate= 0.0005 +standard_conf.learn_iterations = 4 +standard_conf.run_episodes = 100 +standard_conf.offline_epochs = 2000 +standard_conf.offline_validate_every_x_iteration = -1 +standard_conf.load_ann = False +standard_conf.load_mem = True +standard_conf.load_from = 'agnt' +standard_conf.save_to = 'saved_agents/' + + +test = copy.deepcopy(standard_conf) +test.name = 'T2000' +test.net_layout = [256, 128, 128] +test.save_to = 'benchmarks/offline/' +test.load_from = 'NormalDouble' + + +configuration = test +configuration.conf_to_name() + +configuration.agent = QAgent(configuration) +main.run(configuration) \ No newline at end of file diff --git a/run_scripts/report.py b/run_scripts/report.py new file mode 100644 index 0000000000000000000000000000000000000000..7bf76df007572789be32cf6d371f9e2fd9c2737f --- /dev/null +++ b/run_scripts/report.py @@ -0,0 +1,28 @@ +import main +import environment_wrapper as ew +import gym +import copy +import threading +from agents import QAgent, DQAgent + +standard_conf = ew.Config() +standard_conf.render = False +standard_conf.force_cpu = True +standard_conf.env = None +standard_conf.agent = None +standard_conf.env_type = 'Lunar' +standard_conf.name = 'ConfigTest' +standard_conf.learn = True +standard_conf.learn_online = True +standard_conf.learn_offline = False +standard_conf.net_layout= [256, 128] +standard_conf.eps_decay = 0.9996 +standard_conf.learn_rate= 0.001 +standard_conf.learn_iterations = 1 +standard_conf.run_episodes = 20 +standard_conf.offline_epochs = 1000 +standard_conf.offline_validate_every_x_iteration = 10 +standard_conf.load_ann = False +standard_conf.load_mem = False +standard_conf.load_from = 'agnt' +standard_conf.save_to = 'saved_agents/' \ No newline at end of file