Skip to content
Snippets Groups Projects
Commit 89853126 authored by Armin Co's avatar Armin Co
Browse files

New run scripts

parent 99dec5eb
Branches
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ from steering_wheel import Controller
class QAgent:
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_min = 0.005
epsilon_decay = 0.9999
online_batch_size = 64
action_space = 1
......@@ -75,6 +75,7 @@ class DQAgent(QAgent):
def __init__(self, conf):
super().__init__(conf)
self.q2 = QNet(conf)
self.name = 'D_' + self.name
def get_action(self, state):
if np.random.rand() <= self.epsilon:
......
......@@ -14,7 +14,7 @@ c.env_type = 'Lunar'
c.net_layout = [256, 128]
c.eps_decay = 0.9995
c.learn_rate= 0.001
c.run_episodes = 300
c.run_episodes = 500
c.save_to = 'benchmarks/'
smallNet = copy.deepcopy(c)
......@@ -103,10 +103,11 @@ lun.run_episodes = 500
lun.name = 'NormalLunarDoubleNotSoMoreLearn'
lun.net_layout = [256, 128]
lun.conf_to_name()
# lun.agent = QAgent(lun)
configuration = c
# configuration = smallNet
configuration = smallNetSlow
# configuration = smallNetSlow
# configuration = smallNetDeep
# configuration = normalNet
# configuration = normalSlowDecay
......@@ -120,10 +121,10 @@ configuration = smallNetSlow
# configuration = deepNetSlowLearn
# configuration = smallNetDeepSlowLearn
# configuration = lun
configuration = ThisHasToBeTooBig
# configuration = ThisHasToBeTooBig
print(configuration.name)
configuration.agent = QAgent(configuration)
configuration.agent = DQAgent(configuration)
main.run(configuration)
# configurations = [smallNet, smallNetDeep, normalNet, normalSlowDecay, normalSlowLearn, largeNet, deepNet, verryLittleNet, littleNet, verryLittleNetDeep, smallNetDeepSlowLearn, deepNetSlowLearn]
......
import main
import environment_wrapper as ew
import gym
import copy
import threading
from agents import QAgent, DQAgent
standard_conf = ew.Config()
standard_conf.render = False
standard_conf.force_cpu = True
standard_conf.env = gym.make('LunarLander-v2')
standard_conf.agent = None
standard_conf.env_type = 'Lunar'
standard_conf.name = 'StandardConf'
standard_conf.learn = True
standard_conf.learn_online = False
standard_conf.learn_offline = True
standard_conf.net_layout= [256, 128]
standard_conf.eps_decay = 0.6
standard_conf.learn_rate= 0.0005
standard_conf.learn_iterations = 4
standard_conf.run_episodes = 100
standard_conf.offline_epochs = 2000
standard_conf.offline_validate_every_x_iteration = -1
standard_conf.load_ann = False
standard_conf.load_mem = True
standard_conf.load_from = 'agnt'
standard_conf.save_to = 'saved_agents/'
test = copy.deepcopy(standard_conf)
test.name = 'T2000'
test.net_layout = [256, 128, 128]
test.save_to = 'benchmarks/offline/'
test.load_from = 'NormalDouble'
configuration = test
configuration.conf_to_name()
configuration.agent = QAgent(configuration)
main.run(configuration)
\ No newline at end of file
import main
import environment_wrapper as ew
import gym
import copy
import threading
from agents import QAgent, DQAgent
standard_conf = ew.Config()
standard_conf.render = False
standard_conf.force_cpu = True
standard_conf.env = None
standard_conf.agent = None
standard_conf.env_type = 'Lunar'
standard_conf.name = 'ConfigTest'
standard_conf.learn = True
standard_conf.learn_online = True
standard_conf.learn_offline = False
standard_conf.net_layout= [256, 128]
standard_conf.eps_decay = 0.9996
standard_conf.learn_rate= 0.001
standard_conf.learn_iterations = 1
standard_conf.run_episodes = 20
standard_conf.offline_epochs = 1000
standard_conf.offline_validate_every_x_iteration = 10
standard_conf.load_ann = False
standard_conf.load_mem = False
standard_conf.load_from = 'agnt'
standard_conf.save_to = 'saved_agents/'
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment