diff --git a/agents.py b/agents.py index 49d0c695bf41f231c9bfa6227cc361000540898b..ce4d367cc71efe744992a34d1ea37c158abfd44c 100644 --- a/agents.py +++ b/agents.py @@ -75,7 +75,7 @@ class DQAgent(QAgent): def __init__(self, conf): super().__init__(conf) self.q2 = QNet(conf) - self.name = 'D_' + self.name + self.name = 'D_' + str(self.name) def get_action(self, state): if np.random.rand() <= self.epsilon: diff --git a/environment_wrapper.py b/environment_wrapper.py index e55058bcf7c9c6468471fdaf1ac4be40fbb5d931..2c8317b0e5a5787e8f7b7bd3173ba358e9712867 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -31,7 +31,7 @@ class Config: def conf_to_name(self): - self.name = self.env_type + '_' + self.name + self.name = str(self.env_type) + '_' + str(self.name) for layer in self.net_layout: self.name += '_' + str(layer) + '_' self.name += str(self.eps_decay) + '_' @@ -102,7 +102,7 @@ def learn_offline(agent, conf): def run(environment, agent, episodes, render=True, learn=True, conf=None): """ Run an agent """ - conf.name += 'on' + conf.name = str(conf.name) + 'on' # Set the exploring rate to its minimum. # (epsilon *greedy*) if not learn: diff --git a/run_scripts/benchmarks.py b/run_scripts/benchmarks.py index 83fa817dfa314c0d0b09df1bd061edee6be00acd..a374ffdcdf4e177df0c07cce18049fd922325c41 100644 --- a/run_scripts/benchmarks.py +++ b/run_scripts/benchmarks.py @@ -123,8 +123,8 @@ configuration = c # configuration = lun # configuration = ThisHasToBeTooBig -print(configuration.name) -configuration.agent = DQAgent(configuration) +configuration.conf_to_name() +configuration.agent = QAgent(configuration) main.run(configuration) # configurations = [smallNet, smallNetDeep, normalNet, normalSlowDecay, normalSlowLearn, largeNet, deepNet, verryLittleNet, littleNet, verryLittleNetDeep, smallNetDeepSlowLearn, deepNetSlowLearn]