diff --git a/environment_wrapper.py b/environment_wrapper.py index 89413dd6bc1a3ee826c4afd7535d06d761cf9c33..5b4f5fbb7c102fc766d93aa074cb8019944a7e17 100644 --- a/environment_wrapper.py +++ b/environment_wrapper.py @@ -31,7 +31,7 @@ class Config: if self.force_cpu: os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "" - self.name += self.env_type + self.name = self.env_type + '_' + self.name for layer in self.net_layout: self.name += '_' + str(layer) + '_' self.name += str(self.eps_decay) + '_' @@ -91,7 +91,7 @@ def run(environment, agent, episodes, render=True, learn=True): score_history = [] avg_score_history = [] - pbar = trange(episodes, desc=agent.name + ' [actual, average]: [0, 0]', unit="Episodes") + pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes") for _ in pbar: score = one_episode(environment, agent, render, learn) score_history.append(score) @@ -101,7 +101,7 @@ def run(environment, agent, episodes, render=True, learn=True): if is_solved > 195 and learn: break - desc = ("Score [actual, average]: [{0:.2f}, {1:.2f}]".format(score, is_solved)) + desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved)) pbar.set_description(desc) pbar.refresh() return score_history, avg_score_history diff --git a/main.py b/main.py index 1ccf5c97e570d629c1f898046d56f81ba3fe47cb..886cddb1afecf3ef9ab677758c892799910e8a18 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ Run your desired environment and agent configuration. import os import atexit import gym +from agents import QAgent import environment_wrapper as ew @@ -47,7 +48,9 @@ def run(conf): if __name__ == '__main__': conf = ew.Config() + conf.render = True conf.env = gym.make('LunarLander-v2') conf.env_type = 'Lunar' conf.conf_to_name() + conf.agent = QAgent(conf) run(conf) \ No newline at end of file