Select Git revision
gym-graphic-output.ipynb
main.py NaN GiB
"""
Run your desired environment and agent configuration.
"""
import os
import atexit
import gym
from agents import DQAgent as QAgent
import environment_wrapper as ew
# Allow GPU usage or force tensorflow to use the CPU.
FORCE_CPU=True
if FORCE_CPU:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
if __name__ == '__main__':
# 1. Create an environment
env = gym.make('LunarLander-v2')
# 2. Create a learning agent
marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'FromScratchDouble')
# (2.5) *optional* Load agent memory and/or net from disk.
agnt = 'agent'
LOAD_ANN = False
LOAD_MEMORIES = False
if LOAD_ANN or LOAD_MEMORIES:
marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
# 3. Set your configurations for the run.
RENDER = False
LEARNING = True
LEARN_ONLINE = True
LEARN_OFFLINE = False
RUN_EPISODES = 500
LEARN_OFFLINE_EPOCHS = 500
SAVE_PATH = "./saved_agents"
# Register an *atexit* callback,
# to store the corrent result of the agent
# if the program is interrupted.
if LEARNING:
atexit.register(marvin.save, SAVE_PATH)
# Offline training of the agent with
# previous collected and saved memories.
if LEARN_OFFLINE and LEARNING:
ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS)
# Run the agent in the environment for the
# number of specified epochs. Either to
# verify the performance of the agent or
# to train the agent.
_LEARN = LEARN_ONLINE and LEARNING
loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN)
# Save the final training result of the agent.
if LEARNING:
marvin.save(SAVE_PATH)
# Show the result of the runl.
ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER)