Skip to content
Snippets Groups Projects
Select Git revision
  • 2023ws
  • 2024ws default
  • 2022ws
  • 2021ws
  • 2020ws
  • 2018ws
  • 2019ws
  • 2017ws
  • 2016ws
9 results

sort-1.c

Blame
  • main.py NaN GiB
    """
    Run your desired environment and agent configuration.
    """
    
    import os
    import atexit
    import gym
    
    from agents import DQAgent as QAgent
    import environment_wrapper as ew
    
    # Allow GPU usage or force tensorflow to use the CPU.
    FORCE_CPU=True
    if FORCE_CPU:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
    
    
    if __name__ == '__main__':
        # 1. Create an environment
        env = gym.make('LunarLander-v2')
    
        # 2. Create a learning agent
        marvin = QAgent(env.action_space.n, env.observation_space.shape[0], 'FromScratchDouble')
    
        # (2.5) *optional* Load agent memory and/or net from disk.
        agnt = 'agent'
        LOAD_ANN = False
        LOAD_MEMORIES = False
        if LOAD_ANN or LOAD_MEMORIES:
            marvin.load('saved_agents/' + agnt + '/' + agnt, net=LOAD_ANN, memory=LOAD_MEMORIES)
    
        # 3. Set your configurations for the run.
        RENDER = False
        LEARNING = True
        LEARN_ONLINE = True
        LEARN_OFFLINE = False
        RUN_EPISODES = 500
        LEARN_OFFLINE_EPOCHS = 500
        SAVE_PATH = "./saved_agents"
    
        # Register an *atexit* callback,
        # to store the corrent result of the agent
        # if the program is interrupted.
        if LEARNING:
            atexit.register(marvin.save, SAVE_PATH)
    
        # Offline training of the agent with
        # previous collected and saved memories.
        if LEARN_OFFLINE and LEARNING:
            ew.learn_offline(marvin, epochs=LEARN_OFFLINE_EPOCHS)
    
        # Run the agent in the environment for the
        # number of specified epochs. Either to
        # verify the performance of the agent or
        # to train the agent.
        _LEARN = LEARN_ONLINE and LEARNING
        loss, avg_score = ew.run(env, marvin, RUN_EPISODES, render=RENDER, learn=_LEARN)
    
        # Save the final training result of the agent.
        if LEARNING:
            marvin.save(SAVE_PATH)
    
        # Show the result of the runl.
        ew.process_logs(avg_score, loss, title=marvin.name, render=RENDER)