Skip to content
Snippets Groups Projects
Select Git revision
  • d48edcd327479e7a230e1a34684f251a5cd47337
  • master default protected
  • 2018ws
  • 2017ws
  • 2016ws
5 results

output-3.c

Blame
  • Forked from Peter Gerwinski / hp
    Source project has a limited visibility.
    show_agent.py 1.18 KiB
    import gym
    import matplotlib.pyplot as plt
    import numpy as np
    
    from dqnAgent import dqnAgent
    
    env = gym.make('BipedalWalker-v3')
    nmb_of_actions = env.action_space.shape[0]
    observation = env.reset()
    marvin = dqnAgent(gamma=0.99, vareps=0.0, lr=0.001,
                      observations=len(observation), actions=nmb_of_actions, memSize=25000,
                      epsMin=0.02, bSize=32, epsDec=0.001)
    marvin.loadCNNs()
    
    total_rewards = np.load("total_rewards.npy", allow_pickle=True)
    eps = np.load("eps.npy", allow_pickle=True)
    avg_rewards = np.load("avg_rewards.npy", allow_pickle=True)
    
    plt.figure()
    plt.title('Total Rewards')
    plt.plot(total_rewards, c='k')
    plt.figure()
    plt.title('Average Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.plot(avg_rewards, c='k')
    x = np.arange(len(avg_rewards))
    m, b = np.polyfit(x, avg_rewards, 1)
    plt.plot(x, m * x + b)
    plt.figure()
    plt.title('Epsilon')
    plt.plot(eps, c='k')
    plt.show()
    
    for i in range(10):
        done = False
        observation = env.reset()
        observation = observation.reshape(1, -1)
        while not done:
            action = marvin.getAction(observation)
            obs, reward, done, info = env.step(action)
            obs = obs.reshape(1, -1)
            env.render()