Skip to content
Snippets Groups Projects
Select Git revision
  • cf0951ec6dcff494841d9b3566cc788e29f0c30e
  • master default protected
2 results

client_V7.py

Blame
  • show_agent.py 1.18 KiB
    import gym
    import matplotlib.pyplot as plt
    import numpy as np
    
    from dqnAgent import dqnAgent
    
    env = gym.make('BipedalWalker-v3')
    nmb_of_actions = env.action_space.shape[0]
    observation = env.reset()
    marvin = dqnAgent(gamma=0.99, vareps=0.0, lr=0.001,
                      observations=len(observation), actions=nmb_of_actions, memSize=25000,
                      epsMin=0.02, bSize=32, epsDec=0.001)
    marvin.loadCNNs()
    
    total_rewards = np.load("total_rewards.npy", allow_pickle=True)
    eps = np.load("eps.npy", allow_pickle=True)
    avg_rewards = np.load("avg_rewards.npy", allow_pickle=True)
    
    plt.figure()
    plt.title('Total Rewards')
    plt.plot(total_rewards, c='k')
    plt.figure()
    plt.title('Average Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.plot(avg_rewards, c='k')
    x = np.arange(len(avg_rewards))
    m, b = np.polyfit(x, avg_rewards, 1)
    plt.plot(x, m * x + b)
    plt.figure()
    plt.title('Epsilon')
    plt.plot(eps, c='k')
    plt.show()
    
    for i in range(10):
        done = False
        observation = env.reset()
        observation = observation.reshape(1, -1)
        while not done:
            action = marvin.getAction(observation)
            obs, reward, done, info = env.step(action)
            obs = obs.reshape(1, -1)
            env.render()