show_agent.py

import gym
import matplotlib.pyplot as plt
import numpy as np

from dqnAgent import dqnAgent

env = gym.make('BipedalWalker-v3')
nmb_of_actions = env.action_space.shape[0]
observation = env.reset()
marvin = dqnAgent(gamma=0.99, vareps=0.0, lr=0.001,
                  observations=len(observation), actions=nmb_of_actions, memSize=25000,
                  epsMin=0.02, bSize=32, epsDec=0.001)
marvin.loadCNNs()

total_rewards = np.load("total_rewards.npy", allow_pickle=True)
eps = np.load("eps.npy", allow_pickle=True)
avg_rewards = np.load("avg_rewards.npy", allow_pickle=True)

plt.figure()
plt.title('Total Rewards')
plt.plot(total_rewards, c='k')
plt.figure()
plt.title('Average Rewards')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.plot(avg_rewards, c='k')
x = np.arange(len(avg_rewards))
m, b = np.polyfit(x, avg_rewards, 1)
plt.plot(x, m * x + b)
plt.figure()
plt.title('Epsilon')
plt.plot(eps, c='k')
plt.show()

for i in range(10):
    done = False
    observation = env.reset()
    observation = observation.reshape(1, -1)
    while not done:
        action = marvin.getAction(observation)
        obs, reward, done, info = env.step(action)
        obs = obs.reshape(1, -1)
        env.render()