Select Git revision
environment_wrapper.py

Armin Co authored
environment_wrapper.py 4.82 KiB
""" Wrapper to abstract different learning environments for an agent. """
import os
import numpy as np
from tqdm import trange
import pandas as pd
import matplotlib.pyplot as plt
IS_SOLVED = 195
class Config:
render = False
force_cpu = True
env = None
agent = None
env_type = 'Lunar'
name = 'ConfigTest'
learn = True
learn_online = True
learn_offline = False
net_layout= [256, 128]
eps_decay = 0.9996
learn_rate= 0.001
learn_iterations = 1
run_episodes = 20
offline_epochs = 1000
offline_validate_every_x_iteration = 10
load_ann = False
load_mem = False
load_from = 'agnt'
save_to = 'saved_agents/'
def conf_to_name(self):
self.name = str(self.env_type) + '_' + str(self.name)
for layer in self.net_layout:
self.name += '_' + str(layer) + '_'
self.name += str(self.eps_decay) + '_'
self.name += str(self.learn_rate) + '_'
self.name += str(self.learn_iterations)
def step(environment, action):
""" Perform one iteratino in the environment. """
following_state, reward, done, _ = environment.step(action)
following_state = np.reshape(following_state, (1, environment.observation_space.shape[0]))
return following_state, reward, done, _
def reset(environment):
""" Reset the environment, and return the new state. """
state = environment.reset()
state = np.reshape(state, (1, environment.observation_space.shape[0]))
return state
def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
""" Perform one episode of the agent in the environment. """
score = 0
state = reset(environment)
for _ in range(max_steps):
if render:
environment.render()
action = agent.get_action(state)
following_state, reward, done, _ = step(environment, action)
agent.remember(state, action, reward, following_state, done)
score += reward
state = following_state
if learn:
if conf is not None:
agent.learn(epochs=conf.learn_iterations)
else:
agent.learn()
if done:
break
return score
def learn_offline(agent, conf):
""" Train the agent with its memories. """
print('Learning with ', len(agent.memory.history), ' memories.')
agent.epsilon = agent.epsilon_min
score_history = []
avg_score_history = []
desc_train = ''
pbar = trange(conf.offline_epochs, desc='Loss: x')
for i in pbar:
loss = agent.learn(offline=True, epochs=conf.learn_iterations)
desc = ('Loss: %05.4f' %(loss)) + desc_train
pbar.set_description(desc)
pbar.refresh()
if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
score = one_episode(conf.env, agent, conf.render, False, conf=conf)
score_history.append(score)
is_solved = np.mean(score_history[-25:])
desc_train = (', Avg: %05.1f' %(is_solved))
avg_score_history.append(is_solved)
if is_solved > IS_SOLVED:
break
process_logs(avg_score_history, score_history, conf)
def run(environment, agent, episodes, render=True, learn=True, conf=None):
""" Run an agent """
conf.name = str(conf.name) + 'on'
# Set the exploring rate to its minimum.
# (epsilon *greedy*)
if not learn:
agent.epsilon = agent.epsilon_min
score_history = []
avg_score_history = []
pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
for _ in pbar:
score = one_episode(environment, agent, render, learn, conf=conf)
score_history.append(score)
is_solved = np.mean(score_history[-100:])
avg_score_history.append(is_solved)
if is_solved > IS_SOLVED and learn:
break
desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
pbar.set_description(desc)
pbar.refresh()
return score_history, avg_score_history
def process_logs(avg_score_history, loss, conf):
df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
try:
os.makedirs(conf.save_to + conf.name)
except:
pass
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
""" Plot the log history """
plt.figure()
plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
plt.title(conf.name)
plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
if conf.render:
plt.show()
df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
def load_logs(file):
df = pd.read_csv(file)
return df["Score"], df["Average"]