Skip to content
Snippets Groups Projects
Commit a68bbf2e authored by Tobias Döring's avatar Tobias Döring
Browse files

Added plotting and minor fixes

parent c570abf0
No related branches found
No related tags found
1 merge request!2Evo neuro
......@@ -2,40 +2,63 @@ from population import Population
import time
import matplotlib.pyplot as plt
import pickle
import sys
INCREASE_BY = 5
H1 = 12
HIDDEN_LAYER = 12
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03
GAME_CANCELLED = False
LEARNING_RATE = 0.03 # 0 <= x <= 1
GENS = 1000
MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway.
VERSION = 1
TEST_WALKER = True
LOAD_BRAIN = False
RENDER_BEST = False
VERSION = 2
TEST_WALKER = False
if TEST_WALKER:
LOAD_BRAIN = True
if __name__ == '__main__':
population = Population(POP_SIZE, H1 ,MUTATION_FACTOR, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
def plot_reward(rewards):
plt.title(f'{HIDDEN_LAYER}, {VERSION}, {POP_SIZE}, {LEARNING_RATE}')
plt.xlabel('Episodes/10')
plt.ylabel('Rewards')
plt.plot(rewards)
plt.savefig(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}.png')
plt.show()
if __name__ == '__main__':
avg_rewards = []
try:
population = Population(POP_SIZE, HIDDEN_LAYER ,MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
if TEST_WALKER:
while True:
population.test_walker()
rewards = []
for i in range(10):
rewards.append(population.walker.get_reward(10000, True))
print("Reward: ", rewards[-1])
print("Average Reward: ", sum(rewards) / len(rewards))
plot_reward(rewards)
sys.exit(0)
while GAME_CANCELLED is False: # this is our game
for gen in range(GENS): # this is our game
start_time = time.time()
print(f'Gen: {population.gen}')
print(f'Max Steps: {population.max_steps}')
print(f'Gen: {gen}')
#print(f'Max Steps: {population.max_steps}')
population.mutate()
population.play_episode()
population.evolve()
#population.increase_moves(INCREASE_BY)
print("Time for Gen: ", time.time() - start_time)
if population.gen % 10 == 0:
if gen % 10 == 0:
population.walker.save()
avg_rewards.append(population.get_walker_stats())
with open('./models/model-pedal%d-rewards.p' % VERSION, 'wb') as fp:
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp)
plot_reward(avg_rewards)
except KeyboardInterrupt:
if not TEST_WALKER:
plot_reward(avg_rewards)
......@@ -7,31 +7,28 @@ import gym
np.random.seed(42)
MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway.
class Population:
def __init__(self, size, h1, mutation_factor, load_brain, version, lr, render_best):
def __init__(self, size, hidden_layer, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
#self.fitness_sum = 0.0
self.gen = 1
self.version = version
self.max_steps = MAX_STEPS
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3')
self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
self.walker = Walker(hidden_layer, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
self.envs = []
self.rewards = None
self.average_reward = None
self.lr = lr
walker_weights = self.walker.get_weights()
for i in range(self.size):
self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
self.mutants.append(Walker(hidden_layer, version, False, self.env))
if load_brain:
self.mutate()
self.mutants[-1].set_weights(walker_weights)
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
......@@ -49,24 +46,10 @@ class Population:
return action
def get_reward(self, weights, render = False):
observation = self.env.reset()
total_reward = 0
for t in range(self.max_steps):
if render:
self.env.render()
action = self.get_action(observation, weights)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def play_episode(self):
self.rewards = np.zeros(self.size)
for i in range(self.size):
self.rewards[i] = self.mutants[i].get_reward()
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self):
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
......@@ -80,22 +63,14 @@ class Population:
mutant.set_weights(weights)
self.gen += 1
self.test_walker(self.render_best)
def test_walker(self, render):
reward = self.walker.get_reward(render=render)
if self.average_reward is None:
self.average_reward = reward
else:
self.average_reward = 0.9 * self.average_reward + 0.1 * reward
print("Current Reward: ", reward)
print("Average Reward: ", self.average_reward)
print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
def get_walker_stats(self):
avg_reward = self.walker.get_reward(steps=10000)
for i in range(9):
reward = self.walker.get_reward(steps=10000)
avg_reward = 0.9 * avg_reward + 0.1 * reward
avg_reward = []
for i in range(10):
avg_reward.append(self.walker.get_reward(10000))
avg_reward = sum(avg_reward) / len(avg_reward)
print("Average reward: ", avg_reward)
return avg_reward
def mutate(self): # mutates all the weights of the mutants
......
......@@ -8,15 +8,16 @@ np.random.seed(42)
class Walker:
def __init__(self, h1, version, load_brain, env, steps):
def __init__(self, hidden_layer, version, load_brain, env):
self.version = version
self.h1 = h1
self.weights = {}
self.weights['W1'] = np.random.randn(24, h1) / np.sqrt(24)
self.weights['W2'] = np.random.randn(h1, 4) / np.sqrt(h1)
self.hidden_layer = hidden_layer
if load_brain:
self.load()
self.steps = steps
else:
self.weights = {}
self.weights['W1'] = np.random.randn(24, hidden_layer) / np.sqrt(24)
self.weights['W2'] = np.random.randn(hidden_layer, 4) / np.sqrt(hidden_layer)
self.env = env
#self.render_best = render_best
......@@ -28,11 +29,9 @@ class Walker:
return action
def get_reward(self, steps = None, render = False):
def get_reward(self, steps, render = False):
observation = self.env.reset()
total_reward = 0
if steps is None:
steps = self.steps
for t in range(steps):
if render:
self.env.render()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment