Skip to content
Snippets Groups Projects
Commit 1841113a authored by Philip Maas's avatar Philip Maas
Browse files

Merge branch 'evo-neuro' into 'evaluations'

# Conflicts:
#   main.py
#   population.py
#   walker.py
parents 1f761db3 8c5bdea8
No related branches found
No related tags found
2 merge requests!2Evo neuro,!1Evaluations
models/
__pycache__/
\ No newline at end of file
from population import Population
import time
import matplotlib.pyplot as plt
import pickle
import sys
INCREASE_BY = 5
BRAIN_SIZE = 50
HIDDEN_LAYER = 12
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
GAME_CANCELLED = False
LOAD_BRAIN = True # if this true brain_size doesn't matter
LEARNING_RATE = 0.03 # 0 <= x <= 1
GENS = 1000
MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway.
VERSION = 1
TEST_WALKER = True
LOAD_BRAIN = False
RENDER_BEST = False
if TEST_WALKER:
LOAD_BRAIN = True
def plot_reward(rewards):
plt.title(f'{HIDDEN_LAYER}, {VERSION}, {POP_SIZE}, {LEARNING_RATE}')
plt.xlabel('Episodes/10')
plt.ylabel('Rewards')
plt.plot(rewards)
plt.savefig(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}.png')
plt.show()
if __name__ == '__main__':
population = Population(POP_SIZE, BRAIN_SIZE, MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST)
while GAME_CANCELLED is False: # this is our game
if population.all_players_finished(): # this is our genetic algorithm after one generation of players
population.natural_selection()
population.mutate_babies()
population.increase_moves(INCREASE_BY)
population.reset_environments()
print(f'Gen: {population.gen}')
print(f'Best Index: {population.best_walker_index}')
print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
print(f'Max Steps: {population.max_steps}')
else:
population.update()
# time.sleep(0.1)
avg_rewards = []
try:
population = Population(POP_SIZE, HIDDEN_LAYER ,MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
if TEST_WALKER:
rewards = []
population.walker.plot_input_weights()
for i in range(10):
rewards.append(population.walker.get_reward(10000, True))
print("Reward: ", rewards[-1])
print("Average Reward: ", sum(rewards) / len(rewards))
plot_reward(rewards)
sys.exit(0)
for gen in range(GENS): # this is our game
start_time = time.time()
print(f'Gen: {gen}')
#print(f'Max Steps: {population.max_steps}')
population.mutate()
population.play_episode()
population.evolve()
#population.increase_moves(INCREASE_BY)
print("Time for Gen: ", time.time() - start_time)
if gen % 10 == 0:
population.walker.save()
avg_rewards.append(population.get_walker_stats())
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp)
if gen == 1000:
population.lr = 0.01
plot_reward(avg_rewards)
except KeyboardInterrupt:
if not TEST_WALKER:
plot_reward(avg_rewards)
......@@ -3,107 +3,82 @@ import random
import logging
import copy
from walker import Walker
import gym
MAX_STEPS = 1599 # after 1600 steps the Environment gives us a done anyway.
np.random.seed(42)
class Population:
def __init__(self, size, brain_size, mutation_factor, load_brain, render_best):
def __init__(self, size, hidden_layer, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size
self.brain_size = brain_size
self.mutation_factor = mutation_factor
self.fitness_sum = 0.0
#self.fitness_sum = 0.0
self.gen = 1
self.best_walker_index = 0 # index of the best player in self.players
self.best_walker_fitness = 0.0
self.max_steps = MAX_STEPS
self.walkers = []
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3')
self.walker = Walker(hidden_layer, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
self.envs = []
self.fitnesses = None
self.rewards = None
self.lr = lr
walker_weights = self.walker.get_weights()
for i in range(self.size):
self.walkers.append(Walker(self.brain_size, load_brain, render_best))
self.reset_environments()
self.mutants.append(Walker(hidden_layer, version, False, self.env))
if load_brain:
self.mutate_babies()
self.mutants[-1].set_weights(walker_weights)
def reset_environments(self):
for walker in self.walkers:
walker.reset_environment()
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.rewards = np.zeros(self.size)
# for i in range(self.size):
# self.rewards[i] = self.mutants[i].fitness
# self.rewards -= np.min(self.rewards) # maybe offset: +1
# self.fitness_sum = np.sum(self.rewards)
def update(self):
for walker in self.walkers:
# if the player has taken more steps than the best player needed to reach the goal, he's dead
if walker.brain.step >= self.max_steps:
walker.dead = True
else:
walker.update()
def get_action(self, observation, weights):
hl = np.matmul(observation, weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, weights['W2'])
action = np.tanh(action)
"""def calculate_fitness(self): # calculate the fitness of all players
for walker in self.walkers:
walker.calculate_fitness()"""
return action
def calculate_fitness_sum(self):
self.fitness_sum = 0
self.fitnesses = np.zeros(self.size)
def play_episode(self):
self.rewards = np.zeros(self.size)
for i in range(self.size):
self.fitnesses[i] = self.walkers[i].fitness
self.fitnesses -= np.min(self.fitnesses) # maybe offset: +1
self.fitness_sum = np.sum(self.fitnesses)
def all_players_finished(self): # returns whether all the players are either dead or have reached the goal
for walker in self.walkers:
if walker.dead is False and walker.reached_goal is False:
return False
return True
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def natural_selection(self): # gets the next generation of players
#for i in range(self.size):
#new_walkers.append(Walker(self.envs[i], self.brain_size))
self.calculate_fitness_sum()
self.set_best_walker()
self.walkers[self.best_walker_index].brain.save()
# the champion lives on
new_walkers = [self.walkers[self.best_walker_index].get_baby()]
new_walkers[0].is_best = True
for i in range(1, self.size):
parent = self.select_parent() # select parent based on fitness
new_walkers.append(parent.get_baby()) # get baby from them
self.walkers = copy.copy(new_walkers)
def evolve(self):
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
# chooses a player from the population to return randomly(considering fitness)
# this function works by randomly choosing a value between 0 and the fitness-sum
# then go through all the players and add their fitness to a running sum.
# If that sum is greater than the random value generated, that player is chosen
# since players with a higher fitness function add more to the running sum they have a higher chance of being chosen
def select_parent(self):
arrow = random.uniform(0, self.fitness_sum)
running_sum = 0.0 # those are the bars we add together
for i in range(self.size):
running_sum += self.fitnesses[i]
if running_sum > arrow:
return self.walkers[i]
# should never get to this point
logging.error("Critical Error in select_parent")
return None
print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
def mutate_babies(self): # mutates all the brains of the babies
for i in range(1, len(self.walkers)): # we don't want to mutate the champion's brain
self.walkers[i].brain.mutate(self.mutation_factor)
def get_walker_stats(self):
avg_reward = []
for i in range(10):
avg_reward.append(self.walker.get_reward(10000))
avg_reward = sum(avg_reward) / len(avg_reward)
print("Average reward: ", avg_reward)
return avg_reward
def set_best_walker(self): # finds the player with the highest fitness and sets it as the best one
max_index = np.argmax(self.fitnesses)
self.best_walker_index = max_index
self.best_walker_fitness = self.walkers[max_index].fitness
def mutate(self): # mutates all the weights of the mutants
for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor)
# if this dot reached the goal then reset the minimum number of steps it takes to get to the goal
if self.walkers[max_index].reached_goal:
self.max_steps = self.walkers[max_index].brain.step
logging.info('Found goal?!')
logging.info("step:", self.max_steps)
def increase_moves(self, size): # increase the number of directions for the brain
if len(self.walkers[0].brain.directions) < self.max_steps:
for walker in self.walkers:
walker.brain.increase_moves(size)
# def increase_moves(self, size): # increase the number of directions for the brain
# if len(self.mutants[0].brain.directions) < self.max_steps:
# for walker in self.mutants:
# walker.brain.increase_moves(size)
from brain import Brain
import gym
import numpy as np
import pickle
import copy
import os
import matplotlib.pyplot as plt
np.random.seed(42)
class Walker:
def __init__(self, brain_size, load_brain, render_best):
self.brain = Brain(brain_size, load_brain) # new brain with X instructions
self.dead = False
self.reached_goal = False
self.is_best = False # true if this dot is the best dot from the previous generation
self.fitness = 0.0
self.env = gym.make('BipedalWalker-v3')
self.render_best = render_best
# self.pos = copy.copy(self.map.startpoint)
def update(self): # moves the dot according to the brains directions
if (self.dead is True) or (self.reached_goal is True):
return
if self.brain.step >= len(self.brain.directions):
self.dead = True
return
observation, reward, done, info = self.env.step(self.brain.get_move())
self.fitness += reward
if reward == -100:
self.dead = True
elif done is True:
self.reached_goal = True
self.fitness += 10000000
if self.is_best and self.render_best:
def __init__(self, hidden_layer, version, load_brain, env):
self.version = version
self.hidden_layer = hidden_layer
if load_brain:
self.load()
else:
self.weights = {}
self.weights['W1'] = np.random.randn(24, hidden_layer) / np.sqrt(24)
self.weights['W2'] = np.random.randn(hidden_layer, 4) / np.sqrt(hidden_layer)
self.env = env
#self.render_best = render_best
def get_action(self, observation):
hl = np.matmul(observation, self.weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, self.weights['W2'])
action = np.tanh(action)
return action
def get_reward(self, steps, render = False):
observation = self.env.reset()
total_reward = 0
for t in range(steps):
if render:
self.env.render()
action = self.get_action(observation)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def mutate(self, mutation_rate):
for k, v in self.weights.items():
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
return copy.deepcopy(self.weights)
def set_weights(self, weights):
self.weights = copy.deepcopy(weights)
def plot_input_weights(self):
weights = []
names = [
"hull_angle",
"hull_angularVelocity",
"vel_x",
"vel_y",
"hip_joint_1_angle",
"hip_joint_2_angle",
"knee_joint_1_angle",
"knee_joint_2_angle",
"leg_1_ground_contact_flag",
"hip_joint_2_angle",
"hip_joint_2_speed",
"knee_joint_2_angle",
"knee_joint_2_speed",
"leg_2_ground_contact_flag",
"lidar reading 1",
"lidar reading 2",
"lidar reading 3",
"lidar reading 4",
"lidar reading 5",
"lidar reading 6",
"lidar reading 7",
"lidar reading 8",
"lidar reading 9",
"lidar reading 10"
]
for i in range(24):
weights.append(sum(self.weights['W1'][i]))
plt.bar(names, weights)
plt.xticks(rotation = 45, ha = "right")
plt.show()
def save(self):
if not os.path.isdir('./models'):
os.mkdir('./models')
with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
pickle.dump(self.weights, fp)
""" def get_fitness(self):
if self.reached_goal:
# if the dot reached the goal then the fitness is based on the amount of steps it took to get there
self.fitness = 1 / 16 + 10000.0 / (self.brain.step ** 2)
else: # if the dot didn't reach the goal then the fitness is based on how close it is to the goal
self.fitness = 1 / (self.map.get_closest_distance(self.pos[X], self.pos[Y]) ** 2)
return self.fitness"""
def reset_environment(self):
self.env.reset()
def get_baby(self):
baby = Walker(0, False, self.render_best)
baby.brain = self.brain.clone() # babies have the same brain as their parents
self.env.close()
return baby
def load(self):
with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
self.weights = pickle.load(fp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment