Select Git revision
Forked from
Peter Gerwinski / hp
Source project has a limited visibility.
population.py 2.91 KiB
import numpy as np
from walker import Walker
import gym
np.random.seed(42)
class Population:
def __init__(self, size, hidden_layer, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
#self.fitness_sum = 0.0
self.gen = 1
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3')
self.walker = Walker(hidden_layer, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
self.envs = []
self.rewards = None
self.lr = lr
walker_weights = self.walker.get_weights()
for i in range(self.size):
self.mutants.append(Walker(hidden_layer, version, False, self.env))
if load_brain:
self.mutants[-1].set_weights(walker_weights)
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.rewards = np.zeros(self.size)
# for i in range(self.size):
# self.rewards[i] = self.mutants[i].fitness
# self.rewards -= np.min(self.rewards) # maybe offset: +1
# self.fitness_sum = np.sum(self.rewards)
def get_action(self, observation, weights):
hl = np.matmul(observation, weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, weights['W2'])
action = np.tanh(action)
return action
def play_episode(self):
self.rewards = np.zeros(self.size)
for i in range(self.size):
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self):
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
def get_walker_stats(self):
avg_reward = []
for i in range(10):
avg_reward.append(self.walker.get_reward(10000))
avg_reward = sum(avg_reward) / len(avg_reward)
print("Average reward: ", avg_reward)
return avg_reward
def mutate(self): # mutates all the weights of the mutants
for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor)
# def increase_moves(self, size): # increase the number of directions for the brain
# if len(self.mutants[0].brain.directions) < self.max_steps:
# for walker in self.mutants:
# walker.brain.increase_moves(size)