Select Git revision
population.py 2.19 KiB
import numpy as np
from walker import Walker
import gym
np.random.seed(42)
class Population:
def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
self.gen = 1
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
self.envs = []
self.rewards = None
self.lr = lr
walker_weights = self.walker.get_weights()
for i in range(self.size):
self.mutants.append(Walker(hidden_layer, bias, version, False, self.env))
if load_brain:
self.mutants[-1].set_weights(walker_weights)
def play_episode(self):
self.rewards = np.zeros(self.size)
for i in range(self.size):
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self):
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
def get_walker_stats(self):
avg_reward = []
for i in range(10):
avg_reward.append(self.walker.get_reward(10000))
avg_reward = sum(avg_reward) / len(avg_reward)
print("Average reward: ", avg_reward)
return avg_reward
def mutate(self): # mutates all the weights of the mutants
for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor)