Skip to content
Snippets Groups Projects
Commit 295ebae7 authored by Tobias Döring's avatar Tobias Döring
Browse files

[WIP] increasing performance

parent 73e1da69
No related branches found
No related tags found
1 merge request!2Evo neuro
......@@ -9,12 +9,19 @@ MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03
GAME_CANCELLED = False
LOAD_BRAIN = False
RENDER_BEST = True
VERSION = 1
RENDER_BEST = False
VERSION = 2
TEST_WALKER = False
if TEST_WALKER:
LOAD_BRAIN = True
if __name__ == '__main__':
population = Population(POP_SIZE, H1 ,MUTATION_FACTOR, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
if TEST_WALKER:
while True:
population.test_walker()
while GAME_CANCELLED is False: # this is our game
population.mutate()
population.play_episode()
......@@ -25,5 +32,5 @@ if __name__ == '__main__':
#print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
print(f'Max Steps: {population.max_steps}')
# time.sleep(0.1)
if population.gen % 10 == 0:
population.walker.save()
#if population.gen % 10 == 0:
# population.walker.save()
......@@ -5,6 +5,8 @@ import copy
from walker import Walker
import gym
np.random.seed(42)
MAX_STEPS = 1599 # after 1600 steps the Environment gives us a done anyway.
......@@ -17,48 +19,94 @@ class Population:
self.gen = 1
self.version = version
self.max_steps = MAX_STEPS
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3')
self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
self.mutants = []
self.weights = {}
self.weights['W1'] = np.random.randn(24, h1) / np.sqrt(24)
self.weights['W2'] = np.random.randn(h1, 4) / np.sqrt(h1)
#self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
# maybe only have one mutatnt that is only used to test all the modifications/mutations
self.mutant = Walker(h1, version, load_brain, self.env, self.max_steps)
self.mutated_weights = dict()
#self.mutants = []
self.envs = []
self.fitnesses = None
self.rewards = None
self.lr = lr
for i in range(self.size):
self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
#for i in range(self.size):
# self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
if load_brain:
self.mutate()
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.fitnesses = np.zeros(self.size)
# self.rewards = np.zeros(self.size)
# for i in range(self.size):
# self.fitnesses[i] = self.mutants[i].fitness
# self.fitnesses -= np.min(self.fitnesses) # maybe offset: +1
# self.fitness_sum = np.sum(self.fitnesses)
# self.rewards[i] = self.mutants[i].fitness
# self.rewards -= np.min(self.rewards) # maybe offset: +1
# self.fitness_sum = np.sum(self.rewards)
def get_action(self, observation, weights):
hl = np.matmul(observation, weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, weights['W2'])
action = np.tanh(action)
return action
def get_reward(self, weights, render = False):
observation = self.env.reset()
total_reward = 0
for t in range(self.max_steps):
if render:
self.env.render()
action = self.get_action(observation, weights)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def play_episode(self):
self.fitnesses = np.zeros(self.size)
self.rewards = np.zeros(self.size)
mutated_weights = dict()
for i in range(self.size):
self.fitnesses[i] = self.mutants[i].get_reward()
for k, v in self.weights.items():
mutated_weights[k] = v + self.mutation_factor * self.mutated_weights[k][i]
self.rewards[i] = self.get_reward(mutated_weights)
#self.mutant.set_weights(mutated_weights)
#self.rewards[i] = self.mutant.get_reward()
def evolve(self):
R = self.fitnesses
A = (R - np.mean(R)) / np.std(R)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] += self.lr/(self.size*self.mutation_factor) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
#R = self.rewards
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
#weights = self.walker.get_weights()
#weights = self.weights
for k in self.weights:
weights_change = np.dot(self.mutated_weights[k].transpose(1, 2, 0), A)
self.weights[k] = self.weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
#self.walker.set_weights(weights)
self.gen += 1
def mutate(self): # mutates all the brains of the babies
for mutant in self.mutants: # we don't want to mutate the champion's brain
mutant.mutate(self.mutation_factor)
if self.render_best:
self.test_walker()
def test_walker(self):
#reward = self.walker.get_reward(render=True)
#print(reward)
return
def mutate(self): # mutates all the weights of the mutants
#for i in range(len(self.mutants)):
# self.mutants[i].mutate(self.mutation_factor)
#weights = self.walker.get_weights()
self.mutated_weights = {}
#weights = self.weights
for k, v in self.weights.items():
self.mutated_weights[k] = np.random.randn(self.size, v.shape[0], v.shape[1])
def increase_moves(self, size): # increase the number of directions for the brain
if len(self.mutants[0].brain.directions) < self.max_steps:
for walker in self.mutants:
walker.brain.increase_moves(size)
# def increase_moves(self, size): # increase the number of directions for the brain
# if len(self.mutants[0].brain.directions) < self.max_steps:
# for walker in self.mutants:
# walker.brain.increase_moves(size)
......@@ -28,10 +28,12 @@ class Walker:
return action
def get_reward(self):
def get_reward(self, render = False):
observation = self.env.reset()
total_reward = 0
for t in range(self.steps):
if render:
self.env.render()
action = self.get_action(observation)
observation, reward, done, info = self.env.step(action)
total_reward += reward
......@@ -45,10 +47,12 @@ class Walker:
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
return copy.deepcopy(self.weights)
#return copy.deepcopy(self.weights)
return self.weights
def set_weights(self, weights):
self.weights = copy.deepcopy(weights)
#self.weights = copy.deepcopy(weights)
self.weights = weights
def save(self):
if not os.path.isdir('./models'):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment