Skip to content
Snippets Groups Projects
Commit 73e1da69 authored by Tobias Döring's avatar Tobias Döring
Browse files

initial neuronal network evolution strategy

parents
No related branches found
No related tags found
1 merge request!2Evo neuro
models/
__pycache__/
\ No newline at end of file
main.py 0 → 100644
from population import Population
import time
import matplotlib.pyplot as plt
INCREASE_BY = 5
H1 = 12
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03
GAME_CANCELLED = False
LOAD_BRAIN = False
RENDER_BEST = True
VERSION = 1
if __name__ == '__main__':
population = Population(POP_SIZE, H1 ,MUTATION_FACTOR, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
while GAME_CANCELLED is False: # this is our game
population.mutate()
population.play_episode()
population.evolve()
#population.increase_moves(INCREASE_BY)
print(f'Gen: {population.gen}')
#print(f'Best Index: {population.best_walker_index}')
#print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
print(f'Max Steps: {population.max_steps}')
# time.sleep(0.1)
if population.gen % 10 == 0:
population.walker.save()
import numpy as np
import random
import logging
import copy
from walker import Walker
import gym
MAX_STEPS = 1599 # after 1600 steps the Environment gives us a done anyway.
class Population:
def __init__(self, size, h1, mutation_factor, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
#self.fitness_sum = 0.0
self.gen = 1
self.version = version
self.max_steps = MAX_STEPS
self.env = gym.make('BipedalWalker-v3')
self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
self.mutants = []
self.envs = []
self.fitnesses = None
self.lr = lr
for i in range(self.size):
self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
if load_brain:
self.mutate()
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.fitnesses = np.zeros(self.size)
# for i in range(self.size):
# self.fitnesses[i] = self.mutants[i].fitness
# self.fitnesses -= np.min(self.fitnesses) # maybe offset: +1
# self.fitness_sum = np.sum(self.fitnesses)
def play_episode(self):
self.fitnesses = np.zeros(self.size)
for i in range(self.size):
self.fitnesses[i] = self.mutants[i].get_reward()
def evolve(self):
R = self.fitnesses
A = (R - np.mean(R)) / np.std(R)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] += self.lr/(self.size*self.mutation_factor) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
def mutate(self): # mutates all the brains of the babies
for mutant in self.mutants: # we don't want to mutate the champion's brain
mutant.mutate(self.mutation_factor)
def increase_moves(self, size): # increase the number of directions for the brain
if len(self.mutants[0].brain.directions) < self.max_steps:
for walker in self.mutants:
walker.brain.increase_moves(size)
import gym
import numpy as np
import pickle
import copy
import os
np.random.seed(42)
class Walker:
def __init__(self, h1, version, load_brain, env, steps):
self.version = version
self.h1 = h1
self.weights = {}
self.weights['W1'] = np.random.randn(24, h1) / np.sqrt(24)
self.weights['W2'] = np.random.randn(h1, 4) / np.sqrt(h1)
if load_brain:
self.load()
self.steps = steps
self.env = env
#self.render_best = render_best
def get_action(self, observation):
hl = np.matmul(observation, self.weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, self.weights['W2'])
action = np.tanh(action)
return action
def get_reward(self):
observation = self.env.reset()
total_reward = 0
for t in range(self.steps):
action = self.get_action(observation)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def mutate(self, mutation_rate):
for k, v in self.weights.items():
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
return copy.deepcopy(self.weights)
def set_weights(self, weights):
self.weights = copy.deepcopy(weights)
def save(self):
if not os.path.isdir('./models'):
os.mkdir('./models')
with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
pickle.dump(self.weights, fp)
def load(self):
with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
self.weights = pickle.load(fp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment