Skip to content
Snippets Groups Projects
Commit 73e1da69 authored by Tobias Döring's avatar Tobias Döring
Browse files

initial neuronal network evolution strategy

parents
Branches
Tags
1 merge request!2Evo neuro
models/
__pycache__/
\ No newline at end of file
main.py 0 → 100644
from population import Population
import time
import matplotlib.pyplot as plt
INCREASE_BY = 5
H1 = 12
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03
GAME_CANCELLED = False
LOAD_BRAIN = False
RENDER_BEST = True
VERSION = 1
if __name__ == '__main__':
population = Population(POP_SIZE, H1 ,MUTATION_FACTOR, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
while GAME_CANCELLED is False: # this is our game
population.mutate()
population.play_episode()
population.evolve()
#population.increase_moves(INCREASE_BY)
print(f'Gen: {population.gen}')
#print(f'Best Index: {population.best_walker_index}')
#print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
print(f'Max Steps: {population.max_steps}')
# time.sleep(0.1)
if population.gen % 10 == 0:
population.walker.save()
import numpy as np
import random
import logging
import copy
from walker import Walker
import gym
MAX_STEPS = 1599 # after 1600 steps the Environment gives us a done anyway.
class Population:
def __init__(self, size, h1, mutation_factor, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
#self.fitness_sum = 0.0
self.gen = 1
self.version = version
self.max_steps = MAX_STEPS
self.env = gym.make('BipedalWalker-v3')
self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
self.mutants = []
self.envs = []
self.fitnesses = None
self.lr = lr
for i in range(self.size):
self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
if load_brain:
self.mutate()
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.fitnesses = np.zeros(self.size)
# for i in range(self.size):
# self.fitnesses[i] = self.mutants[i].fitness
# self.fitnesses -= np.min(self.fitnesses) # maybe offset: +1
# self.fitness_sum = np.sum(self.fitnesses)
def play_episode(self):
self.fitnesses = np.zeros(self.size)
for i in range(self.size):
self.fitnesses[i] = self.mutants[i].get_reward()
def evolve(self):
R = self.fitnesses
A = (R - np.mean(R)) / np.std(R)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] += self.lr/(self.size*self.mutation_factor) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
def mutate(self): # mutates all the brains of the babies
for mutant in self.mutants: # we don't want to mutate the champion's brain
mutant.mutate(self.mutation_factor)
def increase_moves(self, size): # increase the number of directions for the brain
if len(self.mutants[0].brain.directions) < self.max_steps:
for walker in self.mutants:
walker.brain.increase_moves(size)
import gym
import numpy as np
import pickle
import copy
import os
np.random.seed(42)
class Walker:
def __init__(self, h1, version, load_brain, env, steps):
self.version = version
self.h1 = h1
self.weights = {}
self.weights['W1'] = np.random.randn(24, h1) / np.sqrt(24)
self.weights['W2'] = np.random.randn(h1, 4) / np.sqrt(h1)
if load_brain:
self.load()
self.steps = steps
self.env = env
#self.render_best = render_best
def get_action(self, observation):
hl = np.matmul(observation, self.weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, self.weights['W2'])
action = np.tanh(action)
return action
def get_reward(self):
observation = self.env.reset()
total_reward = 0
for t in range(self.steps):
action = self.get_action(observation)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def mutate(self, mutation_rate):
for k, v in self.weights.items():
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
return copy.deepcopy(self.weights)
def set_weights(self, weights):
self.weights = copy.deepcopy(weights)
def save(self):
if not os.path.isdir('./models'):
os.mkdir('./models')
with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
pickle.dump(self.weights, fp)
def load(self):
with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
self.weights = pickle.load(fp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment