Skip to content
Snippets Groups Projects
Commit e12a37ee authored by Philip Maas's avatar Philip Maas
Browse files

Merge branch 'evo-neuro' into 'main'

Evo neuro

See merge request !2
parents 3bceaab7 226f67ea
No related branches found
No related tags found
1 merge request!2Evo neuro
Showing
with 229 additions and 0 deletions
models/
__pycache__/
\ No newline at end of file
File added
EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png

28.9 KiB

File added
EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png

38.8 KiB

File added
File added
File added
File added
File added
EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png

50.3 KiB

File added
EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png

40.3 KiB

EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png

36.7 KiB

File added
File added
EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png

32.3 KiB

from population import Population
import time
import matplotlib.pyplot as plt
import pickle
import sys
HIDDEN_LAYER = 2
BIAS = True
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1
GENS = 7000
MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway.
VERSION = 1
TEST_WALKER = True
LOAD_BRAIN = False
RENDER_BEST = False
if TEST_WALKER:
LOAD_BRAIN = True
def plot_reward(rewards):
plt.title(f'{HIDDEN_LAYER}, {VERSION}, {POP_SIZE}, {LEARNING_RATE}')
plt.xlabel('Episodes/10')
plt.ylabel('Rewards')
plt.plot(rewards)
plt.savefig(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}.png')
plt.show()
plt.cla()
if __name__ == '__main__':
avg_rewards = []
try:
population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
if TEST_WALKER:
rewards = []
#population.walker.plot_input_weights()
for i in range(10):
rewards.append(population.walker.get_reward(10000, True))
print("Reward: ", rewards[-1])
print("Average Reward: ", sum(rewards) / len(rewards))
plot_reward(rewards)
sys.exit(0)
for gen in range(GENS): # this is our game
start_time = time.time()
print(f'Gen: {gen}')
population.mutate()
population.play_episode()
population.evolve()
print("Time for Gen: ", time.time() - start_time)
if gen % 10 == 0:
population.walker.save()
avg_rewards.append(population.get_walker_stats())
population.walker.save_mlp_weights(gen)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp)
if gen == 1000:
population.lr = 0.01
#if gen == 5000:
#population.lr = 0.005
plot_reward(avg_rewards)
except KeyboardInterrupt:
if not TEST_WALKER:
plot_reward(avg_rewards)
from numpy import cos, sin, arctan
from matplotlib import pyplot
vertical_distance_between_layers = 40
horizontal_distance_between_neurons = 4
neuron_radius = 1
default_line_width = 1
class Neuron:
def __init__(self, x, y):
self.x = x
self.y = y
def draw(self):
circle = pyplot.Circle((self.x, self.y), radius=neuron_radius, fill=False)
pyplot.gca().add_patch(circle)
class Layer:
def __init__(self, network, number_of_neurons, weights):
self.previous_layer = self.get_previous_layer(network)
self.y = self.calculate_layer_y_position()
self.neurons = self.init_neurons(number_of_neurons)
self.weights = weights
def init_neurons(self, number_of_neurons):
neurons = []
x = self.calc_left_margin(number_of_neurons)
for iteration in range(number_of_neurons):
neuron = Neuron(x, self.y)
neurons.append(neuron)
x += horizontal_distance_between_neurons
return neurons
def calc_left_margin(self, number_of_neurons): # so it's centered
return -horizontal_distance_between_neurons * number_of_neurons/2
def calculate_layer_y_position(self):
if self.previous_layer:
return self.previous_layer.y + vertical_distance_between_layers
else:
return 0
def get_previous_layer(self, network):
if len(network.layers) > 0:
return network.layers[-1]
else:
return None
def line(self, neuron1, neuron2, weight):
angle = arctan((neuron2.x - neuron1.x) / float(neuron2.y - neuron1.y))
x_adjustment = neuron_radius * sin(angle)
y_adjustment = neuron_radius * cos(angle)
color = 'blue'
if weight < 0:
color = 'red'
line = pyplot.Line2D((neuron1.x - x_adjustment, neuron2.x + x_adjustment),
(neuron1.y - y_adjustment, neuron2.y + y_adjustment),
linewidth=default_line_width * weight, color=color) # HIER
pyplot.gca().add_line(line)
def draw(self):
y = 0
for neuron in self.neurons:
if self.previous_layer:
x = 0
for previous_layer_neuron in self.previous_layer.neurons:
self.line(neuron, previous_layer_neuron, self.weights[x][y])
x += 1
y += 1
neuron.draw()
class NeuralNetwork():
def __init__(self, architecture, weights):
self.layers = []
for i in range(len(architecture)):
if i > 0:
self.layers.append(Layer(self, architecture[i], weights[i - 1]))
else:
self.layers.append(Layer(self, architecture[i], None))
def add_layer(self, number_of_neurons):
layer = Layer(self, number_of_neurons)
self.layers.append(layer)
def draw(self, gen):
for layer in self.layers:
layer.draw()
pyplot.axis('scaled')
pyplot.savefig(f'./models/mlp_{gen}.png', dpi=300)
pyplot.cla()
#pyplot.show()
if __name__ == "__main__":
network = NeuralNetwork([24, 12, 4])
network.draw()
import numpy as np
from walker import Walker
import gym
np.random.seed(42)
class Population:
def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
self.gen = 1
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
self.envs = []
self.rewards = None
self.lr = lr
walker_weights = self.walker.get_weights()
for i in range(self.size):
self.mutants.append(Walker(hidden_layer, bias, version, False, self.env))
if load_brain:
self.mutants[-1].set_weights(walker_weights)
def play_episode(self):
self.rewards = np.zeros(self.size)
for i in range(self.size):
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self):
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
def get_walker_stats(self):
avg_reward = []
for i in range(10):
avg_reward.append(self.walker.get_reward(10000))
avg_reward = sum(avg_reward) / len(avg_reward)
print("Average reward: ", avg_reward)
return avg_reward
def mutate(self): # mutates all the weights of the mutants
for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment