Skip to content
Snippets Groups Projects
Commit 95f5bdb2 authored by Philip Maas's avatar Philip Maas
Browse files

The Algorithm can now tackle various continuous problems like the LunarLander

parent 77b00e89
No related branches found
No related tags found
1 merge request!2Evo neuro
...@@ -4,13 +4,13 @@ import matplotlib.pyplot as plt ...@@ -4,13 +4,13 @@ import matplotlib.pyplot as plt
import pickle import pickle
import sys import sys
HIDDEN_LAYER = 8 HIDDEN_LAYER = 2
BIAS = True BIAS = True
POP_SIZE = 50 POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1 MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1 LEARNING_RATE = 0.03 # 0 <= x <= 1
GENS = 7000 GENS = 7000
MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway. MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway.
VERSION = 1 VERSION = 1
...@@ -37,7 +37,7 @@ if __name__ == '__main__': ...@@ -37,7 +37,7 @@ if __name__ == '__main__':
if TEST_WALKER: if TEST_WALKER:
rewards = [] rewards = []
population.walker.plot_input_weights() #population.walker.plot_input_weights()
for i in range(10): for i in range(10):
rewards.append(population.walker.get_reward(10000, True)) rewards.append(population.walker.get_reward(10000, True))
print("Reward: ", rewards[-1]) print("Reward: ", rewards[-1])
...@@ -48,11 +48,9 @@ if __name__ == '__main__': ...@@ -48,11 +48,9 @@ if __name__ == '__main__':
for gen in range(GENS): # this is our game for gen in range(GENS): # this is our game
start_time = time.time() start_time = time.time()
print(f'Gen: {gen}') print(f'Gen: {gen}')
#print(f'Max Steps: {population.max_steps}')
population.mutate() population.mutate()
population.play_episode() population.play_episode()
population.evolve() population.evolve()
#population.increase_moves(INCREASE_BY)
print("Time for Gen: ", time.time() - start_time) print("Time for Gen: ", time.time() - start_time)
if gen % 10 == 0: if gen % 10 == 0:
population.walker.save() population.walker.save()
......
mlp.png

346 KiB | W: | H:

mlp.png

668 KiB | W: | H:

mlp.png
mlp.png
mlp.png
mlp.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -4,17 +4,17 @@ import gym ...@@ -4,17 +4,17 @@ import gym
np.random.seed(42) np.random.seed(42)
class Population: class Population:
def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best): def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size self.size = size
self.mutation_factor = mutation_factor self.mutation_factor = mutation_factor
#self.fitness_sum = 0.0
self.gen = 1 self.gen = 1
self.version = version self.version = version
self.max_steps = max_steps self.max_steps = max_steps
self.render_best = render_best self.render_best = render_best
self.env = gym.make('BipedalWalker-v3') self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict() self.mutated_weights = dict()
self.mutants = [] self.mutants = []
...@@ -27,22 +27,6 @@ class Population: ...@@ -27,22 +27,6 @@ class Population:
if load_brain: if load_brain:
self.mutants[-1].set_weights(walker_weights) self.mutants[-1].set_weights(walker_weights)
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.rewards = np.zeros(self.size)
# for i in range(self.size):
# self.rewards[i] = self.mutants[i].fitness
# self.rewards -= np.min(self.rewards) # maybe offset: +1
# self.fitness_sum = np.sum(self.rewards)
def get_action(self, observation, weights):
hl = np.matmul(observation, weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, weights['W2'])
action = np.tanh(action)
return action
def play_episode(self): def play_episode(self):
self.rewards = np.zeros(self.size) self.rewards = np.zeros(self.size)
for i in range(self.size): for i in range(self.size):
...@@ -73,9 +57,3 @@ class Population: ...@@ -73,9 +57,3 @@ class Population:
def mutate(self): # mutates all the weights of the mutants def mutate(self): # mutates all the weights of the mutants
for i in range(len(self.mutants)): for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor) self.mutants[i].mutate(self.mutation_factor)
# def increase_moves(self, size): # increase the number of directions for the brain
# if len(self.mutants[0].brain.directions) < self.max_steps:
# for walker in self.mutants:
# walker.brain.increase_moves(size)
...@@ -8,21 +8,23 @@ import mlp_visualizer ...@@ -8,21 +8,23 @@ import mlp_visualizer
np.random.seed(42) np.random.seed(42)
class Walker: class Walker:
def __init__(self, hidden_layer, bias, version, load_brain, env): def __init__(self, hidden_layer, bias, version, load_brain, env):
self.env = env
self.version = version self.version = version
self.hidden_layer = hidden_layer self.hidden_layer = hidden_layer
if load_brain: if load_brain:
self.load() self.load()
else: else:
self.weights = {} self.weights = {}
self.weights['W1'] = np.random.randn(24 + int(bias), hidden_layer) / np.sqrt(24 + int(bias)) self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
self.weights['W2'] = np.random.randn(hidden_layer, 4) / np.sqrt(hidden_layer) / np.sqrt(self.env.observation_space.shape[0] + int(bias))
self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
self.bias = bias self.bias = bias
self.env = env
#self.render_best = render_best
def get_action(self, observation): def get_action(self, observation):
if self.bias: if self.bias:
...@@ -88,20 +90,19 @@ class Walker: ...@@ -88,20 +90,19 @@ class Walker:
] ]
if self.bias: if self.bias:
names.append('bias') names.append('bias')
for i in range(24 + int(self.bias)): for i in range(self.env.observation_space.shape[0] + int(self.bias)):
weights.append(sum(self.weights['W1'][i])) weights.append(sum(self.weights['W1'][i]))
plt.bar(names, weights) plt.bar(names, weights)
plt.xticks(rotation=45, ha="right") plt.xticks(rotation=45, ha="right")
plt.show() plt.show()
def save_mlp_weights(self, gen): def save_mlp_weights(self, gen):
mlp_visualizer.vertical_distance_between_layers = 40 mlp_visualizer.vertical_distance_between_layers = 40
mlp_visualizer.horizontal_distance_between_neurons = 4 mlp_visualizer.horizontal_distance_between_neurons = 4
mlp_visualizer.neuron_radius = 1 mlp_visualizer.neuron_radius = 1
mlp_visualizer.default_line_width = 1 mlp_visualizer.default_line_width = 1
network = mlp_visualizer.NeuralNetwork([24 + self.bias, self.hidden_layer, 4], network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
[self.weights['W1'], self.weights['W2']]) self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
network.draw(gen) network.draw(gen)
def save(self): def save(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment