Skip to content
Snippets Groups Projects
Commit 95f5bdb2 authored by Philip Maas's avatar Philip Maas
Browse files

The Algorithm can now tackle various continuous problems like the LunarLander

parent 77b00e89
No related branches found
No related tags found
1 merge request!2Evo neuro
......@@ -4,13 +4,13 @@ import matplotlib.pyplot as plt
import pickle
import sys
HIDDEN_LAYER = 8
HIDDEN_LAYER = 2
BIAS = True
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1
GENS = 7000
MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway.
MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway.
VERSION = 1
......@@ -37,7 +37,7 @@ if __name__ == '__main__':
if TEST_WALKER:
rewards = []
population.walker.plot_input_weights()
#population.walker.plot_input_weights()
for i in range(10):
rewards.append(population.walker.get_reward(10000, True))
print("Reward: ", rewards[-1])
......@@ -48,11 +48,9 @@ if __name__ == '__main__':
for gen in range(GENS): # this is our game
start_time = time.time()
print(f'Gen: {gen}')
#print(f'Max Steps: {population.max_steps}')
population.mutate()
population.play_episode()
population.evolve()
#population.increase_moves(INCREASE_BY)
print("Time for Gen: ", time.time() - start_time)
if gen % 10 == 0:
population.walker.save()
......
mlp.png

346 KiB | W: | H:

mlp.png

668 KiB | W: | H:

mlp.png
mlp.png
mlp.png
mlp.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -4,17 +4,17 @@ import gym
np.random.seed(42)
class Population:
def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
self.size = size
self.mutation_factor = mutation_factor
#self.fitness_sum = 0.0
self.gen = 1
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3')
self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
......@@ -27,22 +27,6 @@ class Population:
if load_brain:
self.mutants[-1].set_weights(walker_weights)
# def calculate_fitness_sum(self):
# self.fitness_sum = 0
# self.rewards = np.zeros(self.size)
# for i in range(self.size):
# self.rewards[i] = self.mutants[i].fitness
# self.rewards -= np.min(self.rewards) # maybe offset: +1
# self.fitness_sum = np.sum(self.rewards)
def get_action(self, observation, weights):
hl = np.matmul(observation, weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, weights['W2'])
action = np.tanh(action)
return action
def play_episode(self):
self.rewards = np.zeros(self.size)
for i in range(self.size):
......@@ -73,9 +57,3 @@ class Population:
def mutate(self): # mutates all the weights of the mutants
for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor)
# def increase_moves(self, size): # increase the number of directions for the brain
# if len(self.mutants[0].brain.directions) < self.max_steps:
# for walker in self.mutants:
# walker.brain.increase_moves(size)
......@@ -8,21 +8,23 @@ import mlp_visualizer
np.random.seed(42)
class Walker:
def __init__(self, hidden_layer, bias, version, load_brain, env):
self.env = env
self.version = version
self.hidden_layer = hidden_layer
if load_brain:
self.load()
else:
self.weights = {}
self.weights['W1'] = np.random.randn(24 + int(bias), hidden_layer) / np.sqrt(24 + int(bias))
self.weights['W2'] = np.random.randn(hidden_layer, 4) / np.sqrt(hidden_layer)
self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
/ np.sqrt(self.env.observation_space.shape[0] + int(bias))
self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
self.bias = bias
self.env = env
#self.render_best = render_best
def get_action(self, observation):
if self.bias:
......@@ -88,20 +90,19 @@ class Walker:
]
if self.bias:
names.append('bias')
for i in range(24 + int(self.bias)):
for i in range(self.env.observation_space.shape[0] + int(self.bias)):
weights.append(sum(self.weights['W1'][i]))
plt.bar(names, weights)
plt.xticks(rotation=45, ha="right")
plt.show()
def save_mlp_weights(self, gen):
mlp_visualizer.vertical_distance_between_layers = 40
mlp_visualizer.horizontal_distance_between_neurons = 4
mlp_visualizer.neuron_radius = 1
mlp_visualizer.default_line_width = 1
network = mlp_visualizer.NeuralNetwork([24 + self.bias, self.hidden_layer, 4],
[self.weights['W1'], self.weights['W2']])
network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
network.draw(gen)
def save(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment