diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..4c195866e50935ce0d787acd9089fdafb67de059
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+models/
+__pycache__/
\ No newline at end of file
diff --git a/EvolutionStrategies/Experiments/100 1 50 0.1 0.03 300/avg190_hl100.p b/EvolutionStrategies/Experiments/100 1 50 0.1 0.03 300/avg190_hl100.p
new file mode 100644
index 0000000000000000000000000000000000000000..a5d30b58299bde2aae6544750923df1d590f71ab
Binary files /dev/null and b/EvolutionStrategies/Experiments/100 1 50 0.1 0.03 300/avg190_hl100.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png
new file mode 100644
index 0000000000000000000000000000000000000000..2515e18af215c554424c8106abcf712fb1391b6c
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/293.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/293.p
new file mode 100644
index 0000000000000000000000000000000000000000..12cd2744432fbd5e562571e488b9f74069bcf673
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/293.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png
new file mode 100644
index 0000000000000000000000000000000000000000..35a61097e80b7e8f07f6ec8144893cc5a2c788f2
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg260_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg260_hl12.p
new file mode 100644
index 0000000000000000000000000000000000000000..c8c3d8fff3f9ae5d7473eafca736c9a67cfef391
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg260_hl12.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg270_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg270_hl12.p
new file mode 100644
index 0000000000000000000000000000000000000000..141bd7de73bf4f7b223df37fc51da8922287c52a
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg270_hl12.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg278_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg278_hl12.p
new file mode 100644
index 0000000000000000000000000000000000000000..47e73e91851250bee7a00aea52f4030f84a528b2
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg278_hl12.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt200_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt200_hl12.p
new file mode 100644
index 0000000000000000000000000000000000000000..b33479e2c2e0549085f0b52ef90346d5bd75d8ac
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt200_hl12.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt250_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt250_hl12.p
new file mode 100644
index 0000000000000000000000000000000000000000..c8306c0417750308dd44dcbbe56ca74468b48078
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt250_hl12.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png
new file mode 100644
index 0000000000000000000000000000000000000000..dab40b431c08d1bc5813022d931b99ef1b9b2857
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/model-pedal1.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/model-pedal1.p
new file mode 100644
index 0000000000000000000000000000000000000000..3d9384dbc6423e71a86f92343c2103d0d1f847ba
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/model-pedal1.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png
new file mode 100644
index 0000000000000000000000000000000000000000..4be293beaeb88fe015069af2ce4d6940349c2bb0
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png
new file mode 100644
index 0000000000000000000000000000000000000000..e78794ebd91bf62921974116d80009716c2ea0d7
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal1.p b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal1.p
new file mode 100644
index 0000000000000000000000000000000000000000..0ddf0c87ba62ca0e8ed361ac14c6f9ecd017be4b
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal1.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal2.p b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal2.p
new file mode 100644
index 0000000000000000000000000000000000000000..bef8fad01968c07c06ebd179a7dd10f60136c966
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal2.p differ
diff --git a/EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png b/EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png
new file mode 100644
index 0000000000000000000000000000000000000000..139b436d9c39bde8d07a9053c646342993a6ed2e
Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png differ
diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa9e09f05c574d8846700164912f585d8f5c5031
--- /dev/null
+++ b/EvolutionStrategies/main.py
@@ -0,0 +1,70 @@
+from population import Population
+import time
+import matplotlib.pyplot as plt
+import pickle
+import sys
+
+HIDDEN_LAYER = 2
+BIAS = True
+POP_SIZE = 50
+MUTATION_FACTOR = 0.1  # 0 <= x <= 1
+LEARNING_RATE = 0.03   # 0 <= x <= 1
+GENS = 7000
+MAX_STEPS = 200  # after 1600 steps the Environment gives us a done anyway.
+
+
+VERSION = 1
+TEST_WALKER = True
+LOAD_BRAIN = False
+RENDER_BEST = False
+if TEST_WALKER:
+    LOAD_BRAIN = True
+
+def plot_reward(rewards):
+    plt.title(f'{HIDDEN_LAYER}, {VERSION}, {POP_SIZE}, {LEARNING_RATE}')
+    plt.xlabel('Episodes/10')
+    plt.ylabel('Rewards')
+    plt.plot(rewards)
+    plt.savefig(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}.png')
+    plt.show()
+    plt.cla()
+
+if __name__ == '__main__':
+    avg_rewards = []
+
+    try:
+        population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
+
+        if TEST_WALKER:
+            rewards = []
+            #population.walker.plot_input_weights()
+            for i in range(10):
+                rewards.append(population.walker.get_reward(10000, True))
+                print("Reward: ", rewards[-1])
+            print("Average Reward: ", sum(rewards) / len(rewards))
+            plot_reward(rewards)
+            sys.exit(0)
+
+        for gen in range(GENS):  # this is our game
+            start_time = time.time()
+            print(f'Gen: {gen}')
+            population.mutate()
+            population.play_episode()
+            population.evolve()
+            print("Time for Gen: ", time.time() - start_time)
+            if gen % 10 == 0:
+                population.walker.save()
+                avg_rewards.append(population.get_walker_stats())
+                population.walker.save_mlp_weights(gen)
+                with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
+                    pickle.dump(avg_rewards, fp)
+                if gen == 1000:
+                    population.lr = 0.01
+                #if gen == 5000:
+                    #population.lr = 0.005
+        
+        plot_reward(avg_rewards)
+    except KeyboardInterrupt:
+        if not TEST_WALKER:
+            plot_reward(avg_rewards)
+
diff --git a/EvolutionStrategies/mlp_visualizer.py b/EvolutionStrategies/mlp_visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff933fc3407cdecf841a1d569b95920b550395db
--- /dev/null
+++ b/EvolutionStrategies/mlp_visualizer.py
@@ -0,0 +1,98 @@
+from numpy import cos, sin, arctan
+from matplotlib import pyplot
+
+vertical_distance_between_layers = 40
+horizontal_distance_between_neurons = 4
+neuron_radius = 1
+default_line_width = 1
+
+class Neuron:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def draw(self):
+        circle = pyplot.Circle((self.x, self.y), radius=neuron_radius, fill=False)
+        pyplot.gca().add_patch(circle)
+
+
+class Layer:
+    def __init__(self, network, number_of_neurons, weights):
+        self.previous_layer = self.get_previous_layer(network)
+        self.y = self.calculate_layer_y_position()
+        self.neurons = self.init_neurons(number_of_neurons)
+        self.weights = weights
+
+    def init_neurons(self, number_of_neurons):
+        neurons = []
+        x = self.calc_left_margin(number_of_neurons)
+        for iteration in range(number_of_neurons):
+            neuron = Neuron(x, self.y)
+            neurons.append(neuron)
+            x += horizontal_distance_between_neurons
+        return neurons
+
+    def calc_left_margin(self, number_of_neurons):  # so it's centered
+        return -horizontal_distance_between_neurons * number_of_neurons/2
+
+    def calculate_layer_y_position(self):
+        if self.previous_layer:
+            return self.previous_layer.y + vertical_distance_between_layers
+        else:
+            return 0
+
+    def get_previous_layer(self, network):
+        if len(network.layers) > 0:
+            return network.layers[-1]
+        else:
+            return None
+
+    def line(self, neuron1, neuron2, weight):
+        angle = arctan((neuron2.x - neuron1.x) / float(neuron2.y - neuron1.y))
+        x_adjustment = neuron_radius * sin(angle)
+        y_adjustment = neuron_radius * cos(angle)
+        color = 'blue'
+        if weight < 0:
+            color = 'red'
+        line = pyplot.Line2D((neuron1.x - x_adjustment, neuron2.x + x_adjustment),
+                             (neuron1.y - y_adjustment, neuron2.y + y_adjustment),
+                             linewidth=default_line_width * weight, color=color)  # HIER
+        pyplot.gca().add_line(line)
+
+    def draw(self):
+        y = 0
+        for neuron in self.neurons:
+            if self.previous_layer:
+                x = 0
+                for previous_layer_neuron in self.previous_layer.neurons:
+                    self.line(neuron, previous_layer_neuron, self.weights[x][y])
+                    x += 1
+            y += 1
+            neuron.draw()
+
+
+class NeuralNetwork():
+    def __init__(self, architecture, weights):
+        self.layers = []
+        for i in range(len(architecture)):
+            if i > 0:
+                self.layers.append(Layer(self, architecture[i], weights[i - 1]))
+            else:
+                self.layers.append(Layer(self, architecture[i], None))
+
+    def add_layer(self, number_of_neurons):
+        layer = Layer(self, number_of_neurons)
+        self.layers.append(layer)
+
+    def draw(self, gen):
+        for layer in self.layers:
+            layer.draw()
+        pyplot.axis('scaled')
+        pyplot.savefig(f'./models/mlp_{gen}.png', dpi=300)
+        pyplot.cla()
+        #pyplot.show()
+
+
+if __name__ == "__main__":
+    network = NeuralNetwork([24, 12, 4])
+    network.draw()
diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c4824773c5f8e5fe5b1b17c9e1015cdee4b225d
--- /dev/null
+++ b/EvolutionStrategies/population.py
@@ -0,0 +1,59 @@
+import numpy as np
+from walker import Walker
+import gym
+
+np.random.seed(42)
+
+
+class Population:
+
+    def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
+        self.size = size
+        self.mutation_factor = mutation_factor
+        self.gen = 1
+        self.version = version
+        self.max_steps = max_steps
+        self.render_best = render_best
+        self.env = gym.make('Pendulum-v1')  # MountainCarContinuous-v0  LunarLanderContinuous-v2  Pendulum-v1  CarRacing-v0
+        self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
+        self.mutated_weights = dict()
+        self.mutants = []
+        self.envs = []
+        self.rewards = None
+        self.lr = lr
+        walker_weights = self.walker.get_weights()
+        for i in range(self.size):
+            self.mutants.append(Walker(hidden_layer, bias, version, False, self.env))
+            if load_brain:
+                self.mutants[-1].set_weights(walker_weights)
+
+    def play_episode(self):
+        self.rewards = np.zeros(self.size)
+        for i in range(self.size):
+            self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
+
+    def evolve(self):
+        A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
+        weights = self.walker.get_weights()
+        for i in range(self.size):
+            for k in weights:
+                weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
+                weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
+        self.walker.set_weights(weights)
+        for mutant in self.mutants:
+            mutant.set_weights(weights)
+        self.gen += 1
+
+        print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
+
+    def get_walker_stats(self):
+        avg_reward = []
+        for i in range(10):
+            avg_reward.append(self.walker.get_reward(10000))
+        avg_reward = sum(avg_reward) / len(avg_reward)
+        print("Average reward: ", avg_reward)
+        return avg_reward
+
+    def mutate(self):  # mutates all the weights of the mutants
+        for i in range(len(self.mutants)):
+            self.mutants[i].mutate(self.mutation_factor)
diff --git a/EvolutionStrategies/walker.py b/EvolutionStrategies/walker.py
new file mode 100644
index 0000000000000000000000000000000000000000..bda567fce6c9161efe21baa077c44340b6150a9c
--- /dev/null
+++ b/EvolutionStrategies/walker.py
@@ -0,0 +1,116 @@
+import numpy as np
+import pickle
+import copy
+import os
+import matplotlib.pyplot as plt
+
+import mlp_visualizer
+
+np.random.seed(42)
+
+
+class Walker:
+
+    def __init__(self, hidden_layer, bias, version, load_brain, env):
+        self.env = env
+        self.version = version
+        self.hidden_layer = hidden_layer
+
+        if load_brain:
+            self.load()
+        else:
+            self.weights = {}
+            self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
+                                 / np.sqrt(self.env.observation_space.shape[0] + int(bias))
+            self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
+
+        self.bias = bias
+
+    def get_action(self, observation):
+        if self.bias:
+            observation = np.append(observation, 1)
+        hl = np.matmul(observation, self.weights['W1'])
+        hl = np.tanh(hl)
+        action = np.matmul(hl, self.weights['W2'])
+        action = np.tanh(action)
+
+        return action
+
+    def get_reward(self, steps, render=False):
+        observation = self.env.reset()
+        total_reward = 0
+        for t in range(steps):
+            if render:
+                self.env.render()
+            action = self.get_action(observation)
+            observation, reward, done, info = self.env.step(action)
+            total_reward += reward
+
+            if done:
+                break
+        return total_reward
+
+    def mutate(self, mutation_rate):
+        for k, v in self.weights.items():
+            self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
+
+    def get_weights(self):
+        return copy.deepcopy(self.weights)
+
+    def set_weights(self, weights):
+        self.weights = copy.deepcopy(weights)
+
+    def plot_input_weights(self):
+        weights = []
+        names = [
+            "hull_angle",
+            "hull_angularVelocity",
+            "vel_x",
+            "vel_y",
+            "hip_joint_1_angle",
+            "hip_joint_2_angle",
+            "knee_joint_1_angle",
+            "knee_joint_2_angle",
+            "leg_1_ground_contact_flag",
+            "hip_joint_2_angle",
+            "hip_joint_2_speed",
+            "knee_joint_2_angle",
+            "knee_joint_2_speed",
+            "leg_2_ground_contact_flag",
+            "lidar reading 1",
+            "lidar reading 2",
+            "lidar reading 3",
+            "lidar reading 4",
+            "lidar reading 5",
+            "lidar reading 6",
+            "lidar reading 7",
+            "lidar reading 8",
+            "lidar reading 9",
+            "lidar reading 10"
+        ]
+        if self.bias:
+            names.append('bias')
+        for i in range(self.env.observation_space.shape[0] + int(self.bias)):
+            weights.append(sum(self.weights['W1'][i]))
+        plt.bar(names, weights)
+        plt.xticks(rotation=45, ha="right")
+        plt.show()
+
+    def save_mlp_weights(self, gen):
+        mlp_visualizer.vertical_distance_between_layers = 40
+        mlp_visualizer.horizontal_distance_between_neurons = 4
+        mlp_visualizer.neuron_radius = 1
+        mlp_visualizer.default_line_width = 1
+        network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
+                                                self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
+        network.draw(gen)
+
+    def save(self):
+        if not os.path.isdir('./models'):
+            os.mkdir('./models')
+        with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
+            pickle.dump(self.weights, fp)
+
+    def load(self):
+        with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
+            self.weights = pickle.load(fp)
diff --git a/brain.py b/MutateActions/brain.py
similarity index 100%
rename from brain.py
rename to MutateActions/brain.py
diff --git a/main.py b/MutateActions/main.py
similarity index 100%
rename from main.py
rename to MutateActions/main.py
diff --git a/population.py b/MutateActions/population.py
similarity index 100%
rename from population.py
rename to MutateActions/population.py
diff --git a/walker.py b/MutateActions/walker.py
similarity index 100%
rename from walker.py
rename to MutateActions/walker.py
diff --git a/README.md b/README.md
index 43c3ffff5fbab235aa6a01506d8b8f4fe90856d8..762ee9c9fbf2f15fd0dffd86eabe3f9d9c56b75a 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,38 @@
 # Bipedal Walker Evo
 
-Trying to solve the bipedal walker with an evolution algorithm
\ No newline at end of file
+This project tries to solve OpenAI's bipedal walker with an evolutionary strategy.\
+After 1000 episodes, which is about 1h of learning, it will reach ~250 reward.\
+Best score until now: 292/300
+
+## How it works
+1. Generate a randomly weighted neural net
+2. Create a population of neural nets with mutated weights
+3. Let every net finish an episode and reward it accordingly
+4. The better the reward, the higher the chance to pass weights to next gen
+
+## Hyperparameters
+| Parameter         | Description                                                 | Interval  |
+|-------------------|-------------------------------------------------------------|-----------|
+| `HIDDEN_LAYER`    | Size of hidden layer.                                       | [1;∞[     |
+| `BIAS`            | Add a bias neuron to the input layer.                       | {0,1}     |
+| `POP_SIZE`        | Size of population.                                         | [0;∞[     |
+| `MUTATION_FACTOR` | Percentage of weights that will be mutated for each mutant. | [0;1]     |
+| `LEARNING_RATE`   | This is the rate of learning.                               | [0;1]     |
+| `GENS`            | Number of generations.                                      | [0;∞[     |
+| `MAX_STEPS`       | Number of steps that are played in one episode.             | [0; 1600] |
+
+
+## Installation
+We use Windows, Anaconda and Python 3.7 \
+`conda create -n evo_neuro python=3.7` \
+`conda activate evo_neuro`\
+`conda install swig`\
+`pip install -r requirements.txt`
+
+
+
+## Sources
+Environment: https://github.com/openai/gym/wiki/BipedalWalker-v2 \
+Table of all Environments: https://github.com/openai/gym/wiki/Table-of-environments
+OpenAI Website: https://gym.openai.com/envs/BipedalWalker-v2/ \
+More on evolution strategies: https://openai.com/blog/evolution-strategies/
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c85215b839de1a44c1b47e1fc37c70c6b9032b7
Binary files /dev/null and b/requirements.txt differ