Merge branch 'evo-neuro' into 'main'

Evo neuro See merge request !2

Merge branch 'evo-neuro' into 'main'
e12a37ee · Philip Maas · 3bceaab7 · 226f67ea · e12a37ee · e12a37ee
Commit e12a37ee authored Feb 28, 2022 by Philip Maas
--- a/EvolutionStrategies/walker.py
+++ b/EvolutionStrategies/walker.py
+import numpy as np
+import pickle
+import copy
+import os
+import matplotlib.pyplot as plt
+
+import mlp_visualizer
+
+np.random.seed(42)
+
+
+class Walker:
+
+    def __init__(self, hidden_layer, bias, version, load_brain, env):
+        self.env = env
+        self.version = version
+        self.hidden_layer = hidden_layer
+
+        if load_brain:
+            self.load()
+        else:
+            self.weights = {}
+            self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
+                                 / np.sqrt(self.env.observation_space.shape[0] + int(bias))
+            self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
+
+        self.bias = bias
+
+    def get_action(self, observation):
+        if self.bias:
+            observation = np.append(observation, 1)
+        hl = np.matmul(observation, self.weights['W1'])
+        hl = np.tanh(hl)
+        action = np.matmul(hl, self.weights['W2'])
+        action = np.tanh(action)
+
+        return action
+
+    def get_reward(self, steps, render=False):
+        observation = self.env.reset()
+        total_reward = 0
+        for t in range(steps):
+            if render:
+                self.env.render()
+            action = self.get_action(observation)
+            observation, reward, done, info = self.env.step(action)
+            total_reward += reward
+
+            if done:
+                break
+        return total_reward
+
+    def mutate(self, mutation_rate):
+        for k, v in self.weights.items():
+            self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
+
+    def get_weights(self):
+        return copy.deepcopy(self.weights)
+
+    def set_weights(self, weights):
+        self.weights = copy.deepcopy(weights)
+
+    def plot_input_weights(self):
+        weights = []
+        names = [
+            "hull_angle",
+            "hull_angularVelocity",
+            "vel_x",
+            "vel_y",
+            "hip_joint_1_angle",
+            "hip_joint_2_angle",
+            "knee_joint_1_angle",
+            "knee_joint_2_angle",
+            "leg_1_ground_contact_flag",
+            "hip_joint_2_angle",
+            "hip_joint_2_speed",
+            "knee_joint_2_angle",
+            "knee_joint_2_speed",
+            "leg_2_ground_contact_flag",
+            "lidar reading 1",
+            "lidar reading 2",
+            "lidar reading 3",
+            "lidar reading 4",
+            "lidar reading 5",
+            "lidar reading 6",
+            "lidar reading 7",
+            "lidar reading 8",
+            "lidar reading 9",
+            "lidar reading 10"
+        ]
+        if self.bias:
+            names.append('bias')
+        for i in range(self.env.observation_space.shape[0] + int(self.bias)):
+            weights.append(sum(self.weights['W1'][i]))
+        plt.bar(names, weights)
+        plt.xticks(rotation=45, ha="right")
+        plt.show()
+
+    def save_mlp_weights(self, gen):
+        mlp_visualizer.vertical_distance_between_layers = 40
+        mlp_visualizer.horizontal_distance_between_neurons = 4
+        mlp_visualizer.neuron_radius = 1
+        mlp_visualizer.default_line_width = 1
+        network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
+                                                self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
+        network.draw(gen)
+
+    def save(self):
+        if not os.path.isdir('./models'):
+            os.mkdir('./models')
+        with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
+            pickle.dump(self.weights, fp)
+
+    def load(self):
+        with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
+            self.weights = pickle.load(fp)
--- a/brain.py
+++ b/brain.py
--- a/main.py
+++ b/main.py
--- a/population.py
+++ b/population.py
--- a/walker.py
+++ b/walker.py
--- a/README.md
+++ b/README.md
 # Bipedal Walker Evo

-Trying to solve the bipedal walker with an evolution algorithm
\ No newline at end of file
+This project tries to solve OpenAI's bipedal walker with an evolutionary strategy.\
+After 1000 episodes, which is about 1h of learning, it will reach ~250 reward.\
+Best score until now: 292/300
+
+## How it works
+1. Generate a randomly weighted neural net
+2. Create a population of neural nets with mutated weights
+3. Let every net finish an episode and reward it accordingly
+4. The better the reward, the higher the chance to pass weights to next gen
+
+## Hyperparameters
+| Parameter         | Description                                                 | Interval  |
+|-------------------|-------------------------------------------------------------|-----------|
+| `HIDDEN_LAYER`    | Size of hidden layer.                                       | [1;∞[     |
+| `BIAS`            | Add a bias neuron to the input layer.                       | {0,1}     |
+| `POP_SIZE`        | Size of population.                                         | [0;∞[     |
+| `MUTATION_FACTOR` | Percentage of weights that will be mutated for each mutant. | [0;1]     |
+| `LEARNING_RATE`   | This is the rate of learning.                               | [0;1]     |
+| `GENS`            | Number of generations.                                      | [0;∞[     |
+| `MAX_STEPS`       | Number of steps that are played in one episode.             | [0; 1600] |
+
+
+## Installation
+We use Windows, Anaconda and Python 3.7 \
+`conda create -n evo_neuro python=3.7` \
+`conda activate evo_neuro`\
+`conda install swig`\
+`pip install -r requirements.txt`
+
+
+
+## Sources
+Environment: https://github.com/openai/gym/wiki/BipedalWalker-v2 \
+Table of all Environments: https://github.com/openai/gym/wiki/Table-of-environments
+OpenAI Website: https://gym.openai.com/envs/BipedalWalker-v2/ \
+More on evolution strategies: https://openai.com/blog/evolution-strategies/
--- a/requirements.txt
+++ b/requirements.txt