Skip to content
Snippets Groups Projects
Commit e12a37ee authored by Philip Maas's avatar Philip Maas
Browse files

Merge branch 'evo-neuro' into 'main'

Evo neuro

See merge request !2
parents 3bceaab7 226f67ea
No related branches found
No related tags found
1 merge request!2Evo neuro
import numpy as np
import pickle
import copy
import os
import matplotlib.pyplot as plt
import mlp_visualizer
np.random.seed(42)
class Walker:
def __init__(self, hidden_layer, bias, version, load_brain, env):
self.env = env
self.version = version
self.hidden_layer = hidden_layer
if load_brain:
self.load()
else:
self.weights = {}
self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
/ np.sqrt(self.env.observation_space.shape[0] + int(bias))
self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
self.bias = bias
def get_action(self, observation):
if self.bias:
observation = np.append(observation, 1)
hl = np.matmul(observation, self.weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, self.weights['W2'])
action = np.tanh(action)
return action
def get_reward(self, steps, render=False):
observation = self.env.reset()
total_reward = 0
for t in range(steps):
if render:
self.env.render()
action = self.get_action(observation)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def mutate(self, mutation_rate):
for k, v in self.weights.items():
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
return copy.deepcopy(self.weights)
def set_weights(self, weights):
self.weights = copy.deepcopy(weights)
def plot_input_weights(self):
weights = []
names = [
"hull_angle",
"hull_angularVelocity",
"vel_x",
"vel_y",
"hip_joint_1_angle",
"hip_joint_2_angle",
"knee_joint_1_angle",
"knee_joint_2_angle",
"leg_1_ground_contact_flag",
"hip_joint_2_angle",
"hip_joint_2_speed",
"knee_joint_2_angle",
"knee_joint_2_speed",
"leg_2_ground_contact_flag",
"lidar reading 1",
"lidar reading 2",
"lidar reading 3",
"lidar reading 4",
"lidar reading 5",
"lidar reading 6",
"lidar reading 7",
"lidar reading 8",
"lidar reading 9",
"lidar reading 10"
]
if self.bias:
names.append('bias')
for i in range(self.env.observation_space.shape[0] + int(self.bias)):
weights.append(sum(self.weights['W1'][i]))
plt.bar(names, weights)
plt.xticks(rotation=45, ha="right")
plt.show()
def save_mlp_weights(self, gen):
mlp_visualizer.vertical_distance_between_layers = 40
mlp_visualizer.horizontal_distance_between_neurons = 4
mlp_visualizer.neuron_radius = 1
mlp_visualizer.default_line_width = 1
network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
network.draw(gen)
def save(self):
if not os.path.isdir('./models'):
os.mkdir('./models')
with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
pickle.dump(self.weights, fp)
def load(self):
with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
self.weights = pickle.load(fp)
File moved
File moved
File moved
File moved
# Bipedal Walker Evo
Trying to solve the bipedal walker with an evolution algorithm
\ No newline at end of file
This project tries to solve OpenAI's bipedal walker with an evolutionary strategy.\
After 1000 episodes, which is about 1h of learning, it will reach ~250 reward.\
Best score until now: 292/300
## How it works
1. Generate a randomly weighted neural net
2. Create a population of neural nets with mutated weights
3. Let every net finish an episode and reward it accordingly
4. The better the reward, the higher the chance to pass weights to next gen
## Hyperparameters
| Parameter | Description | Interval |
|-------------------|-------------------------------------------------------------|-----------|
| `HIDDEN_LAYER` | Size of hidden layer. | [1;∞[ |
| `BIAS` | Add a bias neuron to the input layer. | {0,1} |
| `POP_SIZE` | Size of population. | [0;∞[ |
| `MUTATION_FACTOR` | Percentage of weights that will be mutated for each mutant. | [0;1] |
| `LEARNING_RATE` | This is the rate of learning. | [0;1] |
| `GENS` | Number of generations. | [0;∞[ |
| `MAX_STEPS` | Number of steps that are played in one episode. | [0; 1600] |
## Installation
We use Windows, Anaconda and Python 3.7 \
`conda create -n evo_neuro python=3.7` \
`conda activate evo_neuro`\
`conda install swig`\
`pip install -r requirements.txt`
## Sources
Environment: https://github.com/openai/gym/wiki/BipedalWalker-v2 \
Table of all Environments: https://github.com/openai/gym/wiki/Table-of-environments
OpenAI Website: https://gym.openai.com/envs/BipedalWalker-v2/ \
More on evolution strategies: https://openai.com/blog/evolution-strategies/
File suppressed by a .gitattributes entry, the file's encoding is unsupported, or the file size exceeds the limit.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment