diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4c195866e50935ce0d787acd9089fdafb67de059 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +models/ +__pycache__/ \ No newline at end of file diff --git a/EvolutionStrategies/Experiments/100 1 50 0.1 0.03 300/avg190_hl100.p b/EvolutionStrategies/Experiments/100 1 50 0.1 0.03 300/avg190_hl100.p new file mode 100644 index 0000000000000000000000000000000000000000..a5d30b58299bde2aae6544750923df1d590f71ab Binary files /dev/null and b/EvolutionStrategies/Experiments/100 1 50 0.1 0.03 300/avg190_hl100.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png new file mode 100644 index 0000000000000000000000000000000000000000..2515e18af215c554424c8106abcf712fb1391b6c Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/12_1_50_0.1_0.01_300.png differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/293.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/293.p new file mode 100644 index 0000000000000000000000000000000000000000..12cd2744432fbd5e562571e488b9f74069bcf673 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.01 300/293.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png new file mode 100644 index 0000000000000000000000000000000000000000..35a61097e80b7e8f07f6ec8144893cc5a2c788f2 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/12_1_50_0.1_0.03_300.png differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg260_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg260_hl12.p new file mode 100644 index 0000000000000000000000000000000000000000..c8c3d8fff3f9ae5d7473eafca736c9a67cfef391 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg260_hl12.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg270_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg270_hl12.p new file mode 100644 index 0000000000000000000000000000000000000000..141bd7de73bf4f7b223df37fc51da8922287c52a Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg270_hl12.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg278_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg278_hl12.p new file mode 100644 index 0000000000000000000000000000000000000000..47e73e91851250bee7a00aea52f4030f84a528b2 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/avg278_hl12.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt200_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt200_hl12.p new file mode 100644 index 0000000000000000000000000000000000000000..b33479e2c2e0549085f0b52ef90346d5bd75d8ac Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt200_hl12.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt250_hl12.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt250_hl12.p new file mode 100644 index 0000000000000000000000000000000000000000..c8306c0417750308dd44dcbbe56ca74468b48078 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.03 300/gt250_hl12.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png new file mode 100644 index 0000000000000000000000000000000000000000..dab40b431c08d1bc5813022d931b99ef1b9b2857 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/12_1_50_0.1_0.1_300.png differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/model-pedal1.p b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/model-pedal1.p new file mode 100644 index 0000000000000000000000000000000000000000..3d9384dbc6423e71a86f92343c2103d0d1f847ba Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 0.1 300/model-pedal1.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png new file mode 100644 index 0000000000000000000000000000000000000000..4be293beaeb88fe015069af2ce4d6940349c2bb0 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_1_50_0.1_decaying_300.png differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png new file mode 100644 index 0000000000000000000000000000000000000000..e78794ebd91bf62921974116d80009716c2ea0d7 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/12_2_50_0.1_decaying_300.png differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal1.p b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal1.p new file mode 100644 index 0000000000000000000000000000000000000000..0ddf0c87ba62ca0e8ed361ac14c6f9ecd017be4b Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal1.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal2.p b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal2.p new file mode 100644 index 0000000000000000000000000000000000000000..bef8fad01968c07c06ebd179a7dd10f60136c966 Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.1 decaying 300/model-pedal2.p differ diff --git a/EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png b/EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png new file mode 100644 index 0000000000000000000000000000000000000000..139b436d9c39bde8d07a9053c646342993a6ed2e Binary files /dev/null and b/EvolutionStrategies/Experiments/12 1 50 0.3 0.03 300/12_1_50_0.3_0.03_300.png differ diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py new file mode 100644 index 0000000000000000000000000000000000000000..aa9e09f05c574d8846700164912f585d8f5c5031 --- /dev/null +++ b/EvolutionStrategies/main.py @@ -0,0 +1,70 @@ +from population import Population +import time +import matplotlib.pyplot as plt +import pickle +import sys + +HIDDEN_LAYER = 2 +BIAS = True +POP_SIZE = 50 +MUTATION_FACTOR = 0.1 # 0 <= x <= 1 +LEARNING_RATE = 0.03 # 0 <= x <= 1 +GENS = 7000 +MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway. + + +VERSION = 1 +TEST_WALKER = True +LOAD_BRAIN = False +RENDER_BEST = False +if TEST_WALKER: + LOAD_BRAIN = True + +def plot_reward(rewards): + plt.title(f'{HIDDEN_LAYER}, {VERSION}, {POP_SIZE}, {LEARNING_RATE}') + plt.xlabel('Episodes/10') + plt.ylabel('Rewards') + plt.plot(rewards) + plt.savefig(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}.png') + plt.show() + plt.cla() + +if __name__ == '__main__': + avg_rewards = [] + + try: + population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST) + + if TEST_WALKER: + rewards = [] + #population.walker.plot_input_weights() + for i in range(10): + rewards.append(population.walker.get_reward(10000, True)) + print("Reward: ", rewards[-1]) + print("Average Reward: ", sum(rewards) / len(rewards)) + plot_reward(rewards) + sys.exit(0) + + for gen in range(GENS): # this is our game + start_time = time.time() + print(f'Gen: {gen}') + population.mutate() + population.play_episode() + population.evolve() + print("Time for Gen: ", time.time() - start_time) + if gen % 10 == 0: + population.walker.save() + avg_rewards.append(population.get_walker_stats()) + population.walker.save_mlp_weights(gen) + with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: + pickle.dump(avg_rewards, fp) + if gen == 1000: + population.lr = 0.01 + #if gen == 5000: + #population.lr = 0.005 + + plot_reward(avg_rewards) + except KeyboardInterrupt: + if not TEST_WALKER: + plot_reward(avg_rewards) + diff --git a/EvolutionStrategies/mlp_visualizer.py b/EvolutionStrategies/mlp_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ff933fc3407cdecf841a1d569b95920b550395db --- /dev/null +++ b/EvolutionStrategies/mlp_visualizer.py @@ -0,0 +1,98 @@ +from numpy import cos, sin, arctan +from matplotlib import pyplot + +vertical_distance_between_layers = 40 +horizontal_distance_between_neurons = 4 +neuron_radius = 1 +default_line_width = 1 + +class Neuron: + def __init__(self, x, y): + self.x = x + self.y = y + + def draw(self): + circle = pyplot.Circle((self.x, self.y), radius=neuron_radius, fill=False) + pyplot.gca().add_patch(circle) + + +class Layer: + def __init__(self, network, number_of_neurons, weights): + self.previous_layer = self.get_previous_layer(network) + self.y = self.calculate_layer_y_position() + self.neurons = self.init_neurons(number_of_neurons) + self.weights = weights + + def init_neurons(self, number_of_neurons): + neurons = [] + x = self.calc_left_margin(number_of_neurons) + for iteration in range(number_of_neurons): + neuron = Neuron(x, self.y) + neurons.append(neuron) + x += horizontal_distance_between_neurons + return neurons + + def calc_left_margin(self, number_of_neurons): # so it's centered + return -horizontal_distance_between_neurons * number_of_neurons/2 + + def calculate_layer_y_position(self): + if self.previous_layer: + return self.previous_layer.y + vertical_distance_between_layers + else: + return 0 + + def get_previous_layer(self, network): + if len(network.layers) > 0: + return network.layers[-1] + else: + return None + + def line(self, neuron1, neuron2, weight): + angle = arctan((neuron2.x - neuron1.x) / float(neuron2.y - neuron1.y)) + x_adjustment = neuron_radius * sin(angle) + y_adjustment = neuron_radius * cos(angle) + color = 'blue' + if weight < 0: + color = 'red' + line = pyplot.Line2D((neuron1.x - x_adjustment, neuron2.x + x_adjustment), + (neuron1.y - y_adjustment, neuron2.y + y_adjustment), + linewidth=default_line_width * weight, color=color) # HIER + pyplot.gca().add_line(line) + + def draw(self): + y = 0 + for neuron in self.neurons: + if self.previous_layer: + x = 0 + for previous_layer_neuron in self.previous_layer.neurons: + self.line(neuron, previous_layer_neuron, self.weights[x][y]) + x += 1 + y += 1 + neuron.draw() + + +class NeuralNetwork(): + def __init__(self, architecture, weights): + self.layers = [] + for i in range(len(architecture)): + if i > 0: + self.layers.append(Layer(self, architecture[i], weights[i - 1])) + else: + self.layers.append(Layer(self, architecture[i], None)) + + def add_layer(self, number_of_neurons): + layer = Layer(self, number_of_neurons) + self.layers.append(layer) + + def draw(self, gen): + for layer in self.layers: + layer.draw() + pyplot.axis('scaled') + pyplot.savefig(f'./models/mlp_{gen}.png', dpi=300) + pyplot.cla() + #pyplot.show() + + +if __name__ == "__main__": + network = NeuralNetwork([24, 12, 4]) + network.draw() diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py new file mode 100644 index 0000000000000000000000000000000000000000..0c4824773c5f8e5fe5b1b17c9e1015cdee4b225d --- /dev/null +++ b/EvolutionStrategies/population.py @@ -0,0 +1,59 @@ +import numpy as np +from walker import Walker +import gym + +np.random.seed(42) + + +class Population: + + def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best): + self.size = size + self.mutation_factor = mutation_factor + self.gen = 1 + self.version = version + self.max_steps = max_steps + self.render_best = render_best + self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 + self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) + self.mutated_weights = dict() + self.mutants = [] + self.envs = [] + self.rewards = None + self.lr = lr + walker_weights = self.walker.get_weights() + for i in range(self.size): + self.mutants.append(Walker(hidden_layer, bias, version, False, self.env)) + if load_brain: + self.mutants[-1].set_weights(walker_weights) + + def play_episode(self): + self.rewards = np.zeros(self.size) + for i in range(self.size): + self.rewards[i] = self.mutants[i].get_reward(self.max_steps) + + def evolve(self): + A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards) + weights = self.walker.get_weights() + for i in range(self.size): + for k in weights: + weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T + weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change + self.walker.set_weights(weights) + for mutant in self.mutants: + mutant.set_weights(weights) + self.gen += 1 + + print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best)) + + def get_walker_stats(self): + avg_reward = [] + for i in range(10): + avg_reward.append(self.walker.get_reward(10000)) + avg_reward = sum(avg_reward) / len(avg_reward) + print("Average reward: ", avg_reward) + return avg_reward + + def mutate(self): # mutates all the weights of the mutants + for i in range(len(self.mutants)): + self.mutants[i].mutate(self.mutation_factor) diff --git a/EvolutionStrategies/walker.py b/EvolutionStrategies/walker.py new file mode 100644 index 0000000000000000000000000000000000000000..bda567fce6c9161efe21baa077c44340b6150a9c --- /dev/null +++ b/EvolutionStrategies/walker.py @@ -0,0 +1,116 @@ +import numpy as np +import pickle +import copy +import os +import matplotlib.pyplot as plt + +import mlp_visualizer + +np.random.seed(42) + + +class Walker: + + def __init__(self, hidden_layer, bias, version, load_brain, env): + self.env = env + self.version = version + self.hidden_layer = hidden_layer + + if load_brain: + self.load() + else: + self.weights = {} + self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \ + / np.sqrt(self.env.observation_space.shape[0] + int(bias)) + self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer) + + self.bias = bias + + def get_action(self, observation): + if self.bias: + observation = np.append(observation, 1) + hl = np.matmul(observation, self.weights['W1']) + hl = np.tanh(hl) + action = np.matmul(hl, self.weights['W2']) + action = np.tanh(action) + + return action + + def get_reward(self, steps, render=False): + observation = self.env.reset() + total_reward = 0 + for t in range(steps): + if render: + self.env.render() + action = self.get_action(observation) + observation, reward, done, info = self.env.step(action) + total_reward += reward + + if done: + break + return total_reward + + def mutate(self, mutation_rate): + for k, v in self.weights.items(): + self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1]) + + def get_weights(self): + return copy.deepcopy(self.weights) + + def set_weights(self, weights): + self.weights = copy.deepcopy(weights) + + def plot_input_weights(self): + weights = [] + names = [ + "hull_angle", + "hull_angularVelocity", + "vel_x", + "vel_y", + "hip_joint_1_angle", + "hip_joint_2_angle", + "knee_joint_1_angle", + "knee_joint_2_angle", + "leg_1_ground_contact_flag", + "hip_joint_2_angle", + "hip_joint_2_speed", + "knee_joint_2_angle", + "knee_joint_2_speed", + "leg_2_ground_contact_flag", + "lidar reading 1", + "lidar reading 2", + "lidar reading 3", + "lidar reading 4", + "lidar reading 5", + "lidar reading 6", + "lidar reading 7", + "lidar reading 8", + "lidar reading 9", + "lidar reading 10" + ] + if self.bias: + names.append('bias') + for i in range(self.env.observation_space.shape[0] + int(self.bias)): + weights.append(sum(self.weights['W1'][i])) + plt.bar(names, weights) + plt.xticks(rotation=45, ha="right") + plt.show() + + def save_mlp_weights(self, gen): + mlp_visualizer.vertical_distance_between_layers = 40 + mlp_visualizer.horizontal_distance_between_neurons = 4 + mlp_visualizer.neuron_radius = 1 + mlp_visualizer.default_line_width = 1 + network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer, + self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']]) + network.draw(gen) + + def save(self): + if not os.path.isdir('./models'): + os.mkdir('./models') + with open('./models/model-pedal%d.p' % self.version, 'wb') as fp: + pickle.dump(self.weights, fp) + + def load(self): + with open('./models/model-pedal%d.p' % self.version, 'rb') as fp: + self.weights = pickle.load(fp) diff --git a/brain.py b/MutateActions/brain.py similarity index 100% rename from brain.py rename to MutateActions/brain.py diff --git a/main.py b/MutateActions/main.py similarity index 100% rename from main.py rename to MutateActions/main.py diff --git a/population.py b/MutateActions/population.py similarity index 100% rename from population.py rename to MutateActions/population.py diff --git a/walker.py b/MutateActions/walker.py similarity index 100% rename from walker.py rename to MutateActions/walker.py diff --git a/README.md b/README.md index 43c3ffff5fbab235aa6a01506d8b8f4fe90856d8..762ee9c9fbf2f15fd0dffd86eabe3f9d9c56b75a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,38 @@ # Bipedal Walker Evo -Trying to solve the bipedal walker with an evolution algorithm \ No newline at end of file +This project tries to solve OpenAI's bipedal walker with an evolutionary strategy.\ +After 1000 episodes, which is about 1h of learning, it will reach ~250 reward.\ +Best score until now: 292/300 + +## How it works +1. Generate a randomly weighted neural net +2. Create a population of neural nets with mutated weights +3. Let every net finish an episode and reward it accordingly +4. The better the reward, the higher the chance to pass weights to next gen + +## Hyperparameters +| Parameter | Description | Interval | +|-------------------|-------------------------------------------------------------|-----------| +| `HIDDEN_LAYER` | Size of hidden layer. | [1;∞[ | +| `BIAS` | Add a bias neuron to the input layer. | {0,1} | +| `POP_SIZE` | Size of population. | [0;∞[ | +| `MUTATION_FACTOR` | Percentage of weights that will be mutated for each mutant. | [0;1] | +| `LEARNING_RATE` | This is the rate of learning. | [0;1] | +| `GENS` | Number of generations. | [0;∞[ | +| `MAX_STEPS` | Number of steps that are played in one episode. | [0; 1600] | + + +## Installation +We use Windows, Anaconda and Python 3.7 \ +`conda create -n evo_neuro python=3.7` \ +`conda activate evo_neuro`\ +`conda install swig`\ +`pip install -r requirements.txt` + + + +## Sources +Environment: https://github.com/openai/gym/wiki/BipedalWalker-v2 \ +Table of all Environments: https://github.com/openai/gym/wiki/Table-of-environments +OpenAI Website: https://gym.openai.com/envs/BipedalWalker-v2/ \ +More on evolution strategies: https://openai.com/blog/evolution-strategies/ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0c85215b839de1a44c1b47e1fc37c70c6b9032b7 Binary files /dev/null and b/requirements.txt differ