diff --git a/README.md b/README.md index 43c3ffff5fbab235aa6a01506d8b8f4fe90856d8..a476550e86cc19681b53dd43b87f179b6d1bc254 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,36 @@ # Bipedal Walker Evo -Trying to solve the bipedal walker with an evolution algorithm \ No newline at end of file +This project tries to solve OpenAI's bipedal walker with an evolutionary strategy.\ +After 1000 episodes, which is about 1h of learning, it will reach ~250 reward.\ +Best score until now: 292/300 + +## How it works +1. Generate a randomly weighted neural net +2. Create a population of neural nets with mutated weights +3. Let every net finish an episode and reward it accordingly +4. The better the reward, the higher the chance to pass weights to next gen + +## Hyperparameters +| Parameter | Description | Interval | +|-------------------|-------------------------------------------------------------|-----------| +| `HIDDEN_LAYER` | Size of hidden layer. | [1;∞[ | +| `POP_SIZE` | Size of population. | [0;∞[ | +| `MUTATION_FACTOR` | Percentage of weights that will be mutated for each mutant. | [0;1] | +| `LEARNING_RATE` | This is the rate of learning. | [0;1] | +| `GENS` | Number of generations. | [0;1] | +| `MAX_STEPS` | Number of steps that are played in one episode. | [0; 1600] | + + +## Installation +We use Windows, Anaconda and Python 3.7 \ +` conda create -n evo_neuro python=3.7` \ +`conda activate evo_neuro`\ +`conda install swig`\ +`pip install -r requirements.txt`\ + + + +## Sources +Environment: https://github.com/openai/gym/wiki/BipedalWalker-v2 \ +OpenAI Website: https://gym.openai.com/envs/BipedalWalker-v2/ \ +More on evolution strategies: https://openai.com/blog/evolution-strategies/ \ No newline at end of file diff --git a/main.py b/main.py index cdcf7a007b533baec70f0a455e8ab7836c1ef66e..ac391d1d2491eb7e0bb4387591ddadad8f7b93b6 100644 --- a/main.py +++ b/main.py @@ -8,12 +8,12 @@ HIDDEN_LAYER = 12 POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 LEARNING_RATE = 0.03 # 0 <= x <= 1 -GENS = 1000 -MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway. +GENS = 3000 +MAX_STEPS = 1200 # after 1600 steps the Environment gives us a done anyway. VERSION = 1 -TEST_WALKER = True +TEST_WALKER = False LOAD_BRAIN = False RENDER_BEST = False if TEST_WALKER: diff --git a/population.py b/population.py index 66cc3b06dc5aef2482c3ef91f24bd2c846d04c4a..460e7d0929b053b7a1de3d3a3cd3b1218ff8f6cc 100644 --- a/population.py +++ b/population.py @@ -1,7 +1,4 @@ import numpy as np -import random -import logging -import copy from walker import Walker import gym diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0c85215b839de1a44c1b47e1fc37c70c6b9032b7 Binary files /dev/null and b/requirements.txt differ diff --git a/walker.py b/walker.py index b3fad9c24adc195c5d0ffbc63ec726123809c15f..2269af2b94e9bfa4e598641d6ffa2e9e2939030b 100644 --- a/walker.py +++ b/walker.py @@ -1,4 +1,3 @@ -import gym import numpy as np import pickle import copy