The Algorithm can now tackle various continuous problems like the LunarLander

95f5bdb2 · Philip Maas · 77b00e89 · 95f5bdb2 · 77b00e89 · 95f5bdb2
Commit 95f5bdb2 authored Feb 20, 2022 by Philip Maas
--- a/main.py
+++ b/main.py
@@ -4,13 +4,13 @@ import matplotlib.pyplot as plt
 import pickle
 import sys
-HIDDEN_LAYER = 8
+HIDDEN_LAYER = 2
 BIAS = True
 POP_SIZE = 50
 MUTATION_FACTOR = 0.1  # 0 <= x <= 1
 LEARNING_RATE = 0.03   # 0 <= x <= 1
 GENS = 7000
-MAX_STEPS = 300  # after 1600 steps the Environment gives us a done anyway.
+MAX_STEPS = 200  # after 1600 steps the Environment gives us a done anyway.
 VERSION = 1
@@ -37,7 +37,7 @@ if __name__ == '__main__':
        if TEST_WALKER:
            rewards = []
-            population.walker.plot_input_weights()
+            #population.walker.plot_input_weights()
            for i in range(10):
                rewards.append(population.walker.get_reward(10000, True))
                print("Reward: ", rewards[-1])
@@ -48,11 +48,9 @@ if __name__ == '__main__':
        for gen in range(GENS):  # this is our game
            start_time = time.time()
            print(f'Gen: {gen}')
-            #print(f'Max Steps: {population.max_steps}')
            population.mutate()
            population.play_episode()
            population.evolve()
-            #population.increase_moves(INCREASE_BY)
            print("Time for Gen: ", time.time() - start_time)
            if gen % 10 == 0:
                population.walker.save()

--- a/mlp.png
+++ b/mlp.png
--- a/population.py
+++ b/population.py
@@ -4,17 +4,17 @@ import gym
 np.random.seed(42)
 class Population:
    def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
        self.size = size
        self.mutation_factor = mutation_factor
-        #self.fitness_sum = 0.0
        self.gen = 1
        self.version = version
        self.max_steps = max_steps
        self.render_best = render_best
-        self.env = gym.make('BipedalWalker-v3')
+        self.env = gym.make('Pendulum-v1')  # MountainCarContinuous-v0  LunarLanderContinuous-v2  Pendulum-v1  CarRacing-v0
        self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
        self.mutated_weights = dict()
        self.mutants = []
@@ -27,22 +27,6 @@ class Population:
            if load_brain:
                self.mutants[-1].set_weights(walker_weights)
-    # def calculate_fitness_sum(self):
-    #     self.fitness_sum = 0
-    #     self.rewards = np.zeros(self.size)
-    #     for i in range(self.size):
-    #         self.rewards[i] = self.mutants[i].fitness
-    #     self.rewards -= np.min(self.rewards)  # maybe offset: +1
-    #     self.fitness_sum = np.sum(self.rewards)
-    def get_action(self, observation, weights):
-        hl = np.matmul(observation, weights['W1'])
-        hl = np.tanh(hl)
-        action = np.matmul(hl, weights['W2'])
-        action = np.tanh(action)
-        return action
    def play_episode(self):
        self.rewards = np.zeros(self.size)
        for i in range(self.size):
@@ -73,9 +57,3 @@ class Population:
    def mutate(self):  # mutates all the weights of the mutants
        for i in range(len(self.mutants)):
            self.mutants[i].mutate(self.mutation_factor)
-    # def increase_moves(self, size):  # increase the number of directions for the brain
-    #     if len(self.mutants[0].brain.directions) < self.max_steps:
-    #         for walker in self.mutants:
-    #             walker.brain.increase_moves(size)
--- a/walker.py
+++ b/walker.py
@@ -8,21 +8,23 @@ import mlp_visualizer
 np.random.seed(42)
 class Walker:
    def __init__(self, hidden_layer, bias, version, load_brain, env):
+        self.env = env
        self.version = version
        self.hidden_layer = hidden_layer
        if load_brain:
            self.load()
        else:
            self.weights = {}
-            self.weights['W1'] = np.random.randn(24 + int(bias), hidden_layer) / np.sqrt(24 + int(bias))
+            self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
-            self.weights['W2'] = np.random.randn(hidden_layer, 4) / np.sqrt(hidden_layer)
+                                 / np.sqrt(self.env.observation_space.shape[0] + int(bias))
+            self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
        self.bias = bias
-        self.env = env
-        #self.render_best = render_best
    def get_action(self, observation):
        if self.bias:
@@ -88,20 +90,19 @@ class Walker:
        ]
        if self.bias:
            names.append('bias')
-        for i in range(24 + int(self.bias)):
+        for i in range(self.env.observation_space.shape[0] + int(self.bias)):
            weights.append(sum(self.weights['W1'][i]))
        plt.bar(names, weights)
        plt.xticks(rotation=45, ha="right")
        plt.show()
    def save_mlp_weights(self, gen):
        mlp_visualizer.vertical_distance_between_layers = 40
        mlp_visualizer.horizontal_distance_between_neurons = 4
        mlp_visualizer.neuron_radius = 1
        mlp_visualizer.default_line_width = 1
-        network = mlp_visualizer.NeuralNetwork([24 + self.bias, self.hidden_layer, 4], 
+        network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
-                                               [self.weights['W1'], self.weights['W2']])
+                                                self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
        network.draw(gen)
    def save(self):