Various small QoL changes

522edfe3 · Philip Maas · 1e690422 · 522edfe3 · 522edfe3 · 522edfe3
Commit 522edfe3 authored 3 years ago by Philip Maas
--- a/EvolutionStrategies/main.py
+++ b/EvolutionStrategies/main.py
@@ -4,14 +4,14 @@ import matplotlib.pyplot as plt
 import pickle
 import sys
-HIDDEN_LAYER = 2
+HIDDEN_LAYER = 12
-BIAS = True
+BIAS = False
 POP_SIZE = 50
 MUTATION_FACTOR = 0.1  # 0 <= x <= 1
 LEARNING_RATE = 0.03   # 0 <= x <= 1
 GENS = 7000
-MAX_STEPS = 200  # after 1600 steps the Environment gives us a done anyway.
+MAX_STEPS = 300  # after 1600 steps the Environment gives us a done anyway.
+DECAY_ALPHA = True
 VERSION = 1
 TEST_WALKER = True
@@ -58,10 +58,12 @@ if __name__ == '__main__':
                population.walker.save_mlp_weights(gen)
                with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
                    pickle.dump(avg_rewards, fp)
-                if gen == 1000:
+                if gen == 1000 and DECAY_ALPHA:
                    population.lr = 0.01
-                #if gen == 5000:
+                    population.mutation_factor = 0.05
-                    #population.lr = 0.005
+                if gen == 5000 and DECAY_ALPHA:
+                    population.lr = 0.005
+                    population.mutation_factor = 0.01
        plot_reward(avg_rewards)
    except KeyboardInterrupt:

--- a/EvolutionStrategies/population.py
+++ b/EvolutionStrategies/population.py
@@ -14,7 +14,7 @@ class Population:
        self.version = version
        self.max_steps = max_steps
        self.render_best = render_best
-        self.env = gym.make('Pendulum-v1')  # MountainCarContinuous-v0  LunarLanderContinuous-v2  Pendulum-v1  CarRacing-v0
+        self.env = gym.make('BipedalWalker-v3')  # MountainCarContinuous-v0  LunarLanderContinuous-v2  Pendulum-v1  CarRacing-v0
        self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
        self.mutated_weights = dict()
        self.mutants = []

--- a/MutateActions/5_50_50_0.2.png
+++ b/MutateActions/5_50_50_0.2.png
--- a/MutateActions/main.py
+++ b/MutateActions/main.py
 from population import Population
-import time
 import matplotlib.pyplot as plt
 import pickle
+import sys
 INCREASE_BY = 5
 BRAIN_SIZE = 50
@@ -17,24 +17,33 @@ if TEST_WALKER:
 if __name__ == '__main__':
-    population = Population(POP_SIZE, BRAIN_SIZE,MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST)
-    rewards = []
    if TEST_WALKER:
        rewards = []
+        steps = []
        with open('rewards.p', 'rb') as fp:
            rewards = pickle.load(fp)
+        with open('steps.p', 'rb') as fp:
+            steps = pickle.load(fp)
        plt.title(f'{POP_SIZE}, {MUTATION_FACTOR}')
        plt.xlabel('Episodes')
        plt.ylabel('Rewards')
        plt.plot(rewards)
+        plt.plot(steps)
        plt.savefig(f'./models/{POP_SIZE}, {MUTATION_FACTOR}.png')
        plt.show()
+        sys.exit(0)
+    population = Population(POP_SIZE, BRAIN_SIZE,MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST)
+    rewards = []
+    steps = []
-    while GAME_CANCELLED is False:  # this is our game
+    while population.gen < 2000:  # this is our game
        if population.all_players_finished():  # this is our genetic algorithm after one generation of players
+            rewards.append(population.walkers[0].fitness)
+            steps.append(len(population.walkers[0].brain.directions))
+            print(f'Best Fitness: {population.walkers[0].fitness}')
            population.natural_selection()
            population.mutate_babies()
            population.increase_moves(INCREASE_BY)
@@ -45,11 +54,12 @@ if __name__ == '__main__':
            print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
            print(f'Max Steps: {population.max_steps}')
-            rewards.append(population.fitnesses[population.best_walker_index])
            if population.gen % 10 == 0:
                with open("rewards.p", 'wb') as fp:
                    pickle.dump(rewards, fp)
+                with open("steps.p", 'wb') as fp:
+                    pickle.dump(steps, fp)
        else:
            population.update()
        # time.sleep(0.1)
--- a/MutateActions/population.py
+++ b/MutateActions/population.py
@@ -39,16 +39,12 @@ class Population:
            else:
                walker.update()
-    """def calculate_fitness(self):  # calculate the fitness of all players
-        for walker in self.walkers:
-            walker.calculate_fitness()"""
    def calculate_fitness_sum(self):
        self.fitness_sum = 0
        self.fitnesses = np.zeros(self.size)
        for i in range(self.size):
            self.fitnesses[i] = self.walkers[i].fitness
-        self.fitnesses -= np.min(self.fitnesses)  # maybe offset: +1
+        self.fitnesses -= np.min(self.fitnesses)
        self.fitness_sum = np.sum(self.fitnesses)
    def all_players_finished(self):  # returns whether all the players are either dead or have reached the goal
@@ -58,8 +54,6 @@ class Population:
        return True
    def natural_selection(self):  # gets the next generation of players
-        #for i in range(self.size):
-            #new_walkers.append(Walker(self.envs[i], self.brain_size))
        self.calculate_fitness_sum()
        self.set_best_walker()
        self.walkers[self.best_walker_index].brain.save()