From 2fd89306c3b8d363364997725170ff7d0cf8fcbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20D=C3=B6ring?= <tobias.doering@stud.hs-bochum.de>
Date: Wed, 9 Mar 2022 11:11:37 +0100
Subject: [PATCH] added increasing steps

---
 EvolutionStrategies/main.py       | 13 +++++++++----
 EvolutionStrategies/population.py |  2 +-
 EvolutionStrategies/walker.py     | 14 ++++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py
index 402634f..2e9889c 100644
--- a/EvolutionStrategies/main.py
+++ b/EvolutionStrategies/main.py
@@ -10,11 +10,11 @@ POP_SIZE = 50
 MUTATION_FACTOR = 0.1  # 0 <= x <= 1
 LEARNING_RATE = 0.03   # 0 <= x <= 1
 GENS = 10000
-MAX_STEPS = 300  # after 1600 steps the Environment gives us a done anyway.
+MAX_STEPS = 100  # after 1600 steps the Environment gives us a done anyway.
 DECAY_ALPHA = True
 
-VERSION = 100
-TEST_WALKER = True
+VERSION = 101
+TEST_WALKER = False
 LOAD_BRAIN = False
 RENDER_BEST = False
 if TEST_WALKER:
@@ -49,14 +49,17 @@ if __name__ == '__main__':
         for gen in range(GENS):  # this is our game
             start_time = time.time()
             print(f'Gen: {gen}')
+            print(f'Steps: {population.max_steps}')
             population.mutate()
             population.play_episode()
             population.evolve()
             print("Time for Gen: ", time.time() - start_time)
             if gen % 10 == 0:
                 avg_reward = population.get_walker_stats()
+                population.walker.save()
+                population.walker.save_evo(gen)
                 if avg_reward > best_avg_reward:
-                    population.walker.save()
+                    population.walker.save('best')
                     best_avg_reward = avg_reward
                     print("New best walker found")
                 avg_rewards.append(avg_reward)
@@ -69,6 +72,8 @@ if __name__ == '__main__':
                 if gen == 5000 and DECAY_ALPHA:
                     population.lr = 0.005
                     population.mutation_factor = 0.01
+                # increase the amount of steps the agent can do
+                population.max_steps += 2
         
         plot_reward(avg_rewards)
     except KeyboardInterrupt:
diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py
index 4966214..66e9acc 100644
--- a/EvolutionStrategies/population.py
+++ b/EvolutionStrategies/population.py
@@ -38,7 +38,7 @@ class Population:
         for i in range(self.size):
             for k in weights:
                 weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
-                weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
+                weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
         self.walker.set_weights(weights)
         for mutant in self.mutants:
             mutant.set_weights(weights)
diff --git a/EvolutionStrategies/walker.py b/EvolutionStrategies/walker.py
index bda567f..327b123 100644
--- a/EvolutionStrategies/walker.py
+++ b/EvolutionStrategies/walker.py
@@ -105,12 +105,18 @@ class Walker:
                                                 self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
         network.draw(gen)
 
-    def save(self):
+    def save_evo(self, gen):
+        if not os.path.isdir(f'./models/weights_evo{self.version}'):
+            os.mkdir(f'./models/weights_evo{self.version}')
+        with open(f'./models/weights_evo{self.version}/model-pedal{gen}.p', 'wb') as fp:
+            pickle.dump(self.weights, fp)
+
+    def save(self, name = 'current'):
         if not os.path.isdir('./models'):
             os.mkdir('./models')
-        with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
+        with open(f'./models/model-pedal{self.version}-{name}.p', 'wb') as fp:
             pickle.dump(self.weights, fp)
 
-    def load(self):
-        with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
+    def load(self, name = 'current'):
+        with open(f'./models/model-pedal{self.version}-{name}.p', 'rb') as fp:
             self.weights = pickle.load(fp)
-- 
GitLab