added increasing steps

2fd89306 · Tobias Döring · c3e0f688 · 2fd89306 · 2fd89306 · 2fd89306
Commit 2fd89306 authored Mar 9, 2022 by Tobias Döring
--- a/EvolutionStrategies/main.py
+++ b/EvolutionStrategies/main.py
@@ -10,11 +10,11 @@ POP_SIZE = 50
 MUTATION_FACTOR = 0.1  # 0 <= x <= 1
 LEARNING_RATE = 0.03   # 0 <= x <= 1
 GENS = 10000
-MAX_STEPS = 300  # after 1600 steps the Environment gives us a done anyway.
+MAX_STEPS = 100  # after 1600 steps the Environment gives us a done anyway.
 DECAY_ALPHA = True
-VERSION = 100
+VERSION = 101
-TEST_WALKER = True
+TEST_WALKER = False
 LOAD_BRAIN = False
 RENDER_BEST = False
 if TEST_WALKER:
@@ -49,14 +49,17 @@ if __name__ == '__main__':
        for gen in range(GENS):  # this is our game
            start_time = time.time()
            print(f'Gen: {gen}')
+            print(f'Steps: {population.max_steps}')
            population.mutate()
            population.play_episode()
            population.evolve()
            print("Time for Gen: ", time.time() - start_time)
            if gen % 10 == 0:
                avg_reward = population.get_walker_stats()
-                if avg_reward > best_avg_reward:
                population.walker.save()
+                population.walker.save_evo(gen)
+                if avg_reward > best_avg_reward:
+                    population.walker.save('best')
                    best_avg_reward = avg_reward
                    print("New best walker found")
                avg_rewards.append(avg_reward)
@@ -69,6 +72,8 @@ if __name__ == '__main__':
                if gen == 5000 and DECAY_ALPHA:
                    population.lr = 0.005
                    population.mutation_factor = 0.01
+                # increase the amount of steps the agent can do
+                population.max_steps += 2
        plot_reward(avg_rewards)
    except KeyboardInterrupt:

--- a/EvolutionStrategies/population.py
+++ b/EvolutionStrategies/population.py
@@ -38,7 +38,7 @@ class Population:
        for i in range(self.size):
            for k in weights:
                weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
-                weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
+                weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
        self.walker.set_weights(weights)
        for mutant in self.mutants:
            mutant.set_weights(weights)

--- a/EvolutionStrategies/walker.py
+++ b/EvolutionStrategies/walker.py
@@ -105,12 +105,18 @@ class Walker:
                                                self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
        network.draw(gen)
-    def save(self):
+    def save_evo(self, gen):
+        if not os.path.isdir(f'./models/weights_evo{self.version}'):
+            os.mkdir(f'./models/weights_evo{self.version}')
+        with open(f'./models/weights_evo{self.version}/model-pedal{gen}.p', 'wb') as fp:
+            pickle.dump(self.weights, fp)
+    def save(self, name = 'current'):
        if not os.path.isdir('./models'):
            os.mkdir('./models')
-        with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
+        with open(f'./models/model-pedal{self.version}-{name}.p', 'wb') as fp:
            pickle.dump(self.weights, fp)
-    def load(self):
+    def load(self, name = 'current'):
-        with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
+        with open(f'./models/model-pedal{self.version}-{name}.p', 'rb') as fp:
            self.weights = pickle.load(fp)