updated doc

bb88a4fb · Tobias Döring · c3e0f688 · bb88a4fb · bb88a4fb · bb88a4fb
Commit bb88a4fb authored 3 years ago by Tobias Döring
--- a/Bipedal_Walker_Evo.pdf
+++ b/Bipedal_Walker_Evo.pdf
--- a/EvolutionStrategies/main.py
+++ b/EvolutionStrategies/main.py
@@ -8,13 +8,13 @@ HIDDEN_LAYER = 12
 BIAS = False
 POP_SIZE = 50
 MUTATION_FACTOR = 0.1  # 0 <= x <= 1
-LEARNING_RATE = 0.03   # 0 <= x <= 1
+LEARNING_RATE = 0.1   # 0 <= x <= 1
-GENS = 10000
+GENS = 2000
 MAX_STEPS = 300  # after 1600 steps the Environment gives us a done anyway.
 DECAY_ALPHA = True
-VERSION = 100
+VERSION = 200
-TEST_WALKER = True
+TEST_WALKER = False
 LOAD_BRAIN = False
 RENDER_BEST = False
 if TEST_WALKER:
@@ -37,6 +37,10 @@ if __name__ == '__main__':
        population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
        if TEST_WALKER:
+            rewards = []
+            with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'rb') as fp:
+                rewards = pickle.load(fp)
+            plot_reward(rewards)
            rewards = []
            #population.walker.plot_input_weights()
            for i in range(10):
@@ -64,10 +68,10 @@ if __name__ == '__main__':
                with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
                    pickle.dump(avg_rewards, fp)
                if gen == 1000 and DECAY_ALPHA:
-                    population.lr = 0.01
+                    population.lr = 0.05
                    population.mutation_factor = 0.05
                if gen == 5000 and DECAY_ALPHA:
-                    population.lr = 0.005
+                    population.lr = 0.01
                    population.mutation_factor = 0.01
        plot_reward(avg_rewards)

--- a/EvolutionStrategies/population.py
+++ b/EvolutionStrategies/population.py
@@ -38,7 +38,7 @@ class Population:
        for i in range(self.size):
            for k in weights:
                weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
-                weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
+                weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
        self.walker.set_weights(weights)
        for mutant in self.mutants:
            mutant.set_weights(weights)