diff --git a/Bipedal_Walker_Evo.pdf b/Bipedal_Walker_Evo.pdf index 7a8a6095e280657ff1b445c3dcad0deea6359b48..b8bfc8a94a034f6a9d20acdc378b6426a69e6687 100644 Binary files a/Bipedal_Walker_Evo.pdf and b/Bipedal_Walker_Evo.pdf differ diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py index 402634f1e644c8803a14eeab3a2c6af8f1cc12d6..e56d33ce2dd932fb7e3ee8d64b716bb52b68497c 100644 --- a/EvolutionStrategies/main.py +++ b/EvolutionStrategies/main.py @@ -8,13 +8,13 @@ HIDDEN_LAYER = 12 BIAS = False POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 -LEARNING_RATE = 0.03 # 0 <= x <= 1 -GENS = 10000 +LEARNING_RATE = 0.1 # 0 <= x <= 1 +GENS = 2000 MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway. DECAY_ALPHA = True -VERSION = 100 -TEST_WALKER = True +VERSION = 200 +TEST_WALKER = False LOAD_BRAIN = False RENDER_BEST = False if TEST_WALKER: @@ -37,6 +37,10 @@ if __name__ == '__main__': population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST) if TEST_WALKER: + rewards = [] + with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'rb') as fp: + rewards = pickle.load(fp) + plot_reward(rewards) rewards = [] #population.walker.plot_input_weights() for i in range(10): @@ -64,10 +68,10 @@ if __name__ == '__main__': with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: pickle.dump(avg_rewards, fp) if gen == 1000 and DECAY_ALPHA: - population.lr = 0.01 + population.lr = 0.05 population.mutation_factor = 0.05 if gen == 5000 and DECAY_ALPHA: - population.lr = 0.005 + population.lr = 0.01 population.mutation_factor = 0.01 plot_reward(avg_rewards) diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py index 49662149501f63cc80b4a914aab3a4520f2d5723..66e9acc68ecad2b7c93553e25767255b44c2f80c 100644 --- a/EvolutionStrategies/population.py +++ b/EvolutionStrategies/population.py @@ -38,7 +38,7 @@ class Population: for i in range(self.size): for k in weights: weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T - weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change + weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change self.walker.set_weights(weights) for mutant in self.mutants: mutant.set_weights(weights)