From 5e0b34b60794f9048ffda856f68154a8fc0bf948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20D=C3=B6ring?= <tobias.doering@stud.hs-bochum.de> Date: Tue, 1 Mar 2022 09:37:27 +0100 Subject: [PATCH] adapting mutation rate --- EvolutionStrategies/main.py | 8 ++++++-- EvolutionStrategies/population.py | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py index aa9e09f..bbf193d 100644 --- a/EvolutionStrategies/main.py +++ b/EvolutionStrategies/main.py @@ -4,11 +4,11 @@ import matplotlib.pyplot as plt import pickle import sys -HIDDEN_LAYER = 2 +HIDDEN_LAYER = 12 BIAS = True POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 -LEARNING_RATE = 0.03 # 0 <= x <= 1 +LEARNING_RATE = 0.02 # 0 <= x <= 1 GENS = 7000 MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway. @@ -31,6 +31,7 @@ def plot_reward(rewards): if __name__ == '__main__': avg_rewards = [] + mutation_factors = [] try: population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST) @@ -55,9 +56,12 @@ if __name__ == '__main__': if gen % 10 == 0: population.walker.save() avg_rewards.append(population.get_walker_stats()) + mutation_factors.append(population.mutation_factor) population.walker.save_mlp_weights(gen) with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: pickle.dump(avg_rewards, fp) + with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_MutationFactors', 'wb') as fp: + pickle.dump(mutation_factors, fp) if gen == 1000: population.lr = 0.01 #if gen == 5000: diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py index 0c48247..33e2bb7 100644 --- a/EvolutionStrategies/population.py +++ b/EvolutionStrategies/population.py @@ -14,7 +14,7 @@ class Population: self.version = version self.max_steps = max_steps self.render_best = render_best - self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 + self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) self.mutated_weights = dict() self.mutants = [] @@ -33,6 +33,12 @@ class Population: self.rewards[i] = self.mutants[i].get_reward(self.max_steps) def evolve(self): + main_reward = self.walker.get_reward(self.max_steps) + good_mutations = np.where(self.rewards > main_reward) + if len(good_mutations)/len(self.rewards) > 0.2: + self.mutation_factor += self.mutation_factor/10 + elif len(good_mutations)/len(self.rewards) < 0.2: + self.mutation_factor -= self.mutation_factor/10 A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards) weights = self.walker.get_weights() for i in range(self.size): -- GitLab