diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py index aa9e09f05c574d8846700164912f585d8f5c5031..bbf193d0252d2c192e858ea46a892e34596a2d8e 100644 --- a/EvolutionStrategies/main.py +++ b/EvolutionStrategies/main.py @@ -4,11 +4,11 @@ import matplotlib.pyplot as plt import pickle import sys -HIDDEN_LAYER = 2 +HIDDEN_LAYER = 12 BIAS = True POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 -LEARNING_RATE = 0.03 # 0 <= x <= 1 +LEARNING_RATE = 0.02 # 0 <= x <= 1 GENS = 7000 MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway. @@ -31,6 +31,7 @@ def plot_reward(rewards): if __name__ == '__main__': avg_rewards = [] + mutation_factors = [] try: population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST) @@ -55,9 +56,12 @@ if __name__ == '__main__': if gen % 10 == 0: population.walker.save() avg_rewards.append(population.get_walker_stats()) + mutation_factors.append(population.mutation_factor) population.walker.save_mlp_weights(gen) with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: pickle.dump(avg_rewards, fp) + with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_MutationFactors', 'wb') as fp: + pickle.dump(mutation_factors, fp) if gen == 1000: population.lr = 0.01 #if gen == 5000: diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py index 0c4824773c5f8e5fe5b1b17c9e1015cdee4b225d..33e2bb780d3b5cfad1b31c00a18d252efaf68f23 100644 --- a/EvolutionStrategies/population.py +++ b/EvolutionStrategies/population.py @@ -14,7 +14,7 @@ class Population: self.version = version self.max_steps = max_steps self.render_best = render_best - self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 + self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) self.mutated_weights = dict() self.mutants = [] @@ -33,6 +33,12 @@ class Population: self.rewards[i] = self.mutants[i].get_reward(self.max_steps) def evolve(self): + main_reward = self.walker.get_reward(self.max_steps) + good_mutations = np.where(self.rewards > main_reward) + if len(good_mutations)/len(self.rewards) > 0.2: + self.mutation_factor += self.mutation_factor/10 + elif len(good_mutations)/len(self.rewards) < 0.2: + self.mutation_factor -= self.mutation_factor/10 A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards) weights = self.walker.get_weights() for i in range(self.size):