From 2fd89306c3b8d363364997725170ff7d0cf8fcbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20D=C3=B6ring?= <tobias.doering@stud.hs-bochum.de> Date: Wed, 9 Mar 2022 11:11:37 +0100 Subject: [PATCH] added increasing steps --- EvolutionStrategies/main.py | 13 +++++++++---- EvolutionStrategies/population.py | 2 +- EvolutionStrategies/walker.py | 14 ++++++++++---- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py index 402634f..2e9889c 100644 --- a/EvolutionStrategies/main.py +++ b/EvolutionStrategies/main.py @@ -10,11 +10,11 @@ POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 LEARNING_RATE = 0.03 # 0 <= x <= 1 GENS = 10000 -MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway. +MAX_STEPS = 100 # after 1600 steps the Environment gives us a done anyway. DECAY_ALPHA = True -VERSION = 100 -TEST_WALKER = True +VERSION = 101 +TEST_WALKER = False LOAD_BRAIN = False RENDER_BEST = False if TEST_WALKER: @@ -49,14 +49,17 @@ if __name__ == '__main__': for gen in range(GENS): # this is our game start_time = time.time() print(f'Gen: {gen}') + print(f'Steps: {population.max_steps}') population.mutate() population.play_episode() population.evolve() print("Time for Gen: ", time.time() - start_time) if gen % 10 == 0: avg_reward = population.get_walker_stats() + population.walker.save() + population.walker.save_evo(gen) if avg_reward > best_avg_reward: - population.walker.save() + population.walker.save('best') best_avg_reward = avg_reward print("New best walker found") avg_rewards.append(avg_reward) @@ -69,6 +72,8 @@ if __name__ == '__main__': if gen == 5000 and DECAY_ALPHA: population.lr = 0.005 population.mutation_factor = 0.01 + # increase the amount of steps the agent can do + population.max_steps += 2 plot_reward(avg_rewards) except KeyboardInterrupt: diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py index 4966214..66e9acc 100644 --- a/EvolutionStrategies/population.py +++ b/EvolutionStrategies/population.py @@ -38,7 +38,7 @@ class Population: for i in range(self.size): for k in weights: weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T - weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change + weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change self.walker.set_weights(weights) for mutant in self.mutants: mutant.set_weights(weights) diff --git a/EvolutionStrategies/walker.py b/EvolutionStrategies/walker.py index bda567f..327b123 100644 --- a/EvolutionStrategies/walker.py +++ b/EvolutionStrategies/walker.py @@ -105,12 +105,18 @@ class Walker: self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']]) network.draw(gen) - def save(self): + def save_evo(self, gen): + if not os.path.isdir(f'./models/weights_evo{self.version}'): + os.mkdir(f'./models/weights_evo{self.version}') + with open(f'./models/weights_evo{self.version}/model-pedal{gen}.p', 'wb') as fp: + pickle.dump(self.weights, fp) + + def save(self, name = 'current'): if not os.path.isdir('./models'): os.mkdir('./models') - with open('./models/model-pedal%d.p' % self.version, 'wb') as fp: + with open(f'./models/model-pedal{self.version}-{name}.p', 'wb') as fp: pickle.dump(self.weights, fp) - def load(self): - with open('./models/model-pedal%d.p' % self.version, 'rb') as fp: + def load(self, name = 'current'): + with open(f'./models/model-pedal{self.version}-{name}.p', 'rb') as fp: self.weights = pickle.load(fp) -- GitLab