diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py index 402634f1e644c8803a14eeab3a2c6af8f1cc12d6..2e9889ccf08102ea648399a7db8b9ea07ead3140 100644 --- a/EvolutionStrategies/main.py +++ b/EvolutionStrategies/main.py @@ -10,11 +10,11 @@ POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 LEARNING_RATE = 0.03 # 0 <= x <= 1 GENS = 10000 -MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway. +MAX_STEPS = 100 # after 1600 steps the Environment gives us a done anyway. DECAY_ALPHA = True -VERSION = 100 -TEST_WALKER = True +VERSION = 101 +TEST_WALKER = False LOAD_BRAIN = False RENDER_BEST = False if TEST_WALKER: @@ -49,14 +49,17 @@ if __name__ == '__main__': for gen in range(GENS): # this is our game start_time = time.time() print(f'Gen: {gen}') + print(f'Steps: {population.max_steps}') population.mutate() population.play_episode() population.evolve() print("Time for Gen: ", time.time() - start_time) if gen % 10 == 0: avg_reward = population.get_walker_stats() + population.walker.save() + population.walker.save_evo(gen) if avg_reward > best_avg_reward: - population.walker.save() + population.walker.save('best') best_avg_reward = avg_reward print("New best walker found") avg_rewards.append(avg_reward) @@ -69,6 +72,8 @@ if __name__ == '__main__': if gen == 5000 and DECAY_ALPHA: population.lr = 0.005 population.mutation_factor = 0.01 + # increase the amount of steps the agent can do + population.max_steps += 2 plot_reward(avg_rewards) except KeyboardInterrupt: diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py index 49662149501f63cc80b4a914aab3a4520f2d5723..66e9acc68ecad2b7c93553e25767255b44c2f80c 100644 --- a/EvolutionStrategies/population.py +++ b/EvolutionStrategies/population.py @@ -38,7 +38,7 @@ class Population: for i in range(self.size): for k in weights: weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T - weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change + weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change self.walker.set_weights(weights) for mutant in self.mutants: mutant.set_weights(weights) diff --git a/EvolutionStrategies/walker.py b/EvolutionStrategies/walker.py index bda567fce6c9161efe21baa077c44340b6150a9c..327b1236bb53078d05214003151a35e0377245a4 100644 --- a/EvolutionStrategies/walker.py +++ b/EvolutionStrategies/walker.py @@ -105,12 +105,18 @@ class Walker: self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']]) network.draw(gen) - def save(self): + def save_evo(self, gen): + if not os.path.isdir(f'./models/weights_evo{self.version}'): + os.mkdir(f'./models/weights_evo{self.version}') + with open(f'./models/weights_evo{self.version}/model-pedal{gen}.p', 'wb') as fp: + pickle.dump(self.weights, fp) + + def save(self, name = 'current'): if not os.path.isdir('./models'): os.mkdir('./models') - with open('./models/model-pedal%d.p' % self.version, 'wb') as fp: + with open(f'./models/model-pedal{self.version}-{name}.p', 'wb') as fp: pickle.dump(self.weights, fp) - def load(self): - with open('./models/model-pedal%d.p' % self.version, 'rb') as fp: + def load(self, name = 'current'): + with open(f'./models/model-pedal{self.version}-{name}.p', 'rb') as fp: self.weights = pickle.load(fp)