diff --git a/EvolutionStrategies/main.py b/EvolutionStrategies/main.py index aa9e09f05c574d8846700164912f585d8f5c5031..d8c8a50521a4b13ec9edfb04cab71985bf6d436b 100644 --- a/EvolutionStrategies/main.py +++ b/EvolutionStrategies/main.py @@ -4,14 +4,14 @@ import matplotlib.pyplot as plt import pickle import sys -HIDDEN_LAYER = 2 -BIAS = True +HIDDEN_LAYER = 12 +BIAS = False POP_SIZE = 50 MUTATION_FACTOR = 0.1 # 0 <= x <= 1 LEARNING_RATE = 0.03 # 0 <= x <= 1 GENS = 7000 -MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway. - +MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway. +DECAY_ALPHA = True VERSION = 1 TEST_WALKER = True @@ -58,10 +58,12 @@ if __name__ == '__main__': population.walker.save_mlp_weights(gen) with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: pickle.dump(avg_rewards, fp) - if gen == 1000: + if gen == 1000 and DECAY_ALPHA: population.lr = 0.01 - #if gen == 5000: - #population.lr = 0.005 + population.mutation_factor = 0.05 + if gen == 5000 and DECAY_ALPHA: + population.lr = 0.005 + population.mutation_factor = 0.01 plot_reward(avg_rewards) except KeyboardInterrupt: diff --git a/EvolutionStrategies/population.py b/EvolutionStrategies/population.py index 0c4824773c5f8e5fe5b1b17c9e1015cdee4b225d..49662149501f63cc80b4a914aab3a4520f2d5723 100644 --- a/EvolutionStrategies/population.py +++ b/EvolutionStrategies/population.py @@ -14,7 +14,7 @@ class Population: self.version = version self.max_steps = max_steps self.render_best = render_best - self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 + self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) self.mutated_weights = dict() self.mutants = [] diff --git a/MutateActions/5_50_50_0.2.png b/MutateActions/5_50_50_0.2.png new file mode 100644 index 0000000000000000000000000000000000000000..3494bfc4d920e956b26d8388edc6fc4b7a98dee5 Binary files /dev/null and b/MutateActions/5_50_50_0.2.png differ diff --git a/MutateActions/main.py b/MutateActions/main.py index b81ae78277734b51a6bea91463d93f6c8a6139f1..a8d4fe7545d19b013a9bc01b16fa04b411f9bdee 100644 --- a/MutateActions/main.py +++ b/MutateActions/main.py @@ -1,7 +1,7 @@ from population import Population -import time import matplotlib.pyplot as plt import pickle +import sys INCREASE_BY = 5 BRAIN_SIZE = 50 @@ -17,24 +17,33 @@ if TEST_WALKER: if __name__ == '__main__': - population = Population(POP_SIZE, BRAIN_SIZE,MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST) - - rewards = [] - if TEST_WALKER: rewards = [] + steps = [] with open('rewards.p', 'rb') as fp: rewards = pickle.load(fp) + with open('steps.p', 'rb') as fp: + steps = pickle.load(fp) plt.title(f'{POP_SIZE}, {MUTATION_FACTOR}') plt.xlabel('Episodes') plt.ylabel('Rewards') plt.plot(rewards) + plt.plot(steps) plt.savefig(f'./models/{POP_SIZE}, {MUTATION_FACTOR}.png') plt.show() + sys.exit(0) + + population = Population(POP_SIZE, BRAIN_SIZE,MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST) + + rewards = [] + steps = [] - while GAME_CANCELLED is False: # this is our game + while population.gen < 2000: # this is our game if population.all_players_finished(): # this is our genetic algorithm after one generation of players + rewards.append(population.walkers[0].fitness) + steps.append(len(population.walkers[0].brain.directions)) + print(f'Best Fitness: {population.walkers[0].fitness}') population.natural_selection() population.mutate_babies() population.increase_moves(INCREASE_BY) @@ -45,11 +54,12 @@ if __name__ == '__main__': print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}') print(f'Max Steps: {population.max_steps}') - rewards.append(population.fitnesses[population.best_walker_index]) if population.gen % 10 == 0: with open("rewards.p", 'wb') as fp: pickle.dump(rewards, fp) + with open("steps.p", 'wb') as fp: + pickle.dump(steps, fp) else: population.update() # time.sleep(0.1) diff --git a/MutateActions/population.py b/MutateActions/population.py index f51dcff9a048a12e11fa06d2967e8644d804f196..4f62c2c6118f925b8a849ca6194ee8e5c9b13f35 100644 --- a/MutateActions/population.py +++ b/MutateActions/population.py @@ -39,16 +39,12 @@ class Population: else: walker.update() - """def calculate_fitness(self): # calculate the fitness of all players - for walker in self.walkers: - walker.calculate_fitness()""" - def calculate_fitness_sum(self): self.fitness_sum = 0 self.fitnesses = np.zeros(self.size) for i in range(self.size): self.fitnesses[i] = self.walkers[i].fitness - self.fitnesses -= np.min(self.fitnesses) # maybe offset: +1 + self.fitnesses -= np.min(self.fitnesses) self.fitness_sum = np.sum(self.fitnesses) def all_players_finished(self): # returns whether all the players are either dead or have reached the goal @@ -58,8 +54,6 @@ class Population: return True def natural_selection(self): # gets the next generation of players - #for i in range(self.size): - #new_walkers.append(Walker(self.envs[i], self.brain_size)) self.calculate_fitness_sum() self.set_best_walker() self.walkers[self.best_walker_index].brain.save()