Skip to content
Snippets Groups Projects
Commit 522edfe3 authored by Philip Maas's avatar Philip Maas
Browse files

Various small QoL changes

parent 1e690422
No related branches found
No related tags found
No related merge requests found
...@@ -4,14 +4,14 @@ import matplotlib.pyplot as plt ...@@ -4,14 +4,14 @@ import matplotlib.pyplot as plt
import pickle import pickle
import sys import sys
HIDDEN_LAYER = 2 HIDDEN_LAYER = 12
BIAS = True BIAS = False
POP_SIZE = 50 POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1 MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1 LEARNING_RATE = 0.03 # 0 <= x <= 1
GENS = 7000 GENS = 7000
MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway. MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway.
DECAY_ALPHA = True
VERSION = 1 VERSION = 1
TEST_WALKER = True TEST_WALKER = True
...@@ -58,10 +58,12 @@ if __name__ == '__main__': ...@@ -58,10 +58,12 @@ if __name__ == '__main__':
population.walker.save_mlp_weights(gen) population.walker.save_mlp_weights(gen)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp) pickle.dump(avg_rewards, fp)
if gen == 1000: if gen == 1000 and DECAY_ALPHA:
population.lr = 0.01 population.lr = 0.01
#if gen == 5000: population.mutation_factor = 0.05
#population.lr = 0.005 if gen == 5000 and DECAY_ALPHA:
population.lr = 0.005
population.mutation_factor = 0.01
plot_reward(avg_rewards) plot_reward(avg_rewards)
except KeyboardInterrupt: except KeyboardInterrupt:
......
...@@ -14,7 +14,7 @@ class Population: ...@@ -14,7 +14,7 @@ class Population:
self.version = version self.version = version
self.max_steps = max_steps self.max_steps = max_steps
self.render_best = render_best self.render_best = render_best
self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict() self.mutated_weights = dict()
self.mutants = [] self.mutants = []
......
MutateActions/5_50_50_0.2.png

22.3 KiB

from population import Population from population import Population
import time
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pickle import pickle
import sys
INCREASE_BY = 5 INCREASE_BY = 5
BRAIN_SIZE = 50 BRAIN_SIZE = 50
...@@ -17,24 +17,33 @@ if TEST_WALKER: ...@@ -17,24 +17,33 @@ if TEST_WALKER:
if __name__ == '__main__': if __name__ == '__main__':
population = Population(POP_SIZE, BRAIN_SIZE,MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST)
rewards = []
if TEST_WALKER: if TEST_WALKER:
rewards = [] rewards = []
steps = []
with open('rewards.p', 'rb') as fp: with open('rewards.p', 'rb') as fp:
rewards = pickle.load(fp) rewards = pickle.load(fp)
with open('steps.p', 'rb') as fp:
steps = pickle.load(fp)
plt.title(f'{POP_SIZE}, {MUTATION_FACTOR}') plt.title(f'{POP_SIZE}, {MUTATION_FACTOR}')
plt.xlabel('Episodes') plt.xlabel('Episodes')
plt.ylabel('Rewards') plt.ylabel('Rewards')
plt.plot(rewards) plt.plot(rewards)
plt.plot(steps)
plt.savefig(f'./models/{POP_SIZE}, {MUTATION_FACTOR}.png') plt.savefig(f'./models/{POP_SIZE}, {MUTATION_FACTOR}.png')
plt.show() plt.show()
sys.exit(0)
population = Population(POP_SIZE, BRAIN_SIZE,MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST)
rewards = []
steps = []
while GAME_CANCELLED is False: # this is our game while population.gen < 2000: # this is our game
if population.all_players_finished(): # this is our genetic algorithm after one generation of players if population.all_players_finished(): # this is our genetic algorithm after one generation of players
rewards.append(population.walkers[0].fitness)
steps.append(len(population.walkers[0].brain.directions))
print(f'Best Fitness: {population.walkers[0].fitness}')
population.natural_selection() population.natural_selection()
population.mutate_babies() population.mutate_babies()
population.increase_moves(INCREASE_BY) population.increase_moves(INCREASE_BY)
...@@ -45,11 +54,12 @@ if __name__ == '__main__': ...@@ -45,11 +54,12 @@ if __name__ == '__main__':
print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}') print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
print(f'Max Steps: {population.max_steps}') print(f'Max Steps: {population.max_steps}')
rewards.append(population.fitnesses[population.best_walker_index])
if population.gen % 10 == 0: if population.gen % 10 == 0:
with open("rewards.p", 'wb') as fp: with open("rewards.p", 'wb') as fp:
pickle.dump(rewards, fp) pickle.dump(rewards, fp)
with open("steps.p", 'wb') as fp:
pickle.dump(steps, fp)
else: else:
population.update() population.update()
# time.sleep(0.1) # time.sleep(0.1)
...@@ -39,16 +39,12 @@ class Population: ...@@ -39,16 +39,12 @@ class Population:
else: else:
walker.update() walker.update()
"""def calculate_fitness(self): # calculate the fitness of all players
for walker in self.walkers:
walker.calculate_fitness()"""
def calculate_fitness_sum(self): def calculate_fitness_sum(self):
self.fitness_sum = 0 self.fitness_sum = 0
self.fitnesses = np.zeros(self.size) self.fitnesses = np.zeros(self.size)
for i in range(self.size): for i in range(self.size):
self.fitnesses[i] = self.walkers[i].fitness self.fitnesses[i] = self.walkers[i].fitness
self.fitnesses -= np.min(self.fitnesses) # maybe offset: +1 self.fitnesses -= np.min(self.fitnesses)
self.fitness_sum = np.sum(self.fitnesses) self.fitness_sum = np.sum(self.fitnesses)
def all_players_finished(self): # returns whether all the players are either dead or have reached the goal def all_players_finished(self): # returns whether all the players are either dead or have reached the goal
...@@ -58,8 +54,6 @@ class Population: ...@@ -58,8 +54,6 @@ class Population:
return True return True
def natural_selection(self): # gets the next generation of players def natural_selection(self): # gets the next generation of players
#for i in range(self.size):
#new_walkers.append(Walker(self.envs[i], self.brain_size))
self.calculate_fitness_sum() self.calculate_fitness_sum()
self.set_best_walker() self.set_best_walker()
self.walkers[self.best_walker_index].brain.save() self.walkers[self.best_walker_index].brain.save()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment