Skip to content
Snippets Groups Projects
Commit 5e0b34b6 authored by Tobias Döring's avatar Tobias Döring
Browse files

adapting mutation rate

parent 7ef0794a
No related branches found
No related tags found
No related merge requests found
...@@ -4,11 +4,11 @@ import matplotlib.pyplot as plt ...@@ -4,11 +4,11 @@ import matplotlib.pyplot as plt
import pickle import pickle
import sys import sys
HIDDEN_LAYER = 2 HIDDEN_LAYER = 12
BIAS = True BIAS = True
POP_SIZE = 50 POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1 MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1 LEARNING_RATE = 0.02 # 0 <= x <= 1
GENS = 7000 GENS = 7000
MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway. MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway.
...@@ -31,6 +31,7 @@ def plot_reward(rewards): ...@@ -31,6 +31,7 @@ def plot_reward(rewards):
if __name__ == '__main__': if __name__ == '__main__':
avg_rewards = [] avg_rewards = []
mutation_factors = []
try: try:
population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST) population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
...@@ -55,9 +56,12 @@ if __name__ == '__main__': ...@@ -55,9 +56,12 @@ if __name__ == '__main__':
if gen % 10 == 0: if gen % 10 == 0:
population.walker.save() population.walker.save()
avg_rewards.append(population.get_walker_stats()) avg_rewards.append(population.get_walker_stats())
mutation_factors.append(population.mutation_factor)
population.walker.save_mlp_weights(gen) population.walker.save_mlp_weights(gen)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp: with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp) pickle.dump(avg_rewards, fp)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_MutationFactors', 'wb') as fp:
pickle.dump(mutation_factors, fp)
if gen == 1000: if gen == 1000:
population.lr = 0.01 population.lr = 0.01
#if gen == 5000: #if gen == 5000:
......
...@@ -14,7 +14,7 @@ class Population: ...@@ -14,7 +14,7 @@ class Population:
self.version = version self.version = version
self.max_steps = max_steps self.max_steps = max_steps
self.render_best = render_best self.render_best = render_best
self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0 self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env) self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict() self.mutated_weights = dict()
self.mutants = [] self.mutants = []
...@@ -33,6 +33,12 @@ class Population: ...@@ -33,6 +33,12 @@ class Population:
self.rewards[i] = self.mutants[i].get_reward(self.max_steps) self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self): def evolve(self):
main_reward = self.walker.get_reward(self.max_steps)
good_mutations = np.where(self.rewards > main_reward)
if len(good_mutations)/len(self.rewards) > 0.2:
self.mutation_factor += self.mutation_factor/10
elif len(good_mutations)/len(self.rewards) < 0.2:
self.mutation_factor -= self.mutation_factor/10
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards) A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights() weights = self.walker.get_weights()
for i in range(self.size): for i in range(self.size):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment