Skip to content
Snippets Groups Projects
Commit 5e0b34b6 authored by Tobias Döring's avatar Tobias Döring
Browse files

adapting mutation rate

parent 7ef0794a
Branches auto_mutation
No related tags found
No related merge requests found
......@@ -4,11 +4,11 @@ import matplotlib.pyplot as plt
import pickle
import sys
HIDDEN_LAYER = 2
HIDDEN_LAYER = 12
BIAS = True
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1
LEARNING_RATE = 0.02 # 0 <= x <= 1
GENS = 7000
MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway.
......@@ -31,6 +31,7 @@ def plot_reward(rewards):
if __name__ == '__main__':
avg_rewards = []
mutation_factors = []
try:
population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
......@@ -55,9 +56,12 @@ if __name__ == '__main__':
if gen % 10 == 0:
population.walker.save()
avg_rewards.append(population.get_walker_stats())
mutation_factors.append(population.mutation_factor)
population.walker.save_mlp_weights(gen)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_MutationFactors', 'wb') as fp:
pickle.dump(mutation_factors, fp)
if gen == 1000:
population.lr = 0.01
#if gen == 5000:
......
......@@ -14,7 +14,7 @@ class Population:
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
......@@ -33,6 +33,12 @@ class Population:
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self):
main_reward = self.walker.get_reward(self.max_steps)
good_mutations = np.where(self.rewards > main_reward)
if len(good_mutations)/len(self.rewards) > 0.2:
self.mutation_factor += self.mutation_factor/10
elif len(good_mutations)/len(self.rewards) < 0.2:
self.mutation_factor -= self.mutation_factor/10
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights()
for i in range(self.size):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment