Skip to content
Snippets Groups Projects
Commit 5e0b34b6 authored by Tobias Döring's avatar Tobias Döring
Browse files

adapting mutation rate

parent 7ef0794a
No related branches found
No related tags found
No related merge requests found
......@@ -4,11 +4,11 @@ import matplotlib.pyplot as plt
import pickle
import sys
HIDDEN_LAYER = 2
HIDDEN_LAYER = 12
BIAS = True
POP_SIZE = 50
MUTATION_FACTOR = 0.1 # 0 <= x <= 1
LEARNING_RATE = 0.03 # 0 <= x <= 1
LEARNING_RATE = 0.02 # 0 <= x <= 1
GENS = 7000
MAX_STEPS = 200 # after 1600 steps the Environment gives us a done anyway.
......@@ -31,6 +31,7 @@ def plot_reward(rewards):
if __name__ == '__main__':
avg_rewards = []
mutation_factors = []
try:
population = Population(POP_SIZE, HIDDEN_LAYER, BIAS, MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
......@@ -55,9 +56,12 @@ if __name__ == '__main__':
if gen % 10 == 0:
population.walker.save()
avg_rewards.append(population.get_walker_stats())
mutation_factors.append(population.mutation_factor)
population.walker.save_mlp_weights(gen)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
pickle.dump(avg_rewards, fp)
with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_MutationFactors', 'wb') as fp:
pickle.dump(mutation_factors, fp)
if gen == 1000:
population.lr = 0.01
#if gen == 5000:
......
......@@ -14,7 +14,7 @@ class Population:
self.version = version
self.max_steps = max_steps
self.render_best = render_best
self.env = gym.make('Pendulum-v1') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.env = gym.make('BipedalWalker-v3') # MountainCarContinuous-v0 LunarLanderContinuous-v2 Pendulum-v1 CarRacing-v0
self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
self.mutated_weights = dict()
self.mutants = []
......@@ -33,6 +33,12 @@ class Population:
self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
def evolve(self):
main_reward = self.walker.get_reward(self.max_steps)
good_mutations = np.where(self.rewards > main_reward)
if len(good_mutations)/len(self.rewards) > 0.2:
self.mutation_factor += self.mutation_factor/10
elif len(good_mutations)/len(self.rewards) < 0.2:
self.mutation_factor -= self.mutation_factor/10
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
weights = self.walker.get_weights()
for i in range(self.size):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment