Skip to content
Snippets Groups Projects
Commit cff99083 authored by Tobias Döring's avatar Tobias Döring
Browse files

Performance and code structure fixed

parent 295ebae7
No related branches found
No related tags found
1 merge request!2Evo neuro
......@@ -18,19 +18,21 @@ if TEST_WALKER:
if __name__ == '__main__':
population = Population(POP_SIZE, H1 ,MUTATION_FACTOR, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
avg_rewards = []
if TEST_WALKER:
while True:
population.test_walker()
while GAME_CANCELLED is False: # this is our game
start_time = time.time()
print(f'Gen: {population.gen}')
print(f'Max Steps: {population.max_steps}')
population.mutate()
population.play_episode()
population.evolve()
#population.increase_moves(INCREASE_BY)
print(f'Gen: {population.gen}')
#print(f'Best Index: {population.best_walker_index}')
#print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
print(f'Max Steps: {population.max_steps}')
# time.sleep(0.1)
#if population.gen % 10 == 0:
# population.walker.save()
print("Time for Gen: ", time.time() - start_time)
if population.gen % 10 == 0:
population.walker.save()
avg_rewards.append(population.get_walker_stats())
......@@ -7,7 +7,7 @@ import gym
np.random.seed(42)
MAX_STEPS = 1599 # after 1600 steps the Environment gives us a done anyway.
MAX_STEPS = 300 # after 1600 steps the Environment gives us a done anyway.
class Population:
......@@ -21,19 +21,15 @@ class Population:
self.max_steps = MAX_STEPS
self.render_best = render_best
self.env = gym.make('BipedalWalker-v3')
self.weights = {}
self.weights['W1'] = np.random.randn(24, h1) / np.sqrt(24)
self.weights['W2'] = np.random.randn(h1, 4) / np.sqrt(h1)
#self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
# maybe only have one mutatnt that is only used to test all the modifications/mutations
self.mutant = Walker(h1, version, load_brain, self.env, self.max_steps)
self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
self.mutated_weights = dict()
#self.mutants = []
self.mutants = []
self.envs = []
self.rewards = None
self.average_reward = None
self.lr = lr
#for i in range(self.size):
# self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
for i in range(self.size):
self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
if load_brain:
self.mutate()
......@@ -69,41 +65,42 @@ class Population:
def play_episode(self):
self.rewards = np.zeros(self.size)
mutated_weights = dict()
for i in range(self.size):
for k, v in self.weights.items():
mutated_weights[k] = v + self.mutation_factor * self.mutated_weights[k][i]
self.rewards[i] = self.get_reward(mutated_weights)
#self.mutant.set_weights(mutated_weights)
#self.rewards[i] = self.mutant.get_reward()
self.rewards[i] = self.mutants[i].get_reward()
def evolve(self):
#R = self.rewards
A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
#weights = self.walker.get_weights()
#weights = self.weights
for k in self.weights:
weights_change = np.dot(self.mutated_weights[k].transpose(1, 2, 0), A)
self.weights[k] = self.weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
#self.walker.set_weights(weights)
weights = self.walker.get_weights()
for i in range(self.size):
for k in weights:
weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
self.walker.set_weights(weights)
for mutant in self.mutants:
mutant.set_weights(weights)
self.gen += 1
if self.render_best:
self.test_walker()
self.test_walker(self.render_best)
def test_walker(self, render):
reward = self.walker.get_reward(render=render)
if self.average_reward is None:
self.average_reward = reward
else:
self.average_reward = 0.9 * self.average_reward + 0.1 * reward
print("Current Reward: ", reward)
print("Average Reward: ", self.average_reward)
def test_walker(self):
#reward = self.walker.get_reward(render=True)
#print(reward)
return
def get_walker_stats(self):
avg_reward = self.walker.get_reward(steps=10000)
for i in range(9):
reward = self.walker.get_reward(steps=10000)
avg_reward = 0.9 * avg_reward + 0.1 * reward
return avg_reward
def mutate(self): # mutates all the weights of the mutants
#for i in range(len(self.mutants)):
# self.mutants[i].mutate(self.mutation_factor)
#weights = self.walker.get_weights()
self.mutated_weights = {}
#weights = self.weights
for k, v in self.weights.items():
self.mutated_weights[k] = np.random.randn(self.size, v.shape[0], v.shape[1])
for i in range(len(self.mutants)):
self.mutants[i].mutate(self.mutation_factor)
# def increase_moves(self, size): # increase the number of directions for the brain
......
......@@ -28,10 +28,12 @@ class Walker:
return action
def get_reward(self, render = False):
def get_reward(self, steps = None, render = False):
observation = self.env.reset()
total_reward = 0
for t in range(self.steps):
if steps is None:
steps = self.steps
for t in range(steps):
if render:
self.env.render()
action = self.get_action(observation)
......@@ -47,12 +49,10 @@ class Walker:
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
#return copy.deepcopy(self.weights)
return self.weights
return copy.deepcopy(self.weights)
def set_weights(self, weights):
#self.weights = copy.deepcopy(weights)
self.weights = weights
self.weights = copy.deepcopy(weights)
def save(self):
if not os.path.isdir('./models'):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment