Skip to content
Snippets Groups Projects
Select Git revision
  • c255da4dd56398b3a641abfdf659d7da19bd5373
  • 2024ws default
  • 2023ws
  • 2022ws
  • 2021ws
  • 2020ws
  • 2018ws
  • 2019ws
  • 2017ws
  • 2016ws
10 results

if-3.c

Blame
  • population.py 2.19 KiB
    import numpy as np
    from walker import Walker
    import gym
    
    np.random.seed(42)
    
    
    class Population:
    
        def __init__(self, size, hidden_layer, bias, mutation_factor, max_steps, load_brain, version, lr, render_best):
            self.size = size
            self.mutation_factor = mutation_factor
            self.gen = 1
            self.version = version
            self.max_steps = max_steps
            self.render_best = render_best
            self.env = gym.make('BipedalWalker-v3')  # MountainCarContinuous-v0  LunarLanderContinuous-v2  Pendulum-v1  CarRacing-v0
            self.walker = Walker(hidden_layer, bias, version, load_brain, self.env)
            self.mutated_weights = dict()
            self.mutants = []
            self.envs = []
            self.rewards = None
            self.lr = lr
            walker_weights = self.walker.get_weights()
            for i in range(self.size):
                self.mutants.append(Walker(hidden_layer, bias, version, False, self.env))
                if load_brain:
                    self.mutants[-1].set_weights(walker_weights)
    
        def play_episode(self):
            self.rewards = np.zeros(self.size)
            for i in range(self.size):
                self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
    
        def evolve(self):
            A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
            weights = self.walker.get_weights()
            for i in range(self.size):
                for k in weights:
                    weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
                    weights[k] = weights[k] + self.lr/(self.size*self.mutation_factor) * weights_change
            self.walker.set_weights(weights)
            for mutant in self.mutants:
                mutant.set_weights(weights)
            self.gen += 1
    
            print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
    
        def get_walker_stats(self):
            avg_reward = []
            for i in range(10):
                avg_reward.append(self.walker.get_reward(10000))
            avg_reward = sum(avg_reward) / len(avg_reward)
            print("Average reward: ", avg_reward)
            return avg_reward
    
        def mutate(self):  # mutates all the weights of the mutants
            for i in range(len(self.mutants)):
                self.mutants[i].mutate(self.mutation_factor)