Skip to content
Snippets Groups Projects
Select Git revision
  • ab5cf008381259e9be7bb788e731b13dc1491901
  • master default
  • feature_scripts
  • develop
  • feature_without_logging
  • feature_opc_server
  • feature_seperate_apps
  • fix_raspi_cmake
  • ss19 protected
  • ss20
10 results

CMakeLists.txt

Blame
  • population.py 2.91 KiB
    import numpy as np
    from walker import Walker
    import gym
    
    np.random.seed(42)
    
    class Population:
    
        def __init__(self, size, hidden_layer, mutation_factor, max_steps, load_brain, version, lr, render_best):
            self.size = size
            self.mutation_factor = mutation_factor
            #self.fitness_sum = 0.0
            self.gen = 1
            self.version = version
            self.max_steps = max_steps
            self.render_best = render_best
            self.env = gym.make('BipedalWalker-v3')
            self.walker = Walker(hidden_layer, version, load_brain, self.env)
            self.mutated_weights = dict()
            self.mutants = []
            self.envs = []
            self.rewards = None
            self.lr = lr
            walker_weights = self.walker.get_weights()
            for i in range(self.size):
                self.mutants.append(Walker(hidden_layer, version, False, self.env))
                if load_brain:
                    self.mutants[-1].set_weights(walker_weights)
    
        # def calculate_fitness_sum(self):
        #     self.fitness_sum = 0
        #     self.rewards = np.zeros(self.size)
        #     for i in range(self.size):
        #         self.rewards[i] = self.mutants[i].fitness
        #     self.rewards -= np.min(self.rewards)  # maybe offset: +1
        #     self.fitness_sum = np.sum(self.rewards)
    
        def get_action(self, observation, weights):
            hl = np.matmul(observation, weights['W1'])
            hl = np.tanh(hl)
            action = np.matmul(hl, weights['W2'])
            action = np.tanh(action)
    
            return action
    
        def play_episode(self):
            self.rewards = np.zeros(self.size)
            for i in range(self.size):
                self.rewards[i] = self.mutants[i].get_reward(self.max_steps)
    
        def evolve(self):
            A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
            weights = self.walker.get_weights()
            for i in range(self.size):
                for k in weights:
                    weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
                    weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
            self.walker.set_weights(weights)
            for mutant in self.mutants:
                mutant.set_weights(weights)
            self.gen += 1
    
            print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))
    
        def get_walker_stats(self):
            avg_reward = []
            for i in range(10):
                avg_reward.append(self.walker.get_reward(10000))
            avg_reward = sum(avg_reward) / len(avg_reward)
            print("Average reward: ", avg_reward)
            return avg_reward
    
        def mutate(self):  # mutates all the weights of the mutants
            for i in range(len(self.mutants)):
                self.mutants[i].mutate(self.mutation_factor)
                
    
        # def increase_moves(self, size):  # increase the number of directions for the brain
        #     if len(self.mutants[0].brain.directions) < self.max_steps:
        #         for walker in self.mutants:
        #             walker.brain.increase_moves(size)