Merge branch 'evo-neuro' into 'evaluations'

# Conflicts: # main.py # population.py # walker.py

Merge branch 'evo-neuro' into 'evaluations'
1841113a · Philip Maas · 1f761db3 · 8c5bdea8 · 1841113a · 1841113a
Commit 1841113a authored Jan 30, 2022 by Philip Maas
--- a/.gitignore
+++ b/.gitignore
+models/
+__pycache__/
\ No newline at end of file
--- a/main.py
+++ b/main.py
 from population import Population
 import time
 import matplotlib.pyplot as plt
+import pickle
+import sys

-INCREASE_BY = 5
-BRAIN_SIZE = 50
+HIDDEN_LAYER = 12
 POP_SIZE = 50
 MUTATION_FACTOR = 0.1  # 0 <= x <= 1
-GAME_CANCELLED = False
-LOAD_BRAIN = True  # if this true brain_size doesn't matter
+LEARNING_RATE = 0.03   # 0 <= x <= 1
+GENS = 1000
+MAX_STEPS = 300  # after 1600 steps the Environment gives us a done anyway.
+
+
+VERSION = 1
+TEST_WALKER = True
+LOAD_BRAIN = False
 RENDER_BEST = False
+if TEST_WALKER:
+    LOAD_BRAIN = True
+
+def plot_reward(rewards):
+    plt.title(f'{HIDDEN_LAYER}, {VERSION}, {POP_SIZE}, {LEARNING_RATE}')
+    plt.xlabel('Episodes/10')
+    plt.ylabel('Rewards')
+    plt.plot(rewards)
+    plt.savefig(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}.png')
+    plt.show()

 if __name__ == '__main__':
-    population = Population(POP_SIZE, BRAIN_SIZE, MUTATION_FACTOR, LOAD_BRAIN, RENDER_BEST)
-
-    while GAME_CANCELLED is False:  # this is our game
-        if population.all_players_finished():  # this is our genetic algorithm after one generation of players
-            population.natural_selection()
-            population.mutate_babies()
-            population.increase_moves(INCREASE_BY)
-            population.reset_environments()
-
-            print(f'Gen: {population.gen}')
-            print(f'Best Index: {population.best_walker_index}')
-            print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
-            print(f'Max Steps: {population.max_steps}')
-        else:
-            population.update()
-        # time.sleep(0.1)
+    avg_rewards = []
+
+    try:
+        population = Population(POP_SIZE, HIDDEN_LAYER ,MUTATION_FACTOR, MAX_STEPS, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
+
+        if TEST_WALKER:
+            rewards = []
+            population.walker.plot_input_weights()
+            for i in range(10):
+                rewards.append(population.walker.get_reward(10000, True))
+                print("Reward: ", rewards[-1])
+            print("Average Reward: ", sum(rewards) / len(rewards))
+            plot_reward(rewards)
+            sys.exit(0)
+
+        for gen in range(GENS):  # this is our game
+            start_time = time.time()
+            print(f'Gen: {gen}')
+            #print(f'Max Steps: {population.max_steps}')
+            population.mutate()
+            population.play_episode()
+            population.evolve()
+            #population.increase_moves(INCREASE_BY)
+            print("Time for Gen: ", time.time() - start_time)
+            if gen % 10 == 0:
+                population.walker.save()
+                avg_rewards.append(population.get_walker_stats())
+                with open(f'./models/{HIDDEN_LAYER}_{VERSION}_{POP_SIZE}_{LEARNING_RATE}_AvgRewards', 'wb') as fp:
+                    pickle.dump(avg_rewards, fp)
+                if gen == 1000:
+                    population.lr = 0.01
+        
+        plot_reward(avg_rewards)
+    except KeyboardInterrupt:
+        if not TEST_WALKER:
+            plot_reward(avg_rewards)
+
--- a/population.py
+++ b/population.py
@@ -3,107 +3,82 @@ import random
 import logging
 import copy
 from walker import Walker
+import gym

-MAX_STEPS = 1599  # after 1600 steps the Environment gives us a done anyway.
-
+np.random.seed(42)

 class Population:

-    def __init__(self, size, brain_size, mutation_factor, load_brain, render_best):
+    def __init__(self, size, hidden_layer, mutation_factor, max_steps, load_brain, version, lr, render_best):
        self.size = size
-        self.brain_size = brain_size
        self.mutation_factor = mutation_factor
-        self.fitness_sum = 0.0
+        #self.fitness_sum = 0.0
        self.gen = 1
-        self.best_walker_index = 0  # index of the best player in self.players
-        self.best_walker_fitness = 0.0
-        self.max_steps = MAX_STEPS
-        self.walkers = []
+        self.version = version
+        self.max_steps = max_steps
+        self.render_best = render_best
+        self.env = gym.make('BipedalWalker-v3')
+        self.walker = Walker(hidden_layer, version, load_brain, self.env)
+        self.mutated_weights = dict()
+        self.mutants = []
        self.envs = []
-        self.fitnesses = None
+        self.rewards = None
+        self.lr = lr
+        walker_weights = self.walker.get_weights()
        for i in range(self.size):
-            self.walkers.append(Walker(self.brain_size, load_brain, render_best))
-        self.reset_environments()
+            self.mutants.append(Walker(hidden_layer, version, False, self.env))
            if load_brain:
-            self.mutate_babies()
+                self.mutants[-1].set_weights(walker_weights)

-    def reset_environments(self):
-        for walker in self.walkers:
-            walker.reset_environment()
+    # def calculate_fitness_sum(self):
+    #     self.fitness_sum = 0
+    #     self.rewards = np.zeros(self.size)
+    #     for i in range(self.size):
+    #         self.rewards[i] = self.mutants[i].fitness
+    #     self.rewards -= np.min(self.rewards)  # maybe offset: +1
+    #     self.fitness_sum = np.sum(self.rewards)

-    def update(self):
-        for walker in self.walkers:
-            # if the player has taken more steps than the best player needed to reach the goal, he's dead
-            if walker.brain.step >= self.max_steps:
-                walker.dead = True
-            else:
-                walker.update()
+    def get_action(self, observation, weights):
+        hl = np.matmul(observation, weights['W1'])
+        hl = np.tanh(hl)
+        action = np.matmul(hl, weights['W2'])
+        action = np.tanh(action)

-    """def calculate_fitness(self):  # calculate the fitness of all players
-        for walker in self.walkers:
-            walker.calculate_fitness()"""
+        return action

-    def calculate_fitness_sum(self):
-        self.fitness_sum = 0
-        self.fitnesses = np.zeros(self.size)
+    def play_episode(self):
+        self.rewards = np.zeros(self.size)
        for i in range(self.size):
-            self.fitnesses[i] = self.walkers[i].fitness
-        self.fitnesses -= np.min(self.fitnesses)  # maybe offset: +1
-        self.fitness_sum = np.sum(self.fitnesses)
-
-    def all_players_finished(self):  # returns whether all the players are either dead or have reached the goal
-        for walker in self.walkers:
-            if walker.dead is False and walker.reached_goal is False:
-                return False
-        return True
+            self.rewards[i] = self.mutants[i].get_reward(self.max_steps)

-    def natural_selection(self):  # gets the next generation of players
-        #for i in range(self.size):
-            #new_walkers.append(Walker(self.envs[i], self.brain_size))
-        self.calculate_fitness_sum()
-        self.set_best_walker()
-        self.walkers[self.best_walker_index].brain.save()
-        # the champion lives on
-        new_walkers = [self.walkers[self.best_walker_index].get_baby()]
-        new_walkers[0].is_best = True
-        for i in range(1, self.size):
-            parent = self.select_parent()  # select parent based on fitness
-            new_walkers.append(parent.get_baby())  # get baby from them
-        self.walkers = copy.copy(new_walkers)
+    def evolve(self):
+        A = (self.rewards - np.mean(self.rewards)) / np.std(self.rewards)
+        weights = self.walker.get_weights()
+        for i in range(self.size):
+            for k in weights:
+                weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
+                weights[k] = weights[k] + self.lr/(self.size*self.lr) * weights_change
+        self.walker.set_weights(weights)
+        for mutant in self.mutants:
+            mutant.set_weights(weights)
        self.gen += 1

-    # chooses a player from the population to return randomly(considering fitness)
-    # this function works by randomly choosing a value between 0 and the fitness-sum
-    # then go through all the players and add their fitness to a running sum.
-    # If that sum is greater than the random value generated, that player is chosen
-    # since players with a higher fitness function add more to the running sum they have a higher chance of being chosen
-    def select_parent(self):
-        arrow = random.uniform(0, self.fitness_sum)
-        running_sum = 0.0  # those are the bars we add together
-        for i in range(self.size):
-            running_sum += self.fitnesses[i]
-            if running_sum > arrow:
-                return self.walkers[i]
-        # should never get to this point
-        logging.error("Critical Error in select_parent")
-        return None
+        print("Reward: ", self.walker.get_reward(self.max_steps, self.render_best))

-    def mutate_babies(self):  # mutates all the brains of the babies
-        for i in range(1, len(self.walkers)):  # we don't want to mutate the champion's brain
-            self.walkers[i].brain.mutate(self.mutation_factor)
+    def get_walker_stats(self):
+        avg_reward = []
+        for i in range(10):
+            avg_reward.append(self.walker.get_reward(10000))
+        avg_reward = sum(avg_reward) / len(avg_reward)
+        print("Average reward: ", avg_reward)
+        return avg_reward

-    def set_best_walker(self):  # finds the player with the highest fitness and sets it as the best one
-        max_index = np.argmax(self.fitnesses)
-        self.best_walker_index = max_index
-        self.best_walker_fitness = self.walkers[max_index].fitness
+    def mutate(self):  # mutates all the weights of the mutants
+        for i in range(len(self.mutants)):
+            self.mutants[i].mutate(self.mutation_factor)
            
-        # if this dot reached the goal then reset the minimum number of steps it takes to get to the goal
-        if self.walkers[max_index].reached_goal:
-            self.max_steps = self.walkers[max_index].brain.step
-            logging.info('Found goal?!')
-            logging.info("step:", self.max_steps)

-    def increase_moves(self, size):  # increase the number of directions for the brain
-        if len(self.walkers[0].brain.directions) < self.max_steps:
-            for walker in self.walkers:
-                walker.brain.increase_moves(size)
+    # def increase_moves(self, size):  # increase the number of directions for the brain
+    #     if len(self.mutants[0].brain.directions) < self.max_steps:
+    #         for walker in self.mutants:
+    #             walker.brain.increase_moves(size)
--- a/walker.py
+++ b/walker.py
-from brain import Brain
 import gym
+import numpy as np
+import pickle
+import copy
+import os
+import matplotlib.pyplot as plt
+
+np.random.seed(42)

 class Walker:

-    def __init__(self, brain_size, load_brain, render_best):
-        self.brain = Brain(brain_size, load_brain)  # new brain with X instructions
-        self.dead = False
-        self.reached_goal = False
-        self.is_best = False  # true if this dot is the best dot from the previous generation
-        self.fitness = 0.0
-        self.env = gym.make('BipedalWalker-v3')
-        self.render_best = render_best
-        # self.pos = copy.copy(self.map.startpoint)
-
-    def update(self):  # moves the dot according to the brains directions
-        if (self.dead is True) or (self.reached_goal is True):
-            return
-        if self.brain.step >= len(self.brain.directions):
-            self.dead = True
-            return
-        observation, reward, done, info = self.env.step(self.brain.get_move())
-        self.fitness += reward
-        if reward == -100:
-            self.dead = True
-        elif done is True:
-            self.reached_goal = True
-            self.fitness += 10000000
-        if self.is_best and self.render_best:
+    def __init__(self, hidden_layer, version, load_brain, env):
+        self.version = version
+        self.hidden_layer = hidden_layer
+        if load_brain:
+            self.load()
+        else:
+            self.weights = {}
+            self.weights['W1'] = np.random.randn(24, hidden_layer) / np.sqrt(24)
+            self.weights['W2'] = np.random.randn(hidden_layer, 4) / np.sqrt(hidden_layer)
+        
+        self.env = env
+        #self.render_best = render_best
+
+    def get_action(self, observation):
+        hl = np.matmul(observation, self.weights['W1'])
+        hl = np.tanh(hl)
+        action = np.matmul(hl, self.weights['W2'])
+        action = np.tanh(action)
+
+        return action
+
+    def get_reward(self, steps, render = False):  
+        observation = self.env.reset()
+        total_reward = 0
+        for t in range(steps):
+            if render:
                self.env.render()
+            action = self.get_action(observation)
+            observation, reward, done, info = self.env.step(action)
+            total_reward += reward
+
+            if done:
+                break
+        return total_reward
+
+    def mutate(self, mutation_rate):
+        for k, v in self.weights.items():
+            self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
+
+    def get_weights(self):
+        return copy.deepcopy(self.weights)
+
+    def set_weights(self, weights):
+        self.weights = copy.deepcopy(weights)
+
+    def plot_input_weights(self):
+        weights = []
+        names = [
+            "hull_angle",
+            "hull_angularVelocity",
+            "vel_x",
+            "vel_y",
+            "hip_joint_1_angle",
+            "hip_joint_2_angle",
+            "knee_joint_1_angle",
+            "knee_joint_2_angle",
+            "leg_1_ground_contact_flag",
+            "hip_joint_2_angle",
+            "hip_joint_2_speed",
+            "knee_joint_2_angle",
+            "knee_joint_2_speed",
+            "leg_2_ground_contact_flag",
+            "lidar reading 1",
+            "lidar reading 2",
+            "lidar reading 3",
+            "lidar reading 4",
+            "lidar reading 5",
+            "lidar reading 6",
+            "lidar reading 7",
+            "lidar reading 8",
+            "lidar reading 9",
+            "lidar reading 10"
+        ]
+        for i in range(24):
+            weights.append(sum(self.weights['W1'][i]))
+        plt.bar(names, weights)
+        plt.xticks(rotation = 45, ha = "right")
+        plt.show()
+
+    def save(self):
+        if not os.path.isdir('./models'):
+            os.mkdir('./models')
+        with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
+            pickle.dump(self.weights, fp)

-    """ def get_fitness(self):
-         if self.reached_goal:
-             # if the dot reached the goal then the fitness is based on the amount of steps it took to get there
-             self.fitness = 1 / 16 + 10000.0 / (self.brain.step ** 2)
-         else:  # if the dot didn't reach the goal then the fitness is based on how close it is to the goal
-             self.fitness = 1 / (self.map.get_closest_distance(self.pos[X], self.pos[Y]) ** 2)
-         return self.fitness"""
-
-    def reset_environment(self):
-        self.env.reset()
-
-    def get_baby(self):
-        baby = Walker(0, False, self.render_best)
-        baby.brain = self.brain.clone()  # babies have the same brain as their parents
-        self.env.close()
-        return baby
+    def load(self):
+        with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
+             self.weights = pickle.load(fp)