First Upload

caf3014d · Philip Maas · 5826010f · caf3014d · caf3014d · caf3014d
Commit caf3014d authored Jan 17, 2022 by Philip Maas
--- a/brain.py
+++ b/brain.py
+import numpy as np
+import random
+import copy
+class Brain:
+    def __init__(self, size):
+        self.directions = []
+        self.step = 0
+        self.increase_moves(size)
+    def get_move(self):
+        move = self.directions[self.step]
+        self.step += 1
+        return move
+    # we want different and random movements
+    def increase_moves(self, size):
+        for i in range(size):
+            self.directions.append(np.random.uniform(-1, 1, 4))
+    # returns a copy of the given brain
+    def clone(self):
+        clone = Brain(len(self.directions))
+        for i in range(len(self.directions)):
+            clone.directions[i] = copy.copy(self.directions[i])
+        return clone
+    # mutates the brain by setting some of the directions to random movements
+    def mutate(self):
+        for i in range(len(self.directions)):
+            if random.random() < 0.1:  # changes roughly 10% of the movements
+                self.directions[i] = np.random.uniform(-1, 1, 4)
+if __name__ == '__main__':  # for debugging
+    brain_inst = Brain(100)
+    print(brain_inst.directions)
+    print(len(brain_inst.directions))
--- a/main.py
+++ b/main.py
+import gym
+from population import Population
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+INCREASE_BY = 5
+GAME_CANCELLED = False
+env = gym.make('BipedalWalker-v3')
+if __name__ == '__main__':
+    population = Population(50)
+    while GAME_CANCELLED is False:  # this is our game
+        if population.all_players_finished():  # this is our genetic algorithm after one generation of players
+            population.natural_selection()
+            population.mutate_babies()
+            population.increase_moves(INCREASE_BY)
+            population.reset_environments()
+            print(f'Best Index: {population.best_walker_index}')
+            print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
+            print(f'Max Steps: {population.max_steps}')
+        else:
+            population.update()
+        # time.sleep(0.1)
--- a/population.py
+++ b/population.py
+import numpy as np
+from walker import Walker
+import gym
+import random
+import logging
+import copy
+MAX_STEPS = 10000
+class Population:
+    def __init__(self, size):
+        self.size = size
+        self.fitness_sum = 0.0
+        self.gen = 1
+        self.best_walker_index = 0  # index of the best player in self.players
+        self.best_walker_fitness = 0.0
+        self.max_steps = MAX_STEPS
+        self.walkers = []
+        self.envs = []
+        self.fitnesses = None
+        for i in range(self.size):
+            self.envs.append(gym.make('BipedalWalker-v3'))
+            self.walkers.append(Walker(self.envs[i]))
+        self.reset_environments()
+    def reset_environments(self):
+        for env in self.envs:
+            env.reset()
+    def update(self):
+        for walker in self.walkers:
+            # if the player has taken more steps than the best player needed to reach the goal, he's dead
+            if walker.brain.step > self.max_steps:
+                walker.dead = True
+            else:
+                walker.update()
+    """def calculate_fitness(self):  # calculate the fitness of all players
+        for walker in self.walkers:
+            walker.calculate_fitness()"""
+    def calculate_fitness_sum(self):
+        self.fitness_sum = 0
+        self.fitnesses = np.zeros(self.size)
+        for i in range(self.size):
+            self.fitnesses[i] = self.walkers[i].fitness
+        self.fitnesses -= np.min(self.fitnesses)  # maybe offset: +1
+        self.fitness_sum = np.sum(self.fitnesses)
+    def all_players_finished(self):  # returns whether all the players are either dead or have reached the goal
+        for walker in self.walkers:
+            if walker.dead is False and walker.reached_goal is False:
+                return False
+        return True
+    def natural_selection(self):  # gets the next generation of players
+        new_walkers = []
+        for i in range(self.size):
+            new_walkers.append(Walker(self.envs[i]))
+        self.calculate_fitness_sum()
+        self.set_best_walker()
+        # the champion lives on
+        new_walkers[0] = self.walkers[self.best_walker_index].get_baby()
+        new_walkers[0].is_best = True
+        for i in range(1, len(new_walkers)):
+            parent = self.select_parent()  # select parent based on fitness
+            new_walkers[i] = parent.get_baby()  # get baby from them
+        self.walkers = copy.copy(new_walkers)
+        self.gen += 1
+    # chooses a player from the population to return randomly(considering fitness)
+    # this function works by randomly choosing a value between 0 and the fitness-sum
+    # then go through all the players and add their fitness to a running sum.
+    # If that sum is greater than the random value generated, that player is chosen
+    # since players with a higher fitness function add more to the running sum they have a higher chance of being chosen
+    def select_parent(self):
+        arrow = random.uniform(0, self.fitness_sum)
+        running_sum = 0.0  # those are the bars we add together
+        for i in range(self.size):
+            running_sum += self.fitnesses[i]
+            if running_sum > arrow:
+                return self.walkers[i]
+        # should never get to this point
+        logging.error("Critical Error in select_parent")
+        return None
+    def mutate_babies(self):  # mutates all the brains of the babies
+        for i in range(1, len(self.walkers)):  # we don't want to mutate the champion's brain
+            self.walkers[i].brain.mutate()
+    def set_best_walker(self):  # finds the player with the highest fitness and sets it as the best one
+        max_index = np.argmax(self.fitnesses)
+        self.best_walker_index = max_index
+        self.best_walker_fitness = self.walkers[max_index].fitness
+        # if this dot reached the goal then reset the minimum number of steps it takes to get to the goal
+        if self.walkers[max_index].reached_goal:
+            self.max_steps = self.walkers[max_index].brain.step
+            logging.info('Found goal?!')
+            logging.info("step:", self.max_steps)
+    def increase_moves(self, size):  # increase the number of directions for the brain
+        if len(self.walkers[0].brain.directions) < self.max_steps:
+            for walker in self.walkers:
+                walker.brain.increase_moves(size)
--- a/walker.py
+++ b/walker.py
+from brain import Brain
+class Walker:
+    def __init__(self, env):
+        self.brain = Brain(50)  # new brain with X instructions
+        self.dead = False
+        self.reached_goal = False
+        self.is_best = False  # true if this dot is the best dot from the previous generation
+        self.fitness = 0.0
+        self.env = env
+        # self.pos = copy.copy(self.map.startpoint)
+    def update(self):  # moves the dot according to the brains directions
+        if (self.dead is True) or (self.reached_goal is True):
+            return
+        if self.brain.step >= len(self.brain.directions):
+            self.dead = True
+            return
+        observation, reward, done, info = self.env.step(self.brain.get_move())
+        self.fitness += reward
+        if reward == -100:
+            self.dead = True
+        elif done is True:
+            self.reached_goal = True
+            self.fitness += 10000000
+        if self.is_best:
+            self.env.render()
+    """ def get_fitness(self):
+         if self.reached_goal:
+             # if the dot reached the goal then the fitness is based on the amount of steps it took to get there
+             self.fitness = 1 / 16 + 10000.0 / (self.brain.step ** 2)
+         else:  # if the dot didn't reach the goal then the fitness is based on how close it is to the goal
+             self.fitness = 1 / (self.map.get_closest_distance(self.pos[X], self.pos[Y]) ** 2)
+         return self.fitness"""
+    def get_baby(self):
+        baby = Walker(self.env)
+        baby.brain = self.brain.clone()  # babies have the same brain as their parents
+        return baby