initial neuronal network evolution strategy

73e1da69 · Tobias Döring · 73e1da69 · 73e1da69 · 73e1da69 · 73e1da69
Commit 73e1da69 authored Jan 24, 2022 by Tobias Döring
--- a/.gitignore
+++ b/.gitignore
+models/
+__pycache__/
\ No newline at end of file
--- a/main.py
+++ b/main.py
+from population import Population
+import time
+import matplotlib.pyplot as plt
+INCREASE_BY = 5
+H1 = 12
+POP_SIZE = 50
+MUTATION_FACTOR = 0.1  # 0 <= x <= 1
+LEARNING_RATE = 0.03
+GAME_CANCELLED = False
+LOAD_BRAIN = False
+RENDER_BEST = True
+VERSION = 1
+if __name__ == '__main__':
+    population = Population(POP_SIZE, H1 ,MUTATION_FACTOR, LOAD_BRAIN, VERSION, LEARNING_RATE, RENDER_BEST)
+    while GAME_CANCELLED is False:  # this is our game
+        population.mutate()
+        population.play_episode()
+        population.evolve()
+        #population.increase_moves(INCREASE_BY)
+        print(f'Gen: {population.gen}')
+        #print(f'Best Index: {population.best_walker_index}')
+        #print(f'Best Fitness: {population.fitnesses[population.best_walker_index]}')
+        print(f'Max Steps: {population.max_steps}')
+        # time.sleep(0.1)
+        if population.gen % 10 == 0:
+            population.walker.save()
--- a/population.py
+++ b/population.py
+import numpy as np
+import random
+import logging
+import copy
+from walker import Walker
+import gym
+MAX_STEPS = 1599  # after 1600 steps the Environment gives us a done anyway.
+class Population:
+    def __init__(self, size, h1, mutation_factor, load_brain, version, lr, render_best):
+        self.size = size
+        self.mutation_factor = mutation_factor
+        #self.fitness_sum = 0.0
+        self.gen = 1
+        self.version = version
+        self.max_steps = MAX_STEPS
+        self.env = gym.make('BipedalWalker-v3')
+        self.walker = Walker(h1, version, load_brain, self.env, self.max_steps)
+        self.mutants = []
+        self.envs = []
+        self.fitnesses = None
+        self.lr = lr
+        for i in range(self.size):
+            self.mutants.append(Walker(h1, version, load_brain, self.env, self.max_steps))
+        if load_brain:
+            self.mutate()
+    # def calculate_fitness_sum(self):
+    #     self.fitness_sum = 0
+    #     self.fitnesses = np.zeros(self.size)
+    #     for i in range(self.size):
+    #         self.fitnesses[i] = self.mutants[i].fitness
+    #     self.fitnesses -= np.min(self.fitnesses)  # maybe offset: +1
+    #     self.fitness_sum = np.sum(self.fitnesses)
+    def play_episode(self):
+        self.fitnesses = np.zeros(self.size)
+        for i in range(self.size):
+            self.fitnesses[i] = self.mutants[i].get_reward()
+    def evolve(self):
+        R = self.fitnesses
+        A = (R - np.mean(R)) / np.std(R)
+        weights = self.walker.get_weights()
+        for i in range(self.size):
+            for k in weights:
+                weights_change = np.dot(self.mutants[i].weights[k].T, A[i]).T
+                weights[k] += self.lr/(self.size*self.mutation_factor) * weights_change
+        self.walker.set_weights(weights)
+        for mutant in self.mutants:
+            mutant.set_weights(weights)
+        self.gen += 1
+    def mutate(self):  # mutates all the brains of the babies
+        for mutant in self.mutants:  # we don't want to mutate the champion's brain
+            mutant.mutate(self.mutation_factor)
+    def increase_moves(self, size):  # increase the number of directions for the brain
+        if len(self.mutants[0].brain.directions) < self.max_steps:
+            for walker in self.mutants:
+                walker.brain.increase_moves(size)
--- a/walker.py
+++ b/walker.py
+import gym
+import numpy as np
+import pickle
+import copy
+import os
+np.random.seed(42)
+class Walker:
+    def __init__(self, h1, version, load_brain, env, steps):
+        self.version = version
+        self.h1 = h1
+        self.weights = {}
+        self.weights['W1'] = np.random.randn(24, h1) / np.sqrt(24)
+        self.weights['W2'] = np.random.randn(h1, 4) / np.sqrt(h1)
+        if load_brain:
+            self.load()
+        self.steps = steps
+        self.env = env
+        #self.render_best = render_best
+    def get_action(self, observation):
+        hl = np.matmul(observation, self.weights['W1'])
+        hl = np.tanh(hl)
+        action = np.matmul(hl, self.weights['W2'])
+        action = np.tanh(action)
+        return action
+    def get_reward(self):  
+        observation = self.env.reset()
+        total_reward = 0
+        for t in range(self.steps):
+            action = self.get_action(observation)
+            observation, reward, done, info = self.env.step(action)
+            total_reward += reward
+            if done:
+                break
+        return total_reward
+    def mutate(self, mutation_rate):
+        for k, v in self.weights.items():
+            self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
+    def get_weights(self):
+        return copy.deepcopy(self.weights)
+    def set_weights(self, weights):
+        self.weights = copy.deepcopy(weights)
+    def save(self):
+        if not os.path.isdir('./models'):
+            os.mkdir('./models')
+        with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
+            pickle.dump(self.weights, fp)
+    def load(self):
+        with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
+             self.weights = pickle.load(fp)