Skip to content
Snippets Groups Projects
Select Git revision
  • d48edcd327479e7a230e1a34684f251a5cd47337
  • master default protected
  • 2018ws
  • 2017ws
  • 2016ws
5 results

answer.h

Blame
  • Forked from Peter Gerwinski / hp
    Source project has a limited visibility.
    walker.py 3.71 KiB
    import numpy as np
    import pickle
    import copy
    import os
    import matplotlib.pyplot as plt
    
    import mlp_visualizer
    
    np.random.seed(42)
    
    
    class Walker:
    
        def __init__(self, hidden_layer, bias, version, load_brain, env):
            self.env = env
            self.version = version
            self.hidden_layer = hidden_layer
    
            if load_brain:
                self.load()
            else:
                self.weights = {}
                self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
                                     / np.sqrt(self.env.observation_space.shape[0] + int(bias))
                self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
    
            self.bias = bias
    
        def get_action(self, observation):
            if self.bias:
                observation = np.append(observation, 1)
            hl = np.matmul(observation, self.weights['W1'])
            hl = np.tanh(hl)
            action = np.matmul(hl, self.weights['W2'])
            action = np.tanh(action)
    
            return action
    
        def get_reward(self, steps, render=False):
            observation = self.env.reset()
            total_reward = 0
            for t in range(steps):
                if render:
                    self.env.render()
                action = self.get_action(observation)
                observation, reward, done, info = self.env.step(action)
                total_reward += reward
    
                if done:
                    break
            return total_reward
    
        def mutate(self, mutation_rate):
            for k, v in self.weights.items():
                self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
    
        def get_weights(self):
            return copy.deepcopy(self.weights)
    
        def set_weights(self, weights):
            self.weights = copy.deepcopy(weights)
    
        def plot_input_weights(self):
            weights = []
            names = [
                "hull_angle",
                "hull_angularVelocity",
                "vel_x",
                "vel_y",
                "hip_joint_1_angle",
                "hip_joint_2_angle",
                "knee_joint_1_angle",
                "knee_joint_2_angle",
                "leg_1_ground_contact_flag",
                "hip_joint_2_angle",
                "hip_joint_2_speed",
                "knee_joint_2_angle",
                "knee_joint_2_speed",
                "leg_2_ground_contact_flag",
                "lidar reading 1",
                "lidar reading 2",
                "lidar reading 3",
                "lidar reading 4",
                "lidar reading 5",
                "lidar reading 6",
                "lidar reading 7",
                "lidar reading 8",
                "lidar reading 9",
                "lidar reading 10"
            ]
            if self.bias:
                names.append('bias')
            for i in range(self.env.observation_space.shape[0] + int(self.bias)):
                weights.append(sum(self.weights['W1'][i]))
            plt.bar(names, weights)
            plt.xticks(rotation=45, ha="right")
            plt.show()
    
        def save_mlp_weights(self, gen):
            mlp_visualizer.vertical_distance_between_layers = 40
            mlp_visualizer.horizontal_distance_between_neurons = 4
            mlp_visualizer.neuron_radius = 1
            mlp_visualizer.default_line_width = 1
            network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
                                                    self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
            network.draw(gen)
    
        def save(self):
            if not os.path.isdir('./models'):
                os.mkdir('./models')
            with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
                pickle.dump(self.weights, fp)
    
        def load(self):
            with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
                self.weights = pickle.load(fp)