Select Git revision
Forked from
Peter Gerwinski / bs
Source project has a limited visibility.
walker.py 3.71 KiB
import numpy as np
import pickle
import copy
import os
import matplotlib.pyplot as plt
import mlp_visualizer
np.random.seed(42)
class Walker:
def __init__(self, hidden_layer, bias, version, load_brain, env):
self.env = env
self.version = version
self.hidden_layer = hidden_layer
if load_brain:
self.load()
else:
self.weights = {}
self.weights['W1'] = np.random.randn(self.env.observation_space.shape[0] + int(bias), hidden_layer) \
/ np.sqrt(self.env.observation_space.shape[0] + int(bias))
self.weights['W2'] = np.random.randn(hidden_layer, self.env.action_space.shape[0]) / np.sqrt(hidden_layer)
self.bias = bias
def get_action(self, observation):
if self.bias:
observation = np.append(observation, 1)
hl = np.matmul(observation, self.weights['W1'])
hl = np.tanh(hl)
action = np.matmul(hl, self.weights['W2'])
action = np.tanh(action)
return action
def get_reward(self, steps, render=False):
observation = self.env.reset()
total_reward = 0
for t in range(steps):
if render:
self.env.render()
action = self.get_action(observation)
observation, reward, done, info = self.env.step(action)
total_reward += reward
if done:
break
return total_reward
def mutate(self, mutation_rate):
for k, v in self.weights.items():
self.weights[k] = v + mutation_rate * np.random.randn(v.shape[0], v.shape[1])
def get_weights(self):
return copy.deepcopy(self.weights)
def set_weights(self, weights):
self.weights = copy.deepcopy(weights)
def plot_input_weights(self):
weights = []
names = [
"hull_angle",
"hull_angularVelocity",
"vel_x",
"vel_y",
"hip_joint_1_angle",
"hip_joint_2_angle",
"knee_joint_1_angle",
"knee_joint_2_angle",
"leg_1_ground_contact_flag",
"hip_joint_2_angle",
"hip_joint_2_speed",
"knee_joint_2_angle",
"knee_joint_2_speed",
"leg_2_ground_contact_flag",
"lidar reading 1",
"lidar reading 2",
"lidar reading 3",
"lidar reading 4",
"lidar reading 5",
"lidar reading 6",
"lidar reading 7",
"lidar reading 8",
"lidar reading 9",
"lidar reading 10"
]
if self.bias:
names.append('bias')
for i in range(self.env.observation_space.shape[0] + int(self.bias)):
weights.append(sum(self.weights['W1'][i]))
plt.bar(names, weights)
plt.xticks(rotation=45, ha="right")
plt.show()
def save_mlp_weights(self, gen):
mlp_visualizer.vertical_distance_between_layers = 40
mlp_visualizer.horizontal_distance_between_neurons = 4
mlp_visualizer.neuron_radius = 1
mlp_visualizer.default_line_width = 1
network = mlp_visualizer.NeuralNetwork([self.env.observation_space.shape[0] + self.bias, self.hidden_layer,
self.env.action_space.shape[0]], [self.weights['W1'], self.weights['W2']])
network.draw(gen)
def save(self):
if not os.path.isdir('./models'):
os.mkdir('./models')
with open('./models/model-pedal%d.p' % self.version, 'wb') as fp:
pickle.dump(self.weights, fp)
def load(self):
with open('./models/model-pedal%d.p' % self.version, 'rb') as fp:
self.weights = pickle.load(fp)