Skip to content
Snippets Groups Projects
Commit 2b9dda23 authored by Philip Maas's avatar Philip Maas
Browse files

Added averageRewards.png and reformatted code

parent 9e09122b
Branches
No related tags found
No related merge requests found
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
class agentMemory(object):
def __init__(self, memSize, input_shape, action_shape):
......
DeepQLearning/averageRewards.png

160 KiB

import time
import numpy as np
from agentMemoryDQN import agentMemory
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam
from agentMemoryDQN import agentMemory
def qFunctionNN(lr, outputs, inputs):
QFunction = Sequential()
QFunction.add(Dense(24, activation='relu', input_dim=inputs))
......@@ -13,6 +16,7 @@ def qFunctionNN(lr, outputs, inputs):
QFunction.compile(optimizer=Adam(lr=lr), loss='mean_squared_error')
return QFunction
class dqnAgent(object):
def __init__(self, lr, gamma, actions, vareps, bSize, observations,
epsDec=0.0, epsMin=0.01, memSize=10000, name='Alan', bins=7):
......@@ -55,7 +59,7 @@ class dqnAgent(object):
target = (1.0 - 0.1) * r[i] + 0.1 * self.gamma * next_action
y = self.Q.predict(state[i])
y[0] = target
history = self.Q.fit(x=state[i], y=y, verbose=0, epochs=1)
self.Q.fit(x=state[i], y=y, verbose=0, epochs=1)
self.steps += 1
print("learn time: ", time.time() - start)
......
import time
import gym
import numpy as np
from tqdm import tqdm
from dqnAgent import dqnAgent
import gym
import time
from config import *
from dqnAgent import dqnAgent
env = gym.make('BipedalWalker-v3')
nmb_of_actions = env.action_space.shape[0]
......@@ -13,7 +13,8 @@ marvin = dqnAgent(gamma=0.99, vareps=1.0, lr=0.001,
observations=nmb_of_obs, actions=nmb_of_actions, memSize=25000,
epsMin=0.05, bSize=16, epsDec=0.999, bins=7)
rewards = []; epsHistory = []
rewards = [];
epsHistory = []
avg_rewards = []
steps = 0
verbose = False
......@@ -80,6 +81,3 @@ while not done:
# marvin.addMemory(observation, action_ind, reward, obs, int(done))
env.render()
observation = obs
\ No newline at end of file
import gym
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from dqnAgent import dqnAgent
import matplotlib.pyplot as plt
import gym
env = gym.make('BipedalWalker-v3')
nmb_of_actions = env.action_space.shape[0]
observation = env.reset()
marvin = dqnAgent(gamma=0.99, vareps=0.0, lr=0.001,
observations=len(observation), actions=nmb_of_actions, memSize=25000,
epsMin=0.02, bSize=32, replace=1000, epsDec=0.001)
epsMin=0.02, bSize=32, epsDec=0.001)
marvin.loadCNNs()
total_rewards = np.load("total_rewards.npy", allow_pickle=True)
eps = np.load("eps.npy", allow_pickle=True)
avg_rewards = np.load("avg_rewards.npy", allow_pickle=True)
......
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment