diff --git a/DeepQLearning/main.py b/DeepQLearning/main.py index 73595036d219e438ce4fe7de2977d410e2e140a8..f3f92486415cb14b54065d427b2336eda89f2f54 100644 --- a/DeepQLearning/main.py +++ b/DeepQLearning/main.py @@ -13,7 +13,7 @@ marvin = dqnAgent(gamma=0.99, vareps=1.0, lr=0.001, observations=nmb_of_obs, actions=nmb_of_actions, memSize=25000, epsMin=0.05, bSize=16, epsDec=0.999, bins=7) -rewards = []; +rewards = [] epsHistory = [] avg_rewards = [] steps = 0 @@ -21,7 +21,7 @@ verbose = False best_total_reward = -1000 progress = tqdm(range(10000), desc='Training', unit=' episode') -for epoche in progress: +for epochs in progress: done = False observation = env.reset() observation = observation.reshape(1, -1) @@ -61,7 +61,7 @@ for epoche in progress: msg += ' vareps=' + str(round(marvin.vareps, ndigits=2)) msg += ' avg=' + str(movingAvr) progress.set_description(msg) - if epoche % 10 == 0: + if epochs % 10 == 0: np.save("eps.npy", np.array(epsHistory)) np.save("total_rewards.npy", np.array(rewards)) np.save("avg_rewards.npy", np.array(avg_rewards))