reformatted code

686da13e · Philip Maas · 881a9c34 · 686da13e
Commit 686da13e authored Mar 3, 2022 by Philip Maas
--- a/DeepQLearning/main.py
+++ b/DeepQLearning/main.py
@@ -13,7 +13,7 @@ marvin = dqnAgent(gamma=0.99, vareps=1.0, lr=0.001,
                  observations=nmb_of_obs, actions=nmb_of_actions, memSize=25000,
                  epsMin=0.05, bSize=16, epsDec=0.999, bins=7)

-rewards = [];
+rewards = []
 epsHistory = []
 avg_rewards = []
 steps = 0
@@ -21,7 +21,7 @@ verbose = False
 best_total_reward = -1000

 progress = tqdm(range(10000), desc='Training', unit=' episode')
-for epoche in progress:
+for epochs in progress:
    done = False
    observation = env.reset()
    observation = observation.reshape(1, -1)
@@ -61,7 +61,7 @@ for epoche in progress:
    msg += ' vareps=' + str(round(marvin.vareps, ndigits=2))
    msg += ' avg=' + str(movingAvr)
    progress.set_description(msg)
-    if epoche % 10 == 0:
+    if epochs % 10 == 0:
        np.save("eps.npy", np.array(epsHistory))
        np.save("total_rewards.npy", np.array(rewards))
        np.save("avg_rewards.npy", np.array(avg_rewards))