diff --git a/DeepQLearning/agentMemoryDQN.py b/DeepQLearning/agentMemoryDQN.py
index efa250f02ada9a322dc012cb2222365928cffb51..3a3eb864c0cc4c710095b77161e6ba84a0eb2319 100644
--- a/DeepQLearning/agentMemoryDQN.py
+++ b/DeepQLearning/agentMemoryDQN.py
@@ -1,45 +1,46 @@
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 
 class agentMemory(object):
     def __init__(self, memSize, input_shape, action_shape):
         self.memSize = memSize
-        self.mCounter= 0
-        self._stateM      = np.zeros((self.memSize, 1, 24))
-        self._nextstateM  = np.zeros((self.memSize, 1, 24))
-        self._actionM     = np.zeros((self.memSize, *action_shape), dtype=np.int32)
-        self._rewardM     = np.zeros(self.memSize)
-        self._doneM       = np.zeros(self.memSize, dtype=np.uint8)
+        self.mCounter = 0
+        self._stateM = np.zeros((self.memSize, 1, 24))
+        self._nextstateM = np.zeros((self.memSize, 1, 24))
+        self._actionM = np.zeros((self.memSize, *action_shape), dtype=np.int32)
+        self._rewardM = np.zeros(self.memSize)
+        self._doneM = np.zeros(self.memSize, dtype=np.uint8)
 
     def addMemory(self, state, action, reward, nextState, done):
-        idx = self.mCounter% self.memSize
-        self.mCounter+= 1
-        self._stateM[idx]      = state
-        self._nextstateM [idx] = nextState
-        self._actionM [idx]    = action
-        self._rewardM[idx]     = reward
-        self._doneM[idx]       = done
-        
+        idx = self.mCounter % self.memSize
+        self.mCounter += 1
+        self._stateM[idx] = state
+        self._nextstateM[idx] = nextState
+        self._actionM[idx] = action
+        self._rewardM[idx] = reward
+        self._doneM[idx] = done
+
     def getBatch(self, bSize):
-        maxMem     = min(self.mCounter, self.memSize)
-        batchIdx   = np.random.choice(maxMem, bSize, replace=False)
-        states     = self._stateM[batchIdx]
-        actions    = self._actionM [batchIdx]
-        rewards    = self._rewardM[batchIdx]
-        nextStates = self._nextstateM [batchIdx]
-        done       = self._doneM[batchIdx]
+        maxMem = min(self.mCounter, self.memSize)
+        batchIdx = np.random.choice(maxMem, bSize, replace=False)
+        states = self._stateM[batchIdx]
+        actions = self._actionM[batchIdx]
+        rewards = self._rewardM[batchIdx]
+        nextStates = self._nextstateM[batchIdx]
+        done = self._doneM[batchIdx]
         return states, actions, rewards, nextStates, done
-    
-    def showMemory(self,no):
-        print('Memory No.',no,' with memory counter',self.mCounter )
-        print('Reward:',self._rewardM[no])
+
+    def showMemory(self, no):
+        print('Memory No.', no, ' with memory counter', self.mCounter)
+        print('Reward:', self._rewardM[no])
         print('Action', self._actionM[no])
         print('Done', self._doneM[no])
         fig = plt.figure()
         for i in range(4):
-            ax = fig.add_subplot(1,4,i+1)
-            ax.imshow(self._stateM[no,:,:,i])
+            ax = fig.add_subplot(1, 4, i + 1)
+            ax.imshow(self._stateM[no, :, :, i])
         fig = plt.figure()
         for i in range(4):
-            ax = fig.add_subplot(1,4,i+1)
-            ax.imshow(self._nextstateM[no,:,:,i])
+            ax = fig.add_subplot(1, 4, i + 1)
+            ax.imshow(self._nextstateM[no, :, :, i])
diff --git a/DeepQLearning/averageRewards.png b/DeepQLearning/averageRewards.png
new file mode 100644
index 0000000000000000000000000000000000000000..21aeb85a6be3319c245a734a509e3e4e46366312
Binary files /dev/null and b/DeepQLearning/averageRewards.png differ
diff --git a/DeepQLearning/config.py b/DeepQLearning/config.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/DeepQLearning/dqnAgent.py b/DeepQLearning/dqnAgent.py
index c28c90a143566c83c355070b8e807d660fef54ab..b6e3ea4b6250883f3c9054111271860084efa770 100644
--- a/DeepQLearning/dqnAgent.py
+++ b/DeepQLearning/dqnAgent.py
@@ -1,32 +1,36 @@
 import time
+
 import numpy as np
-from agentMemoryDQN import agentMemory
-from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten
+from tensorflow.keras.layers import Dense
 from tensorflow.keras.models import Sequential, load_model
 from tensorflow.keras.optimizers import Adam
 
+from agentMemoryDQN import agentMemory
+
+
 def qFunctionNN(lr, outputs, inputs):
     QFunction = Sequential()
     QFunction.add(Dense(24, activation='relu', input_dim=inputs))
-    QFunction.add(Dense(24, activation='relu') )
+    QFunction.add(Dense(24, activation='relu'))
     QFunction.add(Dense(outputs, activation='linear'))
     QFunction.compile(optimizer=Adam(lr=lr), loss='mean_squared_error')
     return QFunction
 
+
 class dqnAgent(object):
     def __init__(self, lr, gamma, actions, vareps, bSize, observations,
-                 epsDec=0.0,  epsMin=0.01, memSize=10000, name='Alan', bins = 7):
+                 epsDec=0.0, epsMin=0.01, memSize=10000, name='Alan', bins=7):
         self.actions = actions
-        self.gamma   = gamma
-        self.vareps  = vareps
-        self.epsDec  = epsDec
-        self.epsMin  = epsMin
-        self.bSize   = bSize
+        self.gamma = gamma
+        self.vareps = vareps
+        self.epsDec = epsDec
+        self.epsMin = epsMin
+        self.bSize = bSize
         self.bins = bins
-        self.memory  = agentMemory(memSize, [1,24], [actions])
-        self.Q       = qFunctionNN(lr, actions, observations)
-        self.name    = name 
-        self.steps   = 0
+        self.memory = agentMemory(memSize, [1, 24], [actions])
+        self.Q = qFunctionNN(lr, actions, observations)
+        self.name = name
+        self.steps = 0
 
     def addMemory(self, state, action, reward, nextState, done):
         self.memory.addMemory(state, action, reward, nextState, done)
@@ -41,7 +45,7 @@ class dqnAgent(object):
 
     def round_bins(self, x, bins):
         round_fact = (bins - 1) / 2
-        return np.around(x*round_fact)/round_fact
+        return np.around(x * round_fact) / round_fact
 
     def learn(self):
         start = time.time()
@@ -52,18 +56,18 @@ class dqnAgent(object):
                 next_action = np.amax(self.Q.predict(nextState[i]))
                 target = r[i]
                 if not done[i]:
-                    target = (1.0 - 0.1) * r[i] + 0.1 * self.gamma*next_action
+                    target = (1.0 - 0.1) * r[i] + 0.1 * self.gamma * next_action
                 y = self.Q.predict(state[i])
                 y[0] = target
-                history = self.Q.fit(x=state[i], y=y, verbose=0, epochs=1)
+                self.Q.fit(x=state[i], y=y, verbose=0, epochs=1)
 
             self.steps += 1
         print("learn time: ", time.time() - start)
 
     def saveCNNs(self):
-        fname = self.name+'.h5'
-        self.Q.save('Q'+fname)
+        fname = self.name + '.h5'
+        self.Q.save('Q' + fname)
 
     def loadCNNs(self):
-        fname = self.name+'.h5'
-        self.Q = load_model('Q'+fname)
\ No newline at end of file
+        fname = self.name + '.h5'
+        self.Q = load_model('Q' + fname)
diff --git a/DeepQLearning/main.py b/DeepQLearning/main.py
index 370e2ba990e05be46c65dad8fa1c18a9e83b3bed..73595036d219e438ce4fe7de2977d410e2e140a8 100644
--- a/DeepQLearning/main.py
+++ b/DeepQLearning/main.py
@@ -1,10 +1,10 @@
+import time
+
+import gym
 import numpy as np
 from tqdm import tqdm
-from dqnAgent import dqnAgent
-import gym
-import time
 
-from config import *
+from dqnAgent import dqnAgent
 
 env = gym.make('BipedalWalker-v3')
 nmb_of_actions = env.action_space.shape[0]
@@ -13,13 +13,14 @@ marvin = dqnAgent(gamma=0.99, vareps=1.0, lr=0.001,
                   observations=nmb_of_obs, actions=nmb_of_actions, memSize=25000,
                   epsMin=0.05, bSize=16, epsDec=0.999, bins=7)
 
-rewards = []; epsHistory = []
+rewards = [];
+epsHistory = []
 avg_rewards = []
 steps = 0
 verbose = False
 best_total_reward = -1000
 
-progress = tqdm(range(10000),desc='Training',unit=' episode')
+progress = tqdm(range(10000), desc='Training', unit=' episode')
 for epoche in progress:
     done = False
     observation = env.reset()
@@ -34,7 +35,7 @@ for epoche in progress:
         obs = obs.reshape(1, -1)
         totalReward += reward
         marvin.addMemory(observation, action, reward, obs, int(done))
-        if verbose : env.render()    
+        if verbose: env.render()
         observation = obs
 
         ep_rewards.append(reward)
@@ -48,7 +49,7 @@ for epoche in progress:
 
     marvin.learn()
 
-    marvin.vareps *= marvin.epsDec 
+    marvin.vareps *= marvin.epsDec
     if marvin.vareps < marvin.epsMin:
         marvin.vareps = marvin.epsMin
 
@@ -56,15 +57,15 @@ for epoche in progress:
     epsHistory.append(marvin.vareps)
     movingAvr = np.mean(rewards[-20:])
     avg_rewards.append(movingAvr)
-    msg  =' Training r='+str(totalReward)
-    msg +=' vareps='+ str(round(marvin.vareps,ndigits=2))
-    msg += ' avg='+str(movingAvr)    
+    msg = ' Training r=' + str(totalReward)
+    msg += ' vareps=' + str(round(marvin.vareps, ndigits=2))
+    msg += ' avg=' + str(movingAvr)
     progress.set_description(msg)
-    if epoche % 10 == 0: 
+    if epoche % 10 == 0:
         np.save("eps.npy", np.array(epsHistory))
         np.save("total_rewards.npy", np.array(rewards))
         np.save("avg_rewards.npy", np.array(avg_rewards))
-    if movingAvr>300: break # solve condition
+    if movingAvr > 300: break  # solve condition
 
 marvin.vareps = 0
 done = False
@@ -77,9 +78,6 @@ while not done:
     obs, reward, done, info = env.step(action)
     obs = obs.reshape(1, -1)
     totalReward += reward
-    #marvin.addMemory(observation, action_ind, reward, obs, int(done))
-    env.render()    
+    # marvin.addMemory(observation, action_ind, reward, obs, int(done))
+    env.render()
     observation = obs
-
-
-  
\ No newline at end of file
diff --git a/DeepQLearning/show_agent.py b/DeepQLearning/show_agent.py
index 5496929f22803295979307d9f260af31b7be21d1..3c99443d0716101189fe47a52eec7771f902712e 100644
--- a/DeepQLearning/show_agent.py
+++ b/DeepQLearning/show_agent.py
@@ -1,18 +1,17 @@
+import gym
+import matplotlib.pyplot as plt
 import numpy as np
-from tqdm import tqdm
+
 from dqnAgent import dqnAgent
-import matplotlib.pyplot as plt
-import gym
 
 env = gym.make('BipedalWalker-v3')
 nmb_of_actions = env.action_space.shape[0]
 observation = env.reset()
 marvin = dqnAgent(gamma=0.99, vareps=0.0, lr=0.001,
                   observations=len(observation), actions=nmb_of_actions, memSize=25000,
-                  epsMin=0.02, bSize=32, replace=1000, epsDec=0.001)
+                  epsMin=0.02, bSize=32, epsDec=0.001)
 marvin.loadCNNs()
 
-
 total_rewards = np.load("total_rewards.npy", allow_pickle=True)
 eps = np.load("eps.npy", allow_pickle=True)
 avg_rewards = np.load("avg_rewards.npy", allow_pickle=True)
@@ -27,7 +26,7 @@ plt.ylabel('Reward')
 plt.plot(avg_rewards, c='k')
 x = np.arange(len(avg_rewards))
 m, b = np.polyfit(x, avg_rewards, 1)
-plt.plot(x, m*x + b)
+plt.plot(x, m * x + b)
 plt.figure()
 plt.title('Epsilon')
 plt.plot(eps, c='k')
@@ -41,4 +40,4 @@ for i in range(10):
         action = marvin.getAction(observation)
         obs, reward, done, info = env.step(action)
         obs = obs.reshape(1, -1)
-        env.render()
\ No newline at end of file
+        env.render()
diff --git a/requirements.txt b/requirements.txt
index 0c85215b839de1a44c1b47e1fc37c70c6b9032b7..f1eb1c0700b456e7528501523d68d32fb9a21185 100644
Binary files a/requirements.txt and b/requirements.txt differ