diff --git a/.gitignore b/.gitignore
index a206662474aac73cc18f10d9447e97adc498e338..31adcd1eea6e6b910b2362ee6452e6fc672ee573 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,5 +4,6 @@ saved_agents
 benchmarks
 baselines
 workspace.code-workspace
+test
 tech_demo.py
 *.png
diff --git a/agents.py b/agents.py
index 8672ba5d3b39524e58d2478d77035d6e64f2fa9f..300d84ba3181e88a1760e503cbf4e60f5703163f 100644
--- a/agents.py
+++ b/agents.py
@@ -2,6 +2,7 @@ import random
 import numpy as np
 from memory import Memory
 from networks import QNet
+from steering_wheel import Controller
 
 class QAgent:
     gamma = 0.99
@@ -29,10 +30,9 @@ class QAgent:
     def remember(self, state, action, reward, following_state, done):
         self.memory.add(state, action, reward, following_state, done)
 
-    def learn(self, offline=False):
+    def learn(self, offline=False, epochs=1):
         """ Learn the Q-Function. """
         batch_size = self.online_batch_size
-        epochs = 1
         
         if offline:
             batch_size = self.OFFLINE_BATCHSIZE
@@ -49,7 +49,7 @@ class QAgent:
         y[[idx], [actions]] = qMax
 
         if offline:
-            history = self.q.net.fit(states, y, epochs=2, verbose=0)
+            history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
             loss = history.history['loss'][-1]
         else:
             loss = self.q.fit(states, y, epochs)
@@ -64,8 +64,9 @@ class QAgent:
         self.memory.save(path+'/' + self.name + '.mem')
 
     def load(self, path, net=True, memory=True):
-        print(path)
+        print('Load:  ' + path)
         if net:
+            print('Network')
             self.q.load(path+'.net')
         if memory:
             self.memory.load(path+'.mem')
@@ -81,14 +82,13 @@ class DQAgent(QAgent):
         action_values = (self.q.predict(state) + self.q2.predict(state)) / 2
         return np.argmax(action_values[0])
 
-    def learn(self, offline=False):
+    def learn(self, offline=False, epochs=1):
         for _ in range(2):
             if np.random.rand() < 0.5:
                 temp = self.q
                 self.q = self.q2
                 self.q2 = temp
             batch_size = self.online_batch_size
-            epochs = 1
             if offline:
                 batch_size = self.OFFLINE_BATCHSIZE
             if len(self.memory.history) < batch_size:
@@ -101,10 +101,22 @@ class DQAgent(QAgent):
             idx = np.array([i for i in range(batch_size)])
             y[[idx], [actions]] = qMax
             if offline:
-                history = self.q.net.fit(states, y, epochs=2, verbose=0)
+                history = self.q.net.fit(states, y, epochs=epochs, verbose=0)
                 loss = history.history['loss'][-1]
             else:
                 loss = self.q.fit(states, y, epochs)
             if self.epsilon > self.epsilon_min:
                 self.epsilon *= self.epsilon_decay
-        return loss
\ No newline at end of file
+        return loss
+
+class CarlaManual(QAgent):
+    control = None
+
+    def __init__(self, conf):
+        super().__init__(conf)
+        self.control = Controller()
+
+    def get_action(self, state):
+        self.control.on_update()
+        return self.control.get_action()
+
diff --git a/carla_environment.py b/carla_environment.py
index 7edd56d3f9085fbdc1c300676edfcbd55a0a41be..8854f528063b962f4cef66f53e63d0a85272691a 100644
--- a/carla_environment.py
+++ b/carla_environment.py
@@ -97,8 +97,9 @@ class ObstacleSensor:
         self.parent = parent
         bp = world.get_blueprint_library().find('sensor.other.obstacle')
         bp.set_attribute('distance', '10')
-        bp.set_attribute('hit_radius', '3')
-        print(bp.get_attribute('hit_radius'))
+        bp.set_attribute('hit_radius', '5')
+        # print(bp.get_attribute('hit_radius'))
+        position = carla.Transform(carla.Location(x=-2, y=0, z=0.0), carla.Rotation(pitch=0.0, yaw=0, roll=0.0))
         self.sensor = world.spawn_actor(bp, carla.Transform(), attach_to=parent, attachment_type=carla.AttachmentType.Rigid)
         weak_self = weakref.ref(self)
         self.sensor.listen(lambda event: ObstacleSensor._on_event(weak_self, event))
@@ -108,12 +109,12 @@ class ObstacleSensor:
         if not self:
             return
         self.collision = event
-        if not event.other_actor.type_id == 'static.road':
-            print(self.parent.get_location())
+        if not event.other_actor.type_id == 'static.road' and not event.other_actor.type_id == 'static.roadline' and not event.other_actor.type_id == 'static.sidewalk' and False:
+            print(self.parent.get_transform())
+            print(event.transform)
             print(self.parent.get_velocity())
             print(event.other_actor.type_id)
-            print(event.other_actor.transform)
-            print(event.distance)
+            print(str(event.distance))
 
 
 class LidarSensor:
@@ -333,7 +334,7 @@ if __name__ == "__main__":
         clock.tick(5)
         ctrl.on_update()
         obs, reward, done, _ = env.step(ctrl.get_action(), render=True)
-        print(str(reward) + ' ' + str(done))
+        # print(str(reward) + ' ' + str(done))
         cumulated_reward += reward
         if done:
             break
diff --git a/environment_wrapper.py b/environment_wrapper.py
index 5b4f5fbb7c102fc766d93aa074cb8019944a7e17..cf39665bc9605f7738116bed1ff7bc18706d7efd 100644
--- a/environment_wrapper.py
+++ b/environment_wrapper.py
@@ -5,6 +5,8 @@ from tqdm import trange
 import pandas as pd
 import matplotlib.pyplot as plt
 
+IS_SOLVED = 195
+
 class Config:
     render = False
     force_cpu = True
@@ -18,8 +20,10 @@ class Config:
     net_layout= [256, 128]
     eps_decay = 0.9996
     learn_rate= 0.001
+    learn_iterations = 1
     run_episodes = 20
     offline_epochs = 1000
+    offline_validate_every_x_iteration = 10
     load_ann = False
     load_mem = False
     load_from = 'agnt'
@@ -35,7 +39,8 @@ class Config:
         for layer in self.net_layout:
             self.name += '_' + str(layer) + '_'
         self.name += str(self.eps_decay) + '_'
-        self.name += str(self.learn_rate)
+        self.name += str(self.learn_rate) + '_'
+        self.name += str(self.learn_iterations)
     
 
 
@@ -52,7 +57,7 @@ def reset(environment):
     return state
 
 
-def one_episode(environment, agent, render, learn, max_steps=1000):
+def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
     """ Perform one episode of the agent in the environment. """
     score = 0
     state = reset(environment)
@@ -65,22 +70,33 @@ def one_episode(environment, agent, render, learn, max_steps=1000):
         score += reward
         state = following_state
         if learn:
-            agent.learn()
+            if conf is not None:
+                agent.learn(epochs=conf.learn_iterations)
+            else:
+                agent.learn()
         if done:
             break
     return score
 
-def learn_offline(agent, epochs=1):
+def learn_offline(agent, conf):
     """ Train the agent with its memories. """
     print('Learning with ', len(agent.memory.history), ' memories.')
-    pbar = trange(epochs, desc='Loss: x')
-    for _ in pbar:
-        loss = agent.learn(offline=True)
+    pbar = trange(conf.offline_epochs, desc='Loss: x')
+    for i in pbar:
+        loss = agent.learn(offline=True, epochs=conf.learn_iterations)
         desc = ('Loss: %05.4f' %(loss))
         pbar.set_description(desc)
         pbar.refresh()
+        if i % conf.offline_validate_every_x_iteration == 0 and conf.offline_validate_every_x_iteration is not -1:
+            score, avg = run(conf.env, conf.agent, 1, render=conf.render, learn=False, conf=conf)
+            conf.name += '1'
+            process_logs(avg, score, conf)
+            if avg[-1] > IS_SOLVED:
+                break
+
 
-def run(environment, agent, episodes, render=True, learn=True):
+
+def run(environment, agent, episodes, render=True, learn=True, conf=None):
     """ Run an agent """
 
     # Set the exploring rate to its minimum.
@@ -93,13 +109,13 @@ def run(environment, agent, episodes, render=True, learn=True):
     
     pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
     for _ in pbar:
-        score = one_episode(environment, agent, render, learn)
+        score = one_episode(environment, agent, render, learn, conf=conf)
         score_history.append(score)
 
         is_solved = np.mean(score_history[-100:])
         avg_score_history.append(is_solved)
 
-        if is_solved > 195 and learn:
+        if is_solved > IS_SOLVED and learn:
             break
         desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
         pbar.set_description(desc)
@@ -109,6 +125,10 @@ def run(environment, agent, episodes, render=True, learn=True):
 
 def process_logs(avg_score_history, loss, conf):
     df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
+    try:
+        os.makedirs(conf.save_to + conf.name)
+    except:
+        pass
     df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
 
     """ Plot the log history """
@@ -119,6 +139,7 @@ def process_logs(avg_score_history, loss, conf):
     plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
     if conf.render:
         plt.show()
+    df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
 
 def load_logs(file):
     df = pd.read_csv(file)
diff --git a/main.py b/main.py
index 886cddb1afecf3ef9ab677758c892799910e8a18..5214a52f8a7b88144febb528c280935b08fc8e66 100644
--- a/main.py
+++ b/main.py
@@ -28,14 +28,14 @@ def run(conf):
     # Offline training of the agent with
     # previous collected and saved memories.
     if conf.learn_offline and conf.learn:
-        ew.learn_offline(marvin, epochs=conf.offline_epochs)
+        ew.learn_offline(marvin, conf)
 
     # Run the agent in the environment for the
     # number of specified epochs. Either to
     # verify the performance of the agent or
     # to train the agent.
     _LEARN = conf.learn_online and conf.learn
-    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN)
+    loss, avg_score = ew.run(conf.env, marvin, conf.run_episodes, render=conf.render, learn=_LEARN, conf=conf)
 
     # Save the final training result of the agent.
     if conf.learn:
diff --git a/run_scripts/baselines.py b/run_scripts/baselines.py
index 9afa1c14b4c35505c1dec5f3582332c50a829f84..bc98308122cd3f20662f45aabfeb81b563341243 100644
--- a/run_scripts/baselines.py
+++ b/run_scripts/baselines.py
@@ -17,7 +17,7 @@ c_32.env_type = 'CartPole'
 c_32.net_layout = [32, 32]
 c_32.eps_decay = 0.9995
 c_32.learn_rate= 0.00075
-c_32.run_episodes = 350
+c_32.run_episodes = 1000
 c_32.save_to = 'baselines/'
 
 
@@ -36,22 +36,54 @@ c_512.net_layout = [512, 32]
 c_1024 = copy.deepcopy(c_32)
 c_1024.net_layout = [1024, 32]
 
+c_2048 = copy.deepcopy(c_32)
+c_2048.net_layout = [2048, 32]
+
 cd_3 = copy.deepcopy(c_32)
-cd_3.net_layout[128, 64, 32]
+cd_3.net_layout = [128, 64, 32]
 
 cd_4 = copy.deepcopy(c_32)
-cd_4.net_layout[128, 64, 32, 32]
+cd_4.net_layout = [128, 64, 32, 32]
+
+cd_5 = copy.deepcopy(c_32)
+cd_5.net_layout = [256, 128, 128, 64]
+cd_5.learn_iterations = 5
+cd_5.learn_rate = 0.001
+
+cd_6 = copy.deepcopy(c_32)
+cd_6.net_layout = [512, 256, 128, 64]
+cd_6.learn_iterations = 10
+cd_6.learn_rate = 0.00075
 
 cd_128 = copy.deepcopy(c_32)
-cd_128.net_layout[128, 128, 128]
+cd_128.net_layout = [128, 128, 128]
 
 cd_256 = copy.deepcopy(c_32)
-cd_256.net_layout[256, 256, 256]
+cd_256.net_layout = [256, 256, 256]
 
 cd_512 = copy.deepcopy(c_32)
-cd_512.net_layout[512, 512, 512]
-
-conf = c_32
+cd_512.net_layout = [512, 512, 512]
+cd_512.learn_iterations = 10
+cd_512.learn_rate = 0.001
+
+offline = copy.deepcopy(c_32)
+offline.force_cpu = False
+offline.load_from = 'Offline_Config_Test'
+offline.load_mem = True
+offline.load_ann = False
+offline.learn_offline = False
+offline.learn_online = True
+offline.run_episodes = 100
+offline.net_layout = [1024, 1024, 1024, 256]
+offline.learn_rate = 0.0005
+offline.learn_iterations = 1
+offline.offline_validate_every_x_iteration = 1
+offline.offline_epochs = 100
+offline.name = 'OnlineValidation'
+offline.render = False
+offline.save_to = 'test/'
+
+conf = offline
 
 conf.conf_to_name()
 conf.agent = QAgent(conf)
diff --git a/run_scripts/benchmarks.py b/run_scripts/benchmarks.py
index 77c63913409ba74f26b5cff7471eeceba250b54a..a18117c53aef1a4ebe21d219e604da2b83633fd5 100644
--- a/run_scripts/benchmarks.py
+++ b/run_scripts/benchmarks.py
@@ -12,7 +12,7 @@ c.render = False
 c.env = gym.make('LunarLander-v2')
 c.env_type = 'Lunar'
 c.net_layout = [256, 128]
-c.eps_decay = 0.9996
+c.eps_decay = 0.9995
 c.learn_rate= 0.001
 c.run_episodes = 300
 c.save_to = 'benchmarks/'
@@ -21,7 +21,12 @@ smallNet = copy.deepcopy(c)
 smallNet.name = 'SmallNet'
 smallNet.net_layout = [128, 32]
 smallNet.conf_to_name()
-# smallNet.agent = QAgent(smallNet)
+
+smallNetSlow = copy.deepcopy(c)
+smallNetSlow.name = 'SmallNetSlow'
+smallNetSlow.net_layout = [128, 32]
+smallNetSlow.learn_rate = 0.0005
+smallNetSlow.conf_to_name()
 
 smallNetDeep = copy.deepcopy(c)
 smallNetDeep.name = 'SmallNetDeep'
@@ -95,8 +100,9 @@ lun.conf_to_name()
 # lun.agent = QAgent(lun)
 
 # configuration = smallNet
+configuration = smallNetSlow
 # configuration = smallNetDeep
-configuration = normalNet
+# configuration = normalNet
 # configuration = normalSlowDecay
 # configuration = normalSlowLearn
 # configuration = largeNet
diff --git a/run_scripts/csv_history_to_plot.py b/run_scripts/csv_history_to_plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..e20d257ba9c27f2dd849aa9da4e3475e65bf8bae
--- /dev/null
+++ b/run_scripts/csv_history_to_plot.py
@@ -0,0 +1,32 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from os import listdir
+from os.path import isfile, join
+from pathlib import Path
+
+BASE_PATH = '/home/armin/Master/semester_3/carla/'
+DIR='baselines/hyrican'
+path = BASE_PATH + DIR
+
+def plot_csv(file_path, show=False):
+    df = pd.read_csv(file_path)
+    act_score = df['Score']
+    avg_score = df['Average']
+
+    plt.figure()
+    plt.plot(act_score, label='Episode Score')
+    plt.plot(avg_score, label='Average Score')
+    plt.xlabel('Episode')
+    plt.ylabel('Score')
+    plt.legend()
+    plt.title(file_path)
+    plt.savefig(file_path + '.png')
+    if show:
+        plt.show()
+
+for dir in Path(path).iterdir():
+    for file in listdir(dir):
+        file_path = join(dir, file)
+        if isfile(file_path):
+            if file_path.endswith('.csv'):
+                plot_csv(file_path, show=True)
diff --git a/run_scripts/manual_carla.py b/run_scripts/manual_carla.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b77e09ef1ae912c4b5025fb1aae2f45a0a0ca31
--- /dev/null
+++ b/run_scripts/manual_carla.py
@@ -0,0 +1,42 @@
+import main
+import environment_wrapper as ew
+import gym
+import copy
+from agents import QAgent as QAgent
+from carla_environment import CarlaEnvironment
+
+c = ew.Config()
+
+c.name = 'Base'
+c.render = True
+c.env_type = 'Carla'
+c.net_layout = [256, 128]
+c.eps_decay = 0.9995
+c.learn_rate= 0.001
+c.run_episodes = 20
+c.save_to = 'test/'
+
+
+t = copy.deepcopy(c)
+t.render = True
+t.net_layout = [1024, 1024, 256, 32]
+t.eps_decay = 0.9993
+t.learn_rate = 0.0005
+t.force_cpu = False
+t.load_mem = True
+t.load_ann = False
+t.save_to = 'test/'
+t.load_from = 'Carla_CarlaOffline_1024__1024__256__32_0.9993_0.0005_50'
+t.name = 'Offline'
+t.learn_offline = True
+t.learn_online = True
+t.run_episodes = 500
+t.offline_epochs = 100
+t.learn_iterations = 100
+t.offline_validate_every_x_iteration = -1
+
+configuration = t
+configuration.env = CarlaEnvironment(render=configuration.render)
+configuration.conf_to_name()
+configuration.agent = QAgent(configuration)
+main.run(configuration)