Skip to content
Snippets Groups Projects
Select Git revision
  • 539b86c7bd7145da335b1daef67a2f9d347b3b7b
  • master default protected
  • change_modified_reward_v0
  • feature_carla_szenarios
  • develop_moreSensorsInCarla
  • feature_carlaSupport
  • LearningEnvironment
7 results

environment_wrapper.py

Blame
  • environment_wrapper.py 4.82 KiB
    """ Wrapper to abstract different learning environments for an agent. """
    import os
    import numpy as np
    from tqdm import trange
    import pandas as pd
    import matplotlib.pyplot as plt
    
    IS_SOLVED = 195
    
    class Config:
        render = False
        force_cpu = True
        env = None
        agent = None
        env_type = 'Lunar'
        name = 'ConfigTest'
        learn = True
        learn_online = True
        learn_offline = False 
        net_layout= [256, 128]
        eps_decay = 0.9996
        learn_rate= 0.001
        learn_iterations = 1
        run_episodes = 20
        offline_epochs = 1000
        offline_validate_every_x_iteration = 10
        load_ann = False
        load_mem = False
        load_from = 'agnt'
        save_to = 'saved_agents/'
    
    
        def conf_to_name(self):
            self.name = str(self.env_type) + '_' + str(self.name)
            for layer in self.net_layout:
                self.name += '_' + str(layer) + '_'
            self.name += str(self.eps_decay) + '_'
            self.name += str(self.learn_rate) + '_'
            self.name += str(self.learn_iterations)
        
    
    
    def step(environment, action):
        """ Perform one iteratino in the environment. """
        following_state, reward, done, _ = environment.step(action)
        following_state = np.reshape(following_state, (1, environment.observation_space.shape[0]))
        return following_state, reward, done, _
    
    def reset(environment):
        """ Reset the environment, and return the new state. """
        state = environment.reset()
        state = np.reshape(state, (1, environment.observation_space.shape[0]))
        return state
    
    
    def one_episode(environment, agent, render, learn, conf=None, max_steps=1000):
        """ Perform one episode of the agent in the environment. """
        score = 0
        state = reset(environment)
        for _ in range(max_steps):
            if render:
                environment.render()
            action = agent.get_action(state)
            following_state, reward, done, _ = step(environment, action)
            agent.remember(state, action, reward, following_state, done)
            score += reward
            state = following_state
            if learn:
                if conf is not None:
                    agent.learn(epochs=conf.learn_iterations)
                else:
                    agent.learn()
            if done:
                break
        return score
    
    def learn_offline(agent, conf):
        """ Train the agent with its memories. """
        print('Learning with ', len(agent.memory.history), ' memories.')
        agent.epsilon = agent.epsilon_min
    
        score_history = []
        avg_score_history = []
        desc_train = ''
        pbar = trange(conf.offline_epochs, desc='Loss: x')
        for i in pbar:
            loss = agent.learn(offline=True, epochs=conf.learn_iterations)
            desc = ('Loss: %05.4f' %(loss)) + desc_train
            pbar.set_description(desc)
            pbar.refresh()
            if i % conf.offline_validate_every_x_iteration == 1 and conf.offline_validate_every_x_iteration is not -1:
                score = one_episode(conf.env, agent, conf.render, False, conf=conf)
                score_history.append(score)
                is_solved = np.mean(score_history[-25:])
                desc_train = (', Avg: %05.1f' %(is_solved))
                avg_score_history.append(is_solved)
                if is_solved > IS_SOLVED:
                    break
        process_logs(avg_score_history, score_history, conf)
    
    
    
    def run(environment, agent, episodes, render=True, learn=True, conf=None):
        """ Run an agent """
        conf.name = str(conf.name) + 'on'
        # Set the exploring rate to its minimum.
        # (epsilon *greedy*)
        if not learn:
            agent.epsilon = agent.epsilon_min
    
        score_history = []
        avg_score_history = []
        
        pbar = trange(episodes, desc=agent.name + ' [act, avg]: [0, 0]', unit="Episodes")
        for _ in pbar:
            score = one_episode(environment, agent, render, learn, conf=conf)
            score_history.append(score)
    
            is_solved = np.mean(score_history[-100:])
            avg_score_history.append(is_solved)
    
            if is_solved > IS_SOLVED and learn:
                break
            desc = (agent.name + " [act, avg]: [{0:.2f}, {1:.2f}]".format(score, is_solved))
            pbar.set_description(desc)
            pbar.refresh()
        return score_history, avg_score_history
    
    
    def process_logs(avg_score_history, loss, conf):
        df = pd.DataFrame(list(zip(loss, avg_score_history)), columns=['Score', 'Average'])
        try:
            os.makedirs(conf.save_to + conf.name)
        except:
            pass
        df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
    
        """ Plot the log history """
        plt.figure()
        plt.plot([i+1 for i in range(0, len(loss), 2)], loss[::2])
        plt.plot([i+1 for i in range(0, len(avg_score_history), 2)], avg_score_history[::2], '--')
        plt.title(conf.name)
        plt.savefig(conf.save_to + conf.name + '/' + conf.name + '.png', format="png")
        if conf.render:
            plt.show()
        df.to_csv(conf.save_to + conf.name + '/' + conf.name + '.csv')
    
    def load_logs(file):
        df = pd.read_csv(file)
        return df["Score"], df["Average"]