Skip to content
Snippets Groups Projects
Select Git revision
  • c7d1f3d74b090e1bb91b7b647243fbf710f1dd26
  • master default protected
2 results

Notizen0326.txt

Blame
  • random_forrest.py 2.34 KiB
    from sklearn.model_selection import train_test_split
    import numpy as np
    import pandas as pd
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import mean_absolute_error,mean_absolute_percentage_error
    from pprint import pprint
    import matplotlib.pyplot as plt
    
    dataset = np.genfromtxt('Random Forest/data.csv', delimiter=',', skip_header=1)
    x = dataset[:,np.arange(2,26)]
    y = dataset[:,0]
    
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
    
    #model = RandomForestRegressor(random_state=42, n_estimators=750, max_depth=50)
    model = RandomForestRegressor(random_state=42)
    model.fit(x_train,y_train)
    #pprint(model.get_params())
    
    
    # Accuracy in Percent
    y_predict = model.predict(x_test)
    accuracy = (1-mean_absolute_percentage_error(y_test, y_predict))*100
    print("Accuracy: " + str(accuracy) + '%')
    
    
    # Feature Importance without Distance
    pprint(model.feature_importances_)
    feature_list = ["Jahr","Monat","Tag","Stunde","Minute","Wochentag", "ZielortID"]
    importances = model.feature_importances_[0:5]
    wochentag_importance = np.sum(model.feature_importances_[5:12])
    importances = np.append(importances,wochentag_importance)
    zielort_importance = np.sum(model.feature_importances_[12:])
    importances = np.append(importances,zielort_importance)
    
    # Feature Importance with Distance
    #feature_list = ["Entfernung","Jahr","Monat","Tag","Stunde","Minute","Wochentag", "ZielortID"]
    #importances = model.feature_importances_[0:6]
    #wochentag_importance = np.sum(model.feature_importances_[6:13])
    #importances = np.append(importances,wochentag_importance)
    #zielort_importance = np.sum(model.feature_importances_[13:])
    #importances = np.append(importances,zielort_importance)
    
    importances = np.round(importances * 100,decimals=2)
    #pprint(np.round(model.feature_importances_ * 100,decimals=2))
    zipped = list(zip(feature_list,importances))
    pprint(zipped)
    
    # Feature Importance Plot
    x_values = list(range(len(importances)))
    plt.bar(x_values, importances, orientation = 'vertical')
    plt.xticks(x_values, feature_list, rotation='vertical')
    plt.ylabel('Importance'); 
    plt.xlabel('Feature'); 
    plt.title('Feature Analyse');
    
    # Absolute Error
    print(mean_absolute_error(y_test,y_predict))
    
    # Average Time
    print("Average Time: " + str(np.average(y_test)) + " Sekunden")
    
    # Average Distance
    print("Average Distance: " + str(np.average(dataset[:,1])) + " Meter")
    #plt.show()