Select Git revision
loesung-1d-jawohl.c
random_forrest.py 2.34 KiB
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,mean_absolute_percentage_error
from pprint import pprint
import matplotlib.pyplot as plt
dataset = np.genfromtxt('Random Forest/data.csv', delimiter=',', skip_header=1)
x = dataset[:,np.arange(2,26)]
y = dataset[:,0]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
#model = RandomForestRegressor(random_state=42, n_estimators=750, max_depth=50)
model = RandomForestRegressor(random_state=42)
model.fit(x_train,y_train)
#pprint(model.get_params())
# Accuracy in Percent
y_predict = model.predict(x_test)
accuracy = (1-mean_absolute_percentage_error(y_test, y_predict))*100
print("Accuracy: " + str(accuracy) + '%')
# Feature Importance without Distance
pprint(model.feature_importances_)
feature_list = ["Jahr","Monat","Tag","Stunde","Minute","Wochentag", "ZielortID"]
importances = model.feature_importances_[0:5]
wochentag_importance = np.sum(model.feature_importances_[5:12])
importances = np.append(importances,wochentag_importance)
zielort_importance = np.sum(model.feature_importances_[12:])
importances = np.append(importances,zielort_importance)
# Feature Importance with Distance
#feature_list = ["Entfernung","Jahr","Monat","Tag","Stunde","Minute","Wochentag", "ZielortID"]
#importances = model.feature_importances_[0:6]
#wochentag_importance = np.sum(model.feature_importances_[6:13])
#importances = np.append(importances,wochentag_importance)
#zielort_importance = np.sum(model.feature_importances_[13:])
#importances = np.append(importances,zielort_importance)
importances = np.round(importances * 100,decimals=2)
#pprint(np.round(model.feature_importances_ * 100,decimals=2))
zipped = list(zip(feature_list,importances))
pprint(zipped)
# Feature Importance Plot
x_values = list(range(len(importances)))
plt.bar(x_values, importances, orientation = 'vertical')
plt.xticks(x_values, feature_list, rotation='vertical')
plt.ylabel('Importance');
plt.xlabel('Feature');
plt.title('Feature Analyse');
# Absolute Error
print(mean_absolute_error(y_test,y_predict))
# Average Time
print("Average Time: " + str(np.average(y_test)) + " Sekunden")
# Average Distance
print("Average Distance: " + str(np.average(dataset[:,1])) + " Meter")
#plt.show()