Skip to content
Snippets Groups Projects
Commit a0588b25 authored by Christoph Olberding's avatar Christoph Olberding
Browse files

Upload New File

parent 6ee34afa
Branches
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 1 10:06:55 2021
@author: Christoph
"""
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
np.random.seed(42)
data = pd.read_csv('data_unfilled.csv')
data= data.iloc[:,1:]
#komplette Daten fürs Training:
data_cmpl = data.loc[data['Streckenvorhersage.Dauer']!= 0]
X_cmpl = data_cmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
Y_cmpl = data_cmpl['Streckenvorhersage.Dauer']
X_cmpl_train, X_cmpl_test, y_cmpl_train, y_cmpl_test = train_test_split(X_cmpl, Y_cmpl, test_size=0.2)
# fehlende Daten für Test:
data_incmpl = data.loc[data['Streckenvorhersage.Dauer']== 0]
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
Y_incmpl = data_incmpl['Streckenvorhersage.Dauer']
#Prediction anhand vorhandener Daten
myANN = Sequential()
myANN.add(Dense(80, activation='relu', input_dim=X_cmpl.shape[1]))
myANN.add(Dense(50,activation='relu'))
myANN.add(Dense(30,activation='relu'))
myANN.add(Dense(1,activation='linear'))
myANN.compile(loss='mean_squared_error', optimizer='adam')
myANN.fit(X_cmpl_train,y_cmpl_train, epochs=100,shuffle=True,verbose=False)
yp = myANN.predict(X_cmpl_test)
yp=np.squeeze(yp)
yDiff = yp - y_cmpl_test
print('Mittlere Abweichung auf fehlende Daten: %e ' % (np.mean(np.abs(yDiff))))
#impute Dauer auf vorhandenen Daten
for index,row in data_incmpl.iterrows():
relevant_data= data_cmpl[data_cmpl['time']==row['time']]
data_incmpl.loc[index,'Streckenvorhersage.Dauer']=pd.DataFrame.mean(relevant_data['Streckenvorhersage.Dauer'])
data_incmpl = data_incmpl[~np.isnan(data_incmpl['Streckenvorhersage.Dauer'])] # testing here
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
Y_incmpl = data_incmpl['Streckenvorhersage.Dauer']
X_train, X_test, y_train, y_test = train_test_split(X_cmpl, Y_cmpl, test_size=0.35)
y_train = pd.DataFrame(data=y_train,columns=['Streckenvorhersage.Dauer'])
X_train=X_train.append(X_incmpl)
Y_incmpl = pd.DataFrame(data=Y_incmpl,columns=['Streckenvorhersage.Dauer'])
y_train=pd.concat([y_train,Y_incmpl])
myANN = Sequential()
myANN.add(Dense(80, activation='relu', input_dim=X_cmpl.shape[1]))
myANN.add(Dense(50,activation='relu'))
myANN.add(Dense(30,activation='relu'))
myANN.add(Dense(1,activation='linear'))
myANN.compile(loss='mean_squared_error', optimizer='adam')
myANN.fit(X_train,y_train, epochs=100,shuffle=True,verbose=False)
yp = myANN.predict(X_test)
yp=np.squeeze(yp)
yDiff = yp - y_test
print('Mittlere Abweichung mit aufgefüllten Daten(mean): %e ' % (np.mean(np.abs(yDiff))))
y_test = pd.DataFrame(data=y_test,columns=['Streckenvorhersage.Dauer'])
X_all=X_train.append(X_test)
y_all=pd.concat([y_train,y_test])
data= X_all
y_all= np.asarray(y_all)
data['Streckenvorhersage.Dauer']=y_all
data.to_csv('data_filled(mean).csv')
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment