Skip to content
Snippets Groups Projects
Commit 17f2da77 authored by Christoph Olberding's avatar Christoph Olberding
Browse files

Update Imputer/compute_missing.py, Imputer/data.csv,...

Update Imputer/compute_missing.py, Imputer/data.csv, Imputer/data_unfilled.csv, Imputer/fill_missing.py, Imputer/impute_mean.py, Imputer/sort_csv.py, Imputer/data_filled(ANN).csv, Imputer/data_filled(mean).csv files
Deleted Imputer/data_filled_ANN_.csv, Imputer/data_filled_mean_.csv files
parent 3c23165a
No related branches found
No related tags found
No related merge requests found
......@@ -17,14 +17,14 @@ data= data.iloc[:,1:]
#komplette Daten fürs Training:
data_cmpl = data.loc[data['Streckenvorhersage.Dauer']!= 0]
X_cmpl = data_cmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
X_cmpl = data_cmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time','weekday']]
Y_cmpl = data_cmpl['Streckenvorhersage.Dauer']
X_cmpl_train, X_cmpl_test, y_cmpl_train, y_cmpl_test = train_test_split(X_cmpl, Y_cmpl, test_size=0.2)
# fehlende Daten für Test:
data_incmpl = data.loc[data['Streckenvorhersage.Dauer']== 0]
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time','weekday']]
Y_incmpl = data_incmpl['Streckenvorhersage.Dauer']
......
This diff is collapsed.
This diff is collapsed.
......@@ -46,7 +46,7 @@ for e in data['day_index'].unique(): # für alle verschiedenen tages_indexe
if(h==l):
exists=True # wenn die Zeit existiert
if (exists==False):
new_row={'Streckenvorhersage.ZielortID':k,'Streckenvorhersage.StartortID': 1,'Streckenvorhersage.Dauer':0,'time':h,'day_index':e}
new_row={'Streckenvorhersage.ZielortID':k,'Streckenvorhersage.StartortID': 1,'Streckenvorhersage.Dauer':0,'time':h,'day_index':e ,'weekday':(data.loc[data['day_index']==e]).loc[(data.loc[data['day_index']==e]).index[0],'weekday']}
data = data.append(new_row, ignore_index=True)
data.to_csv('data_unfilled.csv')
\ No newline at end of file
......@@ -17,14 +17,14 @@ data= data.iloc[:,1:]
#komplette Daten fürs Training:
data_cmpl = data.loc[data['Streckenvorhersage.Dauer']!= 0]
X_cmpl = data_cmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
X_cmpl = data_cmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time','weekday']]
Y_cmpl = data_cmpl['Streckenvorhersage.Dauer']
X_cmpl_train, X_cmpl_test, y_cmpl_train, y_cmpl_test = train_test_split(X_cmpl, Y_cmpl, test_size=0.2)
# fehlende Daten für Test:
data_incmpl = data.loc[data['Streckenvorhersage.Dauer']== 0]
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time','weekday']]
Y_incmpl = data_incmpl['Streckenvorhersage.Dauer']
......@@ -54,7 +54,7 @@ for index,row in data_incmpl.iterrows():
data_incmpl = data_incmpl[~np.isnan(data_incmpl['Streckenvorhersage.Dauer'])] # testing here
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time']]
X_incmpl = data_incmpl[['Streckenvorhersage.ZielortID','Streckenvorhersage.StartortID','time','weekday']]
Y_incmpl = data_incmpl['Streckenvorhersage.Dauer']
......
......@@ -18,6 +18,7 @@ helpSeries= pd.DataFrame(np.zeros(relevantData.shape[0]))
#relevantData['year']=helpSeries
relevantData['time']=helpSeries
relevantData['day_index']=helpSeries
relevantData['weekday']=helpSeries
dates= data['Streckenvorhersage.Datum']
for d in range(dates.shape[0]):
......@@ -34,5 +35,7 @@ for d in range(dates.shape[0]):
#relevantData.at[d,'year']=year
relevantData.at[d,'time']=seconds
relevantData.at[d,'day_index']=day_index
relevantData.at[d,'weekday']=pd.to_datetime(year+'-'+month+'-'+day)
relevantData.at[d,'weekday']=relevantData.loc[d,'weekday'].dayofweek
relevantData.to_csv('data.csv')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment