Skip to content
Snippets Groups Projects
Commit 54698cb5 authored by Silas Dohm's avatar Silas Dohm
Browse files

restore point

parent 63bb3dba
Branches
No related tags found
No related merge requests found
...@@ -180,6 +180,7 @@ confusion_matrix(Y_test,y_pred,normalize='true') ...@@ -180,6 +180,7 @@ confusion_matrix(Y_test,y_pred,normalize='true')
\end{lstlisting} \end{lstlisting}
\wip{Tabelle auswerten} \wip{Tabelle auswerten}
\begin{table}[ht] \begin{table}[ht]
\def\arraystretch{1.3}
\begin{center} \begin{center}
\begin{tabular}{*{4}{R}} \begin{tabular}{*{4}{R}}
- &Negativ & Neutral & Positiv\\ - &Negativ & Neutral & Positiv\\
...@@ -195,6 +196,7 @@ von $85.7\%$, betrachtet man jedoch die Konfusionsmatrix in Tabelle \ref{tab:con ...@@ -195,6 +196,7 @@ von $85.7\%$, betrachtet man jedoch die Konfusionsmatrix in Tabelle \ref{tab:con
sieht man das dort bloß $27\%$ der neutralen Rezensionen richtig klassifiziert wurden. sieht man das dort bloß $27\%$ der neutralen Rezensionen richtig klassifiziert wurden.
\wip{Tabellenreferenz ???} \wip{Tabellenreferenz ???}
\begin{table}[ht] \begin{table}[ht]
\def\arraystretch{1.3}
\begin{center} \begin{center}
\begin{tabular}{*{4}{R}} \begin{tabular}{*{4}{R}}
- &Negativ & Neutral & Positiv\\ - &Negativ & Neutral & Positiv\\
......
No preview for this file type
...@@ -74,7 +74,7 @@ ...@@ -74,7 +74,7 @@
} }
\newcolumntype{R}{>{\collectcell\ApplyGradient}c<{\endcollectcell}} \newcolumntype{R}{>{\collectcell\ApplyGradient}c<{\endcollectcell}}
\renewcommand{\arraystretch}{1.5} %\renewcommand{\arraystretch}{1.5}
\setlength{\fboxsep}{3mm} % box size \setlength{\fboxsep}{3mm} % box size
\setlength{\tabcolsep}{5pt} \setlength{\tabcolsep}{5pt}
%----------------------- %-----------------------
......
...@@ -23,8 +23,8 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics= ...@@ -23,8 +23,8 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=
#%% #%%
from hdf5 import hdf5Generator from hdf5 import hdf5Generator
path = "G:\\ml\\" path = "G:\\ml\\"
num_rows = 340000 num_rows = 8000000
batchSize = 512 batchSize = 2048
steps = num_rows/batchSize steps = num_rows/batchSize
#early stop #early stop
earlystop = keras.callbacks.EarlyStopping(monitor='accuracy',patience=10,verbose=False,restore_best_weights=True) earlystop = keras.callbacks.EarlyStopping(monitor='accuracy',patience=10,verbose=False,restore_best_weights=True)
......
...@@ -8,6 +8,7 @@ import pandas as pd ...@@ -8,6 +8,7 @@ import pandas as pd
import json import json
from tensorflow.python.keras.constraints import MaxNorm from tensorflow.python.keras.constraints import MaxNorm
from tensorflow.python.keras.layers.core import Dropout
model_path = "D:\\ml\\full_yelp_w2v_model" model_path = "D:\\ml\\full_yelp_w2v_model"
#data_path ="C:\\Users\\sls21\\Documents\\Uni\\word2vec\\sample.json" #data_path ="C:\\Users\\sls21\\Documents\\Uni\\word2vec\\sample.json"
...@@ -28,15 +29,15 @@ def getSentenceVectorCNN(sentence): ...@@ -28,15 +29,15 @@ def getSentenceVectorCNN(sentence):
i += 1 i += 1
except: except:
pass pass
#if wordVecs == np.zeros((72,100)): #maybe dont alllow sentences with less than n wordvecotrs if np.all(wordVecs[5:]==0):
# raise Exception('words not found in w2v model') raise Exception('not enough words found in w2v model')
return wordVecs return wordVecs
# %% data # %% data
import numpy as np import numpy as np
try: try:
X = np.load("./Data/X_cnn.npy") X = np.load("D:/ml/data/X_cnn.npy")
Y = np.load("./Data/Y_cnn.npy") Y = np.load("D:/ml/data/Y_cnn.npy")
except: except:
X = [] X = []
Y = [] Y = []
...@@ -61,8 +62,8 @@ except: ...@@ -61,8 +62,8 @@ except:
#Y.append(1 if y>2 else 0) #Y.append(1 if y>2 else 0)
X = np.array(X) X = np.array(X)
Y = np.array(Y) Y = np.array(Y)
#np.save("./Data/X_cnn",X) np.save("D:/ml/data/X_cnn.npy",X)
#np.save("./Data/Y_cnn",Y) np.save("D:/ml/data/Y_cnn.npy",Y)
# %% CNN # %% CNN
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
...@@ -87,35 +88,29 @@ import tensorflow as tf ...@@ -87,35 +88,29 @@ import tensorflow as tf
import numpy as np import numpy as np
from tensorflow.keras.models import Sequential from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv1D,MaxPooling1D,GlobalMaxPooling1D from tensorflow.keras.layers import Conv1D,MaxPooling1D,GlobalMaxPooling1D,Dropout
from tensorflow import keras from tensorflow import keras
modelNN = Sequential() modelNN = Sequential()
modelNN.add(Conv1D(32, 7, activation='relu',input_shape=(X[0].shape))) modelNN.add(Conv1D(32, 7, activation='relu',input_shape=(X[0].shape)))
modelNN.add(Conv1D(32, 7, activation='relu')) modelNN.add(Conv1D(32, 7, activation='relu'))
#modelNN.add(GlobalMaxPooling1D()) modelNN.add(GlobalMaxPooling1D())
modelNN.add(Flatten()) modelNN.add(Flatten())
modelNN.add(Dense(512,activation='relu')) modelNN.add(Dense(512,activation='relu'))
modelNN.add(Dropout(0.5))
modelNN.add(Dense(128,activation='relu')) modelNN.add(Dense(128,activation='relu'))
#modelNN.add(Dense(50,activation='relu',input_dim=X[0].size)) modelNN.add(Dropout(0.25))
modelNN.add(Dense(10,activation='relu')) modelNN.add(Dense(10,activation='relu'))
modelNN.add(Dropout(0.1))
modelNN.add(Dense(3,activation='softmax')) modelNN.add(Dense(3,activation='softmax'))
modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=["accuracy"]) modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=["accuracy"])
# %% fit # %% fit
#early stop #early stop
earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=10,verbose=False,restore_best_weights=True) earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=5,verbose=False,restore_best_weights=True)
cbList = [earlystop] cbList = [earlystop]
hist = modelNN.fit(X,Y,epochs=1000,validation_split=0.2,batch_size=128,callbacks=cbList) hist = modelNN.fit(X,Y,epochs=50,validation_split=0.2,batch_size=128,callbacks=cbList)
#%% #%%
\ No newline at end of file
## Notes
# np.unique(Y,return_counts=True)
# (array([0, 1]), array([ 77051, 272891], dtype=int64))
# /Y.size
# -> array([0.2201822, 0.7798178])
# -> Unbalanced dataset
#current val_acc ~ 91.5 %
...@@ -24,8 +24,8 @@ def getSentenceVectorCNN(sentence): ...@@ -24,8 +24,8 @@ def getSentenceVectorCNN(sentence):
i += 1 i += 1
except: except:
pass pass
#if wordVecs == np.zeros((72,100)): #maybe dont alllow sentences with less than n wordvecotrs if np.all(wordVecs[5:]==0):
# raise Exception('words not found in w2v model') raise Exception('not enough words found in w2v model')
return wordVecs return wordVecs
#%% Data Generator #%% Data Generator
...@@ -85,7 +85,7 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics= ...@@ -85,7 +85,7 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=
earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=25,verbose=False,restore_best_weights=True) earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=25,verbose=False,restore_best_weights=True)
cbList = [earlystop] cbList = [earlystop]
num_rows = 350000 num_rows = 350000
batchSize = 512 batchSize = 2048
#hist = modelNN.fit(generate_arrays_from_file('./sample.json',128),epochs=1000,validation_split=0.2,batch_size=128,callbacks=cbList) #hist = modelNN.fit(generate_arrays_from_file('./sample.json',128),epochs=1000,validation_split=0.2,batch_size=128,callbacks=cbList)
train = generate_arrays_from_file('D:\\ml\\data\\train.json',batchSize) train = generate_arrays_from_file('D:\\ml\\data\\train.json',batchSize)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment