Skip to content
Snippets Groups Projects
Commit 8490e6fa authored by Silas Dohm's avatar Silas Dohm
Browse files

CNN-modell almost done

parent a9df72c2
Branches
No related tags found
No related merge requests found
#%% CNN #%% CNN
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from tensorflow.keras.models import Sequential from tensorflow.keras.models import Sequential
...@@ -11,11 +13,10 @@ modelNN = Sequential() ...@@ -11,11 +13,10 @@ modelNN = Sequential()
modelNN.add(Conv1D(32, 7, activation='relu',input_shape=((72, 100)))) modelNN.add(Conv1D(32, 7, activation='relu',input_shape=((72, 100))))
modelNN.add(Conv1D(32, 7, activation='relu')) modelNN.add(Conv1D(32, 7, activation='relu'))
#modelNN.add(GlobalMaxPooling1D()) modelNN.add(GlobalMaxPooling1D())
modelNN.add(Flatten()) modelNN.add(Flatten())
modelNN.add(Dense(512,activation='relu')) modelNN.add(Dense(512,activation='relu'))
modelNN.add(Dense(128,activation='relu')) modelNN.add(Dense(128,activation='relu'))
#modelNN.add(Dense(50,activation='relu',input_dim=X[0].size))
modelNN.add(Dense(10,activation='relu')) modelNN.add(Dense(10,activation='relu'))
modelNN.add(Dense(3,activation='softmax')) modelNN.add(Dense(3,activation='softmax'))
modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=["sparse_categorical_accuracy"]) modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=["sparse_categorical_accuracy"])
...@@ -23,19 +24,33 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics= ...@@ -23,19 +24,33 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=
#%% #%%
from hdf5 import hdf5Generator from hdf5 import hdf5Generator
path = "G:\\ml\\" path = "G:\\ml\\"
num_rows = 8000000 num_rows = 4.8E6
batchSize = 2048 batchSize = 2048
steps = num_rows/batchSize steps = num_rows/batchSize
#early stop #early stop
earlystop = keras.callbacks.EarlyStopping(monitor='accuracy',patience=10,verbose=False,restore_best_weights=True) earlystop = keras.callbacks.EarlyStopping(monitor='sparse_categorical_accuracy',patience=10,verbose=False,restore_best_weights=True)
cbList = [earlystop] cbList = [earlystop]
trainData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Train") trainData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Train")
valData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Val") valData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Val")
hist = modelNN.fit(trainData, validation_data=valData, epochs=12, steps_per_epoch=steps, validation_steps=steps) #%%
cW = {0:4.18,1:9.53,2:1.52}
#hist = modelNN.fit(hdf5Generator("vectors.hdf5", batchSize),epochs=15, steps_per_epoch=steps) hist = modelNN.fit(trainData, validation_data=valData, epochs=100,class_weight=cW, steps_per_epoch=steps, validation_steps=steps,callbacks=cbList)
#hist = modelNN.fit(hdf5Generator("vectors.hdf5", batchSize),epochs=15,batch_size=batchSize,callbacks=cbList) modelNN.save("D:\\ml\\CNN-Classfication")
#modelNN.fit(train,epochs=12,validation_data=val,batch_size=batchSize,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize) #modelNN.fit(train,epochs=12,validation_data=val,batch_size=batchSize,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize)
# %%eval
testData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Test",loop=False)
modelNN.evaluate(testData)
#%%
tD = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Test",loop=False)
y_pred = np.argmax(modelNN.predict(tD),axis=-1)
#%%
y_test=[]
for (x,y) in hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Test",loop=False):
y_test.append(y)
y_test = np.array(y_test).flatten()
#%% confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred,normalize='true')
# %% # %%
#%% #%%
import os import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
from math import nan
from gensim.test.utils import datapath
from gensim import utils from gensim import utils
from w2v_yelp_model import getWordVecModel from w2v_yelp_model import getWordVecModel
import pandas as pd
import json import json
model_path = "D:\\ml\\full_yelp_w2v_model" model_path = "D:\\ml\\full_yelp_w2v_model"
...@@ -31,7 +28,7 @@ def getSentenceVectorCNN(sentence): ...@@ -31,7 +28,7 @@ def getSentenceVectorCNN(sentence):
#%% Data Generator #%% Data Generator
import numpy as np import numpy as np
import json import json
def generate_arrays_from_file(path, batchsize): def generate_arrays_from_file(path, batchsize,loop=True):
inputs = [] inputs = []
targets = [] targets = []
batchcount = 0 batchcount = 0
...@@ -51,13 +48,14 @@ def generate_arrays_from_file(path, batchsize): ...@@ -51,13 +48,14 @@ def generate_arrays_from_file(path, batchsize):
batchcount += 1 batchcount += 1
except: except:
continue continue
if batchcount > batchsize: if batchcount >= batchsize:
X = np.array(inputs) X = np.array(inputs)
y = np.array(targets) y = np.array(targets)
yield (X, y) yield (X, y)
inputs = [] inputs = []
targets = [] targets = []
batchcount = 0 batchcount = 0
if not loop: break
#%% CNN #%% CNN
import tensorflow as tf import tensorflow as tf
...@@ -84,12 +82,25 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics= ...@@ -84,12 +82,25 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=
#early stop #early stop
earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=25,verbose=False,restore_best_weights=True) earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=25,verbose=False,restore_best_weights=True)
cbList = [earlystop] cbList = [earlystop]
num_rows = 350000 #num_rows = 4.8E6
num_rows = 410000
batchSize = 2048 batchSize = 2048
#hist = modelNN.fit(generate_arrays_from_file('./sample.json',128),epochs=1000,validation_split=0.2,batch_size=128,callbacks=cbList) #hist = modelNN.fit(generate_arrays_from_file('./sample.json',128),epochs=1000,validation_split=0.2,batch_size=128,callbacks=cbList)
train = generate_arrays_from_file('D:\\ml\\data\\train.json',batchSize) train = generate_arrays_from_file('D:\\ml\\data\\train.json',batchSize)
val = generate_arrays_from_file('D:\\ml\\data\\val.json',batchSize) val = generate_arrays_from_file('D:\\ml\\data\\val.json',batchSize)
modelNN.fit(train,epochs=12,validation_data=val,batch_size=batchSize,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize) #%%
modelNN.fit(train,epochs=1,validation_data=val,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize)
# %%
modelNN.evaluate(generate_arrays_from_file('D:\\ml\\data\\val.json',16000,False))
# %%
y_pred = np.argmax(modelNN.predict(generate_arrays_from_file('D:\\ml\\data\\val.json',16000,False)),axis=-1)
# %%
y_t = []
for a in generate_arrays_from_file('D:\\ml\\data\\val.json',batchSize,False):
y_t.append(a[1])
# %% # %%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment