diff --git a/python/w2v_cnn_gen_hdf5.py b/python/w2v_cnn_gen_hdf5.py index 10f321d231f752f99e5c18e159e46a0007285d19..78ee36eadce83735e26d88866bf8d580dc1ff0a9 100644 --- a/python/w2v_cnn_gen_hdf5.py +++ b/python/w2v_cnn_gen_hdf5.py @@ -1,5 +1,7 @@ #%% CNN +import os +os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' import tensorflow as tf import numpy as np from tensorflow.keras.models import Sequential @@ -11,11 +13,10 @@ modelNN = Sequential() modelNN.add(Conv1D(32, 7, activation='relu',input_shape=((72, 100)))) modelNN.add(Conv1D(32, 7, activation='relu')) -#modelNN.add(GlobalMaxPooling1D()) +modelNN.add(GlobalMaxPooling1D()) modelNN.add(Flatten()) modelNN.add(Dense(512,activation='relu')) modelNN.add(Dense(128,activation='relu')) -#modelNN.add(Dense(50,activation='relu',input_dim=X[0].size)) modelNN.add(Dense(10,activation='relu')) modelNN.add(Dense(3,activation='softmax')) modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=["sparse_categorical_accuracy"]) @@ -23,19 +24,33 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics= #%% from hdf5 import hdf5Generator path = "G:\\ml\\" -num_rows = 8000000 +num_rows = 4.8E6 batchSize = 2048 steps = num_rows/batchSize #early stop -earlystop = keras.callbacks.EarlyStopping(monitor='accuracy',patience=10,verbose=False,restore_best_weights=True) +earlystop = keras.callbacks.EarlyStopping(monitor='sparse_categorical_accuracy',patience=10,verbose=False,restore_best_weights=True) cbList = [earlystop] trainData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Train") valData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Val") -hist = modelNN.fit(trainData, validation_data=valData, epochs=12, steps_per_epoch=steps, validation_steps=steps) - -#hist = modelNN.fit(hdf5Generator("vectors.hdf5", batchSize),epochs=15, steps_per_epoch=steps) -#hist = modelNN.fit(hdf5Generator("vectors.hdf5", batchSize),epochs=15,batch_size=batchSize,callbacks=cbList) +#%% +cW = {0:4.18,1:9.53,2:1.52} +hist = modelNN.fit(trainData, validation_data=valData, epochs=100,class_weight=cW, steps_per_epoch=steps, validation_steps=steps,callbacks=cbList) +modelNN.save("D:\\ml\\CNN-Classfication") #modelNN.fit(train,epochs=12,validation_data=val,batch_size=batchSize,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize) +# %%eval +testData = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Test",loop=False) +modelNN.evaluate(testData) +#%% +tD = hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Test",loop=False) +y_pred = np.argmax(modelNN.predict(tD),axis=-1) +#%% +y_test=[] +for (x,y) in hdf5Generator(path + "w2vCNN.hdf5", batchSize, "Test",loop=False): + y_test.append(y) +y_test = np.array(y_test).flatten() +#%% confusion matrix +from sklearn.metrics import confusion_matrix +confusion_matrix(y_test,y_pred,normalize='true') # %% diff --git a/python/w2v_sentence_cnn_gen.py b/python/w2v_sentence_cnn_gen.py index 045ecb28855cf20b556f23c6efee7faedc60e7c7..d7ca4c915902963b599e4bbb4181910a85aaa790 100644 --- a/python/w2v_sentence_cnn_gen.py +++ b/python/w2v_sentence_cnn_gen.py @@ -1,11 +1,8 @@ #%% import os -#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" -from math import nan -from gensim.test.utils import datapath +os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' from gensim import utils from w2v_yelp_model import getWordVecModel -import pandas as pd import json model_path = "D:\\ml\\full_yelp_w2v_model" @@ -31,7 +28,7 @@ def getSentenceVectorCNN(sentence): #%% Data Generator import numpy as np import json -def generate_arrays_from_file(path, batchsize): +def generate_arrays_from_file(path, batchsize,loop=True): inputs = [] targets = [] batchcount = 0 @@ -51,13 +48,14 @@ def generate_arrays_from_file(path, batchsize): batchcount += 1 except: continue - if batchcount > batchsize: + if batchcount >= batchsize: X = np.array(inputs) y = np.array(targets) yield (X, y) inputs = [] targets = [] batchcount = 0 + if not loop: break #%% CNN import tensorflow as tf @@ -84,12 +82,25 @@ modelNN.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics= #early stop earlystop = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=25,verbose=False,restore_best_weights=True) cbList = [earlystop] -num_rows = 350000 +#num_rows = 4.8E6 +num_rows = 410000 batchSize = 2048 #hist = modelNN.fit(generate_arrays_from_file('./sample.json',128),epochs=1000,validation_split=0.2,batch_size=128,callbacks=cbList) train = generate_arrays_from_file('D:\\ml\\data\\train.json',batchSize) val = generate_arrays_from_file('D:\\ml\\data\\val.json',batchSize) -modelNN.fit(train,epochs=12,validation_data=val,batch_size=batchSize,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize) +#%% +modelNN.fit(train,epochs=1,validation_data=val,steps_per_epoch= num_rows/batchSize,callbacks=cbList,validation_steps=num_rows/batchSize) + +# %% +modelNN.evaluate(generate_arrays_from_file('D:\\ml\\data\\val.json',16000,False)) +# %% +y_pred = np.argmax(modelNN.predict(generate_arrays_from_file('D:\\ml\\data\\val.json',16000,False)),axis=-1) +# %% +y_t = [] +for a in generate_arrays_from_file('D:\\ml\\data\\val.json',batchSize,False): + y_t.append(a[1]) + + # %%