From 5153a47f989ad4c5645b51442a571373f918d344 Mon Sep 17 00:00:00 2001 From: Silas Dohm <silas@sdohm.xyz> Date: Mon, 2 Aug 2021 03:34:19 +0200 Subject: [PATCH] fixed hdf5; not clean) --- python/hdf5.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/python/hdf5.py b/python/hdf5.py index 1368101..89c4081 100644 --- a/python/hdf5.py +++ b/python/hdf5.py @@ -13,6 +13,7 @@ if __name__ == '__main__': pathSilas = "G:\\ml\\" path = pathSilas data_path ="E:\\downloads\\yelp_dataset\\yelp_dataset\\yelp_academic_dataset_review.json" + #data_path = "D:\\ml\\data\\sample1.json" def getSentenceVectorCNN(sentence): split = utils.simple_preprocess(sentence) @@ -34,7 +35,7 @@ if __name__ == '__main__': import json i = 0 with h5py.File(path + "w2vCNN.hdf5", "w") as hf: - chunkSize = 10**4 + chunkSize = 10E3 trainChunk = int(chunkSize * 0.6) valTestChunk = int(chunkSize * 0.2) xTrain = [] @@ -122,23 +123,23 @@ if __name__ == '__main__': i += 1 index +=1 - XTrain.resize(XTrain.shape[0]+trainChunk, axis=0) - XTrain[-trainChunk:] = xTrain + #XTrain.resize(XTrain.shape[0]+trainChunk, axis=0) + #XTrain[-trainChunk:] = xTrain - YTrain.resize(YTrain.shape[0]+trainChunk, axis=0) - YTrain[-trainChunk:] = yTrain + #YTrain.resize(YTrain.shape[0]+trainChunk, axis=0) + #YTrain[-trainChunk:] = yTrain - XVal.resize(XVal.shape[0]+valTestChunk, axis=0) - XVal[-valTestChunk:] = xVal + #XVal.resize(XVal.shape[0]+valTestChunk, axis=0) + #XVal[-valTestChunk:] = xVal - YVal.resize(YVal.shape[0]+valTestChunk, axis=0) - YVal[-valTestChunk:] = yVal + #YVal.resize(YVal.shape[0]+valTestChunk, axis=0) + #YVal[-valTestChunk:] = yVal - XTest.resize(XTest.shape[0]+valTestChunk, axis=0) - XTest[-valTestChunk:] = xTest + #XTest.resize(XTest.shape[0]+valTestChunk, axis=0) + #XTest[-valTestChunk:] = xTest - YTest.resize(YTest.shape[0]+valTestChunk, axis=0) - YTest[-valTestChunk:] = yTest + #YTest.resize(YTest.shape[0]+valTestChunk, axis=0) + #YTest[-valTestChunk:] = yTest #%% import h5py -- GitLab