diff --git a/python/hdf5.py b/python/hdf5.py index 13681011d82fa99e7a21f4e533dbc75592aa4521..89c4081c63a1c8d252fc00ed46830d6c888129b4 100644 --- a/python/hdf5.py +++ b/python/hdf5.py @@ -13,6 +13,7 @@ if __name__ == '__main__': pathSilas = "G:\\ml\\" path = pathSilas data_path ="E:\\downloads\\yelp_dataset\\yelp_dataset\\yelp_academic_dataset_review.json" + #data_path = "D:\\ml\\data\\sample1.json" def getSentenceVectorCNN(sentence): split = utils.simple_preprocess(sentence) @@ -34,7 +35,7 @@ if __name__ == '__main__': import json i = 0 with h5py.File(path + "w2vCNN.hdf5", "w") as hf: - chunkSize = 10**4 + chunkSize = 10E3 trainChunk = int(chunkSize * 0.6) valTestChunk = int(chunkSize * 0.2) xTrain = [] @@ -122,23 +123,23 @@ if __name__ == '__main__': i += 1 index +=1 - XTrain.resize(XTrain.shape[0]+trainChunk, axis=0) - XTrain[-trainChunk:] = xTrain + #XTrain.resize(XTrain.shape[0]+trainChunk, axis=0) + #XTrain[-trainChunk:] = xTrain - YTrain.resize(YTrain.shape[0]+trainChunk, axis=0) - YTrain[-trainChunk:] = yTrain + #YTrain.resize(YTrain.shape[0]+trainChunk, axis=0) + #YTrain[-trainChunk:] = yTrain - XVal.resize(XVal.shape[0]+valTestChunk, axis=0) - XVal[-valTestChunk:] = xVal + #XVal.resize(XVal.shape[0]+valTestChunk, axis=0) + #XVal[-valTestChunk:] = xVal - YVal.resize(YVal.shape[0]+valTestChunk, axis=0) - YVal[-valTestChunk:] = yVal + #YVal.resize(YVal.shape[0]+valTestChunk, axis=0) + #YVal[-valTestChunk:] = yVal - XTest.resize(XTest.shape[0]+valTestChunk, axis=0) - XTest[-valTestChunk:] = xTest + #XTest.resize(XTest.shape[0]+valTestChunk, axis=0) + #XTest[-valTestChunk:] = xTest - YTest.resize(YTest.shape[0]+valTestChunk, axis=0) - YTest[-valTestChunk:] = yTest + #YTest.resize(YTest.shape[0]+valTestChunk, axis=0) + #YTest[-valTestChunk:] = yTest #%% import h5py