Skip to content
Snippets Groups Projects
Commit 5153a47f authored by Silas Dohm's avatar Silas Dohm
Browse files

fixed hdf5; not clean)

parent 8490e6fa
No related branches found
No related tags found
No related merge requests found
...@@ -13,6 +13,7 @@ if __name__ == '__main__': ...@@ -13,6 +13,7 @@ if __name__ == '__main__':
pathSilas = "G:\\ml\\" pathSilas = "G:\\ml\\"
path = pathSilas path = pathSilas
data_path ="E:\\downloads\\yelp_dataset\\yelp_dataset\\yelp_academic_dataset_review.json" data_path ="E:\\downloads\\yelp_dataset\\yelp_dataset\\yelp_academic_dataset_review.json"
#data_path = "D:\\ml\\data\\sample1.json"
def getSentenceVectorCNN(sentence): def getSentenceVectorCNN(sentence):
split = utils.simple_preprocess(sentence) split = utils.simple_preprocess(sentence)
...@@ -34,7 +35,7 @@ if __name__ == '__main__': ...@@ -34,7 +35,7 @@ if __name__ == '__main__':
import json import json
i = 0 i = 0
with h5py.File(path + "w2vCNN.hdf5", "w") as hf: with h5py.File(path + "w2vCNN.hdf5", "w") as hf:
chunkSize = 10**4 chunkSize = 10E3
trainChunk = int(chunkSize * 0.6) trainChunk = int(chunkSize * 0.6)
valTestChunk = int(chunkSize * 0.2) valTestChunk = int(chunkSize * 0.2)
xTrain = [] xTrain = []
...@@ -122,23 +123,23 @@ if __name__ == '__main__': ...@@ -122,23 +123,23 @@ if __name__ == '__main__':
i += 1 i += 1
index +=1 index +=1
XTrain.resize(XTrain.shape[0]+trainChunk, axis=0) #XTrain.resize(XTrain.shape[0]+trainChunk, axis=0)
XTrain[-trainChunk:] = xTrain #XTrain[-trainChunk:] = xTrain
YTrain.resize(YTrain.shape[0]+trainChunk, axis=0) #YTrain.resize(YTrain.shape[0]+trainChunk, axis=0)
YTrain[-trainChunk:] = yTrain #YTrain[-trainChunk:] = yTrain
XVal.resize(XVal.shape[0]+valTestChunk, axis=0) #XVal.resize(XVal.shape[0]+valTestChunk, axis=0)
XVal[-valTestChunk:] = xVal #XVal[-valTestChunk:] = xVal
YVal.resize(YVal.shape[0]+valTestChunk, axis=0) #YVal.resize(YVal.shape[0]+valTestChunk, axis=0)
YVal[-valTestChunk:] = yVal #YVal[-valTestChunk:] = yVal
XTest.resize(XTest.shape[0]+valTestChunk, axis=0) #XTest.resize(XTest.shape[0]+valTestChunk, axis=0)
XTest[-valTestChunk:] = xTest #XTest[-valTestChunk:] = xTest
YTest.resize(YTest.shape[0]+valTestChunk, axis=0) #YTest.resize(YTest.shape[0]+valTestChunk, axis=0)
YTest[-valTestChunk:] = yTest #YTest[-valTestChunk:] = yTest
#%% #%%
import h5py import h5py
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment