From 5153a47f989ad4c5645b51442a571373f918d344 Mon Sep 17 00:00:00 2001
From: Silas Dohm <silas@sdohm.xyz>
Date: Mon, 2 Aug 2021 03:34:19 +0200
Subject: [PATCH] fixed hdf5; not clean)

---
 python/hdf5.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/python/hdf5.py b/python/hdf5.py
index 1368101..89c4081 100644
--- a/python/hdf5.py
+++ b/python/hdf5.py
@@ -13,6 +13,7 @@ if __name__ == '__main__':
     pathSilas = "G:\\ml\\"
     path = pathSilas
     data_path ="E:\\downloads\\yelp_dataset\\yelp_dataset\\yelp_academic_dataset_review.json"
+    #data_path = "D:\\ml\\data\\sample1.json"
 
     def getSentenceVectorCNN(sentence):
         split = utils.simple_preprocess(sentence)
@@ -34,7 +35,7 @@ if __name__ == '__main__':
     import json
     i = 0
     with h5py.File(path + "w2vCNN.hdf5", "w") as hf:
-        chunkSize = 10**4
+        chunkSize = 10E3
         trainChunk = int(chunkSize * 0.6)
         valTestChunk = int(chunkSize * 0.2)
         xTrain = []
@@ -122,23 +123,23 @@ if __name__ == '__main__':
             i += 1
             index +=1
 
-        XTrain.resize(XTrain.shape[0]+trainChunk, axis=0)
-        XTrain[-trainChunk:] = xTrain
+        #XTrain.resize(XTrain.shape[0]+trainChunk, axis=0)
+        #XTrain[-trainChunk:] = xTrain
 
-        YTrain.resize(YTrain.shape[0]+trainChunk, axis=0)
-        YTrain[-trainChunk:] = yTrain
+        #YTrain.resize(YTrain.shape[0]+trainChunk, axis=0)
+        #YTrain[-trainChunk:] = yTrain
 
-        XVal.resize(XVal.shape[0]+valTestChunk, axis=0)
-        XVal[-valTestChunk:] = xVal
+        #XVal.resize(XVal.shape[0]+valTestChunk, axis=0)
+        #XVal[-valTestChunk:] = xVal
 
-        YVal.resize(YVal.shape[0]+valTestChunk, axis=0)
-        YVal[-valTestChunk:] = yVal        
+        #YVal.resize(YVal.shape[0]+valTestChunk, axis=0)
+        #YVal[-valTestChunk:] = yVal        
 
-        XTest.resize(XTest.shape[0]+valTestChunk, axis=0)
-        XTest[-valTestChunk:] = xTest
+        #XTest.resize(XTest.shape[0]+valTestChunk, axis=0)
+        #XTest[-valTestChunk:] = xTest
 
-        YTest.resize(YTest.shape[0]+valTestChunk, axis=0)
-        YTest[-valTestChunk:] = yTest
+        #YTest.resize(YTest.shape[0]+valTestChunk, axis=0)
+        #YTest[-valTestChunk:] = yTest
 
 #%%
 import h5py
-- 
GitLab