From 8fbacdab8880683ec9a6c38b44b2ca52ebe6096e Mon Sep 17 00:00:00 2001
From: Nathan Zhao <nz11@tyler.cs.illinois.edu>
Date: Mon, 1 Feb 2021 02:21:32 -0600
Subject: [PATCH] fix imagenet, remove cv2

---
 hpvm/projects/keras/README.md                |  6 +-
 hpvm/projects/keras/keras_environment.yml    |  1 -
 hpvm/projects/keras/src/alexnet_imagenet.py  | 73 ++-----------------
 hpvm/projects/keras/src/lenet.py             |  2 +
 hpvm/projects/keras/src/resnet50_imagenet.py | 69 ++----------------
 hpvm/projects/keras/src/vgg16_imagenet.py    | 74 ++------------------
 6 files changed, 25 insertions(+), 200 deletions(-)

diff --git a/hpvm/projects/keras/README.md b/hpvm/projects/keras/README.md
index 4d4ea4c1c8..70828896b0 100644
--- a/hpvm/projects/keras/README.md
+++ b/hpvm/projects/keras/README.md
@@ -48,14 +48,14 @@ List of benchmarks and the expected accuracies:
 | ----------- | ----------- |
 | AlexNet-CIFAR10      | 79.16       |
 | AlexNet2-CIFAR10   | 85.10        |
-| AlexNet-ImageNet | 56.23 | todo: fix broken
+| AlexNet-ImageNet | 56.30 |
 | LeNet-MNIST | 99.11 | todo: fix broken
 | MobileNet-CIFAR10 | 82.40 |
 | ResNet18-CIFAR10 | 89.52 |
-| ResNet50-ImageNet | 74.50 |
+| ResNet50-ImageNet | 75.10 |
 | VGG16-CIFAR10 | 89.42 |
 | VGG16-CIFAR100 | 66.20 |
-| VGG16-ImageNet | 72.50 | todo: fix broken
+| VGG16-ImageNet | 69.46 |
 
 Activate conda environment (above) before running benchmarks 
 
diff --git a/hpvm/projects/keras/keras_environment.yml b/hpvm/projects/keras/keras_environment.yml
index 89b088bc90..1f56f758be 100644
--- a/hpvm/projects/keras/keras_environment.yml
+++ b/hpvm/projects/keras/keras_environment.yml
@@ -32,5 +32,4 @@ dependencies:
     - msgpack==0.5.6
     - tables==3.4.4
     - torch==0.4.1
-    - opencv-python==4.5.1.48
 
diff --git a/hpvm/projects/keras/src/alexnet_imagenet.py b/hpvm/projects/keras/src/alexnet_imagenet.py
index c05f757a7c..5fceb31b31 100644
--- a/hpvm/projects/keras/src/alexnet_imagenet.py
+++ b/hpvm/projects/keras/src/alexnet_imagenet.py
@@ -4,7 +4,6 @@ import glob
 
 import numpy as np
 import tensorflow as tf
-import cv2
 import scipy
 import scipy.io
 import keras
@@ -29,75 +28,15 @@ IMAGES_PER_CLASS = 50
 
 
 class AlexNet(Benchmark):
-    
-    def load_image(self, x):
-
-        image = cv2.imread(x)
-
-        height, width, _ = image.shape
-        new_height = height * 256 // min(image.shape[:2])
-        new_width = width * 256 // min(image.shape[:2])
-        image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-
-        # center crop 224 x 224
-        height, width, _ = image.shape
-        startx = width // 2 - (224 // 2)
-        starty = height // 2 - (224 // 2)
-        image = image[starty:starty + 224, startx:startx + 224]
-
-        image = image / 255 # normalize [0, 1]
-        image = image[:, :, ::-1] # BGR -> RGB
-        image = np.transpose(image, (2, 0, 1)) # (H, W, C) -> (C, H, W)
-
-        image[0, :, :] = (image[0, :, :] - 0.485) / 0.229
-        image[1, :, :] = (image[1, :, :] - 0.456) / 0.224
-        image[2, :, :] = (image[2, :, :] - 0.406) / 0.225
-
-        return image.astype(np.float32)
-
 
     def data_preprocess(self):
 
-        self.synset_to_keras_idx = {}
-
-        f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-        c = 0
-        for line in f:
-            parts = line.split(' ')
-            self.synset_to_keras_idx[parts[0]] = c
-            c += 1
-        f.close()
-
-
-        X_train, X_val = [], []
-        y_train, y_val = [], []
-
-        classes = glob.glob(IMAGENET_DIR + 'val/*')
-
-        for c in np.random.permutation(len(classes))[:NUM_TUNE_CLASSES]:
-            x = glob.glob(classes[c] + '/*')
-            x = np.array(x)
-
-            idx = np.random.permutation(len(x))
-            idx = idx[:min(len(idx), IMAGES_PER_CLASS)]
-
-            synset = classes[c].split('/')[-1]
-            images = list(map(lambda x : self.load_image(x), x[idx]))
-            labels = [self.synset_to_keras_idx[synset]] * len(x[idx])
-
-
-            split = int(len(idx) * 0.5)
-            X_val += images[:split]
-            y_val += labels[:split]
-
-            X_train += images[split:]
-            y_train += labels[split:]
-
-
-        X_train = np.array(X_train)
-        y_train = np.array(y_train)
-        X_val = np.array(X_val)
-        y_val = np.array(y_val)
+        X_val = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_input.bin', dtype=np.float32)
+        y_val = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_labels.bin', dtype=np.uint32)
+        
+        X_val = X_val.reshape((-1, 3, 224, 224)) 
+        X_train, y_train = None, None
+        
             
         X_test = X_val[0:5000]
         y_test = y_val[0:5000]
diff --git a/hpvm/projects/keras/src/lenet.py b/hpvm/projects/keras/src/lenet.py
index 0210baee26..83f4d3cf52 100644
--- a/hpvm/projects/keras/src/lenet.py
+++ b/hpvm/projects/keras/src/lenet.py
@@ -110,6 +110,8 @@ if __name__ == '__main__':
     src_dir = 'data/lenet_mnist_src/'
     num_classes = 10
     batch_size = 500
+    
+    print (reload_dir)
 
     model = LeNet_MNIST('LeNet_MNIST', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
diff --git a/hpvm/projects/keras/src/resnet50_imagenet.py b/hpvm/projects/keras/src/resnet50_imagenet.py
index 68cd96f0ca..bca4799b75 100644
--- a/hpvm/projects/keras/src/resnet50_imagenet.py
+++ b/hpvm/projects/keras/src/resnet50_imagenet.py
@@ -4,7 +4,6 @@ import glob
 
 import numpy as np
 import tensorflow as tf
-import cv2
 import scipy
 import scipy.io
 import keras
@@ -30,27 +29,6 @@ IMAGES_PER_CLASS = 50
 
 
 class ResNet50(Benchmark):
-
-    def load_image(self, x):
-
-        image = cv2.imread(x)
-
-        height, width, _ = image.shape
-        new_height = height * 256 // min(image.shape[:2])
-        new_width = width * 256 // min(image.shape[:2])
-        image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-
-        height, width, _ = image.shape
-        startx = width // 2 - (224 // 2)
-        starty = height // 2 - (224 // 2)
-        image = image[starty:starty + 224, startx:startx + 224]
-
-        image = image[:, :, ::-1]
-        image = np.transpose(image, (2, 0, 1))
-        image = preprocess_input(image.astype(np.float32), data_format="channels_first")
-
-        return image.astype(np.float32)
-    
     
     def buildModel(self):
         
@@ -143,51 +121,18 @@ class ResNet50(Benchmark):
     
     def data_preprocess(self):
 
-        self.synset_to_keras_idx = {}
-
-        f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-        c = 0
-        for line in f:
-            parts = line.split(' ')
-            self.synset_to_keras_idx[parts[0]] = c
-            c += 1
-        f.close()
-
-
-        X_train, X_val = [], []
-        y_train, y_val = [], []
-
-        classes = glob.glob(IMAGENET_DIR + 'val/*')
-
-        for c in np.random.permutation(len(classes))[:NUM_TUNE_CLASSES]:
-            x = glob.glob(classes[c] + '/*')
-            x = np.array(x)
-
-            idx = np.random.permutation(len(x))
-            idx = idx[:min(len(idx), IMAGES_PER_CLASS)]
-
-            synset = classes[c].split('/')[-1]
-            images = list(map(lambda x : self.load_image(x), x[idx]))
-            labels = [self.synset_to_keras_idx[synset]] * len(x[idx])
-
-
-            split = int(len(idx) * 0.5)
-            X_val += images[:split]
-            y_val += labels[:split]
-
-            X_train += images[split:]
-            y_train += labels[split:]
-
-        X_train = np.array(X_train)
-        y_train = np.array(y_train)
-        X_val = np.array(X_val)
-        y_val = np.array(y_val)
+        X_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_input.bin', dtype=np.float32)
+        y_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_labels.bin', dtype=np.uint32)
+        
+        X_val = X_val.reshape((-1, 3, 224, 224)) 
+        X_train, y_train = None, None
+        
             
         X_test = X_val[0:5000]
         y_test = y_val[0:5000]
         X_tuner = X_val[5000:]
         y_tuner = y_val[5000:]
-
+        
         return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
 
diff --git a/hpvm/projects/keras/src/vgg16_imagenet.py b/hpvm/projects/keras/src/vgg16_imagenet.py
index 065150a434..5e2bef9c34 100644
--- a/hpvm/projects/keras/src/vgg16_imagenet.py
+++ b/hpvm/projects/keras/src/vgg16_imagenet.py
@@ -4,7 +4,6 @@ import glob
 
 import numpy as np
 import tensorflow as tf
-import cv2
 import scipy
 import scipy.io
 import keras
@@ -29,33 +28,7 @@ IMAGES_PER_CLASS = 50
 
 
 class VGG16(Benchmark):
-    
-    def load_image(self, x):
-
-        image = cv2.imread(x)
-
-        height, width, _ = image.shape
-        new_height = height * 256 // min(image.shape[:2])
-        new_width = width * 256 // min(image.shape[:2])
-        image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-
-        # center crop 224 x 224
-        height, width, _ = image.shape
-        startx = width // 2 - (224 // 2)
-        starty = height // 2 - (224 // 2)
-        image = image[starty:starty + 224, startx:startx + 224]
-
-        image = image / 255 # normalize [0, 1]
-        image = image[:, :, ::-1] # BGR -> RGB
-        image = np.transpose(image, (2, 0, 1)) # (H, W, C) -> (C, H, W)
 
-        image[0, :, :] = (image[0, :, :] - 0.485) / 0.229
-        image[1, :, :] = (image[1, :, :] - 0.456) / 0.224
-        image[2, :, :] = (image[2, :, :] - 0.406) / 0.225
-
-        return image.astype(np.float32)
-
-    
     def buildModel(self):
         img_input = Input(shape=(3, 224, 224))
 
@@ -132,51 +105,18 @@ class VGG16(Benchmark):
 
     def data_preprocess(self):
 
-        self.synset_to_keras_idx = {}
-
-        f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-        c = 0
-        for line in f:
-            parts = line.split(' ')
-            self.synset_to_keras_idx[parts[0]] = c
-            c += 1
-        f.close()
-
-
-        X_train, X_val = [], []
-        y_train, y_val = [], []
-
-        classes = glob.glob(IMAGENET_DIR + 'val/*')
-
-        for c in np.random.permutation(len(classes))[:NUM_TUNE_CLASSES]:
-            x = glob.glob(classes[c] + '/*')
-            x = np.array(x)
-
-            idx = np.random.permutation(len(x))
-            idx = idx[:min(len(idx), IMAGES_PER_CLASS)]
-
-            synset = classes[c].split('/')[-1]
-            images = list(map(lambda x : self.load_image(x), x[idx]))
-            labels = [self.synset_to_keras_idx[synset]] * len(x[idx])
-
-
-            split = int(len(idx) * 0.5)
-            X_val += images[:split]
-            y_val += labels[:split]
-
-            X_train += images[split:]
-            y_train += labels[split:]
-
-        X_train = np.array(X_train)
-        y_train = np.array(y_train)
-        X_val = np.array(X_val)
-        y_val = np.array(y_val)
+        X_val = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_input.bin', dtype=np.float32)
+        y_val = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_labels.bin', dtype=np.uint32)
+        
+        X_val = X_val.reshape((-1, 3, 224, 224)) 
+        X_train, y_train = None, None
+        
             
         X_test = X_val[0:5000]
         y_test = y_val[0:5000]
         X_tuner = X_val[5000:]
         y_tuner = y_val[5000:]
-
+        
         return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
     
-- 
GitLab