diff --git a/hpvm/test/dnn_benchmarks/keras/Benchmark.py b/hpvm/test/dnn_benchmarks/keras/Benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0df9d98d606c6db3c0eef4af6acb4797f604651
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/Benchmark.py
@@ -0,0 +1,158 @@
+
+
+import sys
+import os
+import shutil
+import subprocess
+from keras.utils.np_utils import to_categorical
+from keras.models import load_model
+from keras_frontend.approxhpvm_translator import translate_to_approxhpvm
+from keras_frontend.weight_utils import dumpCalibrationData
+from keras_frontend.weight_utils import reloadHPVMWeights
+
+
+# Every CNN Benchmark must inherit from Benchmark class
+# Defines common interfaces and virtual methods to be overridden by child classes
+class Benchmark:
+
+    def __init__(self, name, reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size=500):
+        self.name = name
+        self.reload_dir = reload_dir
+        self.keras_model_file = keras_model_file
+        self.data_dir = data_dir
+        self.src_dir = src_dir
+        self.num_classes = num_classes
+        self.batch_size = batch_size
+        
+        
+    def buildModel(self):
+        return
+
+    def data_preprocess(self):
+        return
+    
+    def trainModel(self, X_train, y_train, X_test, y_test):
+        return
+
+    def inference(self):
+        return
+
+
+    # Compiles frontend generated sources
+    def compileSource(self, working_dir, src_name, binary_name):
+              
+        src_file = os.getcwd() + "/" + working_dir + "/" + src_name   #  approxhpvm_src.cc"
+        target_binary = os.getcwd() + "/" + working_dir + "/" + binary_name    # HPVM_binary"
+        approx_conf_file = "tuner_confs.txt"
+
+        FNULL = open(os.devnull, 'w')
+        
+        try:
+            subprocess.run([
+                "approxhpvm.py", 
+                "-h"
+            ], check=True, stdout=FNULL)
+            
+        except:
+            print ("\n\n ERROR: Could not find approxhpvm.py (HPVM compile script)!! \n\n")
+            print ("To Compile, Must set PATH to include approxhpvm.py script. Do the following: ")
+            print ("**** export PATH=${PATH_TO_YOUR_HPVM_INSTALLATION}/build/bin/:$PATH *****")
+            sys.exit(1)
+
+
+        try:
+            subprocess.run([
+                "approxhpvm.py", src_file, target_binary,
+                "-t", "tensor", "--conf-file", approx_conf_file
+            ], check=True)
+        except:
+            print ("\n\n ERROR: HPVM Compilation Failed!! \n\n")
+            sys.exit(1)
+
+        f = open("working_dir.txt", "w+")
+        f.write(working_dir)
+        f.close()
+       
+            
+        
+    def printUsage(self):
+
+        print ("Usage: python ${benchmark.py} [hpvm_reload|train] [frontend] [compile]")
+        sys.exit(0)
+
+        
+    def run(self, argv):
+
+      if len(argv) < 2:
+          self.printUsage()
+          
+      print ("Build Model ...")
+      # Virtual method call implemented by each CNN
+      model = self.buildModel()
+
+      print ("Data Preprocess... \n")
+      # Virtual method call to preprocess test and train data 
+      X_train, y_train, X_test, y_test, X_tuner, y_tuner = self.data_preprocess()   
+
+      if argv[1] == "hpvm_reload":
+        print ("loading weights .....\n\n")  
+        model = reloadHPVMWeights(model, self.reload_dir, self.keras_model_file)
+
+      elif argv[1] == "keras_reload":
+        model.load_weights(self.keras_model_file)
+        model.compile(loss='categorical_crossentropy',
+                    optimizer='adam',
+                    metrics=['accuracy'])   
+
+      elif argv[1] == "train":
+        print ("Train Model ...")
+        model = self.trainModel(model, X_train, y_train, X_test, y_test)
+      else:
+          self.printUsage()
+
+          
+      score = model.evaluate(X_test, to_categorical(y_test, self.num_classes), verbose=0)
+      print('Test accuracy2:', score[1])
+
+      f = open("final_accuracy", "w+")
+      f.write(str(score[1] * 100))
+      f.close()
+
+
+      if len(argv) > 2:
+        if argv[2] == "frontend":
+
+          if argv[1] == "hpvm_reload": # If reloading HPVM weights use this as directory to load from in HPVM-C generated src
+              self.data_dir = self.reload_dir
+          
+          # Main call to ApproxHPVM-Keras Frontend
+          working_dir = translate_to_approxhpvm(model,
+                                                self.data_dir, self.src_dir,   
+                                                X_test, y_test,
+                                                X_tuner, y_tuner,
+                                                self.batch_size, # FIXIT
+                                                self.num_classes,
+                                                (argv[1] == "hpvm_reload")) # Do not redump HPVM weights if `hpvm_reload` used
+
+          if len(argv) > 3 and argv[3] == "compile":
+            self.compileSource(working_dir, "approxhpvm_src.cc", "HPVM_binary")
+          else:
+            self.printUsage()
+
+          if len(argv) > 4 and argv[4] == "compile_tuner":
+            self.compileSource(working_dir, "approxhpvm_tuner_src.cc", "HPVM_tuner_binary")
+          else:
+            self.printUsage()
+
+
+        if argv[2] == "keras_dump":
+          model.save_weights(self.keras_model_file)
+
+          
+      #elif len(argv) > 2:
+      #  self.printUsage()
+            
+
+    
+
+        
diff --git a/hpvm/test/dnn_benchmarks/keras/Config.py b/hpvm/test/dnn_benchmarks/keras/Config.py
new file mode 100644
index 0000000000000000000000000000000000000000..99e696d632c50db4ae8098a2f4836ca994b672aa
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/Config.py
@@ -0,0 +1,13 @@
+
+import pathlib
+
+
+# Path Relative to Model Params Directory
+abs_path = pathlib.Path(__file__).parent.absolute()
+MODEL_PARAMS_DIR = str(abs_path) + "/../../../../hpvm/test/dnn_benchmarks/model_params/"
+
+
+if __name__ == "__main__":
+
+    abs_path = pathlib.Path(__file__).parent.absolute()
+    print (abs_path)
diff --git a/hpvm/test/dnn_benchmarks/keras/alexnet.py b/hpvm/test/dnn_benchmarks/keras/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..0eefe1b3d3dfa28cd009d74806a9bff41f6d597b
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/alexnet.py
@@ -0,0 +1,159 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+# Inherits from Benchmark class defined in src/Benchmark.py
+class AlexNet_CIFAR10(Benchmark):
+
+    # buildModel overrides the buildModel declared in src/Benchmark.py
+    # Goal: Build a Keras Sequential Model (other model types not supported) and return the (uninitalized/untrained) Model 
+    def buildModel(self):
+
+        activation_type = 'tanh'
+        weight_decay = 1e-4
+
+        model = Sequential()
+        
+        model.add(Conv2D(64, (11, 11), padding='same', activation=activation_type,
+                         kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32)))
+        model.add(MaxPooling2D(2, 2))
+        model.add(Dropout(0.2))
+        
+        model.add(Conv2D(192, (5, 5), padding='same', activation=activation_type,
+                         kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(MaxPooling2D(2, 2))
+        model.add(Dropout(0.3))
+
+        model.add(Conv2D(384, (3, 3), padding='same', activation=activation_type, 
+                       kernel_regularizer=regularizers.l2(weight_decay)))   
+        model.add(Conv2D(256, (3, 3), padding='same', activation=activation_type, 
+                       kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Conv2D(256, (3, 3), padding='same', activation=activation_type, 
+                       kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(MaxPooling2D(2, 2))
+        model.add(Dropout(0.4))
+
+        model.add(Flatten())
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+        
+        return model
+
+
+    # This routine is called from the common `run` method in src/Benchmark.py
+    # Goal: Return Training and Testing data after preprocessing/normalization
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
+
+        X_train = X_train / 255.0
+ 
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_cifar10/test_input.bin', dtype=np.float32)
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_cifar10/test_labels.bin', dtype=np.uint32)
+
+        X_test = X_test.reshape((-1,3,32,32))
+
+
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_cifar10/tune_input.bin', dtype=np.float32)
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_cifar10/tune_labels.bin', dtype=np.uint32)
+
+        X_tuner = X_tuner.reshape((-1,3,32,32))
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+
+
+    # Goal: Given a Keras Sequential Model - setup the training parameters, train, and return the trained Model
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=Adam(lr=0.0001, decay=1e-6),
+            metrics=['accuracy']
+        )
+
+        datagen = ImageDataGenerator(
+            rotation_range=15,
+            width_shift_range=0.1,
+            height_shift_range=0.1,
+            horizontal_flip=True,
+        )
+        datagen.fit(X_train)
+
+
+        def lr_schedule(epoch):
+            lrate = 0.001
+            if epoch > 20:
+                lrate = 0.0005
+            if epoch > 40:
+                lrate = 0.0003
+            if epoch > 60:
+                lrate = 0.0001
+            if epoch > 80:
+                lrate = 0.00005  
+            return lrate
+
+        model.fit(
+            X_train,
+            y_train,
+            batch_size=128,
+            shuffle=True,
+            epochs=100,
+            validation_data=(X_test, y_test), 
+            callbacks=[LearningRateScheduler(lr_schedule)]
+        )
+
+        return model
+
+
+    
+if __name__ == '__main__':
+
+    # Using GPU ID 0 - Change to use different GPU
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format - HPVM currently supports NCHW - NHWC format is not supported
+    K.set_image_data_format('channels_first')
+
+
+    # *** Below are Parameters specific to each benchmark *****
+    reload_dir = MODEL_PARAMS_DIR + '/alexnet_cifar10/'
+    ## Either the HPVM weights are loaded (above) or the Keras Model from the path below 
+    keras_model_file = MODEL_PARAMS_DIR + '/alexnet_cifar10/weights.h5'
+    data_dir = ''   # if reloading weights, data_dir can be set to empty string (value is ignored)
+ 
+    src_dir = 'data/alexnet_cifar10_src/'  # Directory where HPVM sources are downloaded
+    num_classes = 10  # Specify num out output classes - CIFAR10 has `10` classes
+    batch_size = 500  # Batch Size set to 500 - Adjust this value based on your GPU memory 
+
+    # All Classes inherit from 'Benchmark` class in src/Benchmark.py and have a common Constructor
+    model = AlexNet_CIFAR10('AlexNet_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+
+    # This invokes the common run function in src/Benchmark.py 
+    model.run(sys.argv)
+
+    
diff --git a/hpvm/test/dnn_benchmarks/keras/alexnet2.py b/hpvm/test/dnn_benchmarks/keras/alexnet2.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2c7d566bb2793a848bdb88c19e2905e6030d588
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/alexnet2.py
@@ -0,0 +1,147 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class AlexNet2_CIFAR10(Benchmark):
+
+    def buildModel(self):
+
+        weight_decay = 1e-4  
+        activation_type = 'tanh'
+
+        model = Sequential()
+        model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32)))
+        model.add(Activation(activation_type))
+        model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.2))
+
+        model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.3))
+
+        model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.4))
+
+        model.add(Flatten())
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+
+        return model
+
+    
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
+
+        X_train = X_train / 255.0
+
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet2_cifar10/test_input.bin', dtype=np.float32)
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet2_cifar10/test_labels.bin', dtype=np.uint32)
+
+        X_test = X_test.reshape((-1,3,32,32))
+
+
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet2_cifar10/tune_input.bin', dtype=np.float32)
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet2_cifar10/tune_labels.bin', dtype=np.uint32)
+
+        X_tuner = X_tuner.reshape((-1,3,32,32))
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+
+
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=Adam(lr=0.0001),
+            metrics=['accuracy']
+        )
+
+        datagen = ImageDataGenerator(
+            rotation_range=15,
+            width_shift_range=0.1,
+            height_shift_range=0.1,
+            horizontal_flip=True,
+        )
+        datagen.fit(X_train)
+
+
+        def lr_schedule(epoch):
+            lrate = 0.001
+            if epoch > 20:
+                lrate = 0.0005
+            if epoch > 40:
+                lrate = 0.0003
+            if epoch > 60:
+                lrate = 0.0001
+            return lrate
+
+        model.fit(
+            X_train,
+            y_train,
+            batch_size=128,
+            shuffle=True,
+            epochs=100,
+            validation_data=(X_test, y_test), 
+            callbacks=[LearningRateScheduler(lr_schedule)]
+        )
+
+        return model
+
+
+    
+if __name__ == '__main__':
+
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/alexnet2_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/alexnet2_cifar10/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/alexnet2_cifar10_src/'
+    num_classes = 10
+    batch_size = 500
+
+    model = AlexNet2_CIFAR10('AlexNet2_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
diff --git a/hpvm/test/dnn_benchmarks/keras/alexnet_imagenet.py b/hpvm/test/dnn_benchmarks/keras/alexnet_imagenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cfe7a79c2a1350689d09d07fdc50f3ce998d8af
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/alexnet_imagenet.py
@@ -0,0 +1,107 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class AlexNet(Benchmark):
+
+    def data_preprocess(self):
+        X_train, y_train = None, None
+        
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 3, 224, 224)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_labels.bin', dtype=np.uint32)
+        
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 3, 224, 224)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/tune_labels.bin', dtype=np.uint32)
+ 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
+    
+    def buildModel(self):
+
+        input_layer = Input((3, 224, 224))
+
+        x = ZeroPadding2D((2, 2))(input_layer)
+        x = Conv2D(64, (11, 11), strides=4, padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D(3, 2)(x)
+
+        x = ZeroPadding2D((2, 2))(x)
+        x = Conv2D(192, (5, 5), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D(3, 2)(x)
+
+        x = Conv2D(384, (3, 3), padding='same')(x)
+        x = Activation('relu')(x)
+
+        x = Conv2D(256, (3, 3), padding='same')(x)
+        x = Activation('relu')(x)
+
+        x = Conv2D(256, (3, 3), padding='same')(x)
+        x = Activation('relu')(x)
+
+        x = MaxPooling2D(3, 2)(x)
+
+        x = Flatten()(x)
+        x = Dropout(0.5)(x)
+        x = Dense(4096)(x)
+        x = Activation('relu')(x)
+        x = Dropout(0.5)(x)
+        x = Dense(4096)(x) 
+        x = Activation('relu')(x)
+        x = Dense(self.num_classes)(x)
+        x = Activation('softmax')(x)
+        
+        model = Model(input_layer, x)
+
+        return model
+
+
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        assert False, "ImageNet training not supported - use Pretrained weights"
+
+
+    
+if __name__ == '__main__':
+
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/alexnet_imagenet/'
+    keras_model_file = MODEL_PARAMS_DIR + '/alexnet_imagenet/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/alexnet_imagenet_src/'
+    num_classes = 1000
+    batch_size = 50
+
+    model = AlexNet('AlexNet_Imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
+
+
+    
diff --git a/hpvm/test/dnn_benchmarks/keras/lenet.py b/hpvm/test/dnn_benchmarks/keras/lenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..70dd73a66ad49cee83a0f061d1240522332c469c
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/lenet.py
@@ -0,0 +1,115 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import mnist
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class LeNet_MNIST(Benchmark):
+
+    def buildModel(self):
+
+        # Network Compostion: 2 Conv Layers, 2 Dense Layers
+        model = Sequential()
+
+        # ConvLayer1
+        model.add(Conv2D(32, kernel_size=(5, 5), padding='same', activation='tanh', input_shape=(1, 28, 28)))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        # ConvLayer2
+        model.add(Conv2D(64, (5, 5), activation='tanh', padding='same'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Flatten())
+        
+        # DenseLayer1
+        model.add(Dense(1024, activation='tanh'))
+        # DenseLayer2
+        
+        model.add(Dense(self.num_classes, activation='tanh'))
+        # Softmax Layer
+        model.add(Activation('softmax'))
+
+        return model
+
+
+    def data_preprocess(self):
+        (X_train, y_train), (X_val, y_val) = mnist.load_data()
+        test_labels = y_val
+
+        X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
+        X_train = X_train.astype('float32')
+        X_train /= 255
+
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 1, 28, 28)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/test_labels.bin', dtype=np.uint32)
+        
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 1, 28, 28)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/tune_labels.bin', dtype=np.uint32)
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
+
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+        
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=keras.optimizers.Adadelta(),
+            metrics=['accuracy']
+        )
+
+        model.fit(
+            X_train, 
+            y_train,
+            batch_size=128,
+            epochs=10,
+            verbose=1,
+            validation_data=(X_test, y_test)
+        )
+        
+        return model
+  
+
+    
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/lenet_mnist/'
+    keras_model_file = MODEL_PARAMS_DIR + '/lenet_mnist/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/lenet_mnist_src/'
+    num_classes = 10
+    batch_size = 500
+    
+    print (reload_dir)
+
+    model = LeNet_MNIST('LeNet_MNIST', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
diff --git a/hpvm/test/dnn_benchmarks/keras/mobilenet_cifar10.py b/hpvm/test/dnn_benchmarks/keras/mobilenet_cifar10.py
new file mode 100644
index 0000000000000000000000000000000000000000..34335b0f1a7e3e414f7915a5eb9305086b7344d8
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/mobilenet_cifar10.py
@@ -0,0 +1,194 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class MobileNet_CIFAR10(Benchmark):
+
+    def buildModel(self):
+        alpha=1
+        depth_multiplier=1
+
+        model = Sequential()
+
+        def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):
+            channel_axis = 1
+
+            model.add(Conv2D(filters, kernel,
+                              padding='same',
+                              use_bias=False,
+                              strides=strides, 
+                              input_shape=(3, 32, 32)))
+            model.add(BatchNormalization(axis=channel_axis))
+            model.add(Activation('relu'))
+
+        def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):
+            channel_axis = 1 
+
+            model.add(ZeroPadding2D(padding=((1,1), (1,1))))
+
+            model.add(DepthwiseConv2D((3, 3),
+                                       padding='valid',
+                                       #depth_multiplier=depth_multiplier,
+                                       strides=strides,
+                                       use_bias=False))    
+            model.add(BatchNormalization(axis=channel_axis))
+
+            model.add(Activation('relu'))
+            model.add(Conv2D(pointwise_conv_filters, (1, 1),
+                              padding='same',
+                              use_bias=False,
+                              strides=(1, 1)))
+            model.add(BatchNormalization(axis=channel_axis))
+            model.add(Activation('relu'))
+
+
+        _conv_block(32, alpha, strides=(1, 1))
+
+        _depthwise_conv_block(64, alpha, depth_multiplier)
+
+        _depthwise_conv_block(128, alpha, depth_multiplier,
+                                  strides=(2, 2))
+        _depthwise_conv_block(128, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(256, alpha, depth_multiplier, 
+                          strides=(2, 2))
+        _depthwise_conv_block(256, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(512, alpha, depth_multiplier,
+                          strides=(2, 2))
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(1024, alpha, depth_multiplier,
+                             strides=(2, 2))
+        _depthwise_conv_block(1024, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        model.add(AveragePooling2D(pool_size=2))
+        model.add(Flatten())
+        model.add(Dense(self.num_classes))    
+        model.add(Activation('softmax'))
+
+        return model
+
+    
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
+
+        X_train = X_train / 255.0
+        #X_val = X_val / 255.0
+
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        #X_val = (X_val - mean) / (std + 1e-7)
+
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/mobilenet_cifar10/test_input.bin', dtype=np.float32)
+        y_test= np.fromfile(MODEL_PARAMS_DIR + '/mobilenet_cifar10/test_labels.bin', dtype=np.uint32)
+
+        X_test = X_test.reshape((-1,3,32,32))
+
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/mobilenet_cifar10/tune_input.bin', dtype=np.float32)
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/mobilenet_cifar10/tune_labels.bin', dtype=np.uint32)
+
+        X_tuner = X_tuner.reshape((-1,3,32,32))
+
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+
+
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        # data augmentation, horizontal flips only
+        datagen = ImageDataGenerator(
+                featurewise_center=False,
+                featurewise_std_normalization=False,
+                rotation_range=0.0,
+                width_shift_range=0.0,
+                height_shift_range=0.0,
+                vertical_flip=False,
+                horizontal_flip=True)
+        datagen.fit(X_train)
+
+
+        learning_rates=[]
+        for i in range(50):
+            learning_rates.append(0.01)
+        for i in range(75-50):
+            learning_rates.append(0.001)
+        for i in range(100-75):
+            learning_rates.append(0.0001)
+        for i in range(125-100):
+            learning_rates.append(0.00001)
+            
+        callbacks = [
+            LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
+        ]
+
+        model.compile(optimizer=keras.optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0), 
+                               loss='categorical_crossentropy', 
+                               metrics=['accuracy'])
+
+        model.fit_generator(
+            datagen.flow(X_train, y_train, batch_size=128),
+            steps_per_epoch=int(np.ceil(50000 / 128)),
+            validation_data=(X_test, y_test),
+            epochs=125,
+            callbacks=callbacks
+        )
+
+        return model
+
+  
+    
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/mobilenet_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/mobilenet_cifar10/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/mobilenet_cifar10_src/'
+    num_classes = 10
+    batch_size = 500
+
+    model = MobileNet_CIFAR10('MobileNet_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
+
diff --git a/hpvm/test/dnn_benchmarks/keras/resnet18_cifar10.py b/hpvm/test/dnn_benchmarks/keras/resnet18_cifar10.py
new file mode 100644
index 0000000000000000000000000000000000000000..02753f9eac83a252e5b128f29981b39c14f35d2c
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/resnet18_cifar10.py
@@ -0,0 +1,566 @@
+"""
+#Trains a ResNet on the CIFAR10 dataset.
+
+ResNet v1:
+[Deep Residual Learning for Image Recognition
+](https://arxiv.org/pdf/1512.03385.pdf)
+
+ResNet v2:
+[Identity Mappings in Deep Residual Networks
+](https://arxiv.org/pdf/1603.05027.pdf)
+
+
+Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti
+:------------|--:|-------:|-----------------------:|---:
+ResNet20   v1|  3| 92.16 %|                 91.25 %|35
+ResNet32   v1|  5| 92.46 %|                 92.49 %|50
+ResNet44   v1|  7| 92.50 %|                 92.83 %|70
+ResNet56   v1|  9| 92.71 %|                 93.03 %|90
+ResNet110  v1| 18| 92.65 %|            93.39+-.16 %|165
+ResNet164  v1| 27|     - %|                 94.07 %|  -
+ResNet1001 v1|N/A|     - %|                 92.39 %|  -
+
+&nbsp;
+
+Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti
+:------------|--:|-------:|-----------------------:|---:
+ResNet20   v2|  2|     - %|                     - %|---
+ResNet32   v2|N/A| NA    %|            NA         %| NA
+ResNet44   v2|N/A| NA    %|            NA         %| NA
+ResNet56   v2|  6| 93.01 %|            NA         %|100
+ResNet110  v2| 12| 93.15 %|            93.63      %|180
+ResNet164  v2| 18|     - %|            94.54      %|  -
+ResNet1001 v2|111|     - %|            95.08+-.14 %|  -
+"""
+
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+# Training parameters
+batch_size = 32  # orig paper trained all networks with batch_size=128
+epochs = 200
+
+
+# Model parameter
+# ----------------------------------------------------------------------------
+#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
+# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
+#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
+# ----------------------------------------------------------------------------
+# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
+# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
+# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
+# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
+# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
+# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
+# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
+# ---------------------------------------------------------------------------
+n = 3
+
+# Model version
+# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
+version = 1
+
+# Computed depth from supplied model parameter n
+if version == 1:
+    depth = n * 6 + 2
+elif version == 2:
+    depth = n * 9 + 2
+
+# Model name, depth and version
+model_type = 'ResNet%dv%d' % (depth, version)
+
+
+    
+def resnet_layer(inputs,
+                 num_filters=16,
+                 kernel_size=3,
+                 strides=1,
+                 activation='relu',
+                 batch_normalization=True,
+                 conv_first=True):
+    """2D Convolution-Batch Normalization-Activation stack builder
+
+    # Arguments
+        inputs (tensor): input tensor from input image or previous layer
+        num_filters (int): Conv2D number of filters
+        kernel_size (int): Conv2D square kernel dimensions
+        strides (int): Conv2D square stride dimensions
+        activation (string): activation name
+        batch_normalization (bool): whether to include batch normalization
+        conv_first (bool): conv-bn-activation (True) or
+            bn-activation-conv (False)
+
+    # Returns
+        x (tensor): tensor as input to the next layer
+    """
+    conv = Conv2D(num_filters,
+                  kernel_size=kernel_size,
+                  strides=strides,
+                  padding='valid', # NOTE: using valid convs with explicit pad operation
+                  kernel_initializer='he_normal',
+                  kernel_regularizer=regularizers.l2(1e-4))
+
+    padding_value = int((kernel_size - 1) / 2)
+    zero_padding = ZeroPadding2D(padding = (padding_value, padding_value))
+
+    # FIXME: Temporarily disabled batch normalization
+    batch_normalization = False
+
+    x = inputs
+    x = zero_padding(x)
+    if conv_first:
+        x = conv(x)
+        if batch_normalization:
+            x = BatchNormalization()(x)
+        if activation is not None:
+            x = Activation(activation)(x)
+    else:
+        if batch_normalization:
+            x = BatchNormalization()(x)
+        if activation is not None:
+            x = Activation(activation)(x)
+        x = conv(x)
+    return x
+
+
+class ResNet18_CIFAR10(Benchmark):
+
+    def lr_schedule(self, epoch):
+        """Learning Rate Schedule
+
+        Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
+        Called automatically every epoch as part of callbacks during training.
+
+        # Arguments
+            epoch (int): The number of epochs
+
+        # Returns
+            lr (float32): learning rate
+        """
+        lr = 1e-3
+        if epoch > 180:
+            lr *= 0.5e-3
+        elif epoch > 160:
+            lr *= 1e-3
+        elif epoch > 120:
+            lr *= 1e-2
+        elif epoch > 80:
+            lr *= 1e-1
+            
+        return lr
+    
+
+    def resnet_v0(self, input_shape, depth, num_classes=10):
+        """ResNet Version 1 Model builder [a]
+
+        Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
+        Last ReLU is after the shortcut connection.
+        At the beginning of each stage, the feature map size is halved (downsampled)
+        by a convolutional layer with strides=2, while the number of filters is
+        doubled. Within each stage, the layers have the same number filters and the
+        same number of filters.
+        Features maps sizes:
+        stage 0: 32x32, 16
+        stage 1: 16x16, 32
+        stage 2:  8x8,  64
+        The Number of parameters is approx the same as Table 6 of [a]:
+        ResNet20 0.27M
+        ResNet32 0.46M
+        ResNet44 0.66M
+        ResNet56 0.85M
+        ResNet110 1.7M
+
+        # Arguments
+            input_shape (tensor): shape of input image tensor
+            depth (int): number of core convolutional layers
+            num_classes (int): number of classes (CIFAR10 has 10)
+
+        # Returns
+            model (Model): Keras model instance
+        """
+        if (depth - 2) % 6 != 0:
+            raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
+        # Start model definition.
+        num_filters = 16
+        num_res_blocks = int((depth - 2) / 6)
+
+        inputs = Input(shape=input_shape)
+        x = resnet_layer(inputs=inputs)
+        # Instantiate the stack of residual units
+        for stack in range(3):
+            for res_block in range(num_res_blocks):
+                strides = 1
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    strides = 2  # downsample
+                y = resnet_layer(inputs=x,
+                                 num_filters=num_filters,
+                                 strides=strides)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters,
+                                 activation=None)
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    # linear projection residual shortcut connection to match
+                    # changed dims
+                    x = resnet_layer(inputs=x,
+                                     num_filters=num_filters,
+                                     kernel_size=1,
+                                     strides=strides,
+                                     activation=None,
+                                     batch_normalization=False)
+                x = keras.layers.add([x, y])
+                x = Activation('relu')(x)
+            num_filters *= 1
+
+        # Add classifier on top.
+        # v1 does not use BN after last shortcut connection-ReLU
+        #-- x = AveragePooling2D(pool_size=8)(x)
+        y = Flatten()(x)
+        x = Dense(64)(y)
+        outputs = Dense(num_classes,
+                        activation='softmax',
+                        kernel_initializer='he_normal')(x)
+
+        # Instantiate model.
+        model = Model(inputs=inputs, outputs=outputs)
+        return model
+
+
+    def resnet_v1_1(self, input_shape, depth, num_classes=10):
+        """ResNet Version 1 Model builder [a]
+
+        Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
+        Last ReLU is after the shortcut connection.
+        At the beginning of each stage, the feature map size is halved (downsampled)
+        by a convolutional layer with strides=2, while the number of filters is
+        doubled. Within each stage, the layers have the same number filters and the
+        same number of filters.
+        Features maps sizes:
+        stage 0: 32x32, 16
+        stage 1: 16x16, 32
+        stage 2:  8x8,  64
+        The Number of parameters is approx the same as Table 6 of [a]:
+        ResNet20 0.27M
+        ResNet32 0.46M
+        ResNet44 0.66M
+        ResNet56 0.85M
+        ResNet110 1.7M
+
+        # Arguments
+            input_shape (tensor): shape of input image tensor
+            depth (int): number of core convolutional layers
+            num_classes (int): number of classes (CIFAR10 has 10)
+
+        # Returns
+            model (Model): Keras model instance
+        """
+        if (depth - 2) % 6 != 0:
+            raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
+        # Start model definition.
+        num_filters = 16
+        num_res_blocks = int((depth - 2) / 6)
+
+        inputs = Input(shape=input_shape)
+        x = resnet_layer(inputs=inputs)
+        # Instantiate the stack of residual units
+        for stack in range(3):
+            for res_block in range(num_res_blocks):
+                strides = 1
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    strides = 2  # downsample
+                y = resnet_layer(inputs=x,
+                                 num_filters=num_filters,
+                                 strides=strides)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters,
+                                 activation=None)
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    # linear projection residual shortcut connection to match
+                    # changed dims
+                    x = resnet_layer(inputs=x,
+                                     num_filters=num_filters,
+                                     kernel_size=1,
+                                     strides=strides,
+                                     activation=None,
+                                     batch_normalization=False)
+                x = keras.layers.add([x, y])
+                x = Activation('relu')(x)
+            num_filters *= 2
+
+
+        x = AveragePooling2D(pool_size=8)(x)
+        y = Flatten()(x)
+        outputs = Dense(num_classes,
+                        #activation='softmax',
+                        kernel_initializer='he_normal')(y)
+
+        outputs = Activation('softmax')(outputs)
+
+
+        # Instantiate model.
+        model = Model(inputs=inputs, outputs=outputs)
+        return model
+
+
+
+    def resnet_v2(self, input_shape, depth, num_classes=10):
+        """ResNet Version 2 Model builder [b]
+
+        Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
+        bottleneck layer
+        First shortcut connection per layer is 1 x 1 Conv2D.
+        Second and onwards shortcut connection is identity.
+        At the beginning of each stage, the feature map size is halved (downsampled)
+        by a convolutional layer with strides=2, while the number of filter maps is
+        doubled. Within each stage, the layers have the same number filters and the
+        same filter map sizes.
+        Features maps sizes:
+        conv1  : 32x32,  16
+        stage 0: 32x32,  64
+        stage 1: 16x16, 128
+        stage 2:  8x8,  256
+
+        # Arguments
+            input_shape (tensor): shape of input image tensor
+            depth (int): number of core convolutional layers
+            num_classes (int): number of classes (CIFAR10 has 10)
+
+        # Returns
+            model (Model): Keras model instance
+        """
+        if (depth - 2) % 9 != 0:
+            raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
+        # Start model definition.
+        num_filters_in = 16
+        num_res_blocks = int((depth - 2) / 9)
+
+        inputs = Input(shape=input_shape)
+        # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
+        x = resnet_layer(inputs=inputs,
+                         num_filters=num_filters_in,
+                         conv_first=True)
+
+        # Instantiate the stack of residual units
+        for stage in range(3):
+            for res_block in range(num_res_blocks):
+                activation = 'relu'
+                batch_normalization = True
+                strides = 1
+                if stage == 0:
+                    num_filters_out = num_filters_in * 4
+                    if res_block == 0:  # first layer and first stage
+                        activation = None
+                        batch_normalization = False
+                else:
+                    num_filters_out = num_filters_in * 2
+                    if res_block == 0:  # first layer but not first stage
+                        strides = 2    # downsample
+
+                # bottleneck residual unit
+                y = resnet_layer(inputs=x,
+                                 num_filters=num_filters_in,
+                                 kernel_size=1,
+                                 strides=strides,
+                                 activation=activation,
+                                 batch_normalization=batch_normalization,
+                                 conv_first=False)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters_in,
+                                 conv_first=False)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters_out,
+                                 kernel_size=1,
+                                 conv_first=False)
+                if res_block == 0:
+                    # linear projection residual shortcut connection to match
+                    # changed dims
+                    x = resnet_layer(inputs=x,
+                                     num_filters=num_filters_out,
+                                     kernel_size=1,
+                                     strides=strides,
+                                     activation=None,
+                                     batch_normalization=False)
+                x = keras.layers.add([x, y])
+
+            num_filters_in = num_filters_out
+
+        # Add classifier on top.
+        # v2 has BN-ReLU before Pooling
+        x = BatchNormalization()(x)
+        x = Activation('relu')(x)
+        x = AveragePooling2D(pool_size=8)(x)
+        y = Flatten()(x)
+        outputs = Dense(num_classes,
+                        activation='softmax',
+                        kernel_initializer='he_normal')(y)
+
+        # Instantiate model.
+        model = Model(inputs=inputs, outputs=outputs)
+        return model
+
+    
+    def buildModel(self):
+
+        depth = 20
+        input_shape = (3, 32, 32)
+        
+        if version == 2:
+            model = self.resnet_v2(input_shape=input_shape, depth=depth)
+        else:
+            model = self.resnet_v1_1(input_shape=input_shape, depth=depth)
+
+        return model
+    
+    
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
+
+        X_train = X_train / 255.0
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean)
+
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_input.bin', dtype=np.float32)
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_labels.bin', dtype=np.uint32)
+
+        X_test = X_test.reshape((-1,3,32,32))
+
+
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_input.bin', dtype=np.float32)
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_labels.bin', dtype=np.uint32)
+
+        X_tuner = X_tuner.reshape((-1,3,32,32))
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+
+
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=Adam(lr=self.lr_schedule(0)),
+            metrics=['accuracy']
+        )
+
+        
+        lr_scheduler = LearningRateScheduler(self.lr_schedule)
+
+        lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
+                                       cooldown=0,
+                                       patience=5,
+                                       min_lr=0.5e-6)
+
+        callbacks = [lr_reducer, lr_scheduler]
+
+        # Run training, with or without data augmentation.
+        if not data_augmentation:
+            print('Not using data augmentation.')
+            model.fit(X_train, y_train,
+                      batch_size=batch_size,
+                      epochs=epochs,
+                      validation_data=(X_test, y_test),
+                      shuffle=True,
+                      callbacks=callbacks)
+        else:
+            print('Using real-time data augmentation.')
+            # This will do preprocessing and realtime data augmentation:
+            datagen = ImageDataGenerator(
+                # set input mean to 0 over the dataset
+                featurewise_center=False,
+                # set each sample mean to 0
+                samplewise_center=False,
+                # divide inputs by std of dataset
+                featurewise_std_normalization=False,
+                # divide each input by its std
+                samplewise_std_normalization=False,
+                # apply ZCA whitening
+                zca_whitening=False,
+                # epsilon for ZCA whitening
+                zca_epsilon=1e-06,
+                # randomly rotate images in the range (deg 0 to 180)
+                rotation_range=0,
+                # randomly shift images horizontally
+                width_shift_range=0.1,
+                # randomly shift images vertically
+                height_shift_range=0.1,
+                # set range for random shear
+                shear_range=0.,
+                # set range for random zoom
+                zoom_range=0.,
+                # set range for random channel shifts
+                channel_shift_range=0.,
+                # set mode for filling points outside the input boundaries
+                fill_mode='nearest',
+                # value used for fill_mode = "constant"
+                cval=0.,
+                # randomly flip images
+                horizontal_flip=True,
+                # randomly flip images
+                vertical_flip=False,
+                # set rescaling factor (applied before any other transformation)
+                rescale=None,
+                # set function that will be applied on each input
+                preprocessing_function=None,
+                # image data format, either "channels_first" or "channels_last"
+                data_format="channels_first",
+                # fraction of images reserved for validation (strictly between 0 and 1)
+                validation_split=0.0)
+
+            # Compute quantities required for featurewise normalization
+            # (std, mean, and principal components if ZCA whitening is applied).
+            datagen.fit(X_train)
+
+            # Fit the model on the batches generated by datagen.flow().
+            model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
+                                validation_data=(X_test, y_test),
+                                epochs=epochs, verbose=1, workers=4,
+                                callbacks=callbacks)
+
+        return model
+
+  
+    
+if __name__ == '__main__':
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/resnet18_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/resnet18_cifar10/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/resnet18_cifar10_src/'
+    num_classes = 10
+    batch_size = 500
+
+    model = ResNet18_CIFAR10('ResNet18_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
+    
diff --git a/hpvm/test/dnn_benchmarks/keras/resnet50_imagenet.py b/hpvm/test/dnn_benchmarks/keras/resnet50_imagenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..de42ae48d834b6f55e7827138f60baeefe8fb897
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/resnet50_imagenet.py
@@ -0,0 +1,155 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class ResNet50(Benchmark):
+    
+    def buildModel(self):
+        
+        def identity_block(input_tensor, kernel_size, filters, stage, block):
+            filters1, filters2, filters3 = filters
+            bn_axis = 1
+
+            x = Conv2D(filters1, (1, 1))(input_tensor)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters2, kernel_size,
+                              padding='same')(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters3, (1, 1))(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+
+            x = add([x, input_tensor])
+            x = Activation('relu')(x)
+            return x
+
+        def conv_block(input_tensor,
+                       kernel_size,
+                       filters,
+                       stage,
+                       block,
+                       strides=(2, 2)):
+            filters1, filters2, filters3 = filters
+            bn_axis = 1
+            x = Conv2D(filters1, (1, 1), strides=strides)(input_tensor)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters2, kernel_size, padding='same')(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters3, (1, 1))(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+
+            shortcut = Conv2D(filters3, (1, 1), strides=strides)(input_tensor)
+            shortcut = BatchNormalization(
+                axis=bn_axis)(shortcut)
+
+            x = add([x, shortcut])
+            x = Activation('relu')(x)
+            return x
+
+        img_input = Input(shape=(3, 224, 224))
+        bn_axis = 1
+
+        x = ZeroPadding2D((3, 3))(img_input)
+        x = Conv2D(64, (7, 7), strides=(2, 2))(x)
+    #     x = BatchNormalization(axis=bn_axis)(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((3, 3), strides=(2, 2))(x)
+        x = BatchNormalization(axis=bn_axis)(x)
+
+        x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+        x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+        x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+        x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+
+        x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
+
+        x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+        x = AveragePooling2D((7, 7))(x)
+        x = Flatten()(x)
+        x = Dense(1000)(x)
+        x = Activation('softmax')(x)
+
+        model = Model(img_input, x)
+        
+        return model
+
+    
+    def data_preprocess(self):
+        X_train, y_train = None, None
+        
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 3, 224, 224)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_labels.bin', dtype=np.uint32)
+        
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 3, 224, 224)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/tune_labels.bin', dtype=np.uint32)
+ 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
+
+    def trainModel(self, model):
+
+        assert False, "ImageNet training not supported - use Pretrained weights"
+
+
+    
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/resnet50_imagenet/'
+    keras_model_file = MODEL_PARAMS_DIR + '/resnet50_imagenet/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/resnet50_imagenet_src/'
+    num_classes = 1000
+    batch_size = 50
+
+    model = ResNet50('ResNet50_imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
+
+
+    
diff --git a/hpvm/test/dnn_benchmarks/keras/vgg16_cifar10.py b/hpvm/test/dnn_benchmarks/keras/vgg16_cifar10.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a5071ee94a54e4832eade954f779d64ebd3416e
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/vgg16_cifar10.py
@@ -0,0 +1,197 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class VGG16_CIFAR10(Benchmark):
+        
+    def buildModel(self):
+        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.
+
+        self.weight_decay = 0.0005
+        self.x_shape = [3, 32, 32]
+
+        model = Sequential()
+        weight_decay = self.weight_decay
+
+        model.add(Conv2D(64, (3, 3), padding='same',
+                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
+        
+        model.add(Activation('relu'))
+        model.add(Dropout(0.3))
+
+        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.5))
+
+        model.add(Flatten())
+        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+    
+        model.add(Dropout(0.5))
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+        return model
+
+    
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
+
+        X_train = X_train / 255.0
+        #X_val = X_val / 255.0
+
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        #X_val = (X_val - mean) / (std + 1e-7)
+
+        X_test= np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar10/test_input.bin', dtype=np.float32)
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar10/test_labels.bin', dtype=np.uint32)
+
+        X_test = X_test.reshape((-1,3,32,32))
+
+        X_tuner= np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar10/tune_input.bin', dtype=np.float32)
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar10/tune_labels.bin', dtype=np.uint32)
+
+        X_tuner = X_tuner.reshape((-1,3,32,32))
+
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+
+
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        batch_size = 128
+        learning_rate = 0.01
+        lr_drop = 20
+
+
+        def lr_scheduler(epoch):
+            return learning_rate * (0.5 ** (epoch // lr_drop))
+
+        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)
+
+        #data augmentation
+        datagen = ImageDataGenerator(
+            featurewise_center=False,  # set input mean to 0 over the dataset
+            samplewise_center=False,  # set each sample mean to 0
+            featurewise_std_normalization=False,  # divide inputs by std of the dataset
+            samplewise_std_normalization=False,  # divide each input by its std
+            zca_whitening=False,  # apply ZCA whitening
+            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
+            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
+            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
+            horizontal_flip=True,  # randomly flip images
+            vertical_flip=False)  # randomly flip images
+        # (std, mean, and principal components if ZCA whitening is applied).
+        datagen.fit(X_train)
+
+
+        model.compile(
+            loss='categorical_crossentropy', 
+            optimizer=keras.optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=True),
+            metrics=['accuracy']
+        )
+
+        # training process in a for loop with learning rate drop every 20 epoches.
+        
+        model.fit_generator(
+            datagen.flow(X_train, y_train, batch_size=batch_size),
+            steps_per_epoch=X_train.shape[0] // batch_size,
+            epochs=250,
+            validation_data=(X_test, y_test),
+            callbacks=[reduce_lr]
+        )
+        
+        return model
+
+
+    
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/vgg16_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/vgg16_cifar10/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/vgg16_cifar10_src/'
+    num_classes = 10
+    batch_size = 500
+
+    model = VGG16_CIFAR10('VGG16_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
+
+    
diff --git a/hpvm/test/dnn_benchmarks/keras/vgg16_cifar100.py b/hpvm/test/dnn_benchmarks/keras/vgg16_cifar100.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fd51ebe03c56ecd622cfab970c51f3096a7d2f4
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/vgg16_cifar100.py
@@ -0,0 +1,211 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar100
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class VGG16_CIFAR100(Benchmark):
+
+    def buildModel(self):
+
+        # Build the network of vgg for 100 classes 
+        self.weight_decay = 0.0005
+        self.x_shape = [3, 32, 32]
+
+        model = Sequential()
+        weight_decay = self.weight_decay
+
+        model.add(Conv2D(64, (3, 3), padding='same',
+                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.3))
+
+        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+        model.add(Dropout(0.4))
+
+        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.5))
+
+        model.add(Flatten())
+        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation('relu'))
+        #model.add(BatchNormalization())
+
+        model.add(Dropout(0.5))
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+        return model
+
+
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar100.load_data()
+
+        X_train = X_train / 255.0
+        #X_val = X_val / 255.0
+
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        #X_val = (X_val - mean) / (std + 1e-7)
+
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar100/test_input.bin', dtype=np.float32)
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar100/test_labels.bin', dtype=np.uint32)
+
+        X_test = X_test.reshape((-1,3,32,32))
+
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar100/tune_input.bin', dtype=np.float32)
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_cifar100/tune_labels.bin', dtype=np.uint32)
+
+        X_tuner = X_tuner.reshape((-1,3,32,32))
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+
+
+    def trainModel(self,model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        batch_size = 128
+        learning_rate = 0.1
+        lr_drop = 30
+
+
+        def lr_scheduler(epoch):
+            return learning_rate * (0.5 ** (epoch // lr_drop))
+
+        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)
+
+        #data augmentation
+        datagen = ImageDataGenerator(
+            featurewise_center=False,  # set input mean to 0 over the dataset
+            samplewise_center=False,  # set each sample mean to 0
+            featurewise_std_normalization=False,  # divide inputs by std of the dataset
+            samplewise_std_normalization=False,  # divide each input by its std
+            zca_whitening=False,  # apply ZCA whitening
+            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
+            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
+            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
+            horizontal_flip=True,  # randomly flip images
+            vertical_flip=False)  # randomly flip images
+        # (std, mean, and principal components if ZCA whitening is applied).
+        datagen.fit(X_train)
+
+
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=optimizers.Adam(lr=learning_rate),
+            metrics=['accuracy']
+        )
+        
+        # training process in a for loop with learning rate drop every 25 epoches.
+        
+        model.fit_generator(
+            datagen.flow(X_train, y_train, batch_size=batch_size),
+            steps_per_epoch=X_train.shape[0] // batch_size,
+            epochs=250,
+            validation_data=(X_test, y_test),
+            callbacks=[reduce_lr]
+        )
+
+        return model
+
+
+    
+if __name__ == '__main__':
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/vgg16_cifar100/'
+    keras_model_file = MODEL_PARAMS_DIR + '/vgg16_cifar100/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/vgg16_cifar100_src/'
+    num_classes = 100
+    batch_size = 100
+
+    model = VGG16_CIFAR100('VGG16_CIFAR100', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
+    
diff --git a/hpvm/test/dnn_benchmarks/keras/vgg16_imagenet.py b/hpvm/test/dnn_benchmarks/keras/vgg16_imagenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b9458b5378c421f5ef8f8811e4721056fd19643
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/keras/vgg16_imagenet.py
@@ -0,0 +1,140 @@
+import os
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class VGG16(Benchmark):
+
+    def buildModel(self):
+        img_input = Input(shape=(3, 224, 224))
+
+        # Block 1
+        x = ZeroPadding2D(padding=(1, 1))(img_input)
+        x = Conv2D(64, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(64, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 2
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(128, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(128, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 3
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(256, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(256, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(256, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 4
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 5
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+    #     x = Flatten(data_format='channels_first')(x)
+        x = Flatten()(x)
+
+        x = Dense(4096)(x)
+        x = Activation('relu')(x)
+        x = Dropout(0.5)(x)
+        x = Dense(4096)(x)   
+        x = Activation('relu')(x)
+        x = Dropout(0.5)(x)
+        x = Dense(1000)(x)
+        x = Activation('softmax')(x)
+
+        model = Model(img_input, x)
+            
+        return model
+
+
+    def data_preprocess(self):
+        X_train, y_train = None, None
+        
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 3, 224, 224)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_labels.bin', dtype=np.uint32)
+        
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 3, 224, 224)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/tune_labels.bin', dtype=np.uint32)
+ 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
+    
+    def trainModel(self, model):
+
+        assert False, "ImageNet training not supported - use Pretrained weights"
+
+
+    
+if __name__ == '__main__':
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/vgg16_imagenet/'
+    keras_model_file = MODEL_PARAMS_DIR + '/vgg16_imagenet/weights.h5'
+    data_dir = '' 
+    src_dir = 'data/vgg16_imagenet_src/'
+    num_classes = 1000
+    batch_size = 25
+
+    alexnet = VGG16('VGG16_imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    alexnet.run(sys.argv)
+
+
+