diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp
index 24ba749cb57953cfec2985ef47c37282bf6f0f93..e1634be3a84b27de82eacc437473c9024cf878c7 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp
@@ -1422,13 +1422,14 @@ hpvm_rt_readLabelsBatch_cached(const char *labels_file, int start, int end) {
       ERROR("Data file %s is not found. Aborting...\n", labels_file);
       abort();
     }
+    
     // Get number of labels
     fseek(file, 0, SEEK_END);
     long size = ftell(file);
     fseek(file, 0, SEEK_SET); // return file pointer to beginning
 
     // Allocate memory for labels
-    labels_from_file = (uint32_t *)malloc(size);
+    labels_from_file = (uint32_t *) malloc(size);
     if (labels_from_file == NULL) {
       ERROR("Memory allocation for labels unsucessfull. Aborting...\n");
       abort();
@@ -1506,9 +1507,9 @@ float hpvm_rt_computeAccuracy3(uint32_t *labels, void *result_ptr) {
 }
 
 
-//#define llvm_hpvm_invokeRtControl_BASE llvm_hpvm_invokeRtControl
+#define llvm_hpvm_invokeRtControl_BASE llvm_hpvm_invokeRtControl
 //#define llvm_hpvm_invokeRtControl_ADJUST_PR llvm_hpvm_invokeRtControl
-#define llvm_hpvm_invokeRtControl_ITERATE llvm_hpvm_invokeRtControl
+//#define llvm_hpvm_invokeRtControl_ITERATE llvm_hpvm_invokeRtControl
 
 extern "C" void llvm_hpvm_invokeRtControl_BASE(
     void *result, const char *str, int start, int end) {
diff --git a/hpvm/projects/keras/README.md b/hpvm/projects/keras/README.md
index 1f790fd46da13a489e12974328471e017e24743b..70828896b00ddb2c452b74a2275370b71ec7b5c4 100644
--- a/hpvm/projects/keras/README.md
+++ b/hpvm/projects/keras/README.md
@@ -1,12 +1,101 @@
+# Keras Frontend 
 
-## Importing Conda Environment:
+## Installing Dependencies
 
-conda env create -f keras_environment.yml
+### Updating pip
+The pip version required in this subproject must be >= `19.3`.
 
-## Building and Installing Frontend:
+To upgrade pip:
 
+```
+pip install --upgrade pip
+```
+
+To check installed pip version:
+
+```
+pip -V
+```
+
+### Importing Conda Environment:
+
+```
+conda env create -f keras_environment.yml --name ${KERAS_ENV_NAME}
+```
+Note: pip version MUST be > 19.3
+
+### Activating Conda Environment:
+
+```
+conda activate ${KERAS_ENV_NAME}
+```
+
+### Building and Installing Frontend:
+
+```
 python setup.py build
 
 python setup.py install
+```
+
+## Running Benchmaks
+
+Benchmarks under `./src/` 
+
+List of benchmarks and the expected accuracies:
+
+| Benchmark       | Accuracy    |
+| ----------- | ----------- |
+| AlexNet-CIFAR10      | 79.16       |
+| AlexNet2-CIFAR10   | 85.10        |
+| AlexNet-ImageNet | 56.30 |
+| LeNet-MNIST | 99.11 | todo: fix broken
+| MobileNet-CIFAR10 | 82.40 |
+| ResNet18-CIFAR10 | 89.52 |
+| ResNet50-ImageNet | 75.10 |
+| VGG16-CIFAR10 | 89.42 |
+| VGG16-CIFAR100 | 66.20 |
+| VGG16-ImageNet | 69.46 |
+
+Activate conda environment (above) before running benchmarks 
+
+### Synopsis
+
+```
+python src/${BENCH_NAME}.py  [hpvm_reload|keras_reload]  [frontend|keras_dump] 
+
+```
+
+**Parameters:**
+
+`hpvm_reload` : Reloads HPVM weights (format used in `model_params` found here: [ADD link to Google Drive]) from directory specified in Benchmark constructor. 
+
+`keras_reload`: Reloads weights in Keras `.h5` file format 
+
+`frontend`: Invokes the HPVM frontend and dumps weights in directory specified in constructor
+
+`keras_dump`: Dumps keras .h5 format model weights in directory specified in constructor
+
+
+
+### Building New Benchmarks 
+
+All benchmarks inherit from the commom parent `Benchmark` class. 
+Each benchmark overrides virtual functions for building the model, training, inference, 
+and data preprocessing. 
+        
+    
+`def buildModel(self)`:
+returns a keras model
+
+`def data_preprocess(self)`:
+returns X_train, y_train, X_test, y_test, X_tuner, and y_tuner data â€” in that order; this data will be directly used later for training and inference
+
+`def trainModel(self, model, X_train, y_train, X_test, y_test)`:
+returns a trained keras model
+
+
+
+
 
 
diff --git a/hpvm/projects/keras/cmake_template/CMakeLists.txt b/hpvm/projects/keras/cmake_template/CMakeLists.txt
deleted file mode 100644
index 0cdee697ce2d663775f0283e96f35c45fd467986..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/cmake_template/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-cmake_minimum_required (VERSION 3.17)
-project (hpvm-tensor-rt)
-find_package(CUDA 6.5 REQUIRED)
-set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "")
-set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-
-# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions
-# Edit: using c++14 now
-set(CMAKE_CXX_STANDARD 14)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -I/")
-set(
-  CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};
-  -gencode;arch=compute_60,code=sm_60;
-  -gencode;arch=compute_60,code=compute_60;
-  -std=c++14 --expt-relaxed-constexpr -maxrregcount 32 # These are for image ops
-)
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-  message("Debug mode")
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g;-lineinfo;-Xcompiler;-ggdb)
-else()
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DNDEBUG;-Xcompiler;-DNDEBUG)
-endif()
-
-
-# Default include/link directories
-include_directories(${CUDA_TOOLKIT_ROOT_DIR}   ${CUDA_TOOLKIT_ROOT_DIR}/include)
-include_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include)
-include_directories($ENV{LLVM_SRC_ROOT}/projects/hpvm-tensor-rt/tensor_runtime/include)
-include_directories($ENV{LLVM_SRC_ROOT}/projects/hpvm-tensor-rt/dnn_sources/include)
-include_directories($ENV{LLVM_SRC_ROOT}/projects/gpu_profiler/include)
-include_directories($ENV{LLVM_SRC_ROOT}/projects/soc_simulator/include)
-link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64 $ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64)
-
-
-
-# Default link libraries
-find_library(GPU_PROFILER_LIB
-  NAMES libgpu_profiler.a
-  HINTS $ENV{LLVM_SRC_ROOT}/projects/gpu_profiler/lib
-  )
-
-find_library(SOC_SIMULATOR_LIB
-  NAMES libpromise_profiler.a
-  HINTS $ENV{LLVM_SRC_ROOT}/projects/soc_simulator/lib
-  )
-
-find_library(TENSOR_LIB
-  NAMES libtensor_runtime.a
-  HINTS $ENV{LLVM_SRC_ROOT}/projects/hpvm-tensor-rt/lib
-  )
-
-
-set(LINK_LIBS cudart cudnn cufft cublas stdc++fs curand -pthread)
-
-#### Image Processing Benchmarks
-
-add_executable(DNN_binary  src.cc)
-target_link_libraries(DNN_binary  ${TENSOR_LIB} ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${LINK_LIBS})
-
diff --git a/hpvm/projects/keras/frontend/approxhpvm_translator.py b/hpvm/projects/keras/frontend/approxhpvm_translator.py
index e60d6adb994ee4da56765a1fa365b14a792863d2..59bc0d17c0605672153a542e850d98cf9d868d77 100644
--- a/hpvm/projects/keras/frontend/approxhpvm_translator.py
+++ b/hpvm/projects/keras/frontend/approxhpvm_translator.py
@@ -5,6 +5,7 @@ from frontend.promise_translator import PromiseRtTranslator
 from frontend.hpvm_dfg_translator import HPVMTranslator
 from frontend.weight_utils import dumpLabels, dumpData, dumpConvWeights, dumpFcWeights, dumpFcBias
 from frontend.utils import *
+from frontend.knobs import *
 import keras
 import os
 
@@ -205,7 +206,75 @@ class TensorRtTranslator:
     self.input_str = ""
     self.filter_names = {}
 
+    # Used for Json gen
+    self.json_str = ""
+    self.knobs_str = ""
+    self.cur_height = 32    
+    self.cur_width = 32     
+    self.op_count = 1       
+    
+    
+
+
+  def setInputHeightWidth(self, data):
+
+    self.cur_height = data.shape[2]
+    self.cur_width = data.shape[3]
+    DEBUG ("cur_height = ", self.cur_height, "  cur_width = ", self.cur_width, ", \n")
+
+    
+  def addConvOverheads(self, weights, padding, strides):
+
+    K_d = weights.shape[0] * weights.shape[1] * weights.shape[2] * weights.shape[3]
+
+    H_d = self.cur_height / strides[0]
+    W_d = self.cur_width / strides[1]
+
+    flops = H_d * W_d * K_d
+    DEBUG ("conv_flops =  ", flops)
+
+    self.json_str += "\"convolution_" + str(self.op_count) + "\" : " + str(flops) + ", \n"
+    self.knobs_str += "\"convolution_" + str(self.op_count) + "\" : ["  + conv_knobs + "], \n"
+    self.op_count += 1
+    
+    self.cur_height = self.cur_height / strides[0]
+    self.cur_width = self.cur_width / strides[1]
+
+    DEBUG ("cur_height = ", self.cur_height, "  cur_width = ", self.cur_width, "\n")
+
+    
+  def addDenseOverheads(self, weights):
+
+    flops = weights.shape[0] * weights.shape[1]
+    DEBUG ("dense_flops =  ", flops)
+
+    self.json_str += "\"linear_" + str(self.op_count) + "\" : " + str(flops) + ", \n"
+    self.knobs_str += "\"linear_" + str(self.op_count) + "\" : ["  + baseline_knobs + "], \n"
+    self.op_count += 1
+        
+    self.cur_height = 1
+    self.cur_width = weights.shape[1] 
+    
+    DEBUG ("cur_height = ", self.cur_height, "  cur_width = ", self.cur_width, "\n")
+
+    
+  def adjustPoolDims(self, strides):
+
+    self.cur_height = self.cur_height / strides[0]
+    self.cur_width = self.cur_width / strides[1]
+    
+    DEBUG ("cur_height = ", self.cur_height, "  cur_width = ", self.cur_width, "\n")
+
+
+  def addBaselineKnob(self, op_name):
 
+    self.json_str += "\"" + op_name + "_" + str(self.op_count) + "\" : 0, \n"
+    self.knobs_str += "\"" + op_name + "_" + str(self.op_count) + "\" : ["  + baseline_knobs + "], \n"
+    self.op_count += 1
+
+    
+    
+    
   def getWeightStr(self):
     return self.weight_str
 
@@ -393,7 +462,16 @@ class TensorRtTranslator:
         print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
         sys.exit(0)
 
-      
+      # NOTE: For Json (tuning config) file generation
+      if layer_type == "Conv2D":
+        self.addConvOverheads(weights, padding, strides)
+  
+      elif layer_type == "DepthwiseConv2D":
+        #self.json_str += "depthwise_convolution_" + str(self.op_count) + " : 0, \n"
+        #self.op_count += 1
+        self.addBaselineKnob("depthwise_convolution")
+
+    
 
     if layer_type == "Dense":
       input_var_name = self.getSingleInputName(cur_node)
@@ -406,6 +484,9 @@ class TensorRtTranslator:
 
       self.program_str += inst_str
 
+      # Add Cost for Dense Layer (Json file)
+      self.addDenseOverheads(weights)
+        
       
     if self.hasBiasAdd(cur_node):
       out_var_name2 = self.getVariableName(cur_node)    
@@ -420,13 +501,21 @@ class TensorRtTranslator:
       # NOTE: Changing output variable
       out_var_name1 = out_var_name2
 
+      #self.json_str += "add_" + str(self.op_count) + " : 0, \n"
+      # self.op_count += 1
+      self.addBaselineKnob("add")
+      
 
     if layer_type == "Activation":
       input_var_name = self.getSingleInputName(cur_node)
       
       inst_str = genActivationCallStr(input_var_name, out_var_name1, cur_node.activation_type)
       self.program_str += inst_str
-      
+
+      #self.json_str += cur_node.activation_type + "_" + str(self.op_count) + " : 0, \n"
+      #self.op_count += 1
+      self.addBaselineKnob(cur_node.activation_type)
+
     
     if self.hasActivation(cur_node) and layer_type != "Activation":
       activation_type = cur_node.activation_type
@@ -438,7 +527,11 @@ class TensorRtTranslator:
       if activation_type == "softmax":
         print ("Softmax canNOT be part of Dense/Conv Op. Insert: Activation('softmax');")
         sys.exit(0)
-        
+
+      #self.json_str += activation_type + "_" + str(self.op_count) + " : 0, \n"
+      #self.op_count += 1
+      self.addBaselineKnob(activation_type)
+      
 
     if layer_type == "BatchNormalization":
       input_var_name = self.getSingleInputName(cur_node)
@@ -453,6 +546,11 @@ class TensorRtTranslator:
       inst_str += "); \n"
       
       self.program_str += inst_str
+
+      #self.json_str += "batchnorm_" + str(self.op_count) + " : 0, \n"
+      #self.op_count += 1
+      self.addBaselineKnob("batchnorm")
+
       
       
     if layer_type == "Add":  
@@ -462,6 +560,10 @@ class TensorRtTranslator:
       inst_str += "tensorAdd(" + input_vars[0] + ", " + input_vars[1] + "); \n"
       self.program_str += inst_str
 
+      #self.json_str += "add_" + str(self.op_count) + " : 0, \n"
+      #self.op_count += 1
+      self.addBaselineKnob("add")
+
       
     if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D":  
       input_var_name = self.getSingleInputName(cur_node)
@@ -473,8 +575,16 @@ class TensorRtTranslator:
       pool_type = 0
       if layer_type == "MaxPooling2D":
         pool_type = "0"
+        #self.json_str += "maxpool_" + str(self.op_count) + " : 0, \n"
+        #self.op_count += 1
+        self.addBaselineKnob("maxpool")
+
       if layer_type == "AveragePooling2D":
-        pool_type = "1"     
+        pool_type = "1"
+        #self.json_str += "avgpool_" + str(self.op_count) + " : 0, \n"
+        #self.op_count += 1
+        self.addBaselineKnob("avgpool")
+
       
       # tensorPooling(input, pool_type, pool_h, pool_w, v_pad, h_pad, v_stride, h_stride)
       inst_str = "void* " + out_var_name1 + " = "
@@ -483,6 +593,7 @@ class TensorRtTranslator:
       inst_str += "); \n"
       self.program_str += inst_str
 
+      self.adjustPoolDims(strides)
       
             
           
@@ -518,7 +629,7 @@ class TensorRtTranslator:
 
     
     
-  def dump_weights(self, model, prefix):
+  def dump_weights(self, model, prefix, reload_weights):
 
     layer_count = 0
     for i in range(len(model.layers)):
@@ -539,7 +650,7 @@ class TensorRtTranslator:
         W = weights.shape[0]
 
         unique_file_name = w_name + ".bin"
-        dumpConvWeights(prefix + unique_file_name, weights, N, C, H, W)
+        dumpConvWeights(prefix + unique_file_name, weights, N, C, H, W, reload_weights)
 
         file_path = w_name + "_path" 
         file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
@@ -566,7 +677,7 @@ class TensorRtTranslator:
           DEBUG (bias_weights.shape, b_name)
 
           unique_file_name = b_name + ".bin"
-          dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0])
+          dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0], reload_weights)
 
           file_path = b_name + "_path" 
           file_path_str =  "std::string " + file_path + " = " + " dir_prefix + std::string(\""
@@ -590,7 +701,7 @@ class TensorRtTranslator:
         W = weights.shape[1]
 
         unique_file_name = w_name + ".bin"
-        dumpFcWeights(prefix + unique_file_name, weights, H, W)
+        dumpFcWeights(prefix + unique_file_name, weights, H, W, reload_weights)
 
         file_path = w_name + "_path" 
         file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
@@ -609,7 +720,7 @@ class TensorRtTranslator:
           DEBUG (bias_weights.shape, b_name)
 
           unique_file_name = b_name + ".bin"
-          dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0])
+          dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0], reload_weights)
 
           file_path = b_name + "_path" 
           file_path_str =  "std::string " + file_path + " = " + " dir_prefix + std::string(\"" 
@@ -629,7 +740,7 @@ class TensorRtTranslator:
         gamma_id = layer_name + "_gamma"
         gamma_file_name = gamma_id + ".bin"
         self.filter_names[gamma_id] = 1
-        dumpFcBias(prefix + gamma_file_name, gamma_w, gamma_w.shape[0])
+        dumpFcBias(prefix + gamma_file_name, gamma_w, gamma_w.shape[0], reload_weights)
 
         file_path = gamma_id + "_path" 
         file_path_str =  "std::string " + file_path + " = " + " dir_prefix + std::string(\"" 
@@ -644,7 +755,7 @@ class TensorRtTranslator:
         beta_id = layer_name + "_beta"
         beta_file_name = beta_id + ".bin"
         self.filter_names[beta_id] = 1
-        dumpFcBias(prefix + beta_file_name, beta_w, beta_w.shape[0])
+        dumpFcBias(prefix + beta_file_name, beta_w, beta_w.shape[0], reload_weights)
 
         file_path = beta_id + "_path" 
         file_path_str =  "std::string " + file_path + " = " + " dir_prefix + std::string(\"" 
@@ -658,7 +769,7 @@ class TensorRtTranslator:
         mean_id = layer_name + "_mean"
         mean_file_name = mean_id + ".bin"
         self.filter_names[mean_id] = 1
-        dumpFcBias(prefix + mean_file_name, mean_w, mean_w.shape[0])
+        dumpFcBias(prefix + mean_file_name, mean_w, mean_w.shape[0], reload_weights)
         
         file_path = mean_id + "_path" 
         file_path_str =  "std::string " + file_path + " = " + " dir_prefix + std::string(\"" 
@@ -673,7 +784,7 @@ class TensorRtTranslator:
         variance_id = layer_name + "_variance"
         variance_file_name = variance_id + ".bin"
         self.filter_names[variance_id] = 1
-        dumpFcBias(prefix + variance_file_name, variance_w, variance_w.shape[0])
+        dumpFcBias(prefix + variance_file_name, variance_w, variance_w.shape[0], reload_weights)
 
         file_path = variance_id + "_path" 
         file_path_str =  "std::string " + file_path + " = " + " dir_prefix + std::string(\"" 
@@ -728,39 +839,55 @@ class TensorRtTranslator:
     
 
 
-  def genInputCalls(self, test_data, test_labels, weights_dir):
-
-    dumpData(weights_dir + "input.bin", test_data)
+  def genInputReadCall(self, input_data, input_name):
 
-    N = test_data.shape[0]
-    C = test_data.shape[1]
-    H = test_data.shape[2]
-    W = test_data.shape[3]
-
-    file_path = "input_path" 
+    file_path =  input_name + "_path" 
     file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
-    file_path_str += "input.bin\"); \n"
+    file_path_str += input_name + ".bin\"); \n"
     self.weight_str += file_path_str
-     
-    self.input_str += "void* input = readTrainedWeights("
+    
+    N = input_data.shape[0]
+    C = input_data.shape[1]
+    H = input_data.shape[2]
+    W = input_data.shape[3]
+
+    self.input_str += "void* " + input_name +  " = readTrainedWeights("
     self.input_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + ","
     self.input_str += str(H) + "," + str(W) + "); \n"
 
+
+
+  def genLabelReadCall(self, labels, labels_name):
+
+    file_path = labels_name + "_path" 
+    file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
+    file_path_str +=  labels_name + ".bin\"); \n"
+    self.weight_str += file_path_str
+
+    self.input_str += "uint32_t* " + labels_name + " = readLabels3("
+    self.input_str += file_path + ".c_str()," + str(labels.shape[0]) + "); \n"
+
+
     
+
+  def genInputCalls(self, test_data, test_labels, tuner_data, tuner_labels, weights_dir, reload_weights):
+
+    dumpData(weights_dir + "test_input.bin", test_data, reload_weights)
+    self.genInputReadCall(test_data, "test_input")
     # Adding input to the filter map
     self.filter_names["input"] = 1
-    
-    dumpLabels(weights_dir + "labels.bin", test_labels)
+    dumpLabels(weights_dir + "test_labels.bin", test_labels, reload_weights)
+    self.genLabelReadCall(test_labels, "test_labels")
+
+    dumpData(weights_dir + "tune_input.bin", tuner_data, reload_weights)
+    self.genInputReadCall(test_data, "tune_input")
+ 
+    dumpLabels(weights_dir + "tune_labels.bin", tuner_labels, reload_weights)
+    self.genLabelReadCall(test_labels, "tune_labels")
 
-    file_path = "labels_path" 
-    file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
-    file_path_str += "labels.bin\"); \n"
-    self.weight_str += file_path_str
 
-    #self.input_str += "uint8_t* labels = readLabels("
-    self.input_str += "uint32_t* labels = readLabels2("
-    self.input_str += file_path + ".c_str()," + str(test_labels.shape[0]) + "); \n"
 
+    
 
     
   def genBatchLoop(self, x_test):
@@ -797,12 +924,9 @@ class TensorRtTranslator:
 
     last_node = self.dfg.last_node
     output_var = self.output_map[last_node.layer_name]
-    #accuracy_call = "\nfloat accuracy = computeAccuracy2(labels, batch_size, " + output_var + "); \n"
-    #accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, batch_size, " + output_var + "); \n"
     accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, " + output_var + "); \n"
     end_loop_str += accuracy_call
  
-    #end_loop_str += "float accuracy = computeAccuracy2(labels, batch_size, var_60); "
     end_loop_str += "final_accuracy += accuracy; \n"
     end_loop_str += "freeBatchMemory(); \n "
     end_loop_str += "\n}\n\n"
@@ -821,20 +945,46 @@ class TensorRtTranslator:
     f.write(self.program_str)
     f.close()
 
+
+  def dumpJsonFile(self, dir_prefix):
+
+    f = open(dir_prefix + "/tuner.json", "w+")
+    f.write("{ \n\n")
+    
+    op_cost_str = " \"op_cost\" : { \n"
+    op_cost_str += self.json_str[:-3]
+    #f.write(self.json_str)
+    op_cost_str += "\n }, \n\n"
+    f.write(op_cost_str)
+
+    knobs_speedup_str = "\n \"knob_speedup\" : { \n"
+    for key in knobs_speedups:
+      knobs_speedup_str += "\"" + str(key) + "\" : " + str(knobs_speedups[key]) + ", \n"
+      
+    f.write(knobs_speedup_str[:-3] + "\n}, \n\n")
+    
+    
+    layer_knobs_str = " \"op_knobs\" : { \n"
+    layer_knobs_str += self.knobs_str[:-3]
+    layer_knobs_str += " \n\n } \n\n"
+    f.write(layer_knobs_str)
+
+    f.write("\n\n}")
+    f.close()
+
     
   
-  def translate(self, model, weights_dir, test_data, test_labels):
+  def translate(self, model, weights_dir, src_dir, test_data, test_labels, tuner_data, tuner_labels, weights_reload):
 
     self.add_header()
     
-    #dir_path = "std::string dir_prefix = std::string(\"" + weights_dir + "\"); \n"
-    dir_path = "std::string dir_prefix = std::string(\"../\"); \n"
+    dir_path = "std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + std::string(\"" + weights_dir +  "\"); \n"
     self.weight_str += dir_path
 
     if test_data is not None:
-      self.genInputCalls(test_data, test_labels, weights_dir)
+      self.genInputCalls(test_data, test_labels, tuner_data, tuner_labels, weights_dir, weights_reload)
 
-    self.dump_weights(model, weights_dir)
+    self.dump_weights(model, weights_dir, weights_reload)
     self.program_str += "\n" + self.weight_str + "\n\n"
 
     self.genBatchLoop(test_data)
@@ -845,9 +995,10 @@ class TensorRtTranslator:
 
     self.add_footer(test_data);
 
-    self.generateSourceProgram(weights_dir)
+    self.generateSourceProgram(src_dir)
+    
+    self.dumpJsonFile(src_dir)
     
-  
 
 
 
@@ -921,21 +1072,27 @@ def getUniquePath(weights_dir):
   
   return weights_dir
   
-    
+
+
+
 #***** Top level External Function ******* 
-def translate_to_approxhpvm(model, weights_dir, test_data=None, test_labels=None,
-                            num_classes=10, reload_dir=None):
+def translate_to_approxhpvm(model,
+                            weights_dir, src_dir,
+                            test_data, test_labels,
+                            tuner_data, tuner_labels,
+                            batch_size, num_classes=10,
+                            enable_weights_reload = False):
 
 
-  weights_dir = getUniquePath(weights_dir)
-  os.mkdir(weights_dir)   
+  reload_weights = enable_weights_reload   # If set to True, does not dump any weight/input/label files
 
-  
-  if reload_dir is not None:
-    y_test = keras.utils.to_categorical(test_labels, num_classes)
-    reloadModelParams(model, reload_dir, test_data, y_test)
+  if not reload_weights:
+    weights_dir = getUniquePath(weights_dir)
+    os.mkdir(weights_dir)   
 
-  
+  src_dir = getUniquePath(src_dir)
+  os.mkdir(src_dir)   
+    
   dfg = DFG()    
   for i in range(len(model.layers)):
     layer = model.layers[i]
@@ -949,22 +1106,25 @@ def translate_to_approxhpvm(model, weights_dir, test_data=None, test_labels=None
   DEBUG ("test_data.shape = ", test_data.shape, "\n")
   DEBUG ("test_labels.shape = ", test_labels.shape, "\n")
 
-  tensorRtTranslator = TensorRtTranslator(dfg)    
-  tensorRtTranslator.translate(model, weights_dir, test_data, test_labels)
+  tensorRtTranslator = TensorRtTranslator(dfg)
+  tensorRtTranslator.setInputHeightWidth(test_data)
+  tensorRtTranslator.translate(model, weights_dir, src_dir, test_data, test_labels, tuner_data, tuner_labels, reload_weights)
   weight_str = tensorRtTranslator.getWeightStr()
   input_str = tensorRtTranslator.getInputStr()
 
 
-  #promiseRtTranslator = PromiseRtTranslator(dfg, weight_str)    
-  #promiseRtTranslator.translate(model, weights_dir, test_data)
-
   filter_names = tensorRtTranslator.getFilterNames()
   hpvmTranslator = HPVMTranslator(dfg, weight_str, input_str, filter_names)    
-  hpvmTranslator.translate(model, weights_dir, test_data)
+  hpvmTranslator.translate(model, src_dir, test_data, tuner_data, batch_size)
 
+  if reload_weights:
+    print ("NOTE: Using existing pretrained weights \n")
+  else:
+    print ("NOTE: dumping new set of weights \n")
+    
   print ("-- Weight Files Under : ", weights_dir)
-  print ("-- TensorRT src : ", weights_dir + "/src.cc")
-  print ("-- ApproxHPVM src  : ", weights_dir + "approxhpvm_src.cc")
+  print ("-- TensorRT src : ", src_dir + "/src.cc")
+  print ("-- ApproxHPVM src  : ", src_dir + "approxhpvm_src.cc")
 
   
   return weights_dir
diff --git a/hpvm/projects/keras/frontend/config.py b/hpvm/projects/keras/frontend/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..72f18e342be19c9e56ecdc7373e8c037d0131f87
--- /dev/null
+++ b/hpvm/projects/keras/frontend/config.py
@@ -0,0 +1,3 @@
+
+# Path Relative to Model Params Directoryx
+MODEL_PARAMS_DIR = "../hpvm-tensor-rt/model_params/"
diff --git a/hpvm/projects/keras/frontend/hpvm_dfg_translator.py b/hpvm/projects/keras/frontend/hpvm_dfg_translator.py
index 65574a98881f010bf7cd67df344517803de8c67c..2c229a0dd2b100cf83882f5640c1d2707c78398d 100644
--- a/hpvm/projects/keras/frontend/hpvm_dfg_translator.py
+++ b/hpvm/projects/keras/frontend/hpvm_dfg_translator.py
@@ -1,7 +1,8 @@
 
 import sys
 from frontend.utils import *
-        
+from frontend.hpvm_intrinsics import *
+
 
 class HPVMTranslator:
 
@@ -19,7 +20,6 @@ class HPVMTranslator:
     self.file_header_str = ""
     self.hpvm_node_names = {}
     
-    
 
    
     
@@ -140,15 +140,15 @@ class HPVMTranslator:
     header_str = self.genNodeHeader(output_var, 1)
     inst_str = header_str 
 
-    func_name = "__visc__tensor_"
+    func_name = ""
     if activation_type == "tanh":
-      func_name += "tanh"
+      func_name += HPVM_tensor_tanh
 
     if activation_type == "relu":
-      func_name += "relu"
+      func_name += HPVM_tensor_relu
 
     if activation_type == "softmax":
-      func_name += "softmax"
+      func_name += HPVM_tensor_softmax
 
     inst_str += "  void* r = " + func_name + "(t1); \n"
     footer_str = self.genNodeFooter(2)
@@ -166,20 +166,26 @@ class HPVMTranslator:
         node_header_str += ", "
         
     node_header_str += ") { \n" 
-    node_header_str += "  __visc__hint(visc::PROMISE_TARGET); \n"
-    node_header_str += "  __visc__attributes(" + str(num_params) + ", "
 
+    node_header_str += " " + HPVM_hint + "(" + HPVM_layer_hint  + "); \n"
+    node_header_str += " " + HPVM_attributes + "(" + str(num_params) + ", "
+    
     for i in range(num_params):
       node_header_str += "t" + str(i + 1) 
       if i < num_params - 1:
         node_header_str += ", "
           
-    node_header_str += ", 0); \n\n" 
+    node_header_str += ", 0); \n"
+
+    # Adding node.id calls to assign IDs that are used with the runtime (for correct config ordering)
+    node_header_str += " " + HPVM_node_id + "(" + str(self.counter) + "); \n\n"
+    
     return node_header_str
 
     
   def genNodeFooter(self, num_params):
-    node_footer_str = "  __visc__return("
+
+    node_footer_str = " " + HPVM_return + "("
     node_footer_str += str(num_params) + ", "
     node_footer_str += "r, "
     node_footer_str += "(size_t) 0); \n"
@@ -188,61 +194,60 @@ class HPVMTranslator:
     return node_footer_str
 
 
-  # NOTE: genHpvmNodeEdges is replaced by genHpvmEdges 
   def genHpvmNodeEdges2(self, hpvm_node_id, input_vars):
 
     hpvm_edge_str = "\n  void* " + hpvm_node_id + " = "
-    hpvm_edge_str += "__visc__createNodeND(0, " + hpvm_node_id + "_node); \n\n"
-
+    hpvm_edge_str += HPVM_createNodeND + "(0, " + hpvm_node_id + "_node); \n\n"
+    
     it = 0
     for input_var_name in input_vars:
       if input_var_name in self.filter_names:
         input_index = self.filter_names[input_var_name]
         index1 = input_index * 2
         index2 = index1 + 1      
-        hpvm_edge_str += "  __visc__bindIn(" + hpvm_node_id + ", " + str(index1) + ", " + str(it*2) + ", 0); \n"
-        hpvm_edge_str += "  __visc__bindIn(" + hpvm_node_id + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n"
+        hpvm_edge_str += " " + HPVM_bindIn + "(" + hpvm_node_id + ", " + str(index1) + ", " + str(it*2) + ", 0); \n"
+        hpvm_edge_str += " " + HPVM_bindIn + "(" + hpvm_node_id + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n"
 
       elif input_var_name in self.hpvm_node_names:
-        hpvm_edge_str += "  __visc__edge(" + input_var_name + ", " + hpvm_node_id + ", 1, 0, " + str(it*2) + ", 0); \n"
-        hpvm_edge_str += "  __visc__edge(" + input_var_name + ", " + hpvm_node_id + ", 1, 1, " + str(it*2+1) + ", 0); \n"
-
+        hpvm_edge_str += "  " + HPVM_edge + "(" + input_var_name + ", " + hpvm_node_id + ", 1, 0, " + str(it*2) + ", 0); \n"
+        hpvm_edge_str += "  " + HPVM_edge + "(" + input_var_name + ", " + hpvm_node_id + ", 1, 1, " + str(it*2+1) + ", 0); \n"        
+        
       it += 1
       
     return hpvm_edge_str
 
+
   
-  # Fix: replace deprecated  genHpvmNodeEdges with  genHpvmEdges
   def genHpvmNodeEdges(self, out_var_name, input_var_name, input_var_name2):
 
     DEBUG ("input_var_name2 = ", input_var_name2)
     DEBUG ("input_var_name = ", input_var_name)
     
     hpvm_edge_str = "\n  void* " + out_var_name + " = "
-    hpvm_edge_str += "__visc__createNodeND(0, " + out_var_name + "_node); \n\n"
+    hpvm_edge_str += HPVM_createNodeND + "(0, " + out_var_name + "_node); \n\n"
 
     if input_var_name in self.filter_names:
       input_index = self.filter_names[input_var_name]
       index1 = input_index * 2
       index2 = index1 + 1      
-      hpvm_edge_str += "  __visc__bindIn(" + out_var_name + ", " + str(index1) + ", 0, 0); \n"
-      hpvm_edge_str += "  __visc__bindIn(" + out_var_name + ", " + str(index2) + ", 1, 0); \n"
+      hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index1) + ", 0, 0); \n"
+      hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index2) + ", 1, 0); \n"
 
     elif input_var_name in self.hpvm_node_names:
-      hpvm_edge_str += "  __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 0, 0, 0); \n"
-      hpvm_edge_str += "  __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 1, 1, 0); \n"
+      hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name + ", " + out_var_name + ", 1, 0, 0, 0); \n"
+      hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name + ", " + out_var_name + ", 1, 1, 1, 0); \n"
 
 
     if input_var_name2 in self.filter_names:
       input_index = self.filter_names[input_var_name2]
       index1 = input_index * 2
       index2 = index1 + 1
-      hpvm_edge_str += "  __visc__bindIn(" + out_var_name + ", " + str(index1) + ", 2, 0); \n"
-      hpvm_edge_str += "  __visc__bindIn(" + out_var_name + ", " + str(index2) + ", 3, 0); \n"
+      hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index1) + ", 2, 0); \n"
+      hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index2) + ", 3, 0); \n"
 
     elif input_var_name2 in self.hpvm_node_names:
-      hpvm_edge_str += "  __visc__edge(" + input_var_name2 + ", " + out_var_name + ", 1, 0, 2, 0); \n"
-      hpvm_edge_str += "  __visc__edge(" + input_var_name2 + ", " + out_var_name + ", 1, 1, 3, 0); \n"
+      hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name2 + ", " + out_var_name + ", 1, 0, 2, 0); \n"
+      hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name2 + ", " + out_var_name + ", 1, 1, 3, 0); \n"
 
       
     return hpvm_edge_str
@@ -254,10 +259,9 @@ class HPVMTranslator:
 
     header_str = self.genNodeHeader(out_var_name, 2)
     inst_str = header_str 
-    inst_str += "  void *r = __visc__tensor_mul(t1, t2); \n"
+    inst_str += "  void *r = " + HPVM_tensor_mul + "(t1, t2); \n"
     footer_str = self.genNodeFooter(2)
     inst_str += footer_str
-
     
     input_var_name = self.getSingleInputName(cur_node)
     weight_name = cur_node.layer_name + "_w"
@@ -270,7 +274,7 @@ class HPVMTranslator:
   
 
   def genConvNode(self, cur_node):
-    #input_var_name = self.getSingleInputName(cur_node)
+
     out_var_name = self.getVariableName(cur_node)
     
     header_str = self.genNodeHeader(out_var_name, 2)
@@ -291,7 +295,7 @@ class HPVMTranslator:
       # FIXME: currently only supporting symmetric padding
       padding = prev_padding[0][0]        
       
-    inst_str += "  void *r = __visc__tensor_convolution(t1, t2, "
+    inst_str += "  void *r = " + HPVM_tensor_convolution + "(t1, t2, "
     inst_str += str(padding) + ", "
     inst_str += str(padding) + ", "
     inst_str += str(strides[0]) + ", "
@@ -310,7 +314,7 @@ class HPVMTranslator:
 
 
   def genDepthwiseConvNode(self, cur_node):
-    #input_var_name = self.getSingleInputName(cur_node)
+
     out_var_name = self.getVariableName(cur_node)
     
     header_str = self.genNodeHeader(out_var_name, 2)
@@ -331,7 +335,7 @@ class HPVMTranslator:
       # FIXME: currently only supporting symmetric padding
       padding = prev_padding[0][0]        
       
-    inst_str += "  void *r = __visc__tensor_group_convolution(t1, t2, "
+    inst_str += "  void *r = " + HPVM_tensor_group_convolution + "(t1, t2, "
     inst_str += str(padding) + ", "
     inst_str += str(padding) + ", "
     inst_str += str(strides[0]) + ", "
@@ -355,13 +359,13 @@ class HPVMTranslator:
 
 
   def genBatchNormNode(self, cur_node):
-    #input_var_name = self.getSingleInputName(cur_node)
+
     out_var_name = self.getVariableName(cur_node)
     
     header_str = self.genNodeHeader(out_var_name, 5)
     inst_str = header_str 
 
-    inst_str += "  void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, "
+    inst_str += "  void *r = " + HPVM_tensor_batchnorm + "(t1, t2, t3, t4, t5, "
     inst_str += str(cur_node.epsilon) + "); \n"
 
     footer_str = self.genNodeFooter(2)
@@ -394,7 +398,7 @@ class HPVMTranslator:
 
     header_str = self.genNodeHeader(out_var_name, 2)
     inst_str = header_str 
-    inst_str += "  void *r = __visc__tensor_add(t1, t2); \n"
+    inst_str += "  void *r = " + HPVM_tensor_add + "(t1, t2); \n"
     footer_str = self.genNodeFooter(2)
     inst_str += footer_str
         
@@ -434,7 +438,7 @@ class HPVMTranslator:
       
     header_str = self.genNodeHeader(out_var_name, 2)
     inst_str = header_str 
-    inst_str += "  void *r = __visc__tensor_add(t1, t2); \n"
+    inst_str += "  void *r = " + HPVM_tensor_add + "(t1, t2); \n"
     footer_str = self.genNodeFooter(2)
     inst_str += footer_str
 
@@ -460,9 +464,9 @@ class HPVMTranslator:
 
     layer_type = cur_node.layer_type
     if layer_type == "MaxPooling2D":
-      func_name = "__visc__tensor_pool_max"     
+      func_name = HPVM_tensor_pool_max
     if layer_type == "AveragePooling2D":
-      func_name = "__visc__tensor_pool_mean"
+      func_name = HPVM_tensor_pool_mean
       
     inst_str += "  void* r = " + func_name + "(t1, "
     inst_str += str(pool_size[0]) + ", " + str(pool_size[1]) + ", "
@@ -549,7 +553,10 @@ class HPVMTranslator:
     headers += "#include <sys/stat.h> \n"
     headers += "#include <cstring> \n"
     
-    headers += "#include <visc.h> \n"
+    headers += "#include <" + HPVM_header +  "> \n"
+    if LLVM_9_BRANCH:
+       headers += "#include \"config.h\" \n"
+    
     headers += "#include <tensorTypes.h> \n"
     headers += "#include <tensorUtils.h> \n\n"
 
@@ -571,9 +578,10 @@ class HPVMTranslator:
       index += 1
 
     root_signature += "){ \n\n"
-    root_signature += "\n  __visc__hint(visc::CPU_TARGET); \n"
-    root_signature += "  __visc__attributes(" + str(len(self.filter_names)) + ", "
 
+    root_signature += "\n  " + HPVM_hint +  "(" + HPVM_cpu_hint + "); \n"
+    root_signature += " " + HPVM_attributes + "(" + str(len(self.filter_names)) + ", "
+    
     index = 0
     for f_name in self.filter_names:
       root_signature += f_name 
@@ -591,8 +599,8 @@ class HPVMTranslator:
     output_var = self.output_map[last_node.layer_name]
 
     # Binding output of last DFG node to the Root Node output
-    root_footer_str = "\n  __visc__bindOut(" + output_var + ", 0, 0, 0); \n"
-    root_footer_str += "  __visc__bindOut(" + output_var + ", 1, 1, 0); \n"
+    root_footer_str = "\n  " + HPVM_bindOut + "(" + output_var + ", 0, 0, 0); \n"
+    root_footer_str += "  " + HPVM_bindOut + "(" + output_var + ", 1, 1, 0); \n"
     root_footer_str += "\n}\n\n"
     
     self.root_str += root_footer_str
@@ -618,29 +626,98 @@ class HPVMTranslator:
 
 
 
-  def genMainFunction(self, test_data):
-    main_func_str = "int main(){ \n\n"
-    main_func_str += self.weight_str
-    main_func_str += self.input_str
-    main_func_str += "\n__visc__init(); \n"
-    main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n"
+  def genBatchLoop(self, test_data, batch_size):
 
-    for f_name in self.filter_names:    
-      main_func_str += "args->" + f_name + " = " + f_name + "; \n"
-      main_func_str += "args->" + f_name + "_bytes = 0; \n"
+    chans = test_data.shape[1]
+    width = test_data.shape[2]
+    height = test_data.shape[3]    
+    test_input_size = test_data.shape[0]
 
-    main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n"
-    main_func_str += "__visc__wait(dfg); \n\n"
-    
-    main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n"
-    main_func_str += "hpvm_request_tensor(result, 0); \n\n"
-    main_func_str += "__visc__cleanup(); \n "
+    func_str = "unsigned int batch_size = " + str(batch_size) + "; \n"
+    func_str += "unsigned int test_input_size = " +  str(test_input_size) +  "; \n"
+    func_str += "unsigned int batch_count = test_input_size / batch_size; \n\n"
+
+    func_str += "startMemTracking(); \n"
+    func_str += "startProfiling(); \n\n"
+   
+    func_str += "for(unsigned int j = 0; j < 1; j++){ \n"
+    func_str += "for(unsigned int i = 0; i < batch_count; i++){  \n\n"
+
+    func_str += "unsigned int start = i * batch_size; \n"
+    func_str += "unsigned int end = (i + 1) * batch_size;  \n"
+   
+    func_str += "void* input = readInputBatch(input_path.c_str(), 0, start, end," + str(chans) + "," + str(width) + "," + str(height) +  ");  \n\n"
    
-    main_func_str += "computeAccuracy3(labels, result); \n"    
-    main_func_str += "return 0; \n\n"
-    main_func_str += "} \n"    
+    func_str += "args->input = input;  \n"
+    func_str += "args->input_bytes = 0; \n\n"
+
+    return func_str
+ 
+
+   
+  def endBatchLoop(self):
+
+    func_str = "freeBatchMemory(); \n"
+    func_str += "} \n"
+    func_str += "} \n\n"
+    func_str += "stopProfiling();  \n"
+
+    return func_str
+
+  # FIXIT
+  def handleTuneTestData(self):
+
+    input_str = "void* input = test_input; \n"
+    input_str += "std::string input_path = test_input_path; \n"
+    input_str += "std::string labels_path = test_labels_path; \n\n"
+
+    input_str += "if (argc >= 2 &&  std::string(argv[1]) ==  \"tune\"){ \n"
+    input_str += "  input = tune_input; \n"
+    input_str += "  input_path = tune_input_path; \n"
+    input_str += "  labels_path = tune_labels_path; \n\n"
+    input_str += "} \n\n" 
+    
+    return input_str
+
+  
     
-    self.main_func_str += main_func_str
+  def genMainFunction(self, test_data, batch_size):
+
+     main_func_str = "int main(int argc, char* argv[]){ \n\n"
+     main_func_str += self.weight_str
+     main_func_str += self.input_str
+     main_func_str += "\n" + HPVM_init + "(); \n"
+     main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n"
+
+     main_func_str += self.handleTuneTestData()  
+ 
+     for f_name in self.filter_names:    
+       main_func_str += "args->" + f_name + " = " + f_name + "; \n"
+       main_func_str += "args->" + f_name + "_bytes = 0; \n"       
+    
+     main_func_str += self.genBatchLoop(test_data, batch_size)
+    
+     main_func_str += "void* dfg = " + HPVM_launch + "(0, root, (void*) args); \n\n"
+     main_func_str += HPVM_wait + "(dfg); \n\n"
+
+     if LLVM_4_BRANCH:
+       main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n"
+     elif LLVM_9_BRANCH:
+       main_func_str += "void *result = static_cast<RootIn *>(args)->r.tensor; \n"
+    
+     main_func_str += "hpvm_request_tensor(result, 0); \n\n"
+     main_func_str += "llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); \n"
+  
+     main_func_str += self.endBatchLoop()
+
+     main_func_str += HPVM_cleanup + "(); \n "
+  
+     ####main_func_str += "computeAccuracy3(labels, result); \n"    
+     main_func_str += "return 0; \n\n"
+     main_func_str += "} \n"    
+    
+     self.main_func_str += main_func_str
+
 
   
     
@@ -658,17 +735,17 @@ class HPVMTranslator:
 
     
   
-  def translate(self, model, weights_dir, test_data):
+  def translate(self, model, src_dir, test_data, tuner_data, batch_size):
 
     self.genFileHeader()
     self.genRootNodeHeader()
     self.genRootStructure()
     self.codegen(self.dfg)
     self.genRootNodeFooter()
-    self.genMainFunction(test_data)
+    self.genMainFunction(test_data, batch_size)
 
     # dump generated program string to source file
-    self.generateSourceProgram(weights_dir)
+    self.generateSourceProgram(src_dir)
     
   
 
diff --git a/hpvm/projects/keras/frontend/hpvm_intrinsics.py b/hpvm/projects/keras/frontend/hpvm_intrinsics.py
new file mode 100644
index 0000000000000000000000000000000000000000..1060d965234bab61e9a97559a367d7902baa02d5
--- /dev/null
+++ b/hpvm/projects/keras/frontend/hpvm_intrinsics.py
@@ -0,0 +1,70 @@
+
+LLVM_4_BRANCH = False
+LLVM_9_BRANCH = not LLVM_4_BRANCH
+
+if LLVM_4_BRANCH:
+
+  HPVM_header = "visc.h"
+  
+  HPVM_hint = "__visc__hint"
+  HPVM_attributes = "__visc__attributes"
+  HPVM_node_id = "__visc__node_id"
+  HPVM_layer_hint = "visc::PROMISE_TARGET"
+  HPVM_cpu_hint = "visc::CPU_TARGET"
+
+  HPVM_init = "__visc__init"
+  HPVM_cleanup = "__visc__cleanup"
+  HPVM_launch = "__visc__launch"
+  HPVM_wait = "__visc__wait"
+
+  HPVM_tensor_convolution = "__visc__tensor_convolution"
+  HPVM_tensor_group_convolution = "__visc__tensor_group_convolution"
+  HPVM_tensor_add = "__visc__tensor_add"
+  HPVM_tensor_mul = "__visc__tensor_mul"
+  HPVM_tensor_batchnorm = "__visc__tensor_batchnorm"
+  HPVM_tensor_pool_max = "__visc__tensor_pool_max"
+  HPVM_tensor_pool_mean = "__visc__tensor_pool_mean"
+  HPVM_tensor_tanh = "__visc__tensor_tanh"
+  HPVM_tensor_relu = "__visc__tensor_relu"
+  HPVM_tensor_softmax = "__visc__tensor_softmax"
+  
+  HPVM_createNodeND = "__visc__createNodeND"
+  HPVM_bindIn = "__visc__bindIn"
+  HPVM_bindOut = "__visc__bindOut"
+  HPVM_edge = "__visc__edge"
+  HPVM_return = "__visc__return"
+
+
+elif LLVM_9_BRANCH:
+
+  HPVM_header = "hpvm.h"
+  
+  HPVM_hint = "__hpvm__hint"
+  HPVM_attributes = "__hpvm__attributes"
+  HPVM_node_id = "__hpvm__node_id"
+  HPVM_layer_hint = "hpvm::TENSOR_TARGET"
+  HPVM_cpu_hint = "hpvm::CPU_TARGET"
+
+  HPVM_init = "__hpvm__init"
+  HPVM_cleanup = "__hpvm__cleanup"
+  HPVM_launch = "__hpvm__launch"
+  HPVM_wait = "__hpvm__wait"
+
+  HPVM_tensor_convolution = "__hpvm__tensor_convolution"
+  HPVM_tensor_group_convolution = "__hpvm__tensor_group_convolution"
+  HPVM_tensor_add = "__hpvm__tensor_add"
+  HPVM_tensor_mul = "__hpvm__tensor_mul"
+  HPVM_tensor_batchnorm = "__hpvm__tensor_batchnorm"
+  HPVM_tensor_pool_max = "__hpvm__tensor_pool_max"
+  HPVM_tensor_pool_mean = "__hpvm__tensor_pool_mean"
+  HPVM_tensor_tanh = "__hpvm__tensor_tanh"
+  HPVM_tensor_relu = "__hpvm__tensor_relu"
+  HPVM_tensor_softmax = "__hpvm__tensor_softmax"
+  
+  HPVM_createNodeND = "__hpvm__createNodeND"
+  HPVM_bindIn = "__hpvm__bindIn"
+  HPVM_bindOut = "__hpvm__bindOut"
+  HPVM_edge = "__hpvm__edge"
+  HPVM_return = "__hpvm__return"
+
+
diff --git a/hpvm/projects/keras/frontend/knobs.py b/hpvm/projects/keras/frontend/knobs.py
new file mode 100644
index 0000000000000000000000000000000000000000..291221acb544dbcdf88c810b9401356d2da91be7
--- /dev/null
+++ b/hpvm/projects/keras/frontend/knobs.py
@@ -0,0 +1,38 @@
+
+knobs_speedups = {}
+knobs_speedups[11] = 1
+knobs_speedups[12] = 1.5
+knobs_speedups[151] = 3
+knobs_speedups[152] = 3
+knobs_speedups[153] = 3
+knobs_speedups[154] = 3
+knobs_speedups[155] = 2.25
+knobs_speedups[156] = 2.25
+knobs_speedups[157] = 2.25
+knobs_speedups[158] = 2.25
+knobs_speedups[159] = 2.25
+knobs_speedups[160] = 2.25
+knobs_speedups[161] = 2
+knobs_speedups[162] = 2
+knobs_speedups[163] = 2
+knobs_speedups[164] = 2
+knobs_speedups[165] = 2
+knobs_speedups[166] = 2
+knobs_speedups[167] = 2
+knobs_speedups[168] = 2
+knobs_speedups[261] = 3
+knobs_speedups[262] = 3
+knobs_speedups[263] = 2.25
+knobs_speedups[264] = 2.25
+knobs_speedups[265] = 2.25
+knobs_speedups[266] = 2
+knobs_speedups[267] = 2
+knobs_speedups[268] = 2
+knobs_speedups[269] = 2
+
+
+conv_knobs = "12, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 261, 262, 263, 264, 265, 266, 267, 268, 269"
+
+baseline_knobs = "12"
+
+
diff --git a/hpvm/projects/keras/frontend/weight_utils.py b/hpvm/projects/keras/frontend/weight_utils.py
index dd22765386e2172572ad0feec201c7dec407a909..83a13c81351bf621a5f7be41ebe8f67bfdf6c3d4 100644
--- a/hpvm/projects/keras/frontend/weight_utils.py
+++ b/hpvm/projects/keras/frontend/weight_utils.py
@@ -1,12 +1,17 @@
 
+import sys
 import numpy as np
 import struct
 import random
 from keras.optimizers import Adam
 
 
-def dumpLabels(file_name, Y_test):
 
+def dumpLabels(file_name, Y_test, reload_weights = False):
+    
+    if reload_weights:
+        return
+    
     print ("Dumping Labels File = ", file_name)
     
     f = open(file_name, "wb")    
@@ -27,38 +32,16 @@ def dumpLabels(file_name, Y_test):
     f.close()
     
 
-"""    
-def dumpData(file_name, X_test):
-
-    N = X_test.shape[0]
-    C = X_test.shape[1]
-    H = X_test.shape[2]
-    W = X_test.shape[3]
- 
-    print ("*DumpData")
-    #print("-min_val = ", np.amin(X_test))
-    #print("-max_val = ", np.amax(X_test))
-    
-    f = open(file_name, "wb")
-    for i in range(N):
-        for j in range(C):
-            for k in range(H):
-                for l in range(W):
-                    val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l]))
-                    f.write(np.float32(val[0]))
-
-    f.close()
-
-"""
 
 
 
-def dumpData(file_name, X_test):
+def dumpData(file_name, X_test, reload_weights = False):
 
+    if reload_weights:
+        return
+   
     print ("*Dumping Input File = ", file_name)
-    #print("-min_val = ", np.amin(X_test))
-    #print("-max_val = ", np.amax(X_test))
-
+   
     f = open(file_name, "wb")
 
     X_test = X_test.flatten()
@@ -69,34 +52,14 @@ def dumpData(file_name, X_test):
 
 
   
-"""    
-def dumpConvWeights(file_name, weights, N, C, H, W):
-
-    print (weights.shape)
-    print ("*DumpConvWeights")
-
-    #print("-min_val = ", np.amin(weights))
-    #print("-max_val = ", np.amax(weights))
-
-    
-    f = open(file_name, "wb")
-    for i in range(N):
-        for j in range(C):
-            for k in range(H):
-                for l in range(W):
-                    f.write(weights[k][l][j][i])
 
-    f.close()
-
-"""
-
-
-def dumpConvWeights(file_name, X_test, N, C, H, W):
+def dumpConvWeights(file_name, X_test, N, C, H, W, reload_weights = False):
 
+    if reload_weights:
+        return
+   
     print ("*Dumping Conv Weights to file = ", file_name)
-    #print("-min_val = ", np.amin(X_test))
-    #print("-max_val = ", np.amax(X_test))
-
+   
     f = open(file_name, "wb")
 
     X_test = np.transpose(X_test, (3, 2, 0, 1))
@@ -109,13 +72,13 @@ def dumpConvWeights(file_name, X_test, N, C, H, W):
 
     
     
-def dumpFcWeights(file_name, weights, H, W):
+def dumpFcWeights(file_name, weights, H, W, reload_weights = False):
 
+    if reload_weights:
+        return
+   
     print ("*Dumping FC weights to = ", file_name)
     
-    #print("-min_val = ", np.amin(weights))
-    #print("-max_val = ", np.amax(weights))
-
     f = open(file_name, "wb")
     for i in range(H):
         for j in range(W):
@@ -125,8 +88,11 @@ def dumpFcWeights(file_name, weights, H, W):
 
 
     
-def dumpFcBias(file_name, bias, W):
+def dumpFcBias(file_name, bias, W, reload_weights = False):
 
+    if reload_weights:
+        return
+   
     print ("*Dump Bias Weights = ", file_name)
 
     f = open(file_name, "wb")
@@ -171,7 +137,7 @@ def dumpCalibrationData2(file_name, test_data, labels_fname, test_labels):
 
 
 # Loads Existing HPVM FP32 weights
-def reloadHPVMWeights(model, reload_dir, output_model, X_test, Y_test):
+def reloadHPVMWeights(model, reload_dir, output_model):
 
   print ("***** Reloading pre-trained HPVM weights ****")
   
@@ -179,37 +145,51 @@ def reloadHPVMWeights(model, reload_dir, output_model, X_test, Y_test):
     layer = model.layers[i]
     layer_name = layer.name
     #-- print ("*layer_name = ", layer_name)
-    if "conv" not in layer_name and "dense" not in layer_name:
-      continue
+    if "conv" in layer_name or "dense" in layer_name:
     
-    w_path = reload_dir + layer_name + "_w.bin"
-    #-- print ("** w_path = ", w_path)    
-    w_arr = np.fromfile(w_path, dtype='float32')
-    
-    b_path = reload_dir + layer_name + "_b.bin"
-    b_arr = np.fromfile(b_path, dtype='float32')
+        w_path = reload_dir + layer_name + "_w.bin"
+        #-- print ("** w_path = ", w_path)    
+        w_arr = np.fromfile(w_path, dtype='float32')
 
-    w_shape = layer.get_weights()[0].shape    
-    if "conv" in layer_name:      
-      w_nchw_shape = (w_shape[3], w_shape[2], w_shape[0], w_shape[1])      
-      w_arr = np.reshape(w_arr, w_nchw_shape)
-      w_arr = np.transpose(w_arr, (2,3,1,0))
+        if layer.use_bias:
+            b_path = reload_dir + layer_name + "_b.bin"
+            b_arr = np.fromfile(b_path, dtype='float32')
 
-    if "dense" in layer_name:      
-      w_arr = np.reshape(w_arr, w_shape)
+        w_shape = layer.get_weights()[0].shape    
+        if "conv" in layer_name:      
+          w_nchw_shape = (w_shape[3], w_shape[2], w_shape[0], w_shape[1])      
+          w_arr = np.reshape(w_arr, w_nchw_shape)
+          w_arr = np.transpose(w_arr, (2,3,1,0))
 
-    weights = []
-    weights.append(w_arr)
-    weights.append(b_arr)
-    
-    # Overriding model weights
-    layer.set_weights(weights)
+        if "dense" in layer_name:      
+          w_arr = np.reshape(w_arr, w_shape)
+
+        if layer.use_bias:
+            weights = [w_arr, b_arr]
+        else:
+            weights = [w_arr]
+        
+        layer.set_weights(weights)
+        
+    elif "batch_normalization" in layer_name:
+        beta_path = reload_dir + layer_name + "_beta.bin"
+        gamma_path = reload_dir + layer_name + "_gamma.bin"
+        mean_path = reload_dir + layer_name + "_mean.bin"
+        variance_path = reload_dir + layer_name + "_variance.bin"
+        
+        beta = np.fromfile(beta_path, dtype='float32')
+        gamma = np.fromfile(gamma_path, dtype='float32')
+        mean = np.fromfile(mean_path, dtype='float32')
+        variance = np.fromfile(variance_path, dtype='float32')
+
+        weights = [gamma, beta, mean, variance]
+        
+        layer.set_weights(weights)
+            
 
   # Model recompilation needed after resetting weights
   model.compile(loss='categorical_crossentropy',
                 optimizer=Adam(lr=0.0001, decay=1e-6),
                 metrics=['accuracy'])    
 
-  model.save(output_model)
-
   return model
diff --git a/hpvm/projects/keras/legacy/generated/vgg16/approxhpvm_src.cc b/hpvm/projects/keras/legacy/generated/vgg16/approxhpvm_src.cc
deleted file mode 100644
index 2bf9bad6b967698e34af99cd128fee8fa206bd5e..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/legacy/generated/vgg16/approxhpvm_src.cc
+++ /dev/null
@@ -1,982 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/stat.h> 
-#include <cstring> 
-#include <visc.h> 
-#include <tensorTypes.h> 
-#include <tensorUtils.h> 
-
-void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_2_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_5_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_6_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_9_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_12_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_13_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_16_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_17_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_19_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_20_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_22_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_23_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_26_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_29_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_32_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_33_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_35_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_36_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_38_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_39_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_41_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_42_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_43_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_44_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_45_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_46_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_47_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_48_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void var_49_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-
-  void* r = __visc__tensor_softmax(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-
-void root(void* input, size_t input_bytes, 
-	  void* conv2d_1_w, size_t conv2d_1_w_bytes, 
-	  void* conv2d_1_b, size_t conv2d_1_b_bytes, 
-	  void* conv2d_2_w, size_t conv2d_2_w_bytes, 
-	  void* conv2d_2_b, size_t conv2d_2_b_bytes, 
-	  void* conv2d_3_w, size_t conv2d_3_w_bytes, 
-	  void* conv2d_3_b, size_t conv2d_3_b_bytes, 
-	  void* conv2d_4_w, size_t conv2d_4_w_bytes, 
-	  void* conv2d_4_b, size_t conv2d_4_b_bytes, 
-	  void* conv2d_5_w, size_t conv2d_5_w_bytes, 
-	  void* conv2d_5_b, size_t conv2d_5_b_bytes, 
-	  void* conv2d_6_w, size_t conv2d_6_w_bytes, 
-	  void* conv2d_6_b, size_t conv2d_6_b_bytes, 
-	  void* conv2d_7_w, size_t conv2d_7_w_bytes, 
-	  void* conv2d_7_b, size_t conv2d_7_b_bytes, 
-	  void* conv2d_8_w, size_t conv2d_8_w_bytes, 
-	  void* conv2d_8_b, size_t conv2d_8_b_bytes, 
-	  void* conv2d_9_w, size_t conv2d_9_w_bytes, 
-	  void* conv2d_9_b, size_t conv2d_9_b_bytes, 
-	  void* conv2d_10_w, size_t conv2d_10_w_bytes, 
-	  void* conv2d_10_b, size_t conv2d_10_b_bytes, 
-	  void* conv2d_11_w, size_t conv2d_11_w_bytes, 
-	  void* conv2d_11_b, size_t conv2d_11_b_bytes, 
-	  void* conv2d_12_w, size_t conv2d_12_w_bytes, 
-	  void* conv2d_12_b, size_t conv2d_12_b_bytes, 
-	  void* conv2d_13_w, size_t conv2d_13_w_bytes, 
-	  void* conv2d_13_b, size_t conv2d_13_b_bytes, 
-	  void* dense_1_w, size_t dense_1_w_bytes, 
-	  void* dense_1_b, size_t dense_1_b_bytes, 
-	  void* dense_2_w, size_t dense_2_w_bytes, 
-	  void* dense_2_b, size_t dense_2_b_bytes){ 
-
-
-  __visc__hint(visc::CPU_TARGET); 
-  __visc__attributes(31, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, conv2d_6_w, conv2d_6_b, conv2d_7_w, conv2d_7_b, conv2d_8_w, conv2d_8_b, conv2d_9_w, conv2d_9_b, conv2d_10_w, conv2d_10_b, conv2d_11_w, conv2d_11_b, conv2d_12_w, conv2d_12_b, conv2d_13_w, conv2d_13_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0); 
-
-
-  void* var_0 = __visc__createNodeND(0, var_0_node); 
-
-  __visc__bindIn(var_0, 0, 0, 0); 
-  __visc__bindIn(var_0, 1, 1, 0); 
-  __visc__bindIn(var_0, 2, 2, 0); 
-  __visc__bindIn(var_0, 3, 3, 0); 
-
-  void* var_1 = __visc__createNodeND(0, var_1_node); 
-
-  __visc__edge(var_0, var_1, 1, 0, 0, 0); 
-  __visc__edge(var_0, var_1, 1, 1, 1, 0); 
-  __visc__bindIn(var_1, 4, 2, 0); 
-  __visc__bindIn(var_1, 5, 3, 0); 
-
-  void* var_2 = __visc__createNodeND(0, var_2_node); 
-
-  __visc__edge(var_1, var_2, 1, 0, 0, 0); 
-  __visc__edge(var_1, var_2, 1, 1, 1, 0); 
-
-  void* var_3 = __visc__createNodeND(0, var_3_node); 
-
-  __visc__edge(var_2, var_3, 1, 0, 0, 0); 
-  __visc__edge(var_2, var_3, 1, 1, 1, 0); 
-  __visc__bindIn(var_3, 6, 2, 0); 
-  __visc__bindIn(var_3, 7, 3, 0); 
-
-  void* var_4 = __visc__createNodeND(0, var_4_node); 
-
-  __visc__edge(var_3, var_4, 1, 0, 0, 0); 
-  __visc__edge(var_3, var_4, 1, 1, 1, 0); 
-  __visc__bindIn(var_4, 8, 2, 0); 
-  __visc__bindIn(var_4, 9, 3, 0); 
-
-  void* var_5 = __visc__createNodeND(0, var_5_node); 
-
-  __visc__edge(var_4, var_5, 1, 0, 0, 0); 
-  __visc__edge(var_4, var_5, 1, 1, 1, 0); 
-
-  void* var_6 = __visc__createNodeND(0, var_6_node); 
-
-  __visc__edge(var_5, var_6, 1, 0, 0, 0); 
-  __visc__edge(var_5, var_6, 1, 1, 1, 0); 
-
-  void* var_7 = __visc__createNodeND(0, var_7_node); 
-
-  __visc__edge(var_6, var_7, 1, 0, 0, 0); 
-  __visc__edge(var_6, var_7, 1, 1, 1, 0); 
-  __visc__bindIn(var_7, 10, 2, 0); 
-  __visc__bindIn(var_7, 11, 3, 0); 
-
-  void* var_8 = __visc__createNodeND(0, var_8_node); 
-
-  __visc__edge(var_7, var_8, 1, 0, 0, 0); 
-  __visc__edge(var_7, var_8, 1, 1, 1, 0); 
-  __visc__bindIn(var_8, 12, 2, 0); 
-  __visc__bindIn(var_8, 13, 3, 0); 
-
-  void* var_9 = __visc__createNodeND(0, var_9_node); 
-
-  __visc__edge(var_8, var_9, 1, 0, 0, 0); 
-  __visc__edge(var_8, var_9, 1, 1, 1, 0); 
-
-  void* var_10 = __visc__createNodeND(0, var_10_node); 
-
-  __visc__edge(var_9, var_10, 1, 0, 0, 0); 
-  __visc__edge(var_9, var_10, 1, 1, 1, 0); 
-  __visc__bindIn(var_10, 14, 2, 0); 
-  __visc__bindIn(var_10, 15, 3, 0); 
-
-  void* var_11 = __visc__createNodeND(0, var_11_node); 
-
-  __visc__edge(var_10, var_11, 1, 0, 0, 0); 
-  __visc__edge(var_10, var_11, 1, 1, 1, 0); 
-  __visc__bindIn(var_11, 16, 2, 0); 
-  __visc__bindIn(var_11, 17, 3, 0); 
-
-  void* var_12 = __visc__createNodeND(0, var_12_node); 
-
-  __visc__edge(var_11, var_12, 1, 0, 0, 0); 
-  __visc__edge(var_11, var_12, 1, 1, 1, 0); 
-
-  void* var_13 = __visc__createNodeND(0, var_13_node); 
-
-  __visc__edge(var_12, var_13, 1, 0, 0, 0); 
-  __visc__edge(var_12, var_13, 1, 1, 1, 0); 
-
-  void* var_14 = __visc__createNodeND(0, var_14_node); 
-
-  __visc__edge(var_13, var_14, 1, 0, 0, 0); 
-  __visc__edge(var_13, var_14, 1, 1, 1, 0); 
-  __visc__bindIn(var_14, 18, 2, 0); 
-  __visc__bindIn(var_14, 19, 3, 0); 
-
-  void* var_15 = __visc__createNodeND(0, var_15_node); 
-
-  __visc__edge(var_14, var_15, 1, 0, 0, 0); 
-  __visc__edge(var_14, var_15, 1, 1, 1, 0); 
-  __visc__bindIn(var_15, 20, 2, 0); 
-  __visc__bindIn(var_15, 21, 3, 0); 
-
-  void* var_16 = __visc__createNodeND(0, var_16_node); 
-
-  __visc__edge(var_15, var_16, 1, 0, 0, 0); 
-  __visc__edge(var_15, var_16, 1, 1, 1, 0); 
-
-  void* var_17 = __visc__createNodeND(0, var_17_node); 
-
-  __visc__edge(var_16, var_17, 1, 0, 0, 0); 
-  __visc__edge(var_16, var_17, 1, 1, 1, 0); 
-  __visc__bindIn(var_17, 22, 2, 0); 
-  __visc__bindIn(var_17, 23, 3, 0); 
-
-  void* var_18 = __visc__createNodeND(0, var_18_node); 
-
-  __visc__edge(var_17, var_18, 1, 0, 0, 0); 
-  __visc__edge(var_17, var_18, 1, 1, 1, 0); 
-  __visc__bindIn(var_18, 24, 2, 0); 
-  __visc__bindIn(var_18, 25, 3, 0); 
-
-  void* var_19 = __visc__createNodeND(0, var_19_node); 
-
-  __visc__edge(var_18, var_19, 1, 0, 0, 0); 
-  __visc__edge(var_18, var_19, 1, 1, 1, 0); 
-
-  void* var_20 = __visc__createNodeND(0, var_20_node); 
-
-  __visc__edge(var_19, var_20, 1, 0, 0, 0); 
-  __visc__edge(var_19, var_20, 1, 1, 1, 0); 
-  __visc__bindIn(var_20, 26, 2, 0); 
-  __visc__bindIn(var_20, 27, 3, 0); 
-
-  void* var_21 = __visc__createNodeND(0, var_21_node); 
-
-  __visc__edge(var_20, var_21, 1, 0, 0, 0); 
-  __visc__edge(var_20, var_21, 1, 1, 1, 0); 
-  __visc__bindIn(var_21, 28, 2, 0); 
-  __visc__bindIn(var_21, 29, 3, 0); 
-
-  void* var_22 = __visc__createNodeND(0, var_22_node); 
-
-  __visc__edge(var_21, var_22, 1, 0, 0, 0); 
-  __visc__edge(var_21, var_22, 1, 1, 1, 0); 
-
-  void* var_23 = __visc__createNodeND(0, var_23_node); 
-
-  __visc__edge(var_22, var_23, 1, 0, 0, 0); 
-  __visc__edge(var_22, var_23, 1, 1, 1, 0); 
-
-  void* var_24 = __visc__createNodeND(0, var_24_node); 
-
-  __visc__edge(var_23, var_24, 1, 0, 0, 0); 
-  __visc__edge(var_23, var_24, 1, 1, 1, 0); 
-  __visc__bindIn(var_24, 30, 2, 0); 
-  __visc__bindIn(var_24, 31, 3, 0); 
-
-  void* var_25 = __visc__createNodeND(0, var_25_node); 
-
-  __visc__edge(var_24, var_25, 1, 0, 0, 0); 
-  __visc__edge(var_24, var_25, 1, 1, 1, 0); 
-  __visc__bindIn(var_25, 32, 2, 0); 
-  __visc__bindIn(var_25, 33, 3, 0); 
-
-  void* var_26 = __visc__createNodeND(0, var_26_node); 
-
-  __visc__edge(var_25, var_26, 1, 0, 0, 0); 
-  __visc__edge(var_25, var_26, 1, 1, 1, 0); 
-
-  void* var_27 = __visc__createNodeND(0, var_27_node); 
-
-  __visc__edge(var_26, var_27, 1, 0, 0, 0); 
-  __visc__edge(var_26, var_27, 1, 1, 1, 0); 
-  __visc__bindIn(var_27, 34, 2, 0); 
-  __visc__bindIn(var_27, 35, 3, 0); 
-
-  void* var_28 = __visc__createNodeND(0, var_28_node); 
-
-  __visc__edge(var_27, var_28, 1, 0, 0, 0); 
-  __visc__edge(var_27, var_28, 1, 1, 1, 0); 
-  __visc__bindIn(var_28, 36, 2, 0); 
-  __visc__bindIn(var_28, 37, 3, 0); 
-
-  void* var_29 = __visc__createNodeND(0, var_29_node); 
-
-  __visc__edge(var_28, var_29, 1, 0, 0, 0); 
-  __visc__edge(var_28, var_29, 1, 1, 1, 0); 
-
-  void* var_30 = __visc__createNodeND(0, var_30_node); 
-
-  __visc__edge(var_29, var_30, 1, 0, 0, 0); 
-  __visc__edge(var_29, var_30, 1, 1, 1, 0); 
-  __visc__bindIn(var_30, 38, 2, 0); 
-  __visc__bindIn(var_30, 39, 3, 0); 
-
-  void* var_31 = __visc__createNodeND(0, var_31_node); 
-
-  __visc__edge(var_30, var_31, 1, 0, 0, 0); 
-  __visc__edge(var_30, var_31, 1, 1, 1, 0); 
-  __visc__bindIn(var_31, 40, 2, 0); 
-  __visc__bindIn(var_31, 41, 3, 0); 
-
-  void* var_32 = __visc__createNodeND(0, var_32_node); 
-
-  __visc__edge(var_31, var_32, 1, 0, 0, 0); 
-  __visc__edge(var_31, var_32, 1, 1, 1, 0); 
-
-  void* var_33 = __visc__createNodeND(0, var_33_node); 
-
-  __visc__edge(var_32, var_33, 1, 0, 0, 0); 
-  __visc__edge(var_32, var_33, 1, 1, 1, 0); 
-
-  void* var_34 = __visc__createNodeND(0, var_34_node); 
-
-  __visc__edge(var_33, var_34, 1, 0, 0, 0); 
-  __visc__edge(var_33, var_34, 1, 1, 1, 0); 
-  __visc__bindIn(var_34, 42, 2, 0); 
-  __visc__bindIn(var_34, 43, 3, 0); 
-
-  void* var_35 = __visc__createNodeND(0, var_35_node); 
-
-  __visc__edge(var_34, var_35, 1, 0, 0, 0); 
-  __visc__edge(var_34, var_35, 1, 1, 1, 0); 
-  __visc__bindIn(var_35, 44, 2, 0); 
-  __visc__bindIn(var_35, 45, 3, 0); 
-
-  void* var_36 = __visc__createNodeND(0, var_36_node); 
-
-  __visc__edge(var_35, var_36, 1, 0, 0, 0); 
-  __visc__edge(var_35, var_36, 1, 1, 1, 0); 
-
-  void* var_37 = __visc__createNodeND(0, var_37_node); 
-
-  __visc__edge(var_36, var_37, 1, 0, 0, 0); 
-  __visc__edge(var_36, var_37, 1, 1, 1, 0); 
-  __visc__bindIn(var_37, 46, 2, 0); 
-  __visc__bindIn(var_37, 47, 3, 0); 
-
-  void* var_38 = __visc__createNodeND(0, var_38_node); 
-
-  __visc__edge(var_37, var_38, 1, 0, 0, 0); 
-  __visc__edge(var_37, var_38, 1, 1, 1, 0); 
-  __visc__bindIn(var_38, 48, 2, 0); 
-  __visc__bindIn(var_38, 49, 3, 0); 
-
-  void* var_39 = __visc__createNodeND(0, var_39_node); 
-
-  __visc__edge(var_38, var_39, 1, 0, 0, 0); 
-  __visc__edge(var_38, var_39, 1, 1, 1, 0); 
-
-  void* var_40 = __visc__createNodeND(0, var_40_node); 
-
-  __visc__edge(var_39, var_40, 1, 0, 0, 0); 
-  __visc__edge(var_39, var_40, 1, 1, 1, 0); 
-  __visc__bindIn(var_40, 50, 2, 0); 
-  __visc__bindIn(var_40, 51, 3, 0); 
-
-  void* var_41 = __visc__createNodeND(0, var_41_node); 
-
-  __visc__edge(var_40, var_41, 1, 0, 0, 0); 
-  __visc__edge(var_40, var_41, 1, 1, 1, 0); 
-  __visc__bindIn(var_41, 52, 2, 0); 
-  __visc__bindIn(var_41, 53, 3, 0); 
-
-  void* var_42 = __visc__createNodeND(0, var_42_node); 
-
-  __visc__edge(var_41, var_42, 1, 0, 0, 0); 
-  __visc__edge(var_41, var_42, 1, 1, 1, 0); 
-
-  void* var_43 = __visc__createNodeND(0, var_43_node); 
-
-  __visc__edge(var_42, var_43, 1, 0, 0, 0); 
-  __visc__edge(var_42, var_43, 1, 1, 1, 0); 
-
-  void* var_44 = __visc__createNodeND(0, var_44_node); 
-
-  __visc__edge(var_43, var_44, 1, 0, 0, 0); 
-  __visc__edge(var_43, var_44, 1, 1, 1, 0); 
-  __visc__bindIn(var_44, 54, 2, 0); 
-  __visc__bindIn(var_44, 55, 3, 0); 
-
-  void* var_45 = __visc__createNodeND(0, var_45_node); 
-
-  __visc__edge(var_44, var_45, 1, 0, 0, 0); 
-  __visc__edge(var_44, var_45, 1, 1, 1, 0); 
-  __visc__bindIn(var_45, 56, 2, 0); 
-  __visc__bindIn(var_45, 57, 3, 0); 
-
-  void* var_46 = __visc__createNodeND(0, var_46_node); 
-
-  __visc__edge(var_45, var_46, 1, 0, 0, 0); 
-  __visc__edge(var_45, var_46, 1, 1, 1, 0); 
-
-  void* var_47 = __visc__createNodeND(0, var_47_node); 
-
-  __visc__edge(var_46, var_47, 1, 0, 0, 0); 
-  __visc__edge(var_46, var_47, 1, 1, 1, 0); 
-  __visc__bindIn(var_47, 58, 2, 0); 
-  __visc__bindIn(var_47, 59, 3, 0); 
-
-  void* var_48 = __visc__createNodeND(0, var_48_node); 
-
-  __visc__edge(var_47, var_48, 1, 0, 0, 0); 
-  __visc__edge(var_47, var_48, 1, 1, 1, 0); 
-  __visc__bindIn(var_48, 60, 2, 0); 
-  __visc__bindIn(var_48, 61, 3, 0); 
-
-  void* var_49 = __visc__createNodeND(0, var_49_node); 
-
-  __visc__edge(var_48, var_49, 1, 0, 0, 0); 
-  __visc__edge(var_48, var_49, 1, 1, 1, 0); 
-
-  __visc__bindOut(var_49, 0, 0, 0); 
-  __visc__bindOut(var_49, 1, 1, 0); 
-
-}
-
-struct ret_t {
-  void* tensor; 
-  size_t bytes; 
-}; 
-
-typedef struct __attribute__((__packed__)) {
-  void* input; 
-  size_t input_bytes; 
-  void* conv2d_1_w; 
-  size_t conv2d_1_w_bytes; 
-  void* conv2d_1_b; 
-  size_t conv2d_1_b_bytes; 
-  void* conv2d_2_w; 
-  size_t conv2d_2_w_bytes; 
-  void* conv2d_2_b; 
-  size_t conv2d_2_b_bytes; 
-  void* conv2d_3_w; 
-  size_t conv2d_3_w_bytes; 
-  void* conv2d_3_b; 
-  size_t conv2d_3_b_bytes; 
-  void* conv2d_4_w; 
-  size_t conv2d_4_w_bytes; 
-  void* conv2d_4_b; 
-  size_t conv2d_4_b_bytes; 
-  void* conv2d_5_w; 
-  size_t conv2d_5_w_bytes; 
-  void* conv2d_5_b; 
-  size_t conv2d_5_b_bytes; 
-  void* conv2d_6_w; 
-  size_t conv2d_6_w_bytes; 
-  void* conv2d_6_b; 
-  size_t conv2d_6_b_bytes; 
-  void* conv2d_7_w; 
-  size_t conv2d_7_w_bytes; 
-  void* conv2d_7_b; 
-  size_t conv2d_7_b_bytes; 
-  void* conv2d_8_w; 
-  size_t conv2d_8_w_bytes; 
-  void* conv2d_8_b; 
-  size_t conv2d_8_b_bytes; 
-  void* conv2d_9_w; 
-  size_t conv2d_9_w_bytes; 
-  void* conv2d_9_b; 
-  size_t conv2d_9_b_bytes; 
-  void* conv2d_10_w; 
-  size_t conv2d_10_w_bytes; 
-  void* conv2d_10_b; 
-  size_t conv2d_10_b_bytes; 
-  void* conv2d_11_w; 
-  size_t conv2d_11_w_bytes; 
-  void* conv2d_11_b; 
-  size_t conv2d_11_b_bytes; 
-  void* conv2d_12_w; 
-  size_t conv2d_12_w_bytes; 
-  void* conv2d_12_b; 
-  size_t conv2d_12_b_bytes; 
-  void* conv2d_13_w; 
-  size_t conv2d_13_w_bytes; 
-  void* conv2d_13_b; 
-  size_t conv2d_13_b_bytes; 
-  void* dense_1_w; 
-  size_t dense_1_w_bytes; 
-  void* dense_1_b; 
-  size_t dense_1_b_bytes; 
-  void* dense_2_w; 
-  size_t dense_2_w_bytes; 
-  void* dense_2_b; 
-  size_t dense_2_b_bytes; 
-
-  struct ret_t r; 
-}
-RootIn;
-
-int main(){ 
-
-std::string dir_prefix = std::string("data/vgg16_cifar10/"); 
-std::string input_path =  dir_prefix + std::string("input.bin"); 
-std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); 
-uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-
-__visc__init(); 
-RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); 
-
-args->input = input; 
-args->input_bytes = 0; 
-args->conv2d_1_w = conv2d_1_w; 
-args->conv2d_1_w_bytes = 0; 
-args->conv2d_1_b = conv2d_1_b; 
-args->conv2d_1_b_bytes = 0; 
-args->conv2d_2_w = conv2d_2_w; 
-args->conv2d_2_w_bytes = 0; 
-args->conv2d_2_b = conv2d_2_b; 
-args->conv2d_2_b_bytes = 0; 
-args->conv2d_3_w = conv2d_3_w; 
-args->conv2d_3_w_bytes = 0; 
-args->conv2d_3_b = conv2d_3_b; 
-args->conv2d_3_b_bytes = 0; 
-args->conv2d_4_w = conv2d_4_w; 
-args->conv2d_4_w_bytes = 0; 
-args->conv2d_4_b = conv2d_4_b; 
-args->conv2d_4_b_bytes = 0; 
-args->conv2d_5_w = conv2d_5_w; 
-args->conv2d_5_w_bytes = 0; 
-args->conv2d_5_b = conv2d_5_b; 
-args->conv2d_5_b_bytes = 0; 
-args->conv2d_6_w = conv2d_6_w; 
-args->conv2d_6_w_bytes = 0; 
-args->conv2d_6_b = conv2d_6_b; 
-args->conv2d_6_b_bytes = 0; 
-args->conv2d_7_w = conv2d_7_w; 
-args->conv2d_7_w_bytes = 0; 
-args->conv2d_7_b = conv2d_7_b; 
-args->conv2d_7_b_bytes = 0; 
-args->conv2d_8_w = conv2d_8_w; 
-args->conv2d_8_w_bytes = 0; 
-args->conv2d_8_b = conv2d_8_b; 
-args->conv2d_8_b_bytes = 0; 
-args->conv2d_9_w = conv2d_9_w; 
-args->conv2d_9_w_bytes = 0; 
-args->conv2d_9_b = conv2d_9_b; 
-args->conv2d_9_b_bytes = 0; 
-args->conv2d_10_w = conv2d_10_w; 
-args->conv2d_10_w_bytes = 0; 
-args->conv2d_10_b = conv2d_10_b; 
-args->conv2d_10_b_bytes = 0; 
-args->conv2d_11_w = conv2d_11_w; 
-args->conv2d_11_w_bytes = 0; 
-args->conv2d_11_b = conv2d_11_b; 
-args->conv2d_11_b_bytes = 0; 
-args->conv2d_12_w = conv2d_12_w; 
-args->conv2d_12_w_bytes = 0; 
-args->conv2d_12_b = conv2d_12_b; 
-args->conv2d_12_b_bytes = 0; 
-args->conv2d_13_w = conv2d_13_w; 
-args->conv2d_13_w_bytes = 0; 
-args->conv2d_13_b = conv2d_13_b; 
-args->conv2d_13_b_bytes = 0; 
-args->dense_1_w = dense_1_w; 
-args->dense_1_w_bytes = 0; 
-args->dense_1_b = dense_1_b; 
-args->dense_1_b_bytes = 0; 
-args->dense_2_w = dense_2_w; 
-args->dense_2_w_bytes = 0; 
-args->dense_2_b = dense_2_b; 
-args->dense_2_b_bytes = 0; 
-
-void* dfg = __visc__launch(0, root, (void*) args); 
-
-__visc__wait(dfg); 
-
-void *result = static_cast<RootIn*>(args)->input; 
-hpvm_request_tensor(result, 0); 
-
-__visc__cleanup(); 
- computeAccuracy2(labels, 10000, result); 
-return 0; 
-
-} 
diff --git a/hpvm/projects/keras/legacy/generated/vgg16/src.cc b/hpvm/projects/keras/legacy/generated/vgg16/src.cc
deleted file mode 100644
index 9303866d0d29d1990c858f84ccaced7f0fc0dcc7..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/legacy/generated/vgg16/src.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-llvm_hpvm_initTensorRt(0); 
-
-
-std::string dir_prefix = std::string("data/vgg16_cifar10/"); 
-std::string input_path =  dir_prefix + std::string("input.bin"); 
-std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-startMemTracking(); 
-
-int test_input_size = 10000; 
-int batch_size = 10000; 
-int batch_count = test_input_size / batch_size; 
-float final_accuracy = 0.0; 
-
-for(int i = 0; i < batch_count; i++){ 
-
-int start = i * batch_size; 
-int end = (i + 1) * batch_size; 
-
-void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-void* var_2 = tensorRelu(var_1); 
-void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-void* var_6 = tensorRelu(var_5); 
-void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-void* var_10 = tensorRelu(var_9); 
-void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-void* var_14 = tensorRelu(var_13); 
-void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-void* var_18 = tensorRelu(var_17); 
-void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-void* var_22 = tensorRelu(var_21); 
-void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-void* var_26 = tensorRelu(var_25); 
-void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-void* var_30 = tensorRelu(var_29); 
-void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-void* var_34 = tensorRelu(var_33); 
-void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-void* var_38 = tensorRelu(var_37); 
-void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-void* var_42 = tensorRelu(var_41); 
-void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-void* var_46 = tensorRelu(var_45); 
-void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-void* var_50 = tensorRelu(var_49); 
-void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-void* var_55 = tensorAdd(var_54, dense_1_b); 
-void* var_56 = tensorRelu(var_55); 
-void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-void* var_59 = tensorAdd(var_58, dense_2_b); 
-void* var_60 = tensorSoftmax(var_59); 
-
-uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-float accuracy = computeAccuracy2(labels, batch_size, var_60); 
-final_accuracy += accuracy; 
-freeBatchMemory(); 
- 
-}
-
-final_accuracy = final_accuracy / batch_count; 
-dumpFinalAccuracy(final_accuracy); 
-
-
-llvm_hpvm_cleanupTensorRt(); 
-
-return 0; 
-
-}
diff --git a/hpvm/projects/keras/legacy/keras_environment.yml b/hpvm/projects/keras/legacy/keras_environment.yml
deleted file mode 100644
index caa3a773dfd8c7a82571a923bb1941997ba59ca9..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/legacy/keras_environment.yml
+++ /dev/null
@@ -1,321 +0,0 @@
-name: approxhpvm_keras
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - absl-py=0.6.1=py36_0
-  - anaconda-project=0.8.2=py36_0
-  - asn1crypto=0.24.0=py36_0
-  - automat=0.7.0=py36_0
-  - babel=2.6.0=py36_0
-  - backports=1.0=py36_1
-  - backports.os=0.1.1=py36_0
-  - beautifulsoup4=4.6.3=py36_0
-  - bkcharts=0.2=py36_0
-  - blaze=0.11.3=py36_0
-  - conda=4.5.11=py36_0
-  - conda-env=2.6.0=1
-  - contextlib2=0.5.5=py36_0
-  - cycler=0.10.0=py36_0
-  - dill=0.2.8.2=py36_0
-  - docutils=0.14=py36_0
-  - entrypoints=0.2.3=py36_2
-  - et_xmlfile=1.0.1=py36_0
-  - idna=2.7=py36_0
-  - imageio=2.4.1=py36_0
-  - importlib_metadata=0.6=py36_0
-  - ipython_genutils=0.2.0=py36_0
-  - isort=4.3.4=py36_0
-  - jdcal=1.4=py36_0
-  - jedi=0.13.1=py36_0
-  - jinja2=2.10=py36_0
-  - jmespath=0.9.3=py36_0
-  - jsonschema=2.6.0=py36_0
-  - keyring=16.0.0=py36_0
-  - libgcc=7.2.0=h69d50b8_2
-  - libgfortran=3.0.0=1
-  - locket=0.2.0=py36_1
-  - more-itertools=4.3.0=py36_0
-  - nbconvert=5.3.1=py36_0
-  - nbformat=4.4.0=py36_0
-  - nose=1.3.7=py36_2
-  - notebook=5.7.0=py36_0
-  - numpydoc=0.8.0=py36_0
-  - odo=0.5.1=py36_0
-  - pathlib2=2.3.2=py36_0
-  - pexpect=4.6.0=py36_0
-  - pickleshare=0.7.5=py36_0
-  - ply=3.11=py36_0
-  - ptyprocess=0.6.0=py36_0
-  - pycodestyle=2.4.0=py36_0
-  - pygments=2.2.0=py36_0
-  - pylint=2.1.1=py36_0
-  - pyopenssl=18.0.0=py36_0
-  - qtconsole=4.4.2=py36_0
-  - requests=2.19.1=py36_0
-  - s3transfer=0.1.13=py36_0
-  - secretstorage=3.1.0=py36_0
-  - setuptools=40.5.0=py36_0
-  - singledispatch=3.4.0.3=py36_0
-  - six=1.11.0=py36_1
-  - snowballstemmer=1.2.1=py36_0
-  - sortedcollections=1.0.1=py36_0
-  - sphinx=1.8.1=py36_0
-  - spyder=3.3.1=py36_1
-  - sympy=1.3=py36_0
-  - tblib=1.3.2=py36_0
-  - termcolor=1.1.0=py36_1
-  - terminado=0.8.1=py36_1
-  - testpath=0.4.2=py36_0
-  - torchvision=0.2.1=py36_0
-  - traitlets=4.3.2=py36_0
-  - typing=3.6.4=py36_0
-  - unicodecsv=0.14.1=py36_0
-  - urllib3=1.23=py36_0
-  - wcwidth=0.1.7=py36_0
-  - wheel=0.32.2=py36_0
-  - widgetsnbextension=3.4.2=py36_0
-  - xlwt=1.3.0=py36_0
-  - _tflow_select=2.1.0=gpu
-  - alabaster=0.7.12=py36_0
-  - anaconda-client=1.7.2=py36_0
-  - anaconda=custom=py36hbbc8b67_0
-  - anaconda-navigator=1.9.2=py36_0
-  - appdirs=1.4.3=py36h28b3542_0
-  - astor=0.7.1=py36_0
-  - astroid=2.0.4=py36_0
-  - astropy=3.0.5=py36h7b6447c_0
-  - atomicwrites=1.2.1=py36_0
-  - attrs=18.2.0=py36h28b3542_0
-  - backcall=0.1.0=py36_0
-  - backports.shutil_get_terminal_size=1.0.0=py36_2
-  - bitarray=0.8.3=py36h14c3975_0
-  - blas=1.0=mkl
-  - bleach=3.0.2=py36_0
-  - blosc=1.14.4=hdbcaa40_0
-  - bokeh=1.0.1=py36_0
-  - boto=2.49.0=py36_0
-  - boto3=1.9.35=py36_0
-  - botocore=1.12.35=py36_0
-  - bottleneck=1.2.1=py36h035aef0_1
-  - bz2file=0.98=py36_1
-  - bzip2=1.0.6=h14c3975_5
-  - ca-certificates=2018.03.07=0
-  - cairo=1.14.12=h8948797_3
-  - certifi=2018.10.15=py36_0
-  - cffi=1.11.5=py36he75722e_1
-  - chardet=3.0.4=py36_1
-  - chest=0.2.3=py36_1
-  - click=7.0=py36_0
-  - cloudpickle=0.6.1=py36_0
-  - clyent=1.2.2=py36_1
-  - colorama=0.4.0=py36_0
-  - configobj=5.0.6=py36_1
-  - constantly=15.1.0=py36h28b3542_0
-  - cryptography=2.3.1=py36hc365091_0
-  - cudatoolkit=9.0=h13b8566_0
-  - cudnn=7.1.2=cuda9.0_0
-  - cupti=9.0.176=0
-  - curl=7.61.0=h84994c4_0
-  - cython=0.29=py36he6710b0_0
-  - cytoolz=0.9.0.1=py36h14c3975_1
-  - dask=0.20.0=py36_0
-  - dask-core=0.20.0=py36_0
-  - datashape=0.5.4=py36_1
-  - dbus=1.13.2=h714fa37_1
-  - decorator=4.3.0=py36_0
-  - defusedxml=0.5.0=py36_1
-  - distributed=1.24.0=py36_0
-  - expat=2.2.6=he6710b0_0
-  - fastcache=1.0.2=py36h14c3975_2
-  - filelock=3.0.10=py36_0
-  - flask=1.0.2=py36_1
-  - flask-cors=3.0.6=py36_0
-  - fontconfig=2.13.0=h9420a91_0
-  - freetype=2.9.1=h8a8886c_1
-  - fribidi=1.0.5=h7b6447c_0
-  - gast=0.2.0=py36_0
-  - gensim=3.4.0=py36h14c3975_0
-  - get_terminal_size=1.0.0=haa9412d_0
-  - gevent=1.3.7=py36h7b6447c_1
-  - glib=2.56.2=hd408876_0
-  - glob2=0.6=py36_1
-  - gmp=6.1.2=h6c8ec71_1
-  - gmpy2=2.0.8=py36h10f8cd9_2
-  - graphite2=1.3.12=h23475e2_2
-  - greenlet=0.4.15=py36h7b6447c_0
-  - grpcio=1.12.1=py36hdbcaa40_0
-  - gst-plugins-base=1.14.0=hbbd80ab_1
-  - gstreamer=1.14.0=hb453b48_1
-  - h5py=2.8.0=py36h989c5e5_3
-  - harfbuzz=1.8.8=hffaf4a1_0
-  - hdf5=1.10.2=hba1933b_1
-  - heapdict=1.0.0=py36_2
-  - html5lib=1.0.1=py36_0
-  - hyperlink=18.0.0=py36_0
-  - icu=58.2=h9c2bf20_1
-  - imagesize=1.1.0=py36_0
-  - incremental=17.5.0=py36_0
-  - ipykernel=5.1.0=py36h39e3cac_0
-  - ipython=7.1.1=py36h39e3cac_0
-  - ipywidgets=7.4.2=py36_0
-  - itsdangerous=1.1.0=py36_0
-  - jbig=2.1=hdba287a_0
-  - jeepney=0.4=py36_0
-  - jpeg=9b=h024ee3a_2
-  - keras=2.1.6=py36_0
-  - keras-applications=1.0.6=py36_0
-  - keras-preprocessing=1.0.5=py36_0
-  - kiwisolver=1.0.1=py36hf484d3e_0
-  - lazy-object-proxy=1.3.1=py36h14c3975_2
-  - libcurl=7.61.0=h1ad7b7a_0
-  - libedit=3.1.20170329=h6b74fdf_2
-  - libffi=3.2.1=hd88cf55_4
-  - libgcc-ng=8.2.0=hdf63c60_1
-  - libgfortran-ng=7.3.0=hdf63c60_0
-  - libiconv=1.15=h63c8f33_5
-  - libpng=1.6.35=hbc83047_0
-  - libprotobuf=3.6.1=hd408876_0
-  - libsodium=1.0.16=h1bed415_0
-  - libssh2=1.8.0=h9cfc8f7_4
-  - libstdcxx-ng=8.2.0=hdf63c60_1
-  - libtiff=4.0.9=he85c1e1_2
-  - libtool=2.4.6=h7b6447c_5
-  - libuuid=1.0.3=h1bed415_2
-  - libxcb=1.13=h1bed415_1
-  - libxml2=2.9.8=h26e45fe_1
-  - libxslt=1.1.32=h1312cb7_0
-  - llvmlite=0.25.0=py36hd408876_0
-  - lxml=4.2.5=py36hefd8a0e_0
-  - lzo=2.10=h49e0be7_2
-  - markdown=3.0.1=py36_0
-  - markupsafe=1.0=py36h14c3975_1
-  - matplotlib=3.0.1=py36h5429711_0
-  - mccabe=0.6.1=py36_1
-  - mistune=0.8.4=py36h7b6447c_0
-  - mkl=2018.0.3=1
-  - mkl-service=1.1.2=py36h90e4bf4_5
-  - mkl_fft=1.0.6=py36h7dd41cf_0
-  - mkl_random=1.0.1=py36h4414c95_1
-  - mpc=1.1.0=h10f8cd9_1
-  - mpfr=4.0.1=hdf1c602_3
-  - mpmath=1.0.0=py36_2
-  - msgpack-python=0.5.6=py36h6bb024c_1
-  - multipledispatch=0.6.0=py36_0
-  - navigator-updater=0.2.1=py36_0
-  - nccl=1.3.5=cuda9.0_0
-  - ncurses=6.1=hf484d3e_0
-  - networkx=2.2=py36_1
-  - ninja=1.8.2=py36h6bb024c_1
-  - nltk=3.3.0=py36_0
-  - numba=0.40.0=py36h962f231_0
-  - numexpr=2.6.8=py36hd89afb7_0
-  - numpy=1.15.3=py36h1d66e8a_0
-  - numpy-base=1.15.3=py36h81de0dd_0
-  - olefile=0.46=py36_0
-  - openpyxl=2.5.9=py36_0
-  - openssl=1.0.2p=h14c3975_0
-  - packaging=18.0=py36_0
-  - pandas=0.23.4=py36h04863e7_0
-  - pandoc=2.2.3.2=0
-  - pandocfilters=1.4.2=py36_1
-  - pango=1.42.4=h049681c_0
-  - parso=0.3.1=py36_0
-  - partd=0.3.9=py36_0
-  - patchelf=0.9=he6710b0_3
-  - path.py=11.5.0=py36_0
-  - patsy=0.5.1=py36_0
-  - pcre=8.42=h439df22_0
-  - pep8=1.7.1=py36_0
-  - pillow=5.3.0=py36h34e0f95_0
-  - pip=18.1=py36_0
-  - pixman=0.34.0=hceecf20_3
-  - pkginfo=1.4.2=py36_1
-  - pluggy=0.8.0=py36_0
-  - prometheus_client=0.4.2=py36_0
-  - prompt_toolkit=2.0.7=py36_0
-  - protobuf=3.6.1=py36he6710b0_0
-  - psutil=5.4.8=py36h7b6447c_0
-  - py=1.7.0=py36_0
-  - pyasn1=0.4.4=py36h28b3542_0
-  - pyasn1-modules=0.2.2=py36_0
-  - pycosat=0.6.3=py36h14c3975_0
-  - pycparser=2.19=py36_0
-  - pycrypto=2.6.1=py36h14c3975_9
-  - pycurl=7.43.0.2=py36hb7f436b_0
-  - pyflakes=2.0.0=py36_0
-  - pyhamcrest=1.9.0=py36_2
-  - pyodbc=4.0.24=py36he6710b0_0
-  - pyparsing=2.2.2=py36_0
-  - pyqt=5.9.2=py36h05f1152_2
-  - pysocks=1.6.8=py36_0
-  - pytables=3.4.4=py36ha205bf6_0
-  - pytest=3.9.3=py36_0
-  - pytest-arraydiff=0.2=py36h39e3cac_0
-  - pytest-astropy=0.4.0=py36_0
-  - pytest-doctestplus=0.1.3=py36_0
-  - pytest-openfiles=0.3.0=py36_0
-  - pytest-remotedata=0.3.1=py36_0
-  - python=3.6.6=h6e4f718_2
-  - python-dateutil=2.7.5=py36_0
-  - pytorch=0.4.1=py36ha74772b_0
-  - pytz=2018.7=py36_0
-  - pywavelets=1.0.1=py36hdd07704_0
-  - pyyaml=3.13=py36h14c3975_0
-  - pyzmq=17.1.2=py36h14c3975_0
-  - qt=5.9.6=h8703b6f_2
-  - qtawesome=0.5.2=py36_0
-  - qtpy=1.5.2=py36_0
-  - readline=7.0=h7b6447c_5
-  - redis=5.0.0=h7b6447c_0
-  - redis-py=2.10.6=py36_0
-  - rope=0.11.0=py36_0
-  - ruamel_yaml=0.15.46=py36h14c3975_0
-  - scikit-image=0.14.0=py36hf484d3e_1
-  - scikit-learn=0.20.0=py36h4989274_1
-  - scipy=1.1.0=py36hfa4b5c9_1
-  - seaborn=0.9.0=py36_0
-  - send2trash=1.5.0=py36_0
-  - service_identity=17.0.0=py36h28b3542_0
-  - simplegeneric=0.8.1=py36_2
-  - sip=4.19.8=py36hf484d3e_0
-  - smart_open=1.7.1=py36_0
-  - snappy=1.1.7=hbae5bb6_3
-  - sockjs-tornado=1.0.6=py36_0
-  - sortedcontainers=2.0.5=py36_0
-  - sphinxcontrib=1.0=py36_1
-  - sphinxcontrib-websupport=1.1.0=py36_1
-  - spyder-kernels=0.2.6=py36_0
-  - sqlalchemy=1.2.12=py36h7b6447c_0
-  - sqlite=3.25.2=h7b6447c_0
-  - statsmodels=0.9.0=py36h035aef0_0
-  - tensorboard=1.11.0=py36hf484d3e_0
-  - tensorflow=1.11.0=gpu_py36h4459f94_0
-  - tensorflow-base=1.11.0=gpu_py36h8e0ae2d_0
-  - tensorflow-gpu=1.11.0=h0d30ee6_0
-  - tk=8.6.8=hbc83047_0
-  - toolz=0.9.0=py36_0
-  - tornado=5.1.1=py36h7b6447c_0
-  - tqdm=4.28.1=py36h28b3542_0
-  - twisted=18.9.0=py36h7b6447c_0
-  - typed-ast=1.1.0=py36h14c3975_0
-  - unixodbc=2.3.7=h14c3975_0
-  - webencodings=0.5.1=py36_1
-  - werkzeug=0.14.1=py36_0
-  - wrapt=1.10.11=py36h14c3975_2
-  - xlrd=1.1.0=py36_1
-  - xlsxwriter=1.1.2=py36_0
-  - xz=5.2.4=h14c3975_4
-  - yaml=0.1.7=had09818_2
-  - zeromq=4.2.5=hf484d3e_1
-  - zict=0.1.3=py36_0
-  - zlib=1.2.11=ha838bed_2
-  - zope=1.0=py36_1
-  - zope.interface=4.6.0=py36h7b6447c_0
-  - cuda91=1.0=h4c16780_0
-  - pip:
-    - msgpack==0.5.6
-    - tables==3.4.4
-    - torch==0.4.1
-
diff --git a/hpvm/projects/keras/legacy/keras_environment_deps.yml b/hpvm/projects/keras/legacy/keras_environment_deps.yml
deleted file mode 100644
index 13876b26150ccbe01fb29cb8efc74a22bfbc3784..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/legacy/keras_environment_deps.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: approxhpvm_keras
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - conda=4.5.11=py36_0
-  - conda-env=2.6.0=1
-  - libgcc=7.2.0=h69d50b8_2
-  - libgfortran=3.0.0=1
-  - torchvision=0.2.1=py36_0
-  - _tflow_select=2.1.0=gpu
-  - anaconda-client=1.7.2=py36_0
-  - anaconda=custom=py36hbbc8b67_0
-  - anaconda-navigator=1.9.2=py36_0
-  - cudatoolkit=9.0=h13b8566_0
-  - cudnn=7.1.2=cuda9.0_0
-  - cupti=9.0.176=0
-  - curl=7.61.0=h84994c4_0
-  - glib=2.56.2=hd408876_0
-  - keras=2.1.6=py36_0
-  - keras-applications=1.0.6=py36_0
-  - keras-preprocessing=1.0.5=py36_0
-  - mkl=2018.0.3=1
-  - mkl-service=1.1.2=py36h90e4bf4_5
-  - mkl_fft=1.0.6=py36h7dd41cf_0
-  - mkl_random=1.0.1=py36h4414c95_1
-  - ncurses=6.1=hf484d3e_0
-  - numpy=1.15.3=py36h1d66e8a_0
-  - numpy-base=1.15.3=py36h81de0dd_0
-  - pandas=0.23.4=py36h04863e7_0
-  - pandoc=2.2.3.2=0
-  - pandocfilters=1.4.2=py36_1
-  - pip=18.1=py36_0
-  - psutil=5.4.8=py36h7b6447c_0
-  - py=1.7.0=py36_0
-  - pycurl=7.43.0.2=py36hb7f436b_0
-  - python=3.6.6=h6e4f718_2
-  - pytorch=0.4.1=py36ha74772b_0
-  - scipy=1.1.0=py36hfa4b5c9_1
-  - tensorboard=1.11.0=py36hf484d3e_0
-  - tensorflow=1.11.0=gpu_py36h4459f94_0
-  - tensorflow-base=1.11.0=gpu_py36h8e0ae2d_0
-  - tensorflow-gpu=1.11.0=h0d30ee6_0
-  - zlib=1.2.11=ha838bed_2
-  - cuda91=1.0=h4c16780_0
-  - pip:
-    - msgpack==0.5.6
-    - tables==3.4.4
-    - torch==0.4.1
-
diff --git a/hpvm/projects/keras/src/Benchmark.py b/hpvm/projects/keras/src/Benchmark.py
index e83a78f4ea5d776a21ed6a6b47ccdb840f42c129..3610b2e9a5ad10c2b3d90795eb20b3d6839b730f 100644
--- a/hpvm/projects/keras/src/Benchmark.py
+++ b/hpvm/projects/keras/src/Benchmark.py
@@ -14,12 +14,14 @@ from frontend.weight_utils import reloadHPVMWeights
 # Defines common interfaces and virtual methods to be overridden by child classes
 class Benchmark:
 
-    def __init__(self, name, reload_dir, keras_model_file, hpvm_dir, num_classes):
+    def __init__(self, name, reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size=500):
         self.name = name
         self.reload_dir = reload_dir
         self.keras_model_file = keras_model_file
-        self.hpvm_dir = hpvm_dir
+        self.data_dir = data_dir
+        self.src_dir = src_dir
         self.num_classes = num_classes
+        self.batch_size = batch_size
         
         
     def buildModel(self):
@@ -28,7 +30,7 @@ class Benchmark:
     def data_preprocess(self):
         return
     
-    def trainModel(self):
+    def trainModel(self, X_train, y_train, X_test, y_test):
         return
 
     def inference(self):
@@ -51,7 +53,7 @@ class Benchmark:
 
     def printUsage(self):
 
-        print ("Usage: python ${benchmark.py} [hpvm_reload|keras_reload|train] [frontend] [compile]")
+        print ("Usage: python ${benchmark.py} [hpvm_reload|train] [frontend] [compile]")
         sys.exit(0)
 
         
@@ -60,46 +62,61 @@ class Benchmark:
       if len(argv) < 2:
           self.printUsage()
           
+      print ("Build Model ...")
       # Virtual method call implemented by each CNN
       model = self.buildModel()
 
+      print ("Data Preprocess... \n")
       # Virtual method call to preprocess test and train data 
-      X_train, Y_train, X_test, Y_test = self.data_preprocess()   
+      X_train, y_train, X_test, y_test, X_tuner, y_tuner = self.data_preprocess()   
 
       if argv[1] == "hpvm_reload":
         print ("loading weights .....\n\n")  
-        model = reloadHPVMWeights(model, self.reload_dir, self.keras_model_file, X_test, Y_test)
+        model = reloadHPVMWeights(model, self.reload_dir, self.keras_model_file)
 
       elif argv[1] == "keras_reload":
-        model = load_model(self.keras_model_file)
+        model.load_weights(self.keras_model_file)
+        model.compile(loss='categorical_crossentropy',
+                    optimizer='adam',
+                    metrics=['accuracy'])   
 
       elif argv[1] == "train":
-        model = self.trainModel(model)
-
+        print ("Train Model ...")
+        model = self.trainModel(model, X_train, y_train, X_test, y_test)
       else:
           self.printUsage()
 
           
-      score = model.evaluate(X_test, to_categorical(Y_test, self.num_classes), verbose=0)
+      score = model.evaluate(X_test, to_categorical(y_test, self.num_classes), verbose=0)
       print('Test accuracy2:', score[1])
 
 
-      if len(argv) > 2 and argv[2] == "frontend":
-        if argv[1] == "keras_reload":
-          print("ERROR: Must load HPVM model to invoke frontend - use 'hpvm_reload'")
-          sys.exit(1)
+      if len(argv) > 2:
+        if argv[2] == "frontend":
+          
+          # Main call to ApproxHPVM-Keras Frontend
+          working_dir = translate_to_approxhpvm(model,
+                                                self.data_dir, self.src_dir,  ##  "data/test_src/", 
+                                                X_test, y_test,
+                                                X_tuner, y_tuner,
+                                                self.batch_size, # FIXIT
+                                                self.num_classes,
+                                                (argv[1] == "hpvm_reload")) # Do not redump HPVM weights if `hpvm_reload` used
 
-        # Main call to ApproxHPVM-Keras Frontend
-        working_dir = translate_to_approxhpvm(model, self.hpvm_dir, X_test, Y_test, self.num_classes)
-        #-- print ("*** working_dir = ", working_dir)
-        
-        if len(argv) > 3 and argv[3] == "compile":
-          self.compileSource(working_dir)
+          if len(argv) > 3 and argv[3] == "compile":
+            self.compileSource(working_dir)
+          else:
+            self.printUsage()
 
-        else:
-          self.printUsage()
 
+        if argv[2] == "keras_dump":
+          model.save_weights(self.keras_model_file)
+
+          
       elif len(argv) > 2:
         self.printUsage()
             
 
+    
+
+        
diff --git a/hpvm/projects/keras/src/Config.py b/hpvm/projects/keras/src/Config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2edc5c1add5542edabdd052097ccb4b45d608472
--- /dev/null
+++ b/hpvm/projects/keras/src/Config.py
@@ -0,0 +1,3 @@
+
+# Path Relative to Model Params Directory
+MODEL_PARAMS_DIR = "../../../hpvm/test/dnn_benchmarks/model_params/"
diff --git a/hpvm/projects/keras/src/__init__.py b/hpvm/projects/keras/src/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/hpvm/projects/keras/src/alexnet.py b/hpvm/projects/keras/src/alexnet.py
index 9bfe80a156ae21e9befea3a6872b63758e37e2a3..4b23fd995ffcc5a4f3234566a8a76dac8c12c6aa 100644
--- a/hpvm/projects/keras/src/alexnet.py
+++ b/hpvm/projects/keras/src/alexnet.py
@@ -1,179 +1,147 @@
+import os
+import sys
+import glob
 
 import numpy as np
-
-from keras.datasets import cifar10
-from keras.models import Sequential
-from keras.models import load_model
-from keras.layers.core import Dense, Dropout, Flatten, Activation
-from keras.layers.convolutional import Conv2D
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
 from keras.optimizers import Adam
-from keras.layers.pooling import MaxPooling2D
-from keras.utils.np_utils import to_categorical
-from keras.preprocessing.image import ImageDataGenerator
-from keras import backend as K
 from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
 from keras.callbacks import LearningRateScheduler
-import sys
-import struct
-import keras
-import numpy as np
-import os
-from Benchmark import Benchmark
-
 
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
 
-class AlexNet(Benchmark):
-
-      
-      
-  def lr_schedule(self, epoch):
-    lrate = 0.001
-    if epoch > 20:
-      lrate = 0.0005
-    if epoch > 40:
-      lrate = 0.0003
-    if epoch > 60:
-      lrate = 0.0001
-    if epoch > 80:
-      lrate = 0.00005  
-
-    return lrate
 
+class AlexNet_CIFAR10(Benchmark):
 
+    def buildModel(self):
 
-  def buildModel(self):
+        activation_type = 'tanh'
+        weight_decay = 1e-4
 
-      print ("BuildModel ...")
-      
-      activation_type = "tanh"
-      weight_decay = 1e-4
+        model = Sequential()
+        
+        model.add(Conv2D(64, (11, 11), padding='same', activation=activation_type,
+                         kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32)))
+        model.add(MaxPooling2D(2, 2))
+        model.add(Dropout(0.2))
+        
+        model.add(Conv2D(192, (5, 5), padding='same', activation=activation_type,
+                         kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(MaxPooling2D(2, 2))
+        model.add(Dropout(0.3))
 
-      model = Sequential()
-      model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type,
-                       input_shape=(3, 32, 32), padding = 'same',
-                       kernel_regularizer=regularizers.l2(weight_decay) ))
-      model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
-      model.add(Dropout(0.2))
-      model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same',
+        model.add(Conv2D(384, (3, 3), padding='same', activation=activation_type, 
+                       kernel_regularizer=regularizers.l2(weight_decay)))   
+        model.add(Conv2D(256, (3, 3), padding='same', activation=activation_type, 
                        kernel_regularizer=regularizers.l2(weight_decay)))
-      model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
-      model.add(Dropout(0.3))
-
-      model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same',
-                       kernel_regularizer=regularizers.l2(weight_decay) ))   
-      model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',
-                       kernel_regularizer=regularizers.l2(weight_decay) ))
-      model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',
-                       kernel_regularizer=regularizers.l2(weight_decay) ))
-      model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))
-      model.add(Dropout(0.4))
-
-      model.add(Flatten())
-      #model.add(Flatten())
-      #model.add(Dense(256))
-      model.add(Dense(10))
-      model.add(Activation('softmax'))
-
-      return model
-
-
-
-
-  def trainModel(self, model):
-
-      (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
-      test_labels = Y_test
-      train_labels = Y_train
-
-      #X_train = X_train.astype('float32')
-      #X_test = X_test.astype('float32')
-      X_train = X_train / 255.0
-      X_test = X_test / 255.0
-
-      mean = np.mean(X_train,axis=(0,1,2,3))
-      std = np.std(X_train,axis=(0,1,2,3))   
-      X_train = (X_train-mean)/(std+1e-7)
-      X_test = (X_test-mean)/(std+1e-7)
-
-      dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/"
-
-      #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)
-      # Compile the model
-      model.compile(loss='categorical_crossentropy',
-                    optimizer=Adam(lr=0.0001, decay=1e-6),
-                    #optimizer = opt_rms,
-                    metrics=['accuracy'])
-
-      #print to_categorical(Y_train, 10)
-      print (to_categorical(Y_train))
-
-
-      datagen = ImageDataGenerator(
-        rotation_range=15,
-        width_shift_range=0.1,
-        height_shift_range=0.1,
-        horizontal_flip=True,
-      )
-      datagen.fit(X_train)
-
-
-      model.fit(X_train, to_categorical(Y_train, 10),
-                batch_size=128,
-                shuffle=True,
-                epochs = 1,
-                #epochs=100,
-                validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(self.lr_schedule)])
+        model.add(Conv2D(256, (3, 3), padding='same', activation=activation_type, 
+                       kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(MaxPooling2D(2, 2))
+        model.add(Dropout(0.4))
 
-      # Evaluate the model
-      scores = model.evaluate(X_test, to_categorical(Y_test, 10))
+        model.add(Flatten())
+        #model.add(Flatten())
+        #model.add(Dense(256))
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
+        
+        return model
 
-      print('Loss: %.3f' % scores[0])
-      print('Accuracy: %.3f' % scores[1])
-      print ("*** TRAINED MODEL ****\n")
+    
+    def data_preprocess(self):
 
-      return model
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
 
+        X_train = X_train / 255.0
+        X_val = X_val / 255.0
 
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        X_val = (X_val - mean) / (std + 1e-7)  
 
-  def data_preprocess(self):
+        X_test = X_val[0:5000]
+        y_test = y_val[0:5000]
+        X_tuner = X_val[5000:]
+        y_tuner = y_val[5000:]
 
-    print ("Data Preprocess... \n")
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
-    (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
 
-    print ("Data Loaded... \n")    
-    
-    X_train = X_train / 255.0
-    X_test = X_test / 255.0
-     
-    mean = np.mean(X_train,axis=(0,1,2,3))
-    std = np.std(X_train,axis=(0,1,2,3))   
-    X_train = (X_train-mean)/(std+1e-7)
-    X_test = (X_test-mean)/(std+1e-7)  
-    
-    return X_train, Y_train, X_test, Y_test
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+        
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=Adam(lr=0.0001, decay=1e-6),
+            metrics=['accuracy']
+        )
+
+        datagen = ImageDataGenerator(
+            rotation_range=15,
+            width_shift_range=0.1,
+            height_shift_range=0.1,
+            horizontal_flip=True,
+        )
+        datagen.fit(X_train)
+
+
+        def lr_schedule(epoch):
+            lrate = 0.001
+            if epoch > 20:
+                lrate = 0.0005
+            if epoch > 40:
+                lrate = 0.0003
+            if epoch > 60:
+                lrate = 0.0001
+            if epoch > 80:
+                lrate = 0.00005  
+            return lrate
+
+        model.fit(
+            X_train,
+            y_train,
+            batch_size=128,
+            shuffle=True,
+            epochs=100,
+            validation_data=(X_test, y_test), 
+            callbacks=[LearningRateScheduler(lr_schedule)]
+        )
+
+        return model
 
 
-  
-
     
-if __name__ == "__main__":
-
-      
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+if __name__ == '__main__':
+  
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
     # Changing to NCHW format
     K.set_image_data_format('channels_first')
 
 
     ### Parameters specific to each benchmark
-    reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/"
-    keras_model_file = "alexnet.h5"
-    hpvm_dir = "data/alexnet_cifar10/" 
+    reload_dir = MODEL_PARAMS_DIR + '/alexnet_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/alexnet_cifar10/weights.h5'
+    data_dir = '/alexnet_cifar10/' 
+    src_dir = 'data/alexnet_cifar10_src/'
     num_classes = 10
-
-    alexnet = AlexNet("AlexNet", reload_dir, keras_model_file, hpvm_dir, num_classes)
+    batch_size = 500
+        
+    model = AlexNet_CIFAR10('AlexNet_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-    alexnet.run(sys.argv)
+    model.run(sys.argv)
 
     
diff --git a/hpvm/projects/keras/src/alexnet2.py b/hpvm/projects/keras/src/alexnet2.py
index e29917b26f4c59472148c6c1cbb3babf785d5b5d..de69d8c12972df7a1fa51338b30676ffafc65f4e 100644
--- a/hpvm/projects/keras/src/alexnet2.py
+++ b/hpvm/projects/keras/src/alexnet2.py
@@ -1,154 +1,143 @@
-
+import os
 import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
 import keras
-from keras.models import Sequential
-from keras.utils import np_utils
-from keras.preprocessing.image import ImageDataGenerator
-from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
-from keras.layers import Conv2D, MaxPooling2D
-from keras.datasets import cifar10
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
 from keras import regularizers
-from keras.callbacks import LearningRateScheduler
-import numpy as np
-import os
-import struct
-from Benchmark import Benchmark
 from keras import backend as K
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-
-
-
-class AlexNet2(Benchmark):
-
-
-
-  def lr_schedule2(self, epoch):
-    lrate = 0.0005
-    if epoch > 100:
-      lrate = 0.0003
-    if epoch > 200:
-      lrate = 0.0002
-    if epoch > 250:
-      lrate = 0.0001
-    if epoch > 300:
-      lrate = 0.00003
-
-    return lrate
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
 
-    
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
-  def buildModel(self):
 
-      weight_decay = 1e-4  
-      activation_type = 'tanh'
 
-      model = Sequential()
-      model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32) ))
-      model.add(Activation(activation_type))
-      model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
-      model.add(Activation(activation_type))
-      model.add(MaxPooling2D(pool_size=(2,2)))
-      model.add(Dropout(0.2))
+class AlexNet2_CIFAR10(Benchmark):
 
-      model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
-      model.add(Activation(activation_type))
-      model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
-      model.add(Activation(activation_type))
-      model.add(MaxPooling2D(pool_size=(2,2)))
-      model.add(Dropout(0.3))
+    def buildModel(self):
 
-      model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
-      model.add(Activation(activation_type))
-      model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
-      model.add(Activation(activation_type))
-      model.add(MaxPooling2D(pool_size=(2,2)))
-      model.add(Dropout(0.4))
+        weight_decay = 1e-4  
+        activation_type = 'tanh'
 
-      model.add(Flatten())
-      model.add(Dense(self.num_classes))
-      model.add(Activation('softmax'))
-      model.summary()
+        model = Sequential()
+        model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32)))
+        model.add(Activation(activation_type))
+        model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.2))
 
-      return model
+        model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.3))
 
+        model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
+        model.add(Activation(activation_type))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+        model.add(Dropout(0.4))
 
+        model.add(Flatten())
+        model.add(Dense(self.num_classes))
+        model.add(Activation('softmax'))
 
-  def trainModel(self, model):
+        return model
 
-        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
+    
+    def data_preprocess(self):
 
-        test_labels = y_test
-        x_train = x_train.astype('float32')
-        x_test = x_test.astype('float32')
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
 
-        #z-score
-        mean = np.mean(x_train,axis=(0,1,2,3))
-        std = np.std(x_train,axis=(0,1,2,3))
-        x_train = (x_train-mean)/(std+1e-7)
-        x_test = (x_test-mean)/(std+1e-7)
+        X_train = X_train / 255.0
+        X_val = X_val / 255.0
 
-        y_train = np_utils.to_categorical(y_train, self.num_classes)
-        y_test = np_utils.to_categorical(y_test, self.num_classes)
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        X_val = (X_val - mean) / (std + 1e-7)  
 
-        #data augmentation
-        datagen = ImageDataGenerator(
-          rotation_range=15,
-          width_shift_range=0.1,
-          height_shift_range=0.1,
-          horizontal_flip=True,
-          )
+        X_test = X_val[0:5000]
+        y_test = y_val[0:5000]
+        X_tuner = X_val[5000:]
+        y_tuner = y_val[5000:]
 
-        datagen.fit(x_train)
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
 
-        #training
-        batch_size = 64        
-        opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+                
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
         
-        model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy'])
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=Adam(lr=0.0001),
+            metrics=['accuracy']
+        )
 
-        model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),\
-                      steps_per_epoch=x_train.shape[0] // batch_size, #epochs=350,\
-                      epochs=3,
-                      verbose=1,validation_data=(x_test,y_test), \
-                            callbacks=[LearningRateScheduler(self.lr_schedule2)])
+        datagen = ImageDataGenerator(
+            rotation_range=15,
+            width_shift_range=0.1,
+            height_shift_range=0.1,
+            horizontal_flip=True,
+        )
+        datagen.fit(X_train)
+
+
+        def lr_schedule(epoch):
+            lrate = 0.001
+            if epoch > 20:
+                lrate = 0.0005
+            if epoch > 40:
+                lrate = 0.0003
+            if epoch > 60:
+                lrate = 0.0001
+            return lrate
+
+        model.fit(
+            X_train,
+            y_train,
+            batch_size=128,
+            shuffle=True,
+            epochs=100,
+            validation_data=(X_test, y_test), 
+            callbacks=[LearningRateScheduler(lr_schedule)]
+        )
 
         return model
 
 
-
     
-  def data_preprocess(self):
-
-      (x_train, y_train), (x_test, y_test) = cifar10.load_data()
-      
-      x_train = x_train.astype('float32')
-      x_test = x_test.astype('float32')
-
-      #z-score
-      mean = np.mean(x_train,axis=(0,1,2,3))
-      std = np.std(x_train,axis=(0,1,2,3))
-      x_train = (x_train-mean)/(std+1e-7)
-      x_test = (x_test-mean)/(std+1e-7)
-
-      return x_train, y_train, x_test, y_test
-
-  
-
-
-if __name__ == "__main__":
+if __name__ == '__main__':
 
       
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
     # Changing to NCHW format
     K.set_image_data_format('channels_first')
 
 
     ### Parameters specific to each benchmark
-    reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/"
-    keras_model_file = "alexnet2.h5"
-    hpvm_dir = "data/alexnet2_cifar10/" 
+    reload_dir = MODEL_PARAMS_DIR + '/alexnet2_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/alexnet2_cifar10/weights.h5'
+    data_dir = '/alexnet2_cifar10/' 
+    src_dir = 'data/alexnet2_cifar10_src/'
     num_classes = 10
+    batch_size = 500
 
-    alexnet2 = AlexNet2("AlexNet2", reload_dir, keras_model_file, hpvm_dir, num_classes)
+    model = AlexNet2_CIFAR10('AlexNet2_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-    alexnet2.run(sys.argv)
+    model.run(sys.argv)
diff --git a/hpvm/projects/keras/src/alexnet_imagenet.py b/hpvm/projects/keras/src/alexnet_imagenet.py
index 41cf2d837cf8ba764b614512f40a0f6c0522ab3d..e3ab937e9bb355fde74a63664c8657c76d6343f5 100644
--- a/hpvm/projects/keras/src/alexnet_imagenet.py
+++ b/hpvm/projects/keras/src/alexnet_imagenet.py
@@ -1,259 +1,107 @@
 import os
+import sys
 import glob
-import random
 
-import scipy
-import scipy.io
-import cv2
 import numpy as np
-
 import tensorflow as tf
+import scipy
+import scipy.io
 import keras
-from keras.models import Sequential, Model
+from keras.models import Model, Sequential
 from keras.layers import *
-from keras.utils import to_categorical
+from keras.optimizers import Adam
+from keras import regularizers
 from keras import backend as K
-import torchvision.models as models
-
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-from frontend.weight_utils import dumpCalibrationData2
-
-
-np.random.seed(2020)
-
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-
-K.set_image_data_format('channels_first')
-
-data_format = 'channels_first'
-
-
-IMAGENET_DIR = '/home/nz11/ILSVRC2012/'
-OUTPUT_DIR = 'data/alexnet_imagenet_tune/'
-WEIGHTS_PATH = 'data/alexnet_imagenet_tune/weights.h5'
-
-NUM_CLASSES = 200
-IMAGES_PER_CLASS = 50
-# VAL_SIZE = 100
-
-
-
-def get_alexnet_nchw_keras():
-
-    input_layer = Input((3, 224, 224))
-    
-    x = ZeroPadding2D((2, 2))(input_layer)
-    x = Conv2D(64, (11, 11), strides=4, padding='valid')(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D(3, 2)(x)
-    
-    x = ZeroPadding2D((2, 2))(x)
-    x = Conv2D(192, (5, 5), padding='valid')(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D(3, 2)(x)
-    
-    x = Conv2D(384, (3, 3), padding='same')(x)
-    x = Activation('relu')(x)
-    
-    x = Conv2D(256, (3, 3), padding='same')(x)
-    x = Activation('relu')(x)
-    
-    x = Conv2D(256, (3, 3), padding='same')(x)
-    x = Activation('relu')(x)
-        
-    x = MaxPooling2D(3, 2)(x)
-    
-    x = Flatten()(x)
-    x = Dropout(0.5)(x)
-    x = Dense(4096)(x)
-    x = Activation('relu')(x)
-    x = Dropout(0.5)(x)
-    x = Dense(4096)(x) 
-    x = Activation('relu')(x)
-    x = Dense(1000)(x) 
-    x = Activation('softmax')(x)
-    
-    model_nchw = Model(input_layer, x)
-    
-    
-    torch_model = models.alexnet(pretrained=True)
-    
-    j = 0
-    torch_weights = list(torch_model.parameters())
-    for i in range(len(model_nchw.layers)):
-        if (2 * j >= len(torch_weights)):
-            break
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
 
-        w = torch_weights[2 * j].detach().numpy()
-        b = torch_weights[2 * j + 1].detach().numpy()
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
-        if (len(w.shape) == 4):
-            w = np.transpose(w, (2, 3, 1, 0))
-        else:
-            w = w.transpose()
 
-        try:
-            model_nchw.layers[i].set_weights([w, b])
-            j += 1
-            print ([w.shape, b.shape], 'loaded')
-        except:
-            pass
-                   
-    return model_nchw
 
+class AlexNet(Benchmark):
 
-def load_image(x):
-    
-    image = cv2.imread(x)
+    def data_preprocess(self):
+        X_train, y_train = None, None
         
-    height, width, _ = image.shape
-    new_height = height * 256 // min(image.shape[:2])
-    new_width = width * 256 // min(image.shape[:2])
-    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-    
-    height, width, _ = image.shape
-    startx = width // 2 - (224 // 2)
-    starty = height // 2 - (224 // 2)
-    image = image[starty:starty + 224, startx:startx + 224]
-    
-    image = image[:, :, ::-1]
-    image = np.transpose(image, (2, 0, 1))
-
-
-    image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
-    image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
-    image[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225
-
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 3, 224, 224)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_labels.bin', dtype=np.uint32)
         
-    return image.astype(np.float32)
-
-
-meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat')
-original_idx_to_synset = {}
-synset_to_name = {}
-
-for i in range(1000):
-    ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0])
-    synset = meta['synsets'][i,0][1][0]
-    name = meta['synsets'][i,0][2][0]
-    original_idx_to_synset[ilsvrc2012_id] = synset
-    synset_to_name[synset] = name
-
-synset_to_keras_idx = {}
-keras_idx_to_name = {}
-f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-c = 0
-for line in f:
-    parts = line.split(' ')
-    synset_to_keras_idx[parts[0]] = c
-    keras_idx_to_name[c] = ' '.join(parts[1:])
-    c += 1
-f.close()
-
-
-
-
-model = get_alexnet_nchw_keras()
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 3, 224, 224)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/tune_labels.bin', dtype=np.uint32)
+ 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
-X_tune, X_test = [], []
-y_tune, y_true = [], []
     
-classes = glob.glob(IMAGENET_DIR + 'val/*')
+    def buildModel(self):
 
-for c in np.random.permutation(len(classes))[:NUM_CLASSES]:
-    x = glob.glob(classes[c] + '/*')
-    x = np.array(x)
-            
-    idx = np.random.permutation(len(x))
-    idx = idx[:max(len(idx), IMAGES_PER_CLASS)]
-    
-    synset = classes[c].split('/')[-1]
-    images = list(map(lambda x : load_image(x), x[idx]))
-    labels = [synset_to_keras_idx[synset]] * len(x[idx])
-    
-    X_test += images[:IMAGES_PER_CLASS // 2]
-    y_true += labels[:IMAGES_PER_CLASS // 2]
-    
-    X_tune += images[IMAGES_PER_CLASS // 2:]
-    y_tune += labels[IMAGES_PER_CLASS // 2:]
-    
-    
-X_test = np.array(X_test)
-y_true = np.array(y_true)
-X_tune = np.array(X_tune)
-y_tune = np.array(y_tune)
+        input_layer = Input((3, 224, 224))
 
-print ('tune size', len(X_tune))
-print ('test size', len(X_test))
+        x = ZeroPadding2D((2, 2))(input_layer)
+        x = Conv2D(64, (11, 11), strides=4, padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D(3, 2)(x)
 
+        x = ZeroPadding2D((2, 2))(x)
+        x = Conv2D(192, (5, 5), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D(3, 2)(x)
 
+        x = Conv2D(384, (3, 3), padding='same')(x)
+        x = Activation('relu')(x)
 
+        x = Conv2D(256, (3, 3), padding='same')(x)
+        x = Activation('relu')(x)
 
+        x = Conv2D(256, (3, 3), padding='same')(x)
+        x = Activation('relu')(x)
 
+        x = MaxPooling2D(3, 2)(x)
 
-def train_helper(x):
-    
-    try:
-        x = x.decode('utf-8')
-    except:
-        pass
-    
-    image = load_image(x)
-    
-    y = np.zeros(1000, dtype=np.uint8)
-        
-    y[synset_to_keras_idx[x.split('/')[-2]]] = 1
+        x = Flatten()(x)
+        x = Dropout(0.5)(x)
+        x = Dense(4096)(x)
+        x = Activation('relu')(x)
+        x = Dropout(0.5)(x)
+        x = Dense(4096)(x) 
+        x = Activation('relu')(x)
+        x = Dense(self.num_classes)(x)
+        x = Activation('softmax')(x)
         
-    return image, y
-
-
+        model = Model(input_layer, x)
 
-train_images = glob.glob(IMAGENET_DIR + 'train/*/*')
-random.shuffle(train_images)
+        return model
 
-dataset = tf.data.Dataset().from_tensor_slices(train_images)
-dataset = dataset.map(
-    lambda x : tf.py_func(train_helper, [x], [tf.float32, tf.uint8]), 
-    num_parallel_calls=16
-)
 
-dataset = dataset.shuffle(buffer_size=1000)
-dataset = dataset.batch(64)
-dataset = dataset.repeat()
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
 
-next_element = dataset.make_one_shot_iterator().get_next()
+        assert False, "ImageNet training not supported - use Pretrained weights"
 
-sess = tf.Session()
 
-def generate():
-    while True:
-        yield sess.run(next_element)
     
+if __name__ == '__main__':
 
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
 
-model.compile(optimizer=keras.optimizers.Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['acc'])
-
-if os.path.exists(WEIGHTS_PATH):
-    model.load_weights(WEIGHTS_PATH)
-else:
-    pass
-#     model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=3)
-#     K.set_value(model.optimizer.lr, 0.000001)
-#     model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=3)
-
-translate_to_approxhpvm(model, OUTPUT_DIR, X_tune, y_tune, 1000)
-
-# # dumpCalibrationData2(OUTPUT_DIR + 'test_input_10K.bin', X_test, OUTPUT_DIR + 'test_labels_10K.bin', y_true)
-# dumpCalibrationData2(OUTPUT_DIR + 'tune_input.bin', X_tune, OUTPUT_DIR + 'tune_labels.bin', y_tune)
-# dumpCalibrationData2(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true)
 
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/alexnet_imagenet/'
+    keras_model_file = MODEL_PARAMS_DIR + '/alexnet_imagenet/weights.h5'
+    data_dir = '/alexnet_imagenet/' 
+    src_dir = 'data/alexnet_imagenet_src/'
+    num_classes = 1000
+    batch_size = 50
 
-pred = np.argmax(model.predict(X_test), axis=1)
-print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test))
+    model = AlexNet('AlexNet_Imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-pred = np.argmax(model.predict(X_tune), axis=1)
-print ('val accuracy', np.sum(pred == y_tune.ravel()) / len(X_tune))
+    model.run(sys.argv)
 
-model.save_weights(OUTPUT_DIR + '/weights.h5')
 
     
\ No newline at end of file
diff --git a/hpvm/projects/keras/src/legacy/lenet_conv_test.py b/hpvm/projects/keras/src/legacy/lenet_conv_test.py
deleted file mode 100644
index c9588eef6c393457617b7fdda03c7b8222af5357..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/src/legacy/lenet_conv_test.py
+++ /dev/null
@@ -1,97 +0,0 @@
-
-import sys
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten, Activation
-from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
-from keras import backend as K
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-
-
-batch_size = 128
-num_classes = 10
-
-
-# input image dimensions
-img_rows, img_cols = 28, 28  
-
-
-if __name__ == "__main__":    
-
-    # Changing Keras data format to NCHW - NHWC is default
-    # NOTE: ApproxHPVM requires NCHW format
-    K.set_image_data_format('channels_first')
-
-    # Loads Mnist dataset
-    (x_train, y_train), (x_test, y_test) = mnist.load_data()
-    test_labels = y_test
-
-    # Reshaping data to be NCHW format  
-    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
-
-
-    # Data Normalization 
-    x_train = x_train.astype('float32')
-    x_test = x_test.astype('float32')
-    x_train /= 255
-    x_test /= 255
-
-    
-    # convert class vectors to binary class matrices - required by Keras
-    y_train = keras.utils.to_categorical(y_train, num_classes)
-    y_test = keras.utils.to_categorical(y_test, num_classes)
-
-
-
-    # Network Compostion: 3 Conv Layers, 2 Dense Layers
-    model = Sequential()
-
-    # ConvLayer1
-    model.add(Conv2D(32, kernel_size=(5, 5),
-                     activation='relu',
-                     padding = 'same',
-                     input_shape=input_shape))
-    model.add(MaxPooling2D(pool_size=(2, 2)))
-
-    # ConvLayer2
-    model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same'))
-
-    # ConvLayer3
-    # NOTE: ZeroPading needed for ConvLayer with strides > 1
-    model.add(ZeroPadding2D(padding = (1,1)))
-    model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') )
-    
-    model.add(Flatten())
-    # DenseLayer1
-    model.add(Dense(1024, activation='relu'))
-    # DenseLayer2
-    model.add(Dense(num_classes, activation='relu'))
-    # Softmax Layer
-    model.add(Activation('softmax'))
-
-
-    # Configures model for training    
-    model.compile(loss=keras.losses.categorical_crossentropy,
-                                optimizer=keras.optimizers.Adadelta(),
-                                metrics=['accuracy'])
-
-    # Training
-    model.fit(x_train, y_train,
-                        batch_size=batch_size,
-                        epochs=5,
-                        verbose=1,
-                        validation_data=(x_test, y_test))
-
-
-    # Inference
-    score = model.evaluate(x_test, y_test, verbose=0)
-    print('Test loss:', score[0])
-    print('Test accuracy:', score[1])
-    
-
-    # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src
-    translate_to_approxhpvm(model, "data/lenet_hpvm_batch/", x_test, test_labels, 10)
-
diff --git a/hpvm/projects/keras/src/legacy/mobilenet_imagenet.py b/hpvm/projects/keras/src/legacy/mobilenet_imagenet.py
deleted file mode 100644
index c5cef5193b203633015a5dc8f4be065991bf5608..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/src/legacy/mobilenet_imagenet.py
+++ /dev/null
@@ -1,263 +0,0 @@
-import os
-import glob
-import random
-
-import scipy
-import scipy.io
-import cv2
-import numpy as np
-
-import tensorflow as tf
-import keras
-from keras.models import Sequential, Model
-from keras.layers import *
-from keras.applications.mobilenet import MobileNet, preprocess_input
-from keras.utils import to_categorical
-from keras import backend as K
-
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-from frontend.weight_utils import dumpCalibrationData
-
-
-np.random.seed(2020)
-
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-
-K.set_image_data_format('channels_first')
-
-data_format = 'channels_first'
-
-
-IMAGENET_DIR = '/home/nz11/ILSVRC2012/'
-OUTPUT_DIR = 'data/mobilenet_imagenet/'
-
-NUM_CLASSES = 100
-IMAGES_PER_CLASS = 200
-VAL_SIZE = 100
-
-
-
-def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
-    channel_axis = 1
-    filters = int(filters * alpha)
-        
-    x = ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs)
-    x = Conv2D(filters, kernel,
-                      padding='valid',
-                      use_bias=False,
-                      strides=strides)(x)
-    x = BatchNormalization(axis=channel_axis)(x)
-    return Activation('relu')(x)
-    
-    
-def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
-                          depth_multiplier=1, strides=(1, 1), block_id=1):
-    channel_axis = 1
-    pointwise_conv_filters = int(pointwise_conv_filters * alpha)
-
-    if strides != (1, 1):
-        x = ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs)
-    else:
-        x = inputs
-    
-    x = DepthwiseConv2D((3, 3),
-                               padding='same' if strides == (1, 1) else 'valid',
-                               depth_multiplier=depth_multiplier,
-                               strides=strides,
-                               use_bias=False)(x)
-    x = BatchNormalization(axis=channel_axis)(x)
-    x = Activation('relu')(x)
-
-    x = Conv2D(pointwise_conv_filters, (1, 1),
-                      padding='same',
-                      use_bias=False,
-                      strides=(1, 1))(x)
-    x = BatchNormalization(axis=channel_axis)(x)
-    return Activation('relu')(x)
-    
-    
-    
-def get_mobilenet_nchw_keras():
-
-    alpha=1.0
-    dropout=1e-3
-    depth_multiplier=1
-    
-    img_input = Input(shape=(3, 224, 224))
-
-
-    x = _conv_block(img_input, 32, alpha, strides=(2, 2))
-    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
-
-    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
-                              strides=(2, 2), block_id=2)
-    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
-
-    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
-                              strides=(2, 2), block_id=4)
-    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
-
-    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
-                              strides=(2, 2), block_id=6)
-    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
-    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
-    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
-    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
-    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
-
-    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
-                              strides=(2, 2), block_id=12)
-    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
-
-
-    x = AveragePooling2D((7, 7))(x)    
-    x = Conv2D(1000, (1, 1),
-                      padding='same')(x)
-    x = Flatten()(x)
-    x = Activation('softmax')(x)
-
-
-    model = Model(img_input, x)
-    
-    
-    K.set_image_data_format('channels_last')
-    original_model = MobileNet()
-    K.set_image_data_format('channels_first')
-    
-    j = 0
-    for i in range(0, len(original_model.layers)):
-        try:
-            model.layers[j].set_weights(original_model.layers[i].get_weights())
-            print (j, 'loaded')
-#             model.layers[j].trainable = False
-            j += 1
-        except:
-            print (j, 'skipped', model.layers[j])
-
-    return model
-
-
-
-def load_image(x):
-    image = cv2.imread(x)
-        
-    height, width, _ = image.shape
-    new_height = height * 256 // min(image.shape[:2])
-    new_width = width * 256 // min(image.shape[:2])
-    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-    
-    height, width, _ = image.shape
-    startx = width // 2 - (224 // 2)
-    starty = height // 2 - (224 // 2)
-    image = image[starty:starty + 224, startx:startx + 224]
-    
-    image = image[:, :, ::-1]
-    image = np.transpose(image, (2, 0, 1))
-    image = preprocess_input(image.astype(np.float32))
-        
-    return image.astype(np.float32)
-
-
-meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat')
-original_idx_to_synset = {}
-synset_to_name = {}
-
-for i in range(1000):
-    ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0])
-    synset = meta['synsets'][i,0][1][0]
-    name = meta['synsets'][i,0][2][0]
-    original_idx_to_synset[ilsvrc2012_id] = synset
-    synset_to_name[synset] = name
-
-synset_to_keras_idx = {}
-keras_idx_to_name = {}
-f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-c = 0
-for line in f:
-    parts = line.split(' ')
-    synset_to_keras_idx[parts[0]] = c
-    keras_idx_to_name[c] = ' '.join(parts[1:])
-    c += 1
-f.close()
-
-
-
-model = get_mobilenet_nchw_keras()
-    
-X_test = []
-y_true = []
-    
-classes = glob.glob(IMAGENET_DIR + 'val/*')
-
-for c in np.random.permutation(len(classes))[:NUM_CLASSES]:
-    x = glob.glob(classes[c] + '/*')
-    x = np.array(x)
-            
-    idx = np.random.permutation(len(x))
-    idx = idx[:max(len(idx), IMAGES_PER_CLASS)]
-    
-    X_test += list(map(lambda x : load_image(x), x[idx]))
-    
-    synset = classes[c].split('/')[-1]
-    y_true += [synset_to_keras_idx[synset]] * len(x[idx])
-    
-X_test = np.array(X_test)
-y_true = np.array(y_true)
-
-
-
-
-# def train_helper(x):
-    
-#     try:
-#         x = x.decode('utf-8')
-#     except:
-#         pass
-    
-#     image = load_image(x)
-    
-#     y = np.zeros(1000, dtype=np.uint8)
-        
-#     y[synset_to_keras_idx[x.split('/')[-2]]]= 1
-        
-#     return image, y
-
-
-# train_images = glob.glob(IMAGENET_DIR + 'train/*/*')
-# random.shuffle(train_images)
-
-# dataset = tf.data.Dataset().from_tensor_slices(train_images)
-# dataset = dataset.map(
-#     lambda x : tf.py_func(train_helper, [x], [tf.float32, tf.uint8]), 
-#     num_parallel_calls=16
-# )
-
-# dataset = dataset.shuffle(buffer_size=1000)
-# dataset = dataset.batch(32)
-# dataset = dataset.repeat()
-
-# next_element = dataset.make_one_shot_iterator().get_next()
-
-# sess = tf.Session()
-
-# def generate():
-#     while True:
-#         yield sess.run(next_element)
-    
-
-
-# model.compile(optimizer=keras.optimizers.Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['acc'])
-
-
-# model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=7)
-
-
-translate_to_approxhpvm(model, OUTPUT_DIR, X_test[:VAL_SIZE], y_true[:VAL_SIZE], 1000)
-
-dumpCalibrationData(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true)
-
-
-# pred = np.argmax(model.predict(X_test), axis=1)
-# print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test))    
-
-    
\ No newline at end of file
diff --git a/hpvm/projects/keras/src/legacy/mobilenet_shallow.py b/hpvm/projects/keras/src/legacy/mobilenet_shallow.py
deleted file mode 100644
index 64df7f98174f22a59f3382ed4337d23e29900051..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/src/legacy/mobilenet_shallow.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import sys
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-from keras.models import Sequential
-from keras.layers import *
-from keras.datasets import cifar10
-from keras.utils import to_categorical
-from keras.callbacks import *
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Model
-from keras import optimizers
-import keras.backend as K
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-
-
-K.set_image_data_format('channels_first')
-
-(X_train, y_train), (X_test, y_test) = cifar10.load_data()
-test_labels = y_test
-
-print ("X_train.shape = ", X_train.shape)
-print ("X_test.shape = ", X_test.shape)
-
-
-X_train = X_train.astype('float32')
-X_test = X_test.astype('float32')
-
-
-mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)
-std = np.std(X_train, axis=(0, 2, 3), keepdims=True)
-
-X_train = (X_train - mean) / (std + 1e-9)
-X_test = (X_test - mean) / (std + 1e-9)
-
-y_train = to_categorical(y_train, num_classes=10)
-y_test = to_categorical(y_test, num_classes=10)
-
-
-def get_mobilenet(alpha=1, depth_multiplier=1):
-    model = Sequential()
-    
-    def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):
-        channel_axis = 1
-        filters = int(filters * alpha)
-        model.add(Conv2D(filters, kernel,
-                          padding='same',
-                          use_bias=False,
-                          strides=strides, 
-                        input_shape=(3, 32, 32)))
-        model.add(BatchNormalization(axis=channel_axis))
-        model.add(Activation('relu'))
-    
-    def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):
-        channel_axis = 1
-        pointwise_conv_filters = int(pointwise_conv_filters * alpha)
-
-        model.add(DepthwiseConv2D((3, 3),
-                                   padding='same',
-                                   depth_multiplier=depth_multiplier,
-                                   strides=strides,
-                                   use_bias=False))    
-        model.add(BatchNormalization(axis=channel_axis))
-        model.add(Activation('relu'))
-        model.add(Conv2D(pointwise_conv_filters, (1, 1),
-                          padding='same',
-                          use_bias=False,
-                          strides=(1, 1)))
-        model.add(BatchNormalization(axis=channel_axis))
-        model.add(Activation('relu'))
-        
-
-    _conv_block(32, alpha, strides=(1, 1))
-    
-    _depthwise_conv_block(64, alpha, depth_multiplier)
-    
-    _depthwise_conv_block(128, alpha, depth_multiplier,
-                              strides=(2, 2))
-    _depthwise_conv_block(128, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
-
-    _depthwise_conv_block(256, alpha, depth_multiplier, 
-                      strides=(2, 2))
-    _depthwise_conv_block(256, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
-
-    _depthwise_conv_block(512, alpha, depth_multiplier,
-                      strides=(2, 2))
-#     _depthwise_conv_block(512, alpha, depth_multiplier)
-#     _depthwise_conv_block(512, alpha, depth_multiplier)
-#     model.add(Dropout(rate=0.5))
-    
-#     _depthwise_conv_block(512, alpha, depth_multiplier)
-#     _depthwise_conv_block(512, alpha, depth_multiplier)
-#     _depthwise_conv_block(512, alpha, depth_multiplier)
-#     model.add(Dropout(rate=0.5))
-    
-#     _depthwise_conv_block(1024, alpha, depth_multiplier,
-#                          strides=(2, 2))
-#     _depthwise_conv_block(1024, alpha, depth_multiplier)
-#     model.add(Dropout(rate=0.5))
-
-    model.add(AveragePooling2D(pool_size=2))
-    model.add(Flatten())
-    model.add(Dense(10, activation='softmax'))
-
-    return model
-    
-    
-# data augmentation, horizontal flips only
-datagen = ImageDataGenerator(
-        featurewise_center=False,
-        featurewise_std_normalization=False,
-        rotation_range=0.0,
-        width_shift_range=0.2,
-        height_shift_range=0.2,
-        vertical_flip=False,
-        horizontal_flip=True)
-datagen.fit(X_train)
-
-
-model = get_mobilenet()
-
-learning_rates=[]
-for i in range(5):
-    learning_rates.append(5e-2)
-for i in range(50-5):
-    learning_rates.append(2e-2)
-for i in range(100-50):
-    learning_rates.append(8e-3)
-for i in range(150-100):
-    learning_rates.append(4e-3)
-for i in range(200-150):
-    learning_rates.append(2e-3)
-for i in range(250-200):
-    learning_rates.append(1e-3)
-
-callbacks = [
-    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
-]
-
-model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
-                       loss='categorical_crossentropy', 
-                       metrics=['accuracy'])
-
-model.fit_generator(
-    datagen.flow(X_train, y_train, batch_size=128),
-    steps_per_epoch=int(np.ceil(50000 / 128)),
-    validation_data=(X_test, y_test),
-    #epochs=300,
-    epochs=250,
-    callbacks=callbacks
-)
-
-model.summary()
-
-translate_to_approxhpvm(model, "data/mobilenet_shallow/", X_test, test_labels, 10)
-
diff --git a/hpvm/projects/keras/src/legacy/mobilenetv2_cifar10.py b/hpvm/projects/keras/src/legacy/mobilenetv2_cifar10.py
deleted file mode 100644
index 2fbed4623d0e57d7a0dd948fa0894127fea72324..0000000000000000000000000000000000000000
--- a/hpvm/projects/keras/src/legacy/mobilenetv2_cifar10.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import sys
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '1'
-
-from keras.models import Sequential
-from keras.layers import *
-from keras.datasets import cifar10
-from keras.utils import to_categorical
-from keras.callbacks import *
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Model
-from keras import optimizers
-import keras.backend as K
-
-
-K.set_image_data_format('channels_first')
-
-(X_train, y_train), (X_test, y_test) = cifar10.load_data()
-test_labels = y_test
-
-print ("X_train.shape = ", X_train.shape)
-print ("X_test.shape = ", X_test.shape)
-
-
-X_train = X_train.astype('float32')
-X_test = X_test.astype('float32')
-
-mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True)
-std = np.std(X_train, axis=(0, 1, 2), keepdims=True)
-X_train = (X_train - mean) / (std + 1e-9)
-X_test = (X_test - mean) / (std + 1e-9)
-
-y_train = to_categorical(y_train, num_classes=10)
-y_test = to_categorical(y_test, num_classes=10)
-
-
-def _make_divisible(v, divisor, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    # Make sure that round down does not go down by more than 10%.
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
-    channel_axis = 1
-    
-    in_channels = inputs.shape[1]
-    pointwise_conv_filters = int(filters * alpha)
-    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
-    x = inputs
-
-    if block_id:
-        x = Conv2D(int(expansion * in_channels), kernel_size=1, strides=1, padding='valid', use_bias=False)(x)
-        x = BatchNormalization(axis=channel_axis)(x)
-        x = Activation('relu')(x)
-
-    if stride == 2:
-        x = ZeroPadding2D(padding=(1, 1))(x)
-    else:
-        x = ZeroPadding2D(padding=(1, 1))(x)
-        
-    x = DepthwiseConv2D(kernel_size=3, strides=stride, use_bias=False, padding='valid')(x)
-    x = BatchNormalization(axis=channel_axis)(x)
-    x = Activation('relu')(x)
-
-    x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='valid', use_bias=False)(x)
-    x = BatchNormalization(axis=channel_axis)(x)
-
-
-    if in_channels == pointwise_filters and stride == 1:
-        return Add()([inputs, x])
-    return x
-
-def get_mobilenetv2(alpha=1.0, depth_multiplier=1):
-
-    channel_axis = 1
-    
-    first_block_filters = _make_divisible(32 * alpha, 8)
-    img_input = Input(shape=(3, 32, 32))
-
-    x = ZeroPadding2D(padding=(1, 1))(img_input)
-    x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='valid', use_bias=False)(x)
-    #x = BatchNormalization(axis=channel_axis)(x)
-    #x = Activation('relu')(x)
-
-    x = _inverted_res_block(x, filters=16,  alpha=alpha, stride=1, expansion=1, block_id=0 )
-
-    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=1 )
-    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=2 )
-
-    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=2, expansion=6, block_id=3 )
-    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=4 )
-    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=5 )
-
-    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=2, expansion=6, block_id=6 )
-    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=7 )
-    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=8 )
-    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=9 )
-    x = Dropout(rate=0.25)(x)
-
-    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=10)
-    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=11)
-    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=12)
-    x = Dropout(rate=0.25)(x)
-
-    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
-    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
-    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)
-    x = Dropout(rate=0.25)(x)
-
-    x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)
-    x = Dropout(rate=0.25)(x)
-
-    if alpha > 1.0:
-        last_block_filters = _make_divisible(1280 * alpha, 8)
-    else:
-        last_block_filters = 1280
-
-    x = Conv2D(last_block_filters, kernel_size=1, use_bias=False)(x)
-    x = BatchNormalization(axis=channel_axis)(x)
-    x = Activation('relu')(x)
-    
-    x = AveragePooling2D()(x)
-    x = Flatten()(x)
-    x = Dense(10, activation='softmax')(x)
-        
-    model = Model(inputs=img_input, outputs=x)
-    return model
-
-    
-# data augmentation, horizontal flips only
-datagen = ImageDataGenerator(
-        featurewise_center=False,
-        featurewise_std_normalization=False,
-        rotation_range=0.0,
-        width_shift_range=0.0,
-        height_shift_range=0.0,
-        vertical_flip=False,
-        horizontal_flip=True)
-datagen.fit(X_train)
-
-
-model = get_mobilenetv2()
-
-learning_rates=[]
-for i in range(5):
-    learning_rates.append(2e-2)
-for i in range(50-5):
-    learning_rates.append(1e-2)
-for i in range(100-50):
-    learning_rates.append(8e-3)
-for i in range(150-100):
-    learning_rates.append(4e-3)
-for i in range(200-150):
-    learning_rates.append(2e-3)
-for i in range(300-200):
-    learning_rates.append(1e-3)
-
-callbacks = [
-    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
-]
-
-model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
-                       loss='categorical_crossentropy', 
-                       metrics=['accuracy'])
-
-model.fit_generator(
-    datagen.flow(X_train, y_train, batch_size=128),
-    steps_per_epoch=int(np.ceil(50000 / 128)),
-    validation_data=(X_test, y_test),
-    epochs=300,
-    callbacks=callbacks
-)
-
diff --git a/hpvm/projects/keras/src/lenet.py b/hpvm/projects/keras/src/lenet.py
index 4cfee4f12a961a0885af1b2e07563e21d097623b..01c84719e6b90d317f7e0dce012577b08b33fcbf 100644
--- a/hpvm/projects/keras/src/lenet.py
+++ b/hpvm/projects/keras/src/lenet.py
@@ -1,95 +1,115 @@
-
+import os
 import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
 import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten, Activation
-from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalMaxPooling2D
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
 from keras import backend as K
-from frontend.approxhpvm_translator import translate_to_approxhpvm
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
 
+from keras.datasets import mnist
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
-batch_size = 128
-num_classes = 10
 
 
-# input image dimensions
-img_rows, img_cols = 28, 28  
+class LeNet_MNIST(Benchmark):
 
+    def buildModel(self):
 
-if __name__ == "__main__":    
+        # Network Compostion: 2 Conv Layers, 2 Dense Layers
+        model = Sequential()
 
-    # Changing Keras data format to NCHW - NHWC is default
-    # NOTE: ApproxHPVM requires NCHW format
-    K.set_image_data_format('channels_first')
+        # ConvLayer1
+        model.add(Conv2D(32, kernel_size=(5, 5), padding='same', activation='tanh', input_shape=(1, 28, 28)))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
 
-    # Loads Mnist dataset
-    (x_train, y_train), (x_test, y_test) = mnist.load_data()
-    test_labels = y_test
+        # ConvLayer2
+        model.add(Conv2D(64, (5, 5), activation='tanh', padding='same'))
+        model.add(MaxPooling2D(pool_size=(2, 2)))
 
-    # Reshaping data to be NCHW format  
-    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
+        model.add(Flatten())
+        
+        # DenseLayer1
+        model.add(Dense(1024, activation='tanh'))
+        # DenseLayer2
+        
+        model.add(Dense(self.num_classes, activation='tanh'))
+        # Softmax Layer
+        model.add(Activation('softmax'))
 
+        return model
 
-    # Data Normalization 
-    x_train = x_train.astype('float32')
-    x_test = x_test.astype('float32')
-    x_train /= 255
-    x_test /= 255
 
-    
-    # convert class vectors to binary class matrices - required by Keras
-    y_train = keras.utils.to_categorical(y_train, num_classes)
-    y_test = keras.utils.to_categorical(y_test, num_classes)
+    def data_preprocess(self):
+        (X_train, y_train), (X_val, y_val) = mnist.load_data()
+        test_labels = y_val
 
+        X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
+        X_train = X_train.astype('float32')
+        X_train /= 255
 
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 1, 28, 28)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/test_labels.bin', dtype=np.uint32)
+        
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 1, 28, 28)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/tune_labels.bin', dtype=np.uint32)
 
-    # Network Compostion: 3 Conv Layers, 2 Dense Layers
-    model = Sequential()
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
 
-    # ConvLayer1
-    model.add(Conv2D(32, kernel_size=(5, 5),
-                     activation='tanh',
-                     padding = 'same',
-                     input_shape=input_shape))
-    model.add(MaxPooling2D(pool_size=(2, 2)))
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
+
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+        
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=keras.optimizers.Adadelta(),
+            metrics=['accuracy']
+        )
+
+        model.fit(
+            X_train, 
+            y_train,
+            batch_size=128,
+            epochs=10,
+            verbose=1,
+            validation_data=(X_test, y_test)
+        )
+        
+        return model
   
 
-    # ConvLayer2
-    model.add(Conv2D(64, (5, 5), activation='tanh', padding = 'same'))
-    model.add(MaxPooling2D(pool_size=(2, 2)))
-
-    
-    model.add(Flatten())
-    # DenseLayer1
-    model.add(Dense(1024, activation='relu'))
-    # DenseLayer2
-    model.add(Dense(num_classes, activation='relu'))
-    # Softmax Layer
-    model.add(Activation('softmax'))
-
-
-    # Configures model for training    
-    model.compile(loss=keras.losses.categorical_crossentropy,
-                                optimizer=keras.optimizers.Adadelta(),
-                                metrics=['accuracy'])
-
-    # Training
-    model.fit(x_train, y_train,
-                        batch_size=batch_size,
-                        epochs=5,
-                        verbose=1,
-                        validation_data=(x_test, y_test))
-
-
-    # Inference
-    score = model.evaluate(x_test, y_test, verbose=0)
-    print('Test loss:', score[0])
-    print('Test accuracy:', score[1])
     
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
 
-    # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src
-    translate_to_approxhpvm(model, "data/lenet_standard/", x_test, test_labels, 10)
 
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/lenet_mnist/'
+    keras_model_file = MODEL_PARAMS_DIR + '/lenet_mnist/weights.h5'
+    data_dir = '/lenet_mnist/' 
+    src_dir = 'data/lenet_mnist_src/'
+    num_classes = 10
+    batch_size = 500
+    
+    print (reload_dir)
+
+    model = LeNet_MNIST('LeNet_MNIST', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
diff --git a/hpvm/projects/keras/src/mobilenet_cifar10.py b/hpvm/projects/keras/src/mobilenet_cifar10.py
index b739ed819634f30f4b33173443ac41f848f9c8f1..367a4dfc6244228b7b1336d1a63044273cebd2fb 100644
--- a/hpvm/projects/keras/src/mobilenet_cifar10.py
+++ b/hpvm/projects/keras/src/mobilenet_cifar10.py
@@ -1,161 +1,188 @@
-
-import sys
 import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '1'
-
-from keras.models import Sequential
+import sys
+import glob
+
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
 from keras.layers import *
-from keras.datasets import cifar10
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
 from keras.utils import to_categorical
-from keras.callbacks import *
 from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Model
-from keras import optimizers
-import keras.backend as K
-from frontend.approxhpvm_translator import translate_to_approxhpvm
+from keras.callbacks import LearningRateScheduler
 
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
-K.set_image_data_format('channels_first')
 
-(X_train, y_train), (X_test, y_test) = cifar10.load_data()
-test_labels = y_test
 
-print ("X_train.shape = ", X_train.shape)
-print ("X_test.shape = ", X_test.shape)
+class MobileNet_CIFAR10(Benchmark):
 
+    def buildModel(self):
+        alpha=1
+        depth_multiplier=1
 
-X_train = X_train.astype('float32')
-X_test = X_test.astype('float32')
+        model = Sequential()
 
+        def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):
+            channel_axis = 1
 
-mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)
-std = np.std(X_train, axis=(0, 2, 3), keepdims=True)
+            model.add(Conv2D(filters, kernel,
+                              padding='same',
+                              use_bias=False,
+                              strides=strides, 
+                              input_shape=(3, 32, 32)))
+            model.add(BatchNormalization(axis=channel_axis))
+            model.add(Activation('relu'))
 
-X_train = (X_train - mean) / (std + 1e-9)
-X_test = (X_test - mean) / (std + 1e-9)
+        def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):
+            channel_axis = 1 
 
-y_train = to_categorical(y_train, num_classes=10)
-y_test = to_categorical(y_test, num_classes=10)
+            model.add(ZeroPadding2D(padding=((1,1), (1,1))))
 
+            model.add(DepthwiseConv2D((3, 3),
+                                       padding='valid',
+                                       #depth_multiplier=depth_multiplier,
+                                       strides=strides,
+                                       use_bias=False))    
+            model.add(BatchNormalization(axis=channel_axis))
 
-def get_mobilenet(alpha=1, depth_multiplier=1):
-    model = Sequential()
-    
-    def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):
-        channel_axis = 1
-        
-        model.add(Conv2D(filters, kernel,
-                          padding='same',
-                          use_bias=False,
-                          strides=strides, 
-                          input_shape=(3, 32, 32)))
-        model.add(BatchNormalization(axis=channel_axis))
-        model.add(Activation('relu'))
-    
-    def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):
-        channel_axis = 1 
+            model.add(Activation('relu'))
+            model.add(Conv2D(pointwise_conv_filters, (1, 1),
+                              padding='same',
+                              use_bias=False,
+                              strides=(1, 1)))
+            model.add(BatchNormalization(axis=channel_axis))
+            model.add(Activation('relu'))
 
-        model.add(ZeroPadding2D(padding = ((1,1), (1,1) )))
 
-        model.add(DepthwiseConv2D((3, 3),
-                                   padding='valid',
-                                   #depth_multiplier=depth_multiplier,
-                                   strides=strides,
-                                   use_bias=False))    
-        model.add(BatchNormalization(axis=channel_axis))
-        
-        model.add(Activation('relu'))
-        model.add(Conv2D(pointwise_conv_filters, (1, 1),
-                          padding='same',
-                          use_bias=False,
-                          strides=(1, 1)))
-        model.add(BatchNormalization(axis=channel_axis))
-        model.add(Activation('relu'))
+        _conv_block(32, alpha, strides=(1, 1))
 
+        _depthwise_conv_block(64, alpha, depth_multiplier)
+
+        _depthwise_conv_block(128, alpha, depth_multiplier,
+                                  strides=(2, 2))
+        _depthwise_conv_block(128, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(256, alpha, depth_multiplier, 
+                          strides=(2, 2))
+        _depthwise_conv_block(256, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(512, alpha, depth_multiplier,
+                          strides=(2, 2))
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        _depthwise_conv_block(512, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        _depthwise_conv_block(1024, alpha, depth_multiplier,
+                             strides=(2, 2))
+        _depthwise_conv_block(1024, alpha, depth_multiplier)
+        model.add(Dropout(rate=0.5))
+
+        model.add(AveragePooling2D(pool_size=2))
+        model.add(Flatten())
+        model.add(Dense(self.num_classes))    
+        model.add(Activation('softmax'))
+
+        return model
 
-    _conv_block(32, alpha, strides=(1, 1))
-    
-    _depthwise_conv_block(64, alpha, depth_multiplier)
-    
-    _depthwise_conv_block(128, alpha, depth_multiplier,
-                              strides=(2, 2))
-    _depthwise_conv_block(128, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
-
-    _depthwise_conv_block(256, alpha, depth_multiplier, 
-                      strides=(2, 2))
-    _depthwise_conv_block(256, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
-
-    _depthwise_conv_block(512, alpha, depth_multiplier,
-                      strides=(2, 2))
-    _depthwise_conv_block(512, alpha, depth_multiplier)
-    _depthwise_conv_block(512, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
     
-    _depthwise_conv_block(512, alpha, depth_multiplier)
-    _depthwise_conv_block(512, alpha, depth_multiplier)
-    _depthwise_conv_block(512, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
+    def data_preprocess(self):
+
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
+
+        X_train = X_train / 255.0
+        X_val = X_val / 255.0
+
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        X_val = (X_val - mean) / (std + 1e-7)  
+
+        X_test = X_val[0:5000]
+        y_test = y_val[0:5000]
+        X_tuner = X_val[5000:]
+        y_tuner = y_val[5000:]
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
-    _depthwise_conv_block(1024, alpha, depth_multiplier,
-                         strides=(2, 2))
-    _depthwise_conv_block(1024, alpha, depth_multiplier)
-    model.add(Dropout(rate=0.5))
 
-    model.add(AveragePooling2D(pool_size=2))
-    model.add(Flatten())
-    model.add(Dense(10, activation='softmax'))
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
 
-    return model
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+        
+        # data augmentation, horizontal flips only
+        datagen = ImageDataGenerator(
+                featurewise_center=False,
+                featurewise_std_normalization=False,
+                rotation_range=0.0,
+                width_shift_range=0.0,
+                height_shift_range=0.0,
+                vertical_flip=False,
+                horizontal_flip=True)
+        datagen.fit(X_train)
+
+
+        learning_rates=[]
+        for i in range(50):
+            learning_rates.append(0.01)
+        for i in range(75-50):
+            learning_rates.append(0.001)
+        for i in range(100-75):
+            learning_rates.append(0.0001)
+        for i in range(125-100):
+            learning_rates.append(0.00001)
+            
+        callbacks = [
+            LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
+        ]
+
+        model.compile(optimizer=keras.optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0), 
+                               loss='categorical_crossentropy', 
+                               metrics=['accuracy'])
+
+        model.fit_generator(
+            datagen.flow(X_train, y_train, batch_size=128),
+            steps_per_epoch=int(np.ceil(50000 / 128)),
+            validation_data=(X_test, y_test),
+            epochs=125,
+            callbacks=callbacks
+        )
+
+        return model
+
+  
     
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/mobilenet_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/mobilenet_cifar10/weights.h5'
+    data_dir = '/mobilenet_cifar10/' 
+    src_dir = 'data/mobilenet_cifar10_src/'
+    num_classes = 10
+    batch_size = 500
+
+    model = MobileNet_CIFAR10('MobileNet_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-# data augmentation, horizontal flips only
-datagen = ImageDataGenerator(
-        featurewise_center=False,
-        featurewise_std_normalization=False,
-        rotation_range=0.0,
-        width_shift_range=0.0,
-        height_shift_range=0.0,
-        vertical_flip=False,
-        horizontal_flip=True)
-datagen.fit(X_train)
-
-
-model = get_mobilenet()
-
-learning_rates=[]
-for i in range(5):
-    learning_rates.append(2e-2)
-for i in range(50-5):
-    learning_rates.append(1e-2)
-for i in range(100-50):
-    learning_rates.append(8e-3)
-for i in range(150-100):
-    learning_rates.append(4e-3)
-for i in range(200-150):
-    learning_rates.append(2e-3)
-for i in range(300-200):
-    learning_rates.append(1e-3)
-
-callbacks = [
-    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
-]
-
-model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
-                       loss='categorical_crossentropy', 
-                       metrics=['accuracy'])
-
-model.fit_generator(
-    datagen.flow(X_train, y_train, batch_size=128),
-    steps_per_epoch=int(np.ceil(50000 / 128)),
-    validation_data=(X_test, y_test),
-    #epochs=300,
-    epochs=50,
-    callbacks=callbacks
-)
-
-model.summary()
-
-translate_to_approxhpvm(model, "data/mobilenet_hpvm/", X_test, test_labels, 10)
+    model.run(sys.argv)
 
diff --git a/hpvm/projects/keras/src/resnet18_cifar10.py b/hpvm/projects/keras/src/resnet18_cifar10.py
index 6afa1c50fa470d038577ff8c3c4b5df43d9bab6b..74abc7ad9f860963c770aaa6bea27b7c16d59738 100644
--- a/hpvm/projects/keras/src/resnet18_cifar10.py
+++ b/hpvm/projects/keras/src/resnet18_cifar10.py
@@ -33,42 +33,34 @@ ResNet164  v2| 18|     - %|            94.54      %|  -
 ResNet1001 v2|111|     - %|            95.08+-.14 %|  -
 """
 
-from __future__ import print_function
-import keras
-from keras.layers import Dense, Conv2D, BatchNormalization, Activation
-from keras.layers import AveragePooling2D, Input, Flatten, ZeroPadding2D
-from keras.optimizers import Adam
-from keras.callbacks import ModelCheckpoint, LearningRateScheduler
-from keras.callbacks import ReduceLROnPlateau
-from keras.preprocessing.image import ImageDataGenerator
-from keras.regularizers import l2
-from keras import backend as K
-from keras.models import Model
-from keras.datasets import cifar10
-from keras import backend as K
-import numpy as np
 import os
 import sys
-from approxhpvm_translator import translate_to_approxhpvm
-from weight_utils import dumpCalibrationData
-
+import glob
 
+import numpy as np
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
+from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
 
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-
+from keras.datasets import cifar10
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
-K.set_image_data_format('channels_first')
 
 
 # Training parameters
 batch_size = 32  # orig paper trained all networks with batch_size=128
-#---- epochs = 200
-epochs = 2
-data_augmentation = True
-num_classes = 10
+epochs = 200
 
-# Subtracting pixel mean improves accuracy
-subtract_pixel_mean = True
 
 # Model parameter
 # ----------------------------------------------------------------------------
@@ -99,63 +91,8 @@ elif version == 2:
 # Model name, depth and version
 model_type = 'ResNet%dv%d' % (depth, version)
 
-# Load the CIFAR10 data.
-(x_train, y_train), (x_test, y_test) = cifar10.load_data()
-test_labels = y_test
-train_labels = y_train
-
-# Input image dimensions.
-input_shape = x_train.shape[1:]
-
-# Normalize data.
-x_train = x_train.astype('float32') / 255
-x_test = x_test.astype('float32') / 255
-
-# If subtract pixel mean is enabled
-if subtract_pixel_mean:
-    x_train_mean = np.mean(x_train, axis=0)
-    x_train -= x_train_mean
-    x_test -= x_train_mean
-
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-print('y_train shape:', y_train.shape)
-
-# Convert class vectors to binary class matrices.
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-
-
-
-  
-
-def lr_schedule(epoch):
-    """Learning Rate Schedule
-
-    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
-    Called automatically every epoch as part of callbacks during training.
-
-    # Arguments
-        epoch (int): The number of epochs
-
-    # Returns
-        lr (float32): learning rate
-    """
-    lr = 1e-3
-    if epoch > 180:
-        lr *= 0.5e-3
-    elif epoch > 160:
-        lr *= 1e-3
-    elif epoch > 120:
-        lr *= 1e-2
-    elif epoch > 80:
-        lr *= 1e-1
-    print('Learning rate: ', lr)
-    return lr
-
 
+    
 def resnet_layer(inputs,
                  num_filters=16,
                  kernel_size=3,
@@ -183,14 +120,14 @@ def resnet_layer(inputs,
                   strides=strides,
                   padding='valid', # NOTE: using valid convs with explicit pad operation
                   kernel_initializer='he_normal',
-                  kernel_regularizer=l2(1e-4))
+                  kernel_regularizer=regularizers.l2(1e-4))
 
     padding_value = int((kernel_size - 1) / 2)
     zero_padding = ZeroPadding2D(padding = (padding_value, padding_value))
 
     # FIXME: Temporarily disabled batch normalization
     batch_normalization = False
-    
+
     x = inputs
     x = zero_padding(x)
     if conv_first:
@@ -208,364 +145,436 @@ def resnet_layer(inputs,
     return x
 
 
+class ResNet18_CIFAR10(Benchmark):
 
-def resnet_v0(input_shape, depth, num_classes=10):
-    """ResNet Version 1 Model builder [a]
-
-    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
-    Last ReLU is after the shortcut connection.
-    At the beginning of each stage, the feature map size is halved (downsampled)
-    by a convolutional layer with strides=2, while the number of filters is
-    doubled. Within each stage, the layers have the same number filters and the
-    same number of filters.
-    Features maps sizes:
-    stage 0: 32x32, 16
-    stage 1: 16x16, 32
-    stage 2:  8x8,  64
-    The Number of parameters is approx the same as Table 6 of [a]:
-    ResNet20 0.27M
-    ResNet32 0.46M
-    ResNet44 0.66M
-    ResNet56 0.85M
-    ResNet110 1.7M
+    def lr_schedule(self, epoch):
+        """Learning Rate Schedule
 
-    # Arguments
-        input_shape (tensor): shape of input image tensor
-        depth (int): number of core convolutional layers
-        num_classes (int): number of classes (CIFAR10 has 10)
+        Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
+        Called automatically every epoch as part of callbacks during training.
 
-    # Returns
-        model (Model): Keras model instance
-    """
-    if (depth - 2) % 6 != 0:
-        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
-    # Start model definition.
-    num_filters = 16
-    num_res_blocks = int((depth - 2) / 6)
-
-    inputs = Input(shape=input_shape)
-    x = resnet_layer(inputs=inputs)
-    # Instantiate the stack of residual units
-    for stack in range(3):
-        for res_block in range(num_res_blocks):
-            strides = 1
-            if stack > 0 and res_block == 0:  # first layer but not first stack
-                strides = 2  # downsample
-            y = resnet_layer(inputs=x,
-                             num_filters=num_filters,
-                             strides=strides)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters,
-                             activation=None)
-            if stack > 0 and res_block == 0:  # first layer but not first stack
-                # linear projection residual shortcut connection to match
-                # changed dims
-                x = resnet_layer(inputs=x,
-                                 num_filters=num_filters,
-                                 kernel_size=1,
-                                 strides=strides,
-                                 activation=None,
-                                 batch_normalization=False)
-            x = keras.layers.add([x, y])
-            x = Activation('relu')(x)
-        num_filters *= 1
-
-    # Add classifier on top.
-    # v1 does not use BN after last shortcut connection-ReLU
-    #-- x = AveragePooling2D(pool_size=8)(x)
-    y = Flatten()(x)
-    x = Dense(64)(y)
-    outputs = Dense(num_classes,
-                    activation='softmax',
-                    kernel_initializer='he_normal')(x)
-
-    # Instantiate model.
-    model = Model(inputs=inputs, outputs=outputs)
-    return model
-
-
-def resnet_v1_1(input_shape, depth, num_classes=10):
-    """ResNet Version 1 Model builder [a]
-
-    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
-    Last ReLU is after the shortcut connection.
-    At the beginning of each stage, the feature map size is halved (downsampled)
-    by a convolutional layer with strides=2, while the number of filters is
-    doubled. Within each stage, the layers have the same number filters and the
-    same number of filters.
-    Features maps sizes:
-    stage 0: 32x32, 16
-    stage 1: 16x16, 32
-    stage 2:  8x8,  64
-    The Number of parameters is approx the same as Table 6 of [a]:
-    ResNet20 0.27M
-    ResNet32 0.46M
-    ResNet44 0.66M
-    ResNet56 0.85M
-    ResNet110 1.7M
+        # Arguments
+            epoch (int): The number of epochs
 
-    # Arguments
-        input_shape (tensor): shape of input image tensor
-        depth (int): number of core convolutional layers
-        num_classes (int): number of classes (CIFAR10 has 10)
+        # Returns
+            lr (float32): learning rate
+        """
+        lr = 1e-3
+        if epoch > 180:
+            lr *= 0.5e-3
+        elif epoch > 160:
+            lr *= 1e-3
+        elif epoch > 120:
+            lr *= 1e-2
+        elif epoch > 80:
+            lr *= 1e-1
+            
+        return lr
+    
 
-    # Returns
-        model (Model): Keras model instance
-    """
-    if (depth - 2) % 6 != 0:
-        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
-    # Start model definition.
-    num_filters = 16
-    num_res_blocks = int((depth - 2) / 6)
-
-    inputs = Input(shape=input_shape)
-    x = resnet_layer(inputs=inputs)
-    # Instantiate the stack of residual units
-    for stack in range(3):
-        for res_block in range(num_res_blocks):
-            strides = 1
-            if stack > 0 and res_block == 0:  # first layer but not first stack
-                strides = 2  # downsample
-            y = resnet_layer(inputs=x,
-                             num_filters=num_filters,
-                             strides=strides)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters,
-                             activation=None)
-            if stack > 0 and res_block == 0:  # first layer but not first stack
-                # linear projection residual shortcut connection to match
-                # changed dims
-                x = resnet_layer(inputs=x,
+    def resnet_v0(self, input_shape, depth, num_classes=10):
+        """ResNet Version 1 Model builder [a]
+
+        Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
+        Last ReLU is after the shortcut connection.
+        At the beginning of each stage, the feature map size is halved (downsampled)
+        by a convolutional layer with strides=2, while the number of filters is
+        doubled. Within each stage, the layers have the same number filters and the
+        same number of filters.
+        Features maps sizes:
+        stage 0: 32x32, 16
+        stage 1: 16x16, 32
+        stage 2:  8x8,  64
+        The Number of parameters is approx the same as Table 6 of [a]:
+        ResNet20 0.27M
+        ResNet32 0.46M
+        ResNet44 0.66M
+        ResNet56 0.85M
+        ResNet110 1.7M
+
+        # Arguments
+            input_shape (tensor): shape of input image tensor
+            depth (int): number of core convolutional layers
+            num_classes (int): number of classes (CIFAR10 has 10)
+
+        # Returns
+            model (Model): Keras model instance
+        """
+        if (depth - 2) % 6 != 0:
+            raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
+        # Start model definition.
+        num_filters = 16
+        num_res_blocks = int((depth - 2) / 6)
+
+        inputs = Input(shape=input_shape)
+        x = resnet_layer(inputs=inputs)
+        # Instantiate the stack of residual units
+        for stack in range(3):
+            for res_block in range(num_res_blocks):
+                strides = 1
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    strides = 2  # downsample
+                y = resnet_layer(inputs=x,
                                  num_filters=num_filters,
+                                 strides=strides)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters,
+                                 activation=None)
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    # linear projection residual shortcut connection to match
+                    # changed dims
+                    x = resnet_layer(inputs=x,
+                                     num_filters=num_filters,
+                                     kernel_size=1,
+                                     strides=strides,
+                                     activation=None,
+                                     batch_normalization=False)
+                x = keras.layers.add([x, y])
+                x = Activation('relu')(x)
+            num_filters *= 1
+
+        # Add classifier on top.
+        # v1 does not use BN after last shortcut connection-ReLU
+        #-- x = AveragePooling2D(pool_size=8)(x)
+        y = Flatten()(x)
+        x = Dense(64)(y)
+        outputs = Dense(num_classes,
+                        activation='softmax',
+                        kernel_initializer='he_normal')(x)
+
+        # Instantiate model.
+        model = Model(inputs=inputs, outputs=outputs)
+        return model
+
+
+    def resnet_v1_1(self, input_shape, depth, num_classes=10):
+        """ResNet Version 1 Model builder [a]
+
+        Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
+        Last ReLU is after the shortcut connection.
+        At the beginning of each stage, the feature map size is halved (downsampled)
+        by a convolutional layer with strides=2, while the number of filters is
+        doubled. Within each stage, the layers have the same number filters and the
+        same number of filters.
+        Features maps sizes:
+        stage 0: 32x32, 16
+        stage 1: 16x16, 32
+        stage 2:  8x8,  64
+        The Number of parameters is approx the same as Table 6 of [a]:
+        ResNet20 0.27M
+        ResNet32 0.46M
+        ResNet44 0.66M
+        ResNet56 0.85M
+        ResNet110 1.7M
+
+        # Arguments
+            input_shape (tensor): shape of input image tensor
+            depth (int): number of core convolutional layers
+            num_classes (int): number of classes (CIFAR10 has 10)
+
+        # Returns
+            model (Model): Keras model instance
+        """
+        if (depth - 2) % 6 != 0:
+            raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
+        # Start model definition.
+        num_filters = 16
+        num_res_blocks = int((depth - 2) / 6)
+
+        inputs = Input(shape=input_shape)
+        x = resnet_layer(inputs=inputs)
+        # Instantiate the stack of residual units
+        for stack in range(3):
+            for res_block in range(num_res_blocks):
+                strides = 1
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    strides = 2  # downsample
+                y = resnet_layer(inputs=x,
+                                 num_filters=num_filters,
+                                 strides=strides)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters,
+                                 activation=None)
+                if stack > 0 and res_block == 0:  # first layer but not first stack
+                    # linear projection residual shortcut connection to match
+                    # changed dims
+                    x = resnet_layer(inputs=x,
+                                     num_filters=num_filters,
+                                     kernel_size=1,
+                                     strides=strides,
+                                     activation=None,
+                                     batch_normalization=False)
+                x = keras.layers.add([x, y])
+                x = Activation('relu')(x)
+            num_filters *= 2
+
+
+        x = AveragePooling2D(pool_size=8)(x)
+        y = Flatten()(x)
+        outputs = Dense(num_classes,
+                        #activation='softmax',
+                        kernel_initializer='he_normal')(y)
+
+        outputs = Activation('softmax')(outputs)
+
+
+        # Instantiate model.
+        model = Model(inputs=inputs, outputs=outputs)
+        return model
+
+
+
+    def resnet_v2(self, input_shape, depth, num_classes=10):
+        """ResNet Version 2 Model builder [b]
+
+        Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
+        bottleneck layer
+        First shortcut connection per layer is 1 x 1 Conv2D.
+        Second and onwards shortcut connection is identity.
+        At the beginning of each stage, the feature map size is halved (downsampled)
+        by a convolutional layer with strides=2, while the number of filter maps is
+        doubled. Within each stage, the layers have the same number filters and the
+        same filter map sizes.
+        Features maps sizes:
+        conv1  : 32x32,  16
+        stage 0: 32x32,  64
+        stage 1: 16x16, 128
+        stage 2:  8x8,  256
+
+        # Arguments
+            input_shape (tensor): shape of input image tensor
+            depth (int): number of core convolutional layers
+            num_classes (int): number of classes (CIFAR10 has 10)
+
+        # Returns
+            model (Model): Keras model instance
+        """
+        if (depth - 2) % 9 != 0:
+            raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
+        # Start model definition.
+        num_filters_in = 16
+        num_res_blocks = int((depth - 2) / 9)
+
+        inputs = Input(shape=input_shape)
+        # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
+        x = resnet_layer(inputs=inputs,
+                         num_filters=num_filters_in,
+                         conv_first=True)
+
+        # Instantiate the stack of residual units
+        for stage in range(3):
+            for res_block in range(num_res_blocks):
+                activation = 'relu'
+                batch_normalization = True
+                strides = 1
+                if stage == 0:
+                    num_filters_out = num_filters_in * 4
+                    if res_block == 0:  # first layer and first stage
+                        activation = None
+                        batch_normalization = False
+                else:
+                    num_filters_out = num_filters_in * 2
+                    if res_block == 0:  # first layer but not first stage
+                        strides = 2    # downsample
+
+                # bottleneck residual unit
+                y = resnet_layer(inputs=x,
+                                 num_filters=num_filters_in,
                                  kernel_size=1,
                                  strides=strides,
-                                 activation=None,
-                                 batch_normalization=False)
-            x = keras.layers.add([x, y])
-            x = Activation('relu')(x)
-        num_filters *= 2
+                                 activation=activation,
+                                 batch_normalization=batch_normalization,
+                                 conv_first=False)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters_in,
+                                 conv_first=False)
+                y = resnet_layer(inputs=y,
+                                 num_filters=num_filters_out,
+                                 kernel_size=1,
+                                 conv_first=False)
+                if res_block == 0:
+                    # linear projection residual shortcut connection to match
+                    # changed dims
+                    x = resnet_layer(inputs=x,
+                                     num_filters=num_filters_out,
+                                     kernel_size=1,
+                                     strides=strides,
+                                     activation=None,
+                                     batch_normalization=False)
+                x = keras.layers.add([x, y])
+
+            num_filters_in = num_filters_out
+
+        # Add classifier on top.
+        # v2 has BN-ReLU before Pooling
+        x = BatchNormalization()(x)
+        x = Activation('relu')(x)
+        x = AveragePooling2D(pool_size=8)(x)
+        y = Flatten()(x)
+        outputs = Dense(num_classes,
+                        activation='softmax',
+                        kernel_initializer='he_normal')(y)
+
+        # Instantiate model.
+        model = Model(inputs=inputs, outputs=outputs)
+        return model
 
+    
+    def buildModel(self):
+
+        depth = 20
+        input_shape = (3, 32, 32)
         
-    x = AveragePooling2D(pool_size=8)(x)
-    y = Flatten()(x)
-    outputs = Dense(num_classes,
-                    #activation='softmax',
-                    kernel_initializer='he_normal')(y)
+        if version == 2:
+            model = self.resnet_v2(input_shape=input_shape, depth=depth)
+        else:
+            model = self.resnet_v1_1(input_shape=input_shape, depth=depth)
 
-    outputs = Activation('softmax')(outputs)
+        return model
+    
+    
+    def data_preprocess(self):
 
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
 
-    # Instantiate model.
-    model = Model(inputs=inputs, outputs=outputs)
-    return model
+        X_train = X_train / 255.0
+        X_val = X_val / 255.0
 
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+#         X_train = (X_train - mean) / (std + 1e-7)
+#         X_val = (X_val - mean) / (std + 1e-7)
+        X_train = (X_train - mean)
+        X_val = (X_val - mean)
 
 
-def resnet_v2(input_shape, depth, num_classes=10):
-    """ResNet Version 2 Model builder [b]
+        X_test_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_input.bin', dtype=np.float32)
+        Y_test_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_labels.bin', dtype=np.uint32)
 
-    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
-    bottleneck layer
-    First shortcut connection per layer is 1 x 1 Conv2D.
-    Second and onwards shortcut connection is identity.
-    At the beginning of each stage, the feature map size is halved (downsampled)
-    by a convolutional layer with strides=2, while the number of filter maps is
-    doubled. Within each stage, the layers have the same number filters and the
-    same filter map sizes.
-    Features maps sizes:
-    conv1  : 32x32,  16
-    stage 0: 32x32,  64
-    stage 1: 16x16, 128
-    stage 2:  8x8,  256
+        X_test_val = X_test_val.reshape((-1,3,32,32))
 
-    # Arguments
-        input_shape (tensor): shape of input image tensor
-        depth (int): number of core convolutional layers
-        num_classes (int): number of classes (CIFAR10 has 10)
 
-    # Returns
-        model (Model): Keras model instance
-    """
-    if (depth - 2) % 9 != 0:
-        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
-    # Start model definition.
-    num_filters_in = 16
-    num_res_blocks = int((depth - 2) / 9)
-
-    inputs = Input(shape=input_shape)
-    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
-    x = resnet_layer(inputs=inputs,
-                     num_filters=num_filters_in,
-                     conv_first=True)
-
-    # Instantiate the stack of residual units
-    for stage in range(3):
-        for res_block in range(num_res_blocks):
-            activation = 'relu'
-            batch_normalization = True
-            strides = 1
-            if stage == 0:
-                num_filters_out = num_filters_in * 4
-                if res_block == 0:  # first layer and first stage
-                    activation = None
-                    batch_normalization = False
-            else:
-                num_filters_out = num_filters_in * 2
-                if res_block == 0:  # first layer but not first stage
-                    strides = 2    # downsample
-
-            # bottleneck residual unit
-            y = resnet_layer(inputs=x,
-                             num_filters=num_filters_in,
-                             kernel_size=1,
-                             strides=strides,
-                             activation=activation,
-                             batch_normalization=batch_normalization,
-                             conv_first=False)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters_in,
-                             conv_first=False)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters_out,
-                             kernel_size=1,
-                             conv_first=False)
-            if res_block == 0:
-                # linear projection residual shortcut connection to match
-                # changed dims
-                x = resnet_layer(inputs=x,
-                                 num_filters=num_filters_out,
-                                 kernel_size=1,
-                                 strides=strides,
-                                 activation=None,
-                                 batch_normalization=False)
-            x = keras.layers.add([x, y])
+        X_tune_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_input.bin', dtype=np.float32)
+        Y_tune_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_labels.bin', dtype=np.uint32)
+
+        X_tune_val = X_tune_val.reshape((-1,3,32,32))
+
+
+        X_test = X_test_val[:5000]
+        y_test= Y_test_val[:5000]
 
-        num_filters_in = num_filters_out
+        X_tuner = X_tune_val[:5000]
+        y_tuner = Y_tune_val[:5000]
 
-    # Add classifier on top.
-    # v2 has BN-ReLU before Pooling
-    x = BatchNormalization()(x)
-    x = Activation('relu')(x)
-    x = AveragePooling2D(pool_size=8)(x)
-    y = Flatten()(x)
-    outputs = Dense(num_classes,
-                    activation='softmax',
-                    kernel_initializer='he_normal')(y)
 
-    # Instantiate model.
-    model = Model(inputs=inputs, outputs=outputs)
-    return model
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
 
-depth = 20
 
-if version == 2:
-    model = resnet_v2(input_shape=input_shape, depth=depth)
-else:
-    model = resnet_v1_1(input_shape=input_shape, depth=depth)
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
 
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=Adam(lr=self.lr_schedule(0)),
+            metrics=['accuracy']
+        )
+
+        
+        lr_scheduler = LearningRateScheduler(self.lr_schedule)
+
+        lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
+                                       cooldown=0,
+                                       patience=5,
+                                       min_lr=0.5e-6)
+
+        callbacks = [lr_reducer, lr_scheduler]
+
+        # Run training, with or without data augmentation.
+        if not data_augmentation:
+            print('Not using data augmentation.')
+            model.fit(X_train, y_train,
+                      batch_size=batch_size,
+                      epochs=epochs,
+                      validation_data=(X_test, y_test),
+                      shuffle=True,
+                      callbacks=callbacks)
+        else:
+            print('Using real-time data augmentation.')
+            # This will do preprocessing and realtime data augmentation:
+            datagen = ImageDataGenerator(
+                # set input mean to 0 over the dataset
+                featurewise_center=False,
+                # set each sample mean to 0
+                samplewise_center=False,
+                # divide inputs by std of dataset
+                featurewise_std_normalization=False,
+                # divide each input by its std
+                samplewise_std_normalization=False,
+                # apply ZCA whitening
+                zca_whitening=False,
+                # epsilon for ZCA whitening
+                zca_epsilon=1e-06,
+                # randomly rotate images in the range (deg 0 to 180)
+                rotation_range=0,
+                # randomly shift images horizontally
+                width_shift_range=0.1,
+                # randomly shift images vertically
+                height_shift_range=0.1,
+                # set range for random shear
+                shear_range=0.,
+                # set range for random zoom
+                zoom_range=0.,
+                # set range for random channel shifts
+                channel_shift_range=0.,
+                # set mode for filling points outside the input boundaries
+                fill_mode='nearest',
+                # value used for fill_mode = "constant"
+                cval=0.,
+                # randomly flip images
+                horizontal_flip=True,
+                # randomly flip images
+                vertical_flip=False,
+                # set rescaling factor (applied before any other transformation)
+                rescale=None,
+                # set function that will be applied on each input
+                preprocessing_function=None,
+                # image data format, either "channels_first" or "channels_last"
+                data_format="channels_first",
+                # fraction of images reserved for validation (strictly between 0 and 1)
+                validation_split=0.0)
+
+            # Compute quantities required for featurewise normalization
+            # (std, mean, and principal components if ZCA whitening is applied).
+            datagen.fit(X_train)
+
+            # Fit the model on the batches generated by datagen.flow().
+            model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
+                                validation_data=(X_test, y_test),
+                                epochs=epochs, verbose=1, workers=4,
+                                callbacks=callbacks)
+
+        return model
+
+  
     
-model.compile(loss='categorical_crossentropy',
-              optimizer=Adam(lr=lr_schedule(0)),
-              metrics=['accuracy'])
-model.summary()
-print(model_type)
-
-# Prepare model model saving directory.
-save_dir = os.path.join(os.getcwd(), 'saved_models')
-model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
-if not os.path.isdir(save_dir):
-    os.makedirs(save_dir)
-filepath = os.path.join(save_dir, model_name)
-
-# Prepare callbacks for model saving and for learning rate adjustment.
-checkpoint = ModelCheckpoint(filepath=filepath,
-                             monitor='val_acc',
-                             verbose=1,
-                             save_best_only=True)
-
-lr_scheduler = LearningRateScheduler(lr_schedule)
-
-lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
-                               cooldown=0,
-                               patience=5,
-                               min_lr=0.5e-6)
-
-callbacks = [checkpoint, lr_reducer, lr_scheduler]
-
-# Run training, with or without data augmentation.
-if not data_augmentation:
-    print('Not using data augmentation.')
-    model.fit(x_train, y_train,
-              batch_size=batch_size,
-              epochs=epochs,
-              validation_data=(x_test, y_test),
-              shuffle=True,
-              callbacks=callbacks)
-else:
-    print('Using real-time data augmentation.')
-    # This will do preprocessing and realtime data augmentation:
-    datagen = ImageDataGenerator(
-        # set input mean to 0 over the dataset
-        featurewise_center=False,
-        # set each sample mean to 0
-        samplewise_center=False,
-        # divide inputs by std of dataset
-        featurewise_std_normalization=False,
-        # divide each input by its std
-        samplewise_std_normalization=False,
-        # apply ZCA whitening
-        zca_whitening=False,
-        # epsilon for ZCA whitening
-        zca_epsilon=1e-06,
-        # randomly rotate images in the range (deg 0 to 180)
-        rotation_range=0,
-        # randomly shift images horizontally
-        width_shift_range=0.1,
-        # randomly shift images vertically
-        height_shift_range=0.1,
-        # set range for random shear
-        shear_range=0.,
-        # set range for random zoom
-        zoom_range=0.,
-        # set range for random channel shifts
-        channel_shift_range=0.,
-        # set mode for filling points outside the input boundaries
-        fill_mode='nearest',
-        # value used for fill_mode = "constant"
-        cval=0.,
-        # randomly flip images
-        horizontal_flip=True,
-        # randomly flip images
-        vertical_flip=False,
-        # set rescaling factor (applied before any other transformation)
-        rescale=None,
-        # set function that will be applied on each input
-        preprocessing_function=None,
-        # image data format, either "channels_first" or "channels_last"
-        data_format="channels_first",
-        # fraction of images reserved for validation (strictly between 0 and 1)
-        validation_split=0.0)
-
-    # Compute quantities required for featurewise normalization
-    # (std, mean, and principal components if ZCA whitening is applied).
-    datagen.fit(x_train)
-
-    # Fit the model on the batches generated by datagen.flow().
-    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
-                        validation_data=(x_test, y_test),
-                        epochs=epochs, verbose=1, workers=4,
-                        callbacks=callbacks)
-
-# Score trained model.
-scores = model.evaluate(x_test, y_test, verbose=1)
-print('Test loss:', scores[0])
-print('Test accuracy:', scores[1])
-
-
-dumpCalibrationData("calibration_data/resnet18_calib.bin", x_train,
-                        "calibration_data/resnet18_train_labels.bin", train_labels)
-sys.exit(0)
+if __name__ == '__main__':
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
+
+
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/resnet18_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/resnet18_cifar10/weights.h5'
+    data_dir = '/resnet18_cifar10/' 
+    src_dir = 'data/resnet18_cifar10_src/'
+    num_classes = 10
+    batch_size = 500
+
+    model = ResNet18_CIFAR10('ResNet18_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-#translate_to_approxhpvm(model, "resnet18_cifar10_hpvm/", x_test, test_labels)
+    model.run(sys.argv)
     
-translate_to_approxhpvm(model, "resnet_test/", x_test, test_labels, 'resnet18_cifar10_promise/', y_test)
diff --git a/hpvm/projects/keras/src/resnet50_imagenet.py b/hpvm/projects/keras/src/resnet50_imagenet.py
index 55d0918b7a526ba1a9866d2d8e3b0e2e8608cc25..0c3006213d7880f6133e1f8030256d50d25ea35d 100644
--- a/hpvm/projects/keras/src/resnet50_imagenet.py
+++ b/hpvm/projects/keras/src/resnet50_imagenet.py
@@ -1,289 +1,155 @@
 import os
+import sys
 import glob
-import random
 
-import scipy
-import scipy.io
-import cv2
 import numpy as np
-
 import tensorflow as tf
+import scipy
+import scipy.io
 import keras
-from keras.models import Sequential, Model
+from keras.models import Model, Sequential
 from keras.layers import *
-from keras.utils import to_categorical
-from keras.applications.resnet50 import ResNet50, preprocess_input
+from keras.optimizers import Adam
+from keras import regularizers
 from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
 
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-from frontend.weight_utils import dumpCalibrationData2
-
-
-np.random.seed(2020)
-
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-
-K.set_image_data_format('channels_first')
-
-data_format = 'channels_first'
-
-
-IMAGENET_DIR = '/home/nz11/ILSVRC2012/'
-OUTPUT_DIR = 'data/resnet50_imagenet_tune_regenerate/'
-WEIGHTS_PATH = 'data/resnet50_imagenet/weights.h5'
-
-NUM_CLASSES = 200
-IMAGES_PER_CLASS = 40
-# VAL_SIZE = 100
-
-
-
-def identity_block(input_tensor, kernel_size, filters, stage, block):
-    filters1, filters2, filters3 = filters
-    bn_axis = 1
-
-    x = Conv2D(filters1, (1, 1))(input_tensor)
-    x = BatchNormalization(axis=bn_axis)(x)
-    x = Activation('relu')(x)
-
-    x = Conv2D(filters2, kernel_size,
-                      padding='same')(x)
-    x = BatchNormalization(axis=bn_axis)(x)
-    x = Activation('relu')(x)
-
-    x = Conv2D(filters3, (1, 1))(x)
-    x = BatchNormalization(axis=bn_axis)(x)
-
-    x = add([x, input_tensor])
-    x = Activation('relu')(x)
-    return x
-
-
-def conv_block(input_tensor,
-               kernel_size,
-               filters,
-               stage,
-               block,
-               strides=(2, 2)):
-    filters1, filters2, filters3 = filters
-    bn_axis = 1
-    x = Conv2D(filters1, (1, 1), strides=strides)(input_tensor)
-    x = BatchNormalization(axis=bn_axis)(x)
-    x = Activation('relu')(x)
-
-    x = Conv2D(filters2, kernel_size, padding='same')(x)
-    x = BatchNormalization(axis=bn_axis)(x)
-    x = Activation('relu')(x)
-
-    x = Conv2D(filters3, (1, 1))(x)
-    x = BatchNormalization(axis=bn_axis)(x)
-
-    shortcut = Conv2D(filters3, (1, 1), strides=strides)(input_tensor)
-    shortcut = BatchNormalization(
-        axis=bn_axis)(shortcut)
-
-    x = add([x, shortcut])
-    x = Activation('relu')(x)
-    return x
-
-
-def get_resnet50_nchw_keras():
-
-    img_input = Input(shape=(3, 224, 224))
-    bn_axis = 1
-
-    x = ZeroPadding2D((3, 3))(img_input)
-    x = Conv2D(64, (7, 7), strides=(2, 2))(x)
-#     x = BatchNormalization(axis=bn_axis)(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
-    x = BatchNormalization(axis=bn_axis)(x)
-
-    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
-    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
-    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
-
-    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
-    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
-    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
-    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
 
-    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
-    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
-    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
-    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
-    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
-    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
 
-    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
-    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
-    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
 
-    x = AveragePooling2D((7, 7))(x)
-    x = Flatten()(x)
-    x = Dense(1000)(x)
-    x = Activation('softmax')(x)
-        
-    model = Model(img_input, x)
-    
-    
-    original_model = ResNet50()
-    
-    for i in range(len(original_model.layers)):
-        try:
-            model.layers[i].set_weights(original_model.layers[i].get_weights())
-#             model.layers[i].trainable = False
-        except:
-            print (i, 'skipped')
-    model.layers[5].set_weights(original_model.layers[3].get_weights())
+class ResNet50(Benchmark):
     
-            
-    return model
-
-
-def load_image(x):
-    
-    image = cv2.imread(x)
+    def buildModel(self):
         
-    height, width, _ = image.shape
-    new_height = height * 256 // min(image.shape[:2])
-    new_width = width * 256 // min(image.shape[:2])
-    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-    
-    height, width, _ = image.shape
-    startx = width // 2 - (224 // 2)
-    starty = height // 2 - (224 // 2)
-    image = image[starty:starty + 224, startx:startx + 224]
-    
-    image = image[:, :, ::-1]
-    image = np.transpose(image, (2, 0, 1))
-    image = preprocess_input(image.astype(np.float32), data_format=data_format)
+        def identity_block(input_tensor, kernel_size, filters, stage, block):
+            filters1, filters2, filters3 = filters
+            bn_axis = 1
+
+            x = Conv2D(filters1, (1, 1))(input_tensor)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters2, kernel_size,
+                              padding='same')(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters3, (1, 1))(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+
+            x = add([x, input_tensor])
+            x = Activation('relu')(x)
+            return x
+
+        def conv_block(input_tensor,
+                       kernel_size,
+                       filters,
+                       stage,
+                       block,
+                       strides=(2, 2)):
+            filters1, filters2, filters3 = filters
+            bn_axis = 1
+            x = Conv2D(filters1, (1, 1), strides=strides)(input_tensor)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters2, kernel_size, padding='same')(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+            x = Activation('relu')(x)
+
+            x = Conv2D(filters3, (1, 1))(x)
+            x = BatchNormalization(axis=bn_axis)(x)
+
+            shortcut = Conv2D(filters3, (1, 1), strides=strides)(input_tensor)
+            shortcut = BatchNormalization(
+                axis=bn_axis)(shortcut)
+
+            x = add([x, shortcut])
+            x = Activation('relu')(x)
+            return x
+
+        img_input = Input(shape=(3, 224, 224))
+        bn_axis = 1
+
+        x = ZeroPadding2D((3, 3))(img_input)
+        x = Conv2D(64, (7, 7), strides=(2, 2))(x)
+    #     x = BatchNormalization(axis=bn_axis)(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((3, 3), strides=(2, 2))(x)
+        x = BatchNormalization(axis=bn_axis)(x)
+
+        x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+        x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+        x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+        x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+
+        x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
+
+        x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+        x = AveragePooling2D((7, 7))(x)
+        x = Flatten()(x)
+        x = Dense(1000)(x)
+        x = Activation('softmax')(x)
+
+        model = Model(img_input, x)
         
-    return image.astype(np.float32)
-
-
-meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat')
-original_idx_to_synset = {}
-synset_to_name = {}
-
-for i in range(1000):
-    ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0])
-    synset = meta['synsets'][i,0][1][0]
-    name = meta['synsets'][i,0][2][0]
-    original_idx_to_synset[ilsvrc2012_id] = synset
-    synset_to_name[synset] = name
-
-synset_to_keras_idx = {}
-keras_idx_to_name = {}
-f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-c = 0
-for line in f:
-    parts = line.split(' ')
-    synset_to_keras_idx[parts[0]] = c
-    keras_idx_to_name[c] = ' '.join(parts[1:])
-    c += 1
-f.close()
-
+        return model
 
-
-
-model = get_resnet50_nchw_keras()
-    
-X_tune, X_test = [], []
-y_tune, y_true = [], []
-    
-classes = glob.glob(IMAGENET_DIR + 'val/*')
-
-for c in np.random.permutation(len(classes))[:NUM_CLASSES]:
-    x = glob.glob(classes[c] + '/*')
-    x = np.array(x)
-            
-    idx = np.random.permutation(len(x))
-    idx = idx[:max(len(idx), IMAGES_PER_CLASS)]
-    
-    synset = classes[c].split('/')[-1]
-    images = list(map(lambda x : load_image(x), x[idx]))
-    labels = [synset_to_keras_idx[synset]] * len(x[idx])
     
-    X_test += images[:IMAGES_PER_CLASS // 2]
-    y_true += labels[:IMAGES_PER_CLASS // 2]
-    
-    X_tune += images[IMAGES_PER_CLASS // 2:]
-    y_tune += labels[IMAGES_PER_CLASS // 2:]
-    
-    
-X_test = np.array(X_test)
-y_true = np.array(y_true)
-X_tune = np.array(X_tune)
-y_tune = np.array(y_tune)
-
-print ('tune size', len(X_tune))
-print ('test size', len(X_test))
-
-
-
-
-
-
-def train_helper(x):
-    
-    try:
-        x = x.decode('utf-8')
-    except:
-        pass
-    
-    image = load_image(x)
-    
-    y = np.zeros(1000, dtype=np.uint8)
+    def data_preprocess(self):
+        X_train, y_train = None, None
         
-    y[synset_to_keras_idx[x.split('/')[-2]]]= 1
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 3, 224, 224)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_labels.bin', dtype=np.uint32)
         
-    return image, y
-
-
-
-train_images = glob.glob(IMAGENET_DIR + 'train/*/*')
-random.shuffle(train_images)
-
-dataset = tf.data.Dataset().from_tensor_slices(train_images)
-dataset = dataset.map(
-    lambda x : tf.py_func(train_helper, [x], [tf.float32, tf.uint8]), 
-    num_parallel_calls=16
-)
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 3, 224, 224)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/tune_labels.bin', dtype=np.uint32)
+ 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
+    
 
-dataset = dataset.shuffle(buffer_size=1000)
-dataset = dataset.batch(64)
-dataset = dataset.repeat()
+    def trainModel(self, model):
 
-next_element = dataset.make_one_shot_iterator().get_next()
+        assert False, "ImageNet training not supported - use Pretrained weights"
 
-sess = tf.Session()
 
-def generate():
-    while True:
-        yield sess.run(next_element)
     
+if __name__ == '__main__':
+      
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
 
 
-model.compile(optimizer=keras.optimizers.Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['acc'])
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/resnet50_imagenet/'
+    keras_model_file = MODEL_PARAMS_DIR + '/resnet50_imagenet/weights.h5'
+    data_dir = '/resnet50_imagenet/' 
+    src_dir = 'data/resnet50_imagenet_src/'
+    num_classes = 1000
+    batch_size = 50
 
-if os.path.exists(WEIGHTS_PATH):
-    model.load_weights(WEIGHTS_PATH)
-else:
-    pass
-#     model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=6)
-#     model.save_weights(OUTPUT_DIR + 'weights.h5')
-
-translate_to_approxhpvm(model, OUTPUT_DIR, X_tune, y_tune, 1000)
-
-# # dumpCalibrationData2(OUTPUT_DIR + 'test_input_10K.bin', X_test, OUTPUT_DIR + 'test_labels_10K.bin', y_true)
-# dumpCalibrationData2(OUTPUT_DIR + 'tune_input.bin', X_tune, OUTPUT_DIR + 'tune_labels.bin', y_tune)
-# dumpCalibrationData2(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true)
+    model = ResNet50('ResNet50_imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
+    
+    model.run(sys.argv)
 
 
-pred = np.argmax(model.predict(X_test), axis=1)
-print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test))
-    
-# pred = np.argmax(model.predict(X_tune), axis=1)
-# print ('val accuracy', np.sum(pred == y_tune.ravel()) / len(X_tune))
     
\ No newline at end of file
diff --git a/hpvm/projects/keras/src/vgg16_cifar10.py b/hpvm/projects/keras/src/vgg16_cifar10.py
index df1bcc6b0f414a8ba7cba8911e0d87ff0fbcd951..873e23b766ffbd58c1d5db89141da60fee88126e 100644
--- a/hpvm/projects/keras/src/vgg16_cifar10.py
+++ b/hpvm/projects/keras/src/vgg16_cifar10.py
@@ -1,34 +1,34 @@
+import os
+import sys
+import glob
 
-
-from __future__ import print_function
-import keras
-from keras.datasets import cifar10
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
-from keras import optimizers
 import numpy as np
-from keras.layers.core import Lambda
-from keras import backend as K
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
 from keras import regularizers
-import os
-import sys
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar10
 from Benchmark import Benchmark
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-from frontend.weight_utils import dumpCalibrationData
+from Config import MODEL_PARAMS_DIR
 
 
 
 class VGG16_CIFAR10(Benchmark):
-    
-    
         
     def buildModel(self):
         # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.
 
         self.weight_decay = 0.0005
-        self.x_shape = [3,32,32]
+        self.x_shape = [3, 32, 32]
 
         model = Sequential()
         weight_decay = self.weight_decay
@@ -97,55 +97,40 @@ class VGG16_CIFAR10(Benchmark):
         model.add(Activation('softmax'))
         return model
 
-
-    def normalize(self,X_train,X_test):
-        #this function normalize inputs for zero mean and unit variance
-        # it is used when training a model.
-        # Input: training set and test set
-        # Output: normalized training set and test set according to the trianing set statistics.
-        mean = np.mean(X_train,axis=(0,1,2,3))
-        std = np.std(X_train, axis=(0, 1, 2, 3))
-        X_train = (X_train-mean)/(std+1e-7)
-        X_test = (X_test-mean)/(std+1e-7)
-        return X_train, X_test
-
-
+    
     def data_preprocess(self):
 
-        (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
-        #X_train = X_train / 255.0
-        #X_test = X_test / 255.0
+        (X_train, y_train), (X_val, y_val) = cifar10.load_data()
 
-        mean = np.mean(X_train,axis=(0,1,2,3))
-        std = np.std(X_train,axis=(0,1,2,3))   
-        X_train = (X_train-mean)/(std+1e-7)
-        X_test = (X_test-mean)/(std+1e-7)  
+        X_train = X_train / 255.0
+        X_val = X_val / 255.0
 
-        return X_train, Y_train, X_test, Y_test
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        X_val = (X_val - mean) / (std + 1e-7)  
 
+        X_test = X_val[0:5000]
+        y_test = y_val[0:5000]
+        X_tuner = X_val[5000:]
+        y_tuner = y_val[5000:]
+
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
 
-        
     
-    def trainModel(self, model):
+    def trainModel(self, model, X_train, y_train, X_test, y_test):
 
-        #training parameters
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+        
         batch_size = 128
-        #maxepoches = 250
-        maxepoches = 30
         learning_rate = 0.01
-        lr_decay = 1e-6
         lr_drop = 20
-        # The data, shuffled and split between train and test sets:
-        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
-        x_train = x_train.astype('float32')
-        x_test = x_test.astype('float32')
-        x_train, y_train, x_test, y_test = self.data_preprocess()
-
-        y_train = keras.utils.to_categorical(y_train, self.num_classes)
-        y_test = keras.utils.to_categorical(y_test, self.num_classes)
 
+        
         def lr_scheduler(epoch):
             return learning_rate * (0.5 ** (epoch // lr_drop))
+        
         reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)
 
         #data augmentation
@@ -161,46 +146,46 @@ class VGG16_CIFAR10(Benchmark):
             horizontal_flip=True,  # randomly flip images
             vertical_flip=False)  # randomly flip images
         # (std, mean, and principal components if ZCA whitening is applied).
-        datagen.fit(x_train)
+        datagen.fit(X_train)
 
 
-        #optimization details
-        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
-        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])
+        model.compile(
+            loss='categorical_crossentropy', 
+            optimizer=keras.optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=True),
+            metrics=['accuracy']
+        )
 
-
-        # training process in a for loop with learning rate drop every 25 epoches.
-
-        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
-                                          batch_size=batch_size),
-                                          steps_per_epoch=x_train.shape[0] // batch_size,
-                                          epochs=maxepoches,
-                                          validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
+        # training process in a for loop with learning rate drop every 20 epoches.
+        
+        model.fit_generator(
+            datagen.flow(X_train, y_train, batch_size=batch_size),
+            steps_per_epoch=X_train.shape[0] // batch_size,
+            epochs=250,
+            validation_data=(X_test, y_test),
+            callbacks=[reduce_lr]
+        )
         
         return model
 
 
-
     
-
-
-    
-if __name__ == "__main__":
-
+if __name__ == '__main__':
       
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
     # Changing to NCHW format
     K.set_image_data_format('channels_first')
 
 
     ### Parameters specific to each benchmark
-    reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/"
-    keras_model_file = "vgg16_cifar10.h5"
-    hpvm_dir = "data/vgg16_cifar10/" 
+    reload_dir = MODEL_PARAMS_DIR + '/vgg16_cifar10/'
+    keras_model_file = MODEL_PARAMS_DIR + '/vgg16_cifar10/weights.h5'
+    data_dir = '/vgg16_cifar10/' 
+    src_dir = 'data/vgg16_cifar10_src/'
     num_classes = 10
+    batch_size = 500
 
-    vgg16_cifar10 = VGG16_CIFAR10("vgg16_cifar10", reload_dir, keras_model_file, hpvm_dir, num_classes)
+    model = VGG16_CIFAR10('VGG16_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-    vgg16_cifar10.run(sys.argv)
+    model.run(sys.argv)
 
     
diff --git a/hpvm/projects/keras/src/vgg16_cifar100.py b/hpvm/projects/keras/src/vgg16_cifar100.py
index 41d0816ecd7c18a22448a78b9e0a4b1a573d40a7..03bb852e00bb61a7b17836f5c4df5bbf56c4b466 100644
--- a/hpvm/projects/keras/src/vgg16_cifar100.py
+++ b/hpvm/projects/keras/src/vgg16_cifar100.py
@@ -1,32 +1,34 @@
-
-from __future__ import print_function
 import os
 import sys
-import keras
-from keras.datasets import cifar100
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras import optimizers
+import glob
+
 import numpy as np
-from keras.layers.core import Lambda
-from keras import backend as K
+import tensorflow as tf
+import scipy
+import scipy.io
+import keras
+from keras.models import Model, Sequential
+from keras.layers import *
+from keras.optimizers import Adam
 from keras import regularizers
+from keras import backend as K
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from keras.datasets import cifar100
 from Benchmark import Benchmark
-from frontend.weight_utils import dumpCalibrationData
-from frontend.approxhpvm_translator import translate_to_approxhpvm
+from Config import MODEL_PARAMS_DIR
 
 
 
 class VGG16_CIFAR100(Benchmark):
 
-
     def buildModel(self):
 
         # Build the network of vgg for 100 classes 
         self.weight_decay = 0.0005
-        self.x_shape = [3,32,32]
+        self.x_shape = [3, 32, 32]
 
         model = Sequential()
         weight_decay = self.weight_decay
@@ -112,40 +114,35 @@ class VGG16_CIFAR100(Benchmark):
         return model
 
 
-
     def data_preprocess(self):
 
-        (X_train, Y_train), (X_test, Y_test) = cifar100.load_data()
+        (X_train, y_train), (X_val, y_val) = cifar100.load_data()
+
+        X_train = X_train / 255.0
+        X_val = X_val / 255.0
 
-        mean = np.mean(X_train,axis=(0,1,2,3))
-        std = np.std(X_train,axis=(0,1,2,3))   
-        X_train = (X_train-mean)/(std+1e-7)
-        X_test = (X_test-mean)/(std+1e-7)  
+        mean = np.mean(X_train)
+        std = np.std(X_train)
+        X_train = (X_train - mean) / (std + 1e-7)
+        X_val = (X_val - mean) / (std + 1e-7)  
 
-        return X_train, Y_train, X_test, Y_test
+        X_test = X_val[0:5000]
+        y_test = y_val[0:5000]
+        X_tuner = X_val[5000:]
+        y_tuner = y_val[5000:]
 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
     
-    def trainModel(self,model):
+    def trainModel(self,model, X_train, y_train, X_test, y_test):
 
-        #training parameters
+        y_train = to_categorical(y_train, self.num_classes)
+        y_test = to_categorical(y_test, self.num_classes)
+        
         batch_size = 128
-        #maxepoches = 250
-        #maxepoches = 400
-        maxepoches = 4
-        learning_rate = 0.05
-        lr_decay = 1e-6
-        lr_drop = 20
-
-        # The data, shuffled and split between train and test sets:
-        (x_train, y_train), (x_test, y_test) = cifar100.load_data()
-        x_train = x_train.astype('float32')
-        x_test = x_test.astype('float32')
-        x_train, x_test = self.normalize(x_train, x_test)
-
-        y_train = keras.utils.to_categorical(y_train, self.num_classes)
-        y_test = keras.utils.to_categorical(y_test, self.num_classes)
-
+        learning_rate = 0.1
+        lr_drop = 30
+        
  
         def lr_scheduler(epoch):
             return learning_rate * (0.5 ** (epoch // lr_drop))
@@ -165,46 +162,45 @@ class VGG16_CIFAR100(Benchmark):
             horizontal_flip=True,  # randomly flip images
             vertical_flip=False)  # randomly flip images
         # (std, mean, and principal components if ZCA whitening is applied).
-        datagen.fit(x_train)
-
+        datagen.fit(X_train)
 
-        #optimization details
-        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
-        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])
 
+        model.compile(
+            loss='categorical_crossentropy',
+            optimizer=optimizers.Adam(lr=learning_rate),
+            metrics=['accuracy']
+        )
         
         # training process in a for loop with learning rate drop every 25 epoches.
-        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
-                                         batch_size=batch_size),
-                            steps_per_epoch=x_train.shape[0] // batch_size,
-                            epochs=maxepoches,
-                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
+        
+        model.fit_generator(
+            datagen.flow(X_train, y_train, batch_size=batch_size),
+            steps_per_epoch=X_train.shape[0] // batch_size,
+            epochs=250,
+            validation_data=(X_test, y_test),
+            callbacks=[reduce_lr]
+        )
 
-        ##### model.save_weights('cifar100vgg.h5')
         return model
 
 
-
-
-
-
     
-if __name__ == "__main__":
+if __name__ == '__main__':
 
-      
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
     # Changing to NCHW format
     K.set_image_data_format('channels_first')
 
 
     ### Parameters specific to each benchmark
-    reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/"
-    keras_model_file = "vgg16_cifar100.h5"
-    hpvm_dir = "data/vgg16_cifar100/" 
+    reload_dir = MODEL_PARAMS_DIR + '/vgg16_cifar100/'
+    keras_model_file = MODEL_PARAMS_DIR + '/vgg16_cifar100/weights.h5'
+    data_dir = '/vgg16_cifar100/' 
+    src_dir = 'data/vgg16_cifar100_src/'
     num_classes = 100
+    batch_size = 100
 
-    vgg16_cifar100 = VGG16_CIFAR100("vgg16_cifar100", reload_dir, keras_model_file, hpvm_dir, num_classes)
+    model = VGG16_CIFAR100('VGG16_CIFAR100', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-    vgg16_cifar100.run(sys.argv)
-
+    model.run(sys.argv)
     
diff --git a/hpvm/projects/keras/src/vgg16_imagenet.py b/hpvm/projects/keras/src/vgg16_imagenet.py
index b41df8b83a966495d6a1c8281745e91181a66c41..35ab92479e545ba44cf2175cb7b8abcec84c4fed 100644
--- a/hpvm/projects/keras/src/vgg16_imagenet.py
+++ b/hpvm/projects/keras/src/vgg16_imagenet.py
@@ -1,233 +1,140 @@
 import os
+import sys
 import glob
-import random
 
-import scipy
-import scipy.io
-import cv2
 import numpy as np
-
 import tensorflow as tf
+import scipy
+import scipy.io
 import keras
-from keras.models import Sequential, Model
+from keras.models import Model, Sequential
 from keras.layers import *
-from keras.utils import to_categorical
-from keras.applications.vgg16 import VGG16, preprocess_input
+from keras.optimizers import Adam
+from keras import regularizers
 from keras import backend as K
-
-from frontend.approxhpvm_translator import translate_to_approxhpvm
-from frontend.weight_utils import dumpCalibrationData2
-
-
-np.random.seed(2020)
-
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-
-K.set_image_data_format('channels_first')
-
-data_format = 'channels_first'
-
-
-IMAGENET_DIR = '/home/nz11/ILSVRC2012/'
-OUTPUT_DIR = 'data/vgg16_imagenet_tune/'
-
-NUM_CLASSES = 200
-IMAGES_PER_CLASS = 50
-# VAL_SIZE = 100
-
-
-
-def get_vgg16_nchw_keras():
-    img_input = Input(shape=(3, 224, 224))
-    
-    # Block 1
-    x = Conv2D(64, (3, 3),
-                      padding='same',
-                      data_format=data_format)(img_input)
-    x = Activation('relu')(x)
-    x = Conv2D(64, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x)
-
-    # Block 2
-    x = Conv2D(128, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(128, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x)
-
-    # Block 3
-    x = Conv2D(256, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(256, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(256, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x)
-
-    # Block 4
-    x = Conv2D(512, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(512, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(512, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x)
-
-    # Block 5
-    x = Conv2D(512, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(512, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = Conv2D(512, (3, 3),
-                      padding='same',
-                      data_format=data_format)(x)
-    x = Activation('relu')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x)
-
-    x = Flatten(data_format='channels_last')(x)
-    
-    x = Dense(4096)(x)
-    x = Activation('relu')(x)
-    x = Dense(4096)(x)   
-    x = Activation('relu')(x)
-    x = Dense(1000)(x)
-    x = Activation('softmax')(x)
-
-    model_nchw = Model(img_input, x)
-    
-    
-    model = VGG16()
-
-    j = 0
-    for i in range(len(model_nchw.layers)):
-        if 'padding' in model_nchw.layers[i].name or 'activation' in model_nchw.layers[i].name:
-            continue
-        try:
-            model_nchw.layers[i].set_weights(model.layers[j].get_weights())
-        except:
-            print (i, model_nchw.layers[i], 'skipped')
-        j += 1
-
-    
-    return model_nchw
-
+from keras.utils import to_categorical
+from keras.preprocessing.image import ImageDataGenerator
+from keras.callbacks import LearningRateScheduler
+
+from Benchmark import Benchmark
+from Config import MODEL_PARAMS_DIR
+
+
+
+class VGG16(Benchmark):
+
+    def buildModel(self):
+        img_input = Input(shape=(3, 224, 224))
+
+        # Block 1
+        x = ZeroPadding2D(padding=(1, 1))(img_input)
+        x = Conv2D(64, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(64, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 2
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(128, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(128, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 3
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(256, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(256, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(256, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 4
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+        # Block 5
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = ZeroPadding2D(padding=(1, 1))(x)
+        x = Conv2D(512, (3, 3), padding='valid')(x)
+        x = Activation('relu')(x)
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
+
+    #     x = Flatten(data_format='channels_first')(x)
+        x = Flatten()(x)
+
+        x = Dense(4096)(x)
+        x = Activation('relu')(x)
+        x = Dropout(0.5)(x)
+        x = Dense(4096)(x)   
+        x = Activation('relu')(x)
+        x = Dropout(0.5)(x)
+        x = Dense(1000)(x)
+        x = Activation('softmax')(x)
+
+        model = Model(img_input, x)
+            
+        return model
 
 
-def load_image(x):
-    
-    image = cv2.imread(x)
+    def data_preprocess(self):
+        X_train, y_train = None, None
         
-    height, width, _ = image.shape
-    new_height = height * 256 // min(image.shape[:2])
-    new_width = width * 256 // min(image.shape[:2])
-    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-    
-    height, width, _ = image.shape
-    startx = width // 2 - (224 // 2)
-    starty = height // 2 - (224 // 2)
-    image = image[starty:starty + 224, startx:startx + 224]
-    
-    image = image[:, :, ::-1]
-    image = np.transpose(image, (2, 0, 1))
-    image = preprocess_input(image.astype(np.float32), data_format=data_format)
+        X_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_input.bin', dtype=np.float32)
+        X_test = X_test.reshape((-1, 3, 224, 224)) 
+        y_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_labels.bin', dtype=np.uint32)
         
-    return image.astype(np.float32)
-
-
-meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat')
-original_idx_to_synset = {}
-synset_to_name = {}
-
-for i in range(1000):
-    ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0])
-    synset = meta['synsets'][i,0][1][0]
-    name = meta['synsets'][i,0][2][0]
-    original_idx_to_synset[ilsvrc2012_id] = synset
-    synset_to_name[synset] = name
-
-synset_to_keras_idx = {}
-keras_idx_to_name = {}
-f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r')
-c = 0
-for line in f:
-    parts = line.split(' ')
-    synset_to_keras_idx[parts[0]] = c
-    keras_idx_to_name[c] = ' '.join(parts[1:])
-    c += 1
-f.close()
-
-
-
-
-model = get_vgg16_nchw_keras()
-    
-X_tune, X_test = [], []
-y_tune, y_true = [], []
-    
-classes = glob.glob(IMAGENET_DIR + 'val/*')
-
-for c in np.random.permutation(len(classes))[:NUM_CLASSES]:
-    x = glob.glob(classes[c] + '/*')
-    x = np.array(x)
-            
-    idx = np.random.permutation(len(x))
-    idx = idx[:max(len(idx), IMAGES_PER_CLASS)]
+        X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/tune_input.bin', dtype=np.float32)
+        X_tuner = X_tuner.reshape((-1, 3, 224, 224)) 
+        y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/tune_labels.bin', dtype=np.uint32)
+ 
+        return X_train, y_train, X_test, y_test, X_tuner, y_tuner
     
-    synset = classes[c].split('/')[-1]
-    images = list(map(lambda x : load_image(x), x[idx]))
-    labels = [synset_to_keras_idx[synset]] * len(x[idx])
     
-    X_test += images[:IMAGES_PER_CLASS // 2]
-    y_true += labels[:IMAGES_PER_CLASS // 2]
-    
-    X_tune += images[IMAGES_PER_CLASS // 2:]
-    y_tune += labels[IMAGES_PER_CLASS // 2:]
-    
-    
-X_test = np.array(X_test)
-y_true = np.array(y_true)
-X_tune = np.array(X_tune)
-y_tune = np.array(y_tune)
+    def trainModel(self, model):
 
-print ('tune size', len(X_tune))
-print ('test size', len(X_test))
+        assert False, "ImageNet training not supported - use Pretrained weights"
 
 
+    
+if __name__ == '__main__':
 
-translate_to_approxhpvm(model, OUTPUT_DIR, X_tune, y_tune, 1000)
-
-# # dumpCalibrationData2(OUTPUT_DIR + 'test_input_10K.bin', X_test, OUTPUT_DIR + 'test_labels_10K.bin', y_true)
-# dumpCalibrationData2(OUTPUT_DIR + 'tune_input.bin', X_tune, OUTPUT_DIR + 'tune_labels.bin', y_tune)
-# dumpCalibrationData2(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true)
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    # Changing to NCHW format
+    K.set_image_data_format('channels_first')
 
 
+    ### Parameters specific to each benchmark
+    reload_dir = MODEL_PARAMS_DIR + '/vgg16_imagenet/'
+    keras_model_file = MODEL_PARAMS_DIR + '/vgg16_imagenet/weights.h5'
+    data_dir = '/vgg16_imagenet/' 
+    src_dir = 'data/vgg16_imagenet_src/'
+    num_classes = 1000
+    batch_size = 50
 
-pred = np.argmax(model.predict(X_test), axis=1)
-print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test))
+    alexnet = VGG16('VGG16_imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size)
     
-# pred = np.argmax(model.predict(X_tune), axis=1)
-# print ('val accuracy', np.sum(pred == y_tune.ravel()) / len(X_tune))
+    alexnet.run(sys.argv)
+
 
+    
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
index 41f4334c420f77e1a62829798e06bca2899b8e80..dd689d202a91755ecad116a3d1277f59c740d0b1 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
@@ -415,7 +415,7 @@ int main() {
 
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/";
 
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3);
@@ -497,7 +497,7 @@ int main() {
   int test_input_size = 5000;
   int batch_count = test_input_size / batch_size;
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
   // void* input = create4DTensor(0,nchw,batch_size,3,32,32);
 
   startMemTracking();
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
index e5edc8a5890cdbf51bba1ed0effdf64b2297d29a..ae5f31b7dcca3ec59920e0dcc0ba34ca5ea28cbc 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
@@ -366,9 +366,9 @@ int main() {
 
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/";
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
   // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32);
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint8_t *labels = readLabels(labels_path.c_str(), 5000);
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index 87aff828551b2c778098379728b79ca2cb440918..d49c0d2d06b1ea04ad78ee72dc2776bd000dacfd 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -465,8 +465,8 @@ int main() {
   std::string dir_prefix =
       std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/";
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt
index b4e51dff426f4d3c5cb7b9572e6aa5940212acbd..32a9642d38ab816246b9e5cca01c6efcec3a2d8d 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt
@@ -1,8 +1,8 @@
 282.5141369999999
 +++++
 conf1 1 1 98.7 0.0
-1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1
-2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1
+1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
 3 gpu mul fp32 11 add fp32 1 tanh fp32 1
 4 gpu mul fp32 11 add fp32 1 tanh fp32 1
 5 gpu softmax fp32 1
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
index a20315cb9c36610aac2d0d43059182302674b83b..b67d585d01b4809d4107d95ab4476e741f13dd7c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -268,8 +268,8 @@ int main() {
 
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/";
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
index 5ea5c298bf7b5858af024aff7a4ee81c4b8a6ed2..a4de2826216d9bf6b3843e466097abae35ca8b72 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
@@ -1968,8 +1968,8 @@ typedef struct __attribute__((__packed__)) {
 int main() {
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/";
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
index c6fa02c784f90f8c03a81991763e533d864b9ed0..66ab37cd33e502df35f73ca2b3addb1c4be53808 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
@@ -1303,9 +1303,9 @@ typedef struct __attribute__((__packed__)) {
 int main() {
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/";
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
   // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32);
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   // uint32_t* labels = readLabels3(labels_path.c_str(),5000);
 
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index b41e0bc96df83a91f5656e7094e914e8d86e6df5..db6b64daa0d214017ebcf968067fe44f40aa9c06 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -5136,8 +5136,8 @@ int main() {
 
   std::string dir_prefix =
       std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/";
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 7, 7);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index 13d150e7a946296e8ce5c7fb9e128a91dedbe534..39c2ffc8769c8b8f13b359e56f4e138dff0fed98 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -830,8 +830,8 @@ typedef struct __attribute__((__packed__)) {
 int main() {
 
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/";
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 6e26f89b755db90853ce90180ab179b6df421827..ce899cd0a24776bd5a7c8b51f13e0dac698b3495 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -831,8 +831,8 @@ int main() {
 
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/";
 
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3);
@@ -998,7 +998,7 @@ int main() {
   startMemTracking();
   startProfiling();
 
-  for (int j = 0; j < 14; j++) {
+  for (int j = 0; j < 1; j++) {
     for (int i = 0; i < batch_count; i++) {
 
       int start = i * batch_size;
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index 4fad931efc4988cebdf317dc0761c9146cebab0f..91af01fe8eb7deacb47cc42f3fe6cbb620adc000 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -879,8 +879,8 @@ int main() {
 
   std::string dir_prefix =
       std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
-  std::string input_path = dir_prefix + std::string("tune_input.bin");
-  std::string labels_path = dir_prefix + std::string("tune_labels.bin");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
index a627f83e6b2aa9f38b09d82ee94ce35da1a6bafe..71e1c268726e1fb77b0713599928262b95bd64f5 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
@@ -318,13 +318,13 @@ struct Tensor* readInputBatch(const char* file_name, int data_type,
 			      int start, int end,
 			      int dim2_size, int dim3_size, int dim4_size){
 
-  int dim1_size = end - start;
+  long int dim1_size = end - start;
   // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
+  long int type_size = 4; // NOTE: Assuming floating point tensors
   long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
   long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size;
   float* tensor_data = (float*) malloc(sizeof(float) * num_elems);
-  int file_header_size = type_size * start * dim2_size * dim3_size * dim4_size;
+  long int file_header_size = type_size * start * dim2_size * dim3_size * dim4_size;
   
   FILE* file = fopen(file_name, "rb");
   if(file == NULL){
@@ -335,9 +335,9 @@ struct Tensor* readInputBatch(const char* file_name, int data_type,
   fseek(file, file_header_size, SEEK_SET); // Skipping the file header
   size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
 
-
   fclose(file);
-  
+
+  //printf ("FIXED input BATCH read \n");
   
   struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size,
 					                   dim3_size, dim4_size);