diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet.cpp
index 5aae61e5f0673d260c32ad923bb1038f91a39a3b..2b411f75ba6645f227a746f718737c398e8b55ab 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet.cpp
@@ -1,562 +1,569 @@
 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/stat.h> 
-#include <cstring> 
-#include <visc.h> 
-#include <tensorTypes.h> 
-#include <tensorUtils.h> 
-
-
-void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 4, 4); 
-  __visc__return(2, r, (size_t) 0); 
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <cstring>
+#include <hpvm.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+
+void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
+
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 4, 4);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_2_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_2_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_3_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_3_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_6_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_6_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_7_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_7_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_10_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_10_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_13_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_13_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_16_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_16_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_17_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_17_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_mul(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_20_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_20_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_mul(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_22_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_23_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_23_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_mul(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_26_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_26_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_softmax(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_softmax(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void root(void* input, size_t input_bytes, 
-	  void* conv2d_1_w, size_t conv2d_1_w_bytes, 
-	  void* conv2d_1_b, size_t conv2d_1_b_bytes, 
-	  void* conv2d_2_w, size_t conv2d_2_w_bytes, 
-	  void* conv2d_2_b, size_t conv2d_2_b_bytes, 
-	  void* conv2d_3_w, size_t conv2d_3_w_bytes, 
-	  void* conv2d_3_b, size_t conv2d_3_b_bytes, 
-	  void* conv2d_4_w, size_t conv2d_4_w_bytes, 
-	  void* conv2d_4_b, size_t conv2d_4_b_bytes, 
-	  void* conv2d_5_w, size_t conv2d_5_w_bytes, 
-	  void* conv2d_5_b, size_t conv2d_5_b_bytes, 
-	  void* dense_1_w, size_t dense_1_w_bytes, 
-	  void* dense_1_b, size_t dense_1_b_bytes, 
-	  void* dense_2_w, size_t dense_2_w_bytes, 
-	  void* dense_2_b, size_t dense_2_b_bytes, 
-	  void* dense_3_w, size_t dense_3_w_bytes, 
-	  void* dense_3_b, size_t dense_3_b_bytes){ 
+void root(void *input, size_t input_bytes, void *conv2d_1_w,
+          size_t conv2d_1_w_bytes, void *conv2d_1_b, size_t conv2d_1_b_bytes,
+          void *conv2d_2_w, size_t conv2d_2_w_bytes, void *conv2d_2_b,
+          size_t conv2d_2_b_bytes, void *conv2d_3_w, size_t conv2d_3_w_bytes,
+          void *conv2d_3_b, size_t conv2d_3_b_bytes, void *conv2d_4_w,
+          size_t conv2d_4_w_bytes, void *conv2d_4_b, size_t conv2d_4_b_bytes,
+          void *conv2d_5_w, size_t conv2d_5_w_bytes, void *conv2d_5_b,
+          size_t conv2d_5_b_bytes, void *dense_1_w, size_t dense_1_w_bytes,
+          void *dense_1_b, size_t dense_1_b_bytes, void *dense_2_w,
+          size_t dense_2_w_bytes, void *dense_2_b, size_t dense_2_b_bytes,
+          void *dense_3_w, size_t dense_3_w_bytes, void *dense_3_b,
+          size_t dense_3_b_bytes) {
 
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b,
+                     conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w,
+                     conv2d_5_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b,
+                     dense_3_w, dense_3_b, 0);
 
-  __visc__hint(visc::CPU_TARGET); 
-  __visc__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, dense_3_w, dense_3_b, 0); 
+  void *var_0 = __visc__createNodeND(0, var_0_node);
 
+  __visc__bindIn(var_0, 0, 0, 0);
+  __visc__bindIn(var_0, 1, 1, 0);
+  __visc__bindIn(var_0, 2, 2, 0);
+  __visc__bindIn(var_0, 3, 3, 0);
 
-  void* var_0 = __visc__createNodeND(0, var_0_node); 
+  void *var_1 = __visc__createNodeND(0, var_1_node);
 
-  __visc__bindIn(var_0, 0, 0, 0); 
-  __visc__bindIn(var_0, 1, 1, 0); 
-  __visc__bindIn(var_0, 2, 2, 0); 
-  __visc__bindIn(var_0, 3, 3, 0); 
+  __visc__edge(var_0, var_1, 1, 0, 0, 0);
+  __visc__edge(var_0, var_1, 1, 1, 1, 0);
+  __visc__bindIn(var_1, 4, 2, 0);
+  __visc__bindIn(var_1, 5, 3, 0);
 
-  void* var_1 = __visc__createNodeND(0, var_1_node); 
+  void *var_2 = __visc__createNodeND(0, var_2_node);
 
-  __visc__edge(var_0, var_1, 1, 0, 0, 0); 
-  __visc__edge(var_0, var_1, 1, 1, 1, 0); 
-  __visc__bindIn(var_1, 4, 2, 0); 
-  __visc__bindIn(var_1, 5, 3, 0); 
+  __visc__edge(var_1, var_2, 1, 0, 0, 0);
+  __visc__edge(var_1, var_2, 1, 1, 1, 0);
 
-  void* var_2 = __visc__createNodeND(0, var_2_node); 
+  void *var_3 = __visc__createNodeND(0, var_3_node);
 
-  __visc__edge(var_1, var_2, 1, 0, 0, 0); 
-  __visc__edge(var_1, var_2, 1, 1, 1, 0); 
+  __visc__edge(var_2, var_3, 1, 0, 0, 0);
+  __visc__edge(var_2, var_3, 1, 1, 1, 0);
 
-  void* var_3 = __visc__createNodeND(0, var_3_node); 
+  void *var_4 = __visc__createNodeND(0, var_4_node);
 
-  __visc__edge(var_2, var_3, 1, 0, 0, 0); 
-  __visc__edge(var_2, var_3, 1, 1, 1, 0); 
+  __visc__edge(var_3, var_4, 1, 0, 0, 0);
+  __visc__edge(var_3, var_4, 1, 1, 1, 0);
+  __visc__bindIn(var_4, 6, 2, 0);
+  __visc__bindIn(var_4, 7, 3, 0);
 
-  void* var_4 = __visc__createNodeND(0, var_4_node); 
+  void *var_5 = __visc__createNodeND(0, var_5_node);
 
-  __visc__edge(var_3, var_4, 1, 0, 0, 0); 
-  __visc__edge(var_3, var_4, 1, 1, 1, 0); 
-  __visc__bindIn(var_4, 6, 2, 0); 
-  __visc__bindIn(var_4, 7, 3, 0); 
+  __visc__edge(var_4, var_5, 1, 0, 0, 0);
+  __visc__edge(var_4, var_5, 1, 1, 1, 0);
+  __visc__bindIn(var_5, 8, 2, 0);
+  __visc__bindIn(var_5, 9, 3, 0);
 
-  void* var_5 = __visc__createNodeND(0, var_5_node); 
+  void *var_6 = __visc__createNodeND(0, var_6_node);
 
-  __visc__edge(var_4, var_5, 1, 0, 0, 0); 
-  __visc__edge(var_4, var_5, 1, 1, 1, 0); 
-  __visc__bindIn(var_5, 8, 2, 0); 
-  __visc__bindIn(var_5, 9, 3, 0); 
+  __visc__edge(var_5, var_6, 1, 0, 0, 0);
+  __visc__edge(var_5, var_6, 1, 1, 1, 0);
 
-  void* var_6 = __visc__createNodeND(0, var_6_node); 
+  void *var_7 = __visc__createNodeND(0, var_7_node);
 
-  __visc__edge(var_5, var_6, 1, 0, 0, 0); 
-  __visc__edge(var_5, var_6, 1, 1, 1, 0); 
+  __visc__edge(var_6, var_7, 1, 0, 0, 0);
+  __visc__edge(var_6, var_7, 1, 1, 1, 0);
 
-  void* var_7 = __visc__createNodeND(0, var_7_node); 
+  void *var_8 = __visc__createNodeND(0, var_8_node);
 
-  __visc__edge(var_6, var_7, 1, 0, 0, 0); 
-  __visc__edge(var_6, var_7, 1, 1, 1, 0); 
+  __visc__edge(var_7, var_8, 1, 0, 0, 0);
+  __visc__edge(var_7, var_8, 1, 1, 1, 0);
+  __visc__bindIn(var_8, 10, 2, 0);
+  __visc__bindIn(var_8, 11, 3, 0);
 
-  void* var_8 = __visc__createNodeND(0, var_8_node); 
+  void *var_9 = __visc__createNodeND(0, var_9_node);
 
-  __visc__edge(var_7, var_8, 1, 0, 0, 0); 
-  __visc__edge(var_7, var_8, 1, 1, 1, 0); 
-  __visc__bindIn(var_8, 10, 2, 0); 
-  __visc__bindIn(var_8, 11, 3, 0); 
+  __visc__edge(var_8, var_9, 1, 0, 0, 0);
+  __visc__edge(var_8, var_9, 1, 1, 1, 0);
+  __visc__bindIn(var_9, 12, 2, 0);
+  __visc__bindIn(var_9, 13, 3, 0);
 
-  void* var_9 = __visc__createNodeND(0, var_9_node); 
+  void *var_10 = __visc__createNodeND(0, var_10_node);
 
-  __visc__edge(var_8, var_9, 1, 0, 0, 0); 
-  __visc__edge(var_8, var_9, 1, 1, 1, 0); 
-  __visc__bindIn(var_9, 12, 2, 0); 
-  __visc__bindIn(var_9, 13, 3, 0); 
+  __visc__edge(var_9, var_10, 1, 0, 0, 0);
+  __visc__edge(var_9, var_10, 1, 1, 1, 0);
 
-  void* var_10 = __visc__createNodeND(0, var_10_node); 
+  void *var_11 = __visc__createNodeND(0, var_11_node);
 
-  __visc__edge(var_9, var_10, 1, 0, 0, 0); 
-  __visc__edge(var_9, var_10, 1, 1, 1, 0); 
+  __visc__edge(var_10, var_11, 1, 0, 0, 0);
+  __visc__edge(var_10, var_11, 1, 1, 1, 0);
+  __visc__bindIn(var_11, 14, 2, 0);
+  __visc__bindIn(var_11, 15, 3, 0);
 
-  void* var_11 = __visc__createNodeND(0, var_11_node); 
+  void *var_12 = __visc__createNodeND(0, var_12_node);
 
-  __visc__edge(var_10, var_11, 1, 0, 0, 0); 
-  __visc__edge(var_10, var_11, 1, 1, 1, 0); 
-  __visc__bindIn(var_11, 14, 2, 0); 
-  __visc__bindIn(var_11, 15, 3, 0); 
+  __visc__edge(var_11, var_12, 1, 0, 0, 0);
+  __visc__edge(var_11, var_12, 1, 1, 1, 0);
+  __visc__bindIn(var_12, 16, 2, 0);
+  __visc__bindIn(var_12, 17, 3, 0);
 
-  void* var_12 = __visc__createNodeND(0, var_12_node); 
+  void *var_13 = __visc__createNodeND(0, var_13_node);
 
-  __visc__edge(var_11, var_12, 1, 0, 0, 0); 
-  __visc__edge(var_11, var_12, 1, 1, 1, 0); 
-  __visc__bindIn(var_12, 16, 2, 0); 
-  __visc__bindIn(var_12, 17, 3, 0); 
+  __visc__edge(var_12, var_13, 1, 0, 0, 0);
+  __visc__edge(var_12, var_13, 1, 1, 1, 0);
 
-  void* var_13 = __visc__createNodeND(0, var_13_node); 
+  void *var_14 = __visc__createNodeND(0, var_14_node);
 
-  __visc__edge(var_12, var_13, 1, 0, 0, 0); 
-  __visc__edge(var_12, var_13, 1, 1, 1, 0); 
+  __visc__edge(var_13, var_14, 1, 0, 0, 0);
+  __visc__edge(var_13, var_14, 1, 1, 1, 0);
+  __visc__bindIn(var_14, 18, 2, 0);
+  __visc__bindIn(var_14, 19, 3, 0);
 
-  void* var_14 = __visc__createNodeND(0, var_14_node); 
+  void *var_15 = __visc__createNodeND(0, var_15_node);
 
-  __visc__edge(var_13, var_14, 1, 0, 0, 0); 
-  __visc__edge(var_13, var_14, 1, 1, 1, 0); 
-  __visc__bindIn(var_14, 18, 2, 0); 
-  __visc__bindIn(var_14, 19, 3, 0); 
+  __visc__edge(var_14, var_15, 1, 0, 0, 0);
+  __visc__edge(var_14, var_15, 1, 1, 1, 0);
+  __visc__bindIn(var_15, 20, 2, 0);
+  __visc__bindIn(var_15, 21, 3, 0);
 
-  void* var_15 = __visc__createNodeND(0, var_15_node); 
+  void *var_16 = __visc__createNodeND(0, var_16_node);
 
-  __visc__edge(var_14, var_15, 1, 0, 0, 0); 
-  __visc__edge(var_14, var_15, 1, 1, 1, 0); 
-  __visc__bindIn(var_15, 20, 2, 0); 
-  __visc__bindIn(var_15, 21, 3, 0); 
+  __visc__edge(var_15, var_16, 1, 0, 0, 0);
+  __visc__edge(var_15, var_16, 1, 1, 1, 0);
 
-  void* var_16 = __visc__createNodeND(0, var_16_node); 
+  void *var_17 = __visc__createNodeND(0, var_17_node);
 
-  __visc__edge(var_15, var_16, 1, 0, 0, 0); 
-  __visc__edge(var_15, var_16, 1, 1, 1, 0); 
+  __visc__edge(var_16, var_17, 1, 0, 0, 0);
+  __visc__edge(var_16, var_17, 1, 1, 1, 0);
 
-  void* var_17 = __visc__createNodeND(0, var_17_node); 
+  void *var_18 = __visc__createNodeND(0, var_18_node);
 
-  __visc__edge(var_16, var_17, 1, 0, 0, 0); 
-  __visc__edge(var_16, var_17, 1, 1, 1, 0); 
+  __visc__edge(var_17, var_18, 1, 0, 0, 0);
+  __visc__edge(var_17, var_18, 1, 1, 1, 0);
+  __visc__bindIn(var_18, 22, 2, 0);
+  __visc__bindIn(var_18, 23, 3, 0);
 
-  void* var_18 = __visc__createNodeND(0, var_18_node); 
+  void *var_19 = __visc__createNodeND(0, var_19_node);
 
-  __visc__edge(var_17, var_18, 1, 0, 0, 0); 
-  __visc__edge(var_17, var_18, 1, 1, 1, 0); 
-  __visc__bindIn(var_18, 22, 2, 0); 
-  __visc__bindIn(var_18, 23, 3, 0); 
+  __visc__edge(var_18, var_19, 1, 0, 0, 0);
+  __visc__edge(var_18, var_19, 1, 1, 1, 0);
+  __visc__bindIn(var_19, 24, 2, 0);
+  __visc__bindIn(var_19, 25, 3, 0);
 
-  void* var_19 = __visc__createNodeND(0, var_19_node); 
+  void *var_20 = __visc__createNodeND(0, var_20_node);
 
-  __visc__edge(var_18, var_19, 1, 0, 0, 0); 
-  __visc__edge(var_18, var_19, 1, 1, 1, 0); 
-  __visc__bindIn(var_19, 24, 2, 0); 
-  __visc__bindIn(var_19, 25, 3, 0); 
+  __visc__edge(var_19, var_20, 1, 0, 0, 0);
+  __visc__edge(var_19, var_20, 1, 1, 1, 0);
 
-  void* var_20 = __visc__createNodeND(0, var_20_node); 
+  void *var_21 = __visc__createNodeND(0, var_21_node);
 
-  __visc__edge(var_19, var_20, 1, 0, 0, 0); 
-  __visc__edge(var_19, var_20, 1, 1, 1, 0); 
+  __visc__edge(var_20, var_21, 1, 0, 0, 0);
+  __visc__edge(var_20, var_21, 1, 1, 1, 0);
+  __visc__bindIn(var_21, 26, 2, 0);
+  __visc__bindIn(var_21, 27, 3, 0);
 
-  void* var_21 = __visc__createNodeND(0, var_21_node); 
+  void *var_22 = __visc__createNodeND(0, var_22_node);
 
-  __visc__edge(var_20, var_21, 1, 0, 0, 0); 
-  __visc__edge(var_20, var_21, 1, 1, 1, 0); 
-  __visc__bindIn(var_21, 26, 2, 0); 
-  __visc__bindIn(var_21, 27, 3, 0); 
+  __visc__edge(var_21, var_22, 1, 0, 0, 0);
+  __visc__edge(var_21, var_22, 1, 1, 1, 0);
+  __visc__bindIn(var_22, 28, 2, 0);
+  __visc__bindIn(var_22, 29, 3, 0);
 
-  void* var_22 = __visc__createNodeND(0, var_22_node); 
+  void *var_23 = __visc__createNodeND(0, var_23_node);
 
-  __visc__edge(var_21, var_22, 1, 0, 0, 0); 
-  __visc__edge(var_21, var_22, 1, 1, 1, 0); 
-  __visc__bindIn(var_22, 28, 2, 0); 
-  __visc__bindIn(var_22, 29, 3, 0); 
+  __visc__edge(var_22, var_23, 1, 0, 0, 0);
+  __visc__edge(var_22, var_23, 1, 1, 1, 0);
 
-  void* var_23 = __visc__createNodeND(0, var_23_node); 
+  void *var_24 = __visc__createNodeND(0, var_24_node);
 
-  __visc__edge(var_22, var_23, 1, 0, 0, 0); 
-  __visc__edge(var_22, var_23, 1, 1, 1, 0); 
+  __visc__edge(var_23, var_24, 1, 0, 0, 0);
+  __visc__edge(var_23, var_24, 1, 1, 1, 0);
+  __visc__bindIn(var_24, 30, 2, 0);
+  __visc__bindIn(var_24, 31, 3, 0);
 
-  void* var_24 = __visc__createNodeND(0, var_24_node); 
+  void *var_25 = __visc__createNodeND(0, var_25_node);
 
-  __visc__edge(var_23, var_24, 1, 0, 0, 0); 
-  __visc__edge(var_23, var_24, 1, 1, 1, 0); 
-  __visc__bindIn(var_24, 30, 2, 0); 
-  __visc__bindIn(var_24, 31, 3, 0); 
+  __visc__edge(var_24, var_25, 1, 0, 0, 0);
+  __visc__edge(var_24, var_25, 1, 1, 1, 0);
+  __visc__bindIn(var_25, 32, 2, 0);
+  __visc__bindIn(var_25, 33, 3, 0);
 
-  void* var_25 = __visc__createNodeND(0, var_25_node); 
+  void *var_26 = __visc__createNodeND(0, var_26_node);
 
-  __visc__edge(var_24, var_25, 1, 0, 0, 0); 
-  __visc__edge(var_24, var_25, 1, 1, 1, 0); 
-  __visc__bindIn(var_25, 32, 2, 0); 
-  __visc__bindIn(var_25, 33, 3, 0); 
-
-  void* var_26 = __visc__createNodeND(0, var_26_node); 
-
-  __visc__edge(var_25, var_26, 1, 0, 0, 0); 
-  __visc__edge(var_25, var_26, 1, 1, 1, 0); 
-
-  __visc__bindOut(var_26, 0, 0, 0); 
-  __visc__bindOut(var_26, 1, 1, 0); 
+  __visc__edge(var_25, var_26, 1, 0, 0, 0);
+  __visc__edge(var_25, var_26, 1, 1, 1, 0);
 
+  __visc__bindOut(var_26, 0, 0, 0);
+  __visc__bindOut(var_26, 1, 1, 0);
 }
 
 struct ret_t {
-  void* tensor; 
-  size_t bytes; 
-}; 
+  void *tensor;
+  size_t bytes;
+};
 
 typedef struct __attribute__((__packed__)) {
-  void* input; 
-  size_t input_bytes; 
-  void* conv2d_1_w; 
-  size_t conv2d_1_w_bytes; 
-  void* conv2d_1_b; 
-  size_t conv2d_1_b_bytes; 
-  void* conv2d_2_w; 
-  size_t conv2d_2_w_bytes; 
-  void* conv2d_2_b; 
-  size_t conv2d_2_b_bytes; 
-  void* conv2d_3_w; 
-  size_t conv2d_3_w_bytes; 
-  void* conv2d_3_b; 
-  size_t conv2d_3_b_bytes; 
-  void* conv2d_4_w; 
-  size_t conv2d_4_w_bytes; 
-  void* conv2d_4_b; 
-  size_t conv2d_4_b_bytes; 
-  void* conv2d_5_w; 
-  size_t conv2d_5_w_bytes; 
-  void* conv2d_5_b; 
-  size_t conv2d_5_b_bytes; 
-  void* dense_1_w; 
-  size_t dense_1_w_bytes; 
-  void* dense_1_b; 
-  size_t dense_1_b_bytes; 
-  void* dense_2_w; 
-  size_t dense_2_w_bytes; 
-  void* dense_2_b; 
-  size_t dense_2_b_bytes; 
-  void* dense_3_w; 
-  size_t dense_3_w_bytes; 
-  void* dense_3_b; 
-  size_t dense_3_b_bytes; 
-
-  struct ret_t r; 
+  void *input;
+  size_t input_bytes;
+  void *conv2d_1_w;
+  size_t conv2d_1_w_bytes;
+  void *conv2d_1_b;
+  size_t conv2d_1_b_bytes;
+  void *conv2d_2_w;
+  size_t conv2d_2_w_bytes;
+  void *conv2d_2_b;
+  size_t conv2d_2_b_bytes;
+  void *conv2d_3_w;
+  size_t conv2d_3_w_bytes;
+  void *conv2d_3_b;
+  size_t conv2d_3_b_bytes;
+  void *conv2d_4_w;
+  size_t conv2d_4_w_bytes;
+  void *conv2d_4_b;
+  size_t conv2d_4_b_bytes;
+  void *conv2d_5_w;
+  size_t conv2d_5_w_bytes;
+  void *conv2d_5_b;
+  size_t conv2d_5_b_bytes;
+  void *dense_1_w;
+  size_t dense_1_w_bytes;
+  void *dense_1_b;
+  size_t dense_1_b_bytes;
+  void *dense_2_w;
+  size_t dense_2_w_bytes;
+  void *dense_2_b;
+  size_t dense_2_b_bytes;
+  void *dense_3_w;
+  size_t dense_3_w_bytes;
+  void *dense_3_b;
+  size_t dense_3_b_bytes;
+
+  struct ret_t r;
+} RootIn;
+
+int main() {
+
+  std::string dir_prefix =
+      std::string("/shared/hsharif3/alexnet_imagenet_tune/");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
+  std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
+  void *conv2d_1_w =
+      readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11);
+  std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin");
+  void *conv2d_1_b =
+      readTrainedWeights(conv2d_1_b_path.c_str(), 0, 1, 64, 1, 1);
+  std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin");
+  void *conv2d_2_w =
+      readTrainedWeights(conv2d_2_w_path.c_str(), 0, 192, 64, 5, 5);
+  std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin");
+  void *conv2d_2_b =
+      readTrainedWeights(conv2d_2_b_path.c_str(), 0, 1, 192, 1, 1);
+  std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin");
+  void *conv2d_3_w =
+      readTrainedWeights(conv2d_3_w_path.c_str(), 0, 384, 192, 3, 3);
+  std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin");
+  void *conv2d_3_b =
+      readTrainedWeights(conv2d_3_b_path.c_str(), 0, 1, 384, 1, 1);
+  std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin");
+  void *conv2d_4_w =
+      readTrainedWeights(conv2d_4_w_path.c_str(), 0, 256, 384, 3, 3);
+  std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin");
+  void *conv2d_4_b =
+      readTrainedWeights(conv2d_4_b_path.c_str(), 0, 1, 256, 1, 1);
+  std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin");
+  void *conv2d_5_w =
+      readTrainedWeights(conv2d_5_w_path.c_str(), 0, 256, 256, 3, 3);
+  std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin");
+  void *conv2d_5_b =
+      readTrainedWeights(conv2d_5_b_path.c_str(), 0, 1, 256, 1, 1);
+  std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin");
+  void *dense_1_w =
+      readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 9216, 4096);
+  std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
+  void *dense_1_b =
+      readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 4096, 1, 1);
+  std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin");
+  void *dense_2_w =
+      readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 4096, 4096);
+  std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
+  void *dense_2_b =
+      readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 4096, 1, 1);
+  std::string dense_3_w_path = dir_prefix + std::string("dense_3_w.bin");
+  void *dense_3_w =
+      readTrainedWeights(dense_3_w_path.c_str(), 0, 1, 1, 4096, 1000);
+  std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin");
+  void *dense_3_b =
+      readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 1000, 3, 224, 224);
+  // uint32_t* labels = readLabels2(labels_path.c_str(),6000);
+
+  uint32_t *labels = readLabels3(labels_path.c_str(), 1000);
+
+  __visc__init();
+  RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
+
+  args->input = input;
+  args->input_bytes = 0;
+  args->conv2d_1_w = conv2d_1_w;
+  args->conv2d_1_w_bytes = 0;
+  args->conv2d_1_b = conv2d_1_b;
+  args->conv2d_1_b_bytes = 0;
+  args->conv2d_2_w = conv2d_2_w;
+  args->conv2d_2_w_bytes = 0;
+  args->conv2d_2_b = conv2d_2_b;
+  args->conv2d_2_b_bytes = 0;
+  args->conv2d_3_w = conv2d_3_w;
+  args->conv2d_3_w_bytes = 0;
+  args->conv2d_3_b = conv2d_3_b;
+  args->conv2d_3_b_bytes = 0;
+  args->conv2d_4_w = conv2d_4_w;
+  args->conv2d_4_w_bytes = 0;
+  args->conv2d_4_b = conv2d_4_b;
+  args->conv2d_4_b_bytes = 0;
+  args->conv2d_5_w = conv2d_5_w;
+  args->conv2d_5_w_bytes = 0;
+  args->conv2d_5_b = conv2d_5_b;
+  args->conv2d_5_b_bytes = 0;
+  args->dense_1_w = dense_1_w;
+  args->dense_1_w_bytes = 0;
+  args->dense_1_b = dense_1_b;
+  args->dense_1_b_bytes = 0;
+  args->dense_2_w = dense_2_w;
+  args->dense_2_w_bytes = 0;
+  args->dense_2_b = dense_2_b;
+  args->dense_2_b_bytes = 0;
+  args->dense_3_w = dense_3_w;
+  args->dense_3_w_bytes = 0;
+  args->dense_3_b = dense_3_b;
+  args->dense_3_b_bytes = 0;
+
+  void *dfg = __visc__launch(0, root, (void *)args);
+
+  __visc__wait(dfg);
+
+  void *result = static_cast<RootIn *>(args)->input;
+  hpvm_request_tensor(result, 0);
+
+  __visc__cleanup();
+  computeAccuracy3(labels, result);
+  return 0;
 }
-RootIn;
-
-
-int main(){ 
-
-  std::string dir_prefix = std::string("/shared/hsharif3/alexnet_imagenet_tune/"); 
-  std::string input_path =  dir_prefix + std::string("test_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,9216,4096); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,4096,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,4096,4096); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,4096,1,1); 
-  std::string dense_3_w_path =  dir_prefix + std::string("dense_3_w.bin"); 
-  void* dense_3_w =  readTrainedWeights(dense_3_w_path.c_str(), 0,1,1,4096,1000); 
-  std::string dense_3_b_path =  dir_prefix + std::string("dense_3_b.bin"); 
-  void* dense_3_b =  readTrainedWeights(dense_3_b_path.c_str(), 0,1,1000,1,1); 
-  void* input = readTrainedWeights(input_path.c_str(), 0, 1000,3,224,224); 
-  //uint32_t* labels = readLabels2(labels_path.c_str(),6000); 
-
-  uint32_t* labels = readLabels3(labels_path.c_str(), 1000); 
-
-    
-  __visc__init(); 
-  RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); 
-
-  args->input = input; 
-  args->input_bytes = 0; 
-  args->conv2d_1_w = conv2d_1_w; 
-  args->conv2d_1_w_bytes = 0; 
-  args->conv2d_1_b = conv2d_1_b; 
-  args->conv2d_1_b_bytes = 0; 
-  args->conv2d_2_w = conv2d_2_w; 
-  args->conv2d_2_w_bytes = 0; 
-  args->conv2d_2_b = conv2d_2_b; 
-  args->conv2d_2_b_bytes = 0; 
-  args->conv2d_3_w = conv2d_3_w; 
-  args->conv2d_3_w_bytes = 0; 
-  args->conv2d_3_b = conv2d_3_b; 
-  args->conv2d_3_b_bytes = 0; 
-  args->conv2d_4_w = conv2d_4_w; 
-  args->conv2d_4_w_bytes = 0; 
-  args->conv2d_4_b = conv2d_4_b; 
-  args->conv2d_4_b_bytes = 0; 
-  args->conv2d_5_w = conv2d_5_w; 
-  args->conv2d_5_w_bytes = 0; 
-  args->conv2d_5_b = conv2d_5_b; 
-  args->conv2d_5_b_bytes = 0; 
-  args->dense_1_w = dense_1_w; 
-  args->dense_1_w_bytes = 0; 
-  args->dense_1_b = dense_1_b; 
-  args->dense_1_b_bytes = 0; 
-  args->dense_2_w = dense_2_w; 
-  args->dense_2_w_bytes = 0; 
-  args->dense_2_b = dense_2_b; 
-  args->dense_2_b_bytes = 0; 
-  args->dense_3_w = dense_3_w; 
-  args->dense_3_w_bytes = 0; 
-  args->dense_3_b = dense_3_b; 
-  args->dense_3_b_bytes = 0; 
-
-  void* dfg = __visc__launch(0, root, (void*) args); 
-
-  __visc__wait(dfg); 
-
-  void *result = static_cast<RootIn*>(args)->input; 
-  hpvm_request_tensor(result, 0); 
-
-  __visc__cleanup(); 
-  computeAccuracy3(labels, result); 
-  return 0; 
-
-} 
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet_loop.cpp
index e9e3e6e0fd344f205784dc7d1fbd0e159f50d72a..4371ddbbbf66cfead4af55c47509f20c0d48462d 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/src/alexnet_imagenet_loop.cpp
@@ -1,552 +1,560 @@
 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/stat.h> 
-#include <cstring> 
-#include <visc.h> 
-#include <tensorTypes.h> 
-#include <tensorUtils.h> 
-
-
-void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-
-  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 4, 4); 
-  __visc__return(2, r, (size_t) 0); 
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <cstring>
+#include <hpvm.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+
+void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
+
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 4, 4);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_2_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_2_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_3_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_3_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_6_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_6_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_7_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_7_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_10_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_10_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_13_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_13_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_16_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_16_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_17_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_17_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_mul(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_20_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_20_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_mul(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_22_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_23_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_23_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_relu(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_mul(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::PROMISE_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
+void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::PROMISE_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_add(t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_26_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
+void var_26_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes(1, t1, 0);
 
-  void* r = __visc__tensor_softmax(t1); 
-  __visc__return(2, r, (size_t) 0); 
+  void *r = __visc__tensor_softmax(t1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void root(void* input, size_t input_bytes, 
-	  void* conv2d_1_w, size_t conv2d_1_w_bytes, 
-	  void* conv2d_1_b, size_t conv2d_1_b_bytes, 
-	  void* conv2d_2_w, size_t conv2d_2_w_bytes, 
-	  void* conv2d_2_b, size_t conv2d_2_b_bytes, 
-	  void* conv2d_3_w, size_t conv2d_3_w_bytes, 
-	  void* conv2d_3_b, size_t conv2d_3_b_bytes, 
-	  void* conv2d_4_w, size_t conv2d_4_w_bytes, 
-	  void* conv2d_4_b, size_t conv2d_4_b_bytes, 
-	  void* conv2d_5_w, size_t conv2d_5_w_bytes, 
-	  void* conv2d_5_b, size_t conv2d_5_b_bytes, 
-	  void* dense_1_w, size_t dense_1_w_bytes, 
-	  void* dense_1_b, size_t dense_1_b_bytes, 
-	  void* dense_2_w, size_t dense_2_w_bytes, 
-	  void* dense_2_b, size_t dense_2_b_bytes, 
-	  void* dense_3_w, size_t dense_3_w_bytes, 
-	  void* dense_3_b, size_t dense_3_b_bytes){ 
+void root(void *input, size_t input_bytes, void *conv2d_1_w,
+          size_t conv2d_1_w_bytes, void *conv2d_1_b, size_t conv2d_1_b_bytes,
+          void *conv2d_2_w, size_t conv2d_2_w_bytes, void *conv2d_2_b,
+          size_t conv2d_2_b_bytes, void *conv2d_3_w, size_t conv2d_3_w_bytes,
+          void *conv2d_3_b, size_t conv2d_3_b_bytes, void *conv2d_4_w,
+          size_t conv2d_4_w_bytes, void *conv2d_4_b, size_t conv2d_4_b_bytes,
+          void *conv2d_5_w, size_t conv2d_5_w_bytes, void *conv2d_5_b,
+          size_t conv2d_5_b_bytes, void *dense_1_w, size_t dense_1_w_bytes,
+          void *dense_1_b, size_t dense_1_b_bytes, void *dense_2_w,
+          size_t dense_2_w_bytes, void *dense_2_b, size_t dense_2_b_bytes,
+          void *dense_3_w, size_t dense_3_w_bytes, void *dense_3_b,
+          size_t dense_3_b_bytes) {
 
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b,
+                     conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w,
+                     conv2d_5_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b,
+                     dense_3_w, dense_3_b, 0);
 
-  __visc__hint(visc::CPU_TARGET); 
-  __visc__attributes(17, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, dense_3_w, dense_3_b, 0); 
+  void *var_0 = __visc__createNodeND(0, var_0_node);
 
+  __visc__bindIn(var_0, 0, 0, 0);
+  __visc__bindIn(var_0, 1, 1, 0);
+  __visc__bindIn(var_0, 2, 2, 0);
+  __visc__bindIn(var_0, 3, 3, 0);
 
-  void* var_0 = __visc__createNodeND(0, var_0_node); 
+  void *var_1 = __visc__createNodeND(0, var_1_node);
 
-  __visc__bindIn(var_0, 0, 0, 0); 
-  __visc__bindIn(var_0, 1, 1, 0); 
-  __visc__bindIn(var_0, 2, 2, 0); 
-  __visc__bindIn(var_0, 3, 3, 0); 
+  __visc__edge(var_0, var_1, 1, 0, 0, 0);
+  __visc__edge(var_0, var_1, 1, 1, 1, 0);
+  __visc__bindIn(var_1, 4, 2, 0);
+  __visc__bindIn(var_1, 5, 3, 0);
 
-  void* var_1 = __visc__createNodeND(0, var_1_node); 
+  void *var_2 = __visc__createNodeND(0, var_2_node);
 
-  __visc__edge(var_0, var_1, 1, 0, 0, 0); 
-  __visc__edge(var_0, var_1, 1, 1, 1, 0); 
-  __visc__bindIn(var_1, 4, 2, 0); 
-  __visc__bindIn(var_1, 5, 3, 0); 
+  __visc__edge(var_1, var_2, 1, 0, 0, 0);
+  __visc__edge(var_1, var_2, 1, 1, 1, 0);
 
-  void* var_2 = __visc__createNodeND(0, var_2_node); 
+  void *var_3 = __visc__createNodeND(0, var_3_node);
 
-  __visc__edge(var_1, var_2, 1, 0, 0, 0); 
-  __visc__edge(var_1, var_2, 1, 1, 1, 0); 
+  __visc__edge(var_2, var_3, 1, 0, 0, 0);
+  __visc__edge(var_2, var_3, 1, 1, 1, 0);
 
-  void* var_3 = __visc__createNodeND(0, var_3_node); 
+  void *var_4 = __visc__createNodeND(0, var_4_node);
 
-  __visc__edge(var_2, var_3, 1, 0, 0, 0); 
-  __visc__edge(var_2, var_3, 1, 1, 1, 0); 
+  __visc__edge(var_3, var_4, 1, 0, 0, 0);
+  __visc__edge(var_3, var_4, 1, 1, 1, 0);
+  __visc__bindIn(var_4, 6, 2, 0);
+  __visc__bindIn(var_4, 7, 3, 0);
 
-  void* var_4 = __visc__createNodeND(0, var_4_node); 
+  void *var_5 = __visc__createNodeND(0, var_5_node);
 
-  __visc__edge(var_3, var_4, 1, 0, 0, 0); 
-  __visc__edge(var_3, var_4, 1, 1, 1, 0); 
-  __visc__bindIn(var_4, 6, 2, 0); 
-  __visc__bindIn(var_4, 7, 3, 0); 
+  __visc__edge(var_4, var_5, 1, 0, 0, 0);
+  __visc__edge(var_4, var_5, 1, 1, 1, 0);
+  __visc__bindIn(var_5, 8, 2, 0);
+  __visc__bindIn(var_5, 9, 3, 0);
 
-  void* var_5 = __visc__createNodeND(0, var_5_node); 
+  void *var_6 = __visc__createNodeND(0, var_6_node);
 
-  __visc__edge(var_4, var_5, 1, 0, 0, 0); 
-  __visc__edge(var_4, var_5, 1, 1, 1, 0); 
-  __visc__bindIn(var_5, 8, 2, 0); 
-  __visc__bindIn(var_5, 9, 3, 0); 
+  __visc__edge(var_5, var_6, 1, 0, 0, 0);
+  __visc__edge(var_5, var_6, 1, 1, 1, 0);
 
-  void* var_6 = __visc__createNodeND(0, var_6_node); 
+  void *var_7 = __visc__createNodeND(0, var_7_node);
 
-  __visc__edge(var_5, var_6, 1, 0, 0, 0); 
-  __visc__edge(var_5, var_6, 1, 1, 1, 0); 
+  __visc__edge(var_6, var_7, 1, 0, 0, 0);
+  __visc__edge(var_6, var_7, 1, 1, 1, 0);
 
-  void* var_7 = __visc__createNodeND(0, var_7_node); 
+  void *var_8 = __visc__createNodeND(0, var_8_node);
 
-  __visc__edge(var_6, var_7, 1, 0, 0, 0); 
-  __visc__edge(var_6, var_7, 1, 1, 1, 0); 
+  __visc__edge(var_7, var_8, 1, 0, 0, 0);
+  __visc__edge(var_7, var_8, 1, 1, 1, 0);
+  __visc__bindIn(var_8, 10, 2, 0);
+  __visc__bindIn(var_8, 11, 3, 0);
 
-  void* var_8 = __visc__createNodeND(0, var_8_node); 
+  void *var_9 = __visc__createNodeND(0, var_9_node);
 
-  __visc__edge(var_7, var_8, 1, 0, 0, 0); 
-  __visc__edge(var_7, var_8, 1, 1, 1, 0); 
-  __visc__bindIn(var_8, 10, 2, 0); 
-  __visc__bindIn(var_8, 11, 3, 0); 
+  __visc__edge(var_8, var_9, 1, 0, 0, 0);
+  __visc__edge(var_8, var_9, 1, 1, 1, 0);
+  __visc__bindIn(var_9, 12, 2, 0);
+  __visc__bindIn(var_9, 13, 3, 0);
 
-  void* var_9 = __visc__createNodeND(0, var_9_node); 
+  void *var_10 = __visc__createNodeND(0, var_10_node);
 
-  __visc__edge(var_8, var_9, 1, 0, 0, 0); 
-  __visc__edge(var_8, var_9, 1, 1, 1, 0); 
-  __visc__bindIn(var_9, 12, 2, 0); 
-  __visc__bindIn(var_9, 13, 3, 0); 
+  __visc__edge(var_9, var_10, 1, 0, 0, 0);
+  __visc__edge(var_9, var_10, 1, 1, 1, 0);
 
-  void* var_10 = __visc__createNodeND(0, var_10_node); 
+  void *var_11 = __visc__createNodeND(0, var_11_node);
 
-  __visc__edge(var_9, var_10, 1, 0, 0, 0); 
-  __visc__edge(var_9, var_10, 1, 1, 1, 0); 
+  __visc__edge(var_10, var_11, 1, 0, 0, 0);
+  __visc__edge(var_10, var_11, 1, 1, 1, 0);
+  __visc__bindIn(var_11, 14, 2, 0);
+  __visc__bindIn(var_11, 15, 3, 0);
 
-  void* var_11 = __visc__createNodeND(0, var_11_node); 
+  void *var_12 = __visc__createNodeND(0, var_12_node);
 
-  __visc__edge(var_10, var_11, 1, 0, 0, 0); 
-  __visc__edge(var_10, var_11, 1, 1, 1, 0); 
-  __visc__bindIn(var_11, 14, 2, 0); 
-  __visc__bindIn(var_11, 15, 3, 0); 
+  __visc__edge(var_11, var_12, 1, 0, 0, 0);
+  __visc__edge(var_11, var_12, 1, 1, 1, 0);
+  __visc__bindIn(var_12, 16, 2, 0);
+  __visc__bindIn(var_12, 17, 3, 0);
 
-  void* var_12 = __visc__createNodeND(0, var_12_node); 
+  void *var_13 = __visc__createNodeND(0, var_13_node);
 
-  __visc__edge(var_11, var_12, 1, 0, 0, 0); 
-  __visc__edge(var_11, var_12, 1, 1, 1, 0); 
-  __visc__bindIn(var_12, 16, 2, 0); 
-  __visc__bindIn(var_12, 17, 3, 0); 
+  __visc__edge(var_12, var_13, 1, 0, 0, 0);
+  __visc__edge(var_12, var_13, 1, 1, 1, 0);
 
-  void* var_13 = __visc__createNodeND(0, var_13_node); 
+  void *var_14 = __visc__createNodeND(0, var_14_node);
 
-  __visc__edge(var_12, var_13, 1, 0, 0, 0); 
-  __visc__edge(var_12, var_13, 1, 1, 1, 0); 
+  __visc__edge(var_13, var_14, 1, 0, 0, 0);
+  __visc__edge(var_13, var_14, 1, 1, 1, 0);
+  __visc__bindIn(var_14, 18, 2, 0);
+  __visc__bindIn(var_14, 19, 3, 0);
 
-  void* var_14 = __visc__createNodeND(0, var_14_node); 
+  void *var_15 = __visc__createNodeND(0, var_15_node);
 
-  __visc__edge(var_13, var_14, 1, 0, 0, 0); 
-  __visc__edge(var_13, var_14, 1, 1, 1, 0); 
-  __visc__bindIn(var_14, 18, 2, 0); 
-  __visc__bindIn(var_14, 19, 3, 0); 
+  __visc__edge(var_14, var_15, 1, 0, 0, 0);
+  __visc__edge(var_14, var_15, 1, 1, 1, 0);
+  __visc__bindIn(var_15, 20, 2, 0);
+  __visc__bindIn(var_15, 21, 3, 0);
 
-  void* var_15 = __visc__createNodeND(0, var_15_node); 
+  void *var_16 = __visc__createNodeND(0, var_16_node);
 
-  __visc__edge(var_14, var_15, 1, 0, 0, 0); 
-  __visc__edge(var_14, var_15, 1, 1, 1, 0); 
-  __visc__bindIn(var_15, 20, 2, 0); 
-  __visc__bindIn(var_15, 21, 3, 0); 
+  __visc__edge(var_15, var_16, 1, 0, 0, 0);
+  __visc__edge(var_15, var_16, 1, 1, 1, 0);
 
-  void* var_16 = __visc__createNodeND(0, var_16_node); 
+  void *var_17 = __visc__createNodeND(0, var_17_node);
 
-  __visc__edge(var_15, var_16, 1, 0, 0, 0); 
-  __visc__edge(var_15, var_16, 1, 1, 1, 0); 
+  __visc__edge(var_16, var_17, 1, 0, 0, 0);
+  __visc__edge(var_16, var_17, 1, 1, 1, 0);
 
-  void* var_17 = __visc__createNodeND(0, var_17_node); 
+  void *var_18 = __visc__createNodeND(0, var_18_node);
 
-  __visc__edge(var_16, var_17, 1, 0, 0, 0); 
-  __visc__edge(var_16, var_17, 1, 1, 1, 0); 
+  __visc__edge(var_17, var_18, 1, 0, 0, 0);
+  __visc__edge(var_17, var_18, 1, 1, 1, 0);
+  __visc__bindIn(var_18, 22, 2, 0);
+  __visc__bindIn(var_18, 23, 3, 0);
 
-  void* var_18 = __visc__createNodeND(0, var_18_node); 
+  void *var_19 = __visc__createNodeND(0, var_19_node);
 
-  __visc__edge(var_17, var_18, 1, 0, 0, 0); 
-  __visc__edge(var_17, var_18, 1, 1, 1, 0); 
-  __visc__bindIn(var_18, 22, 2, 0); 
-  __visc__bindIn(var_18, 23, 3, 0); 
+  __visc__edge(var_18, var_19, 1, 0, 0, 0);
+  __visc__edge(var_18, var_19, 1, 1, 1, 0);
+  __visc__bindIn(var_19, 24, 2, 0);
+  __visc__bindIn(var_19, 25, 3, 0);
 
-  void* var_19 = __visc__createNodeND(0, var_19_node); 
+  void *var_20 = __visc__createNodeND(0, var_20_node);
 
-  __visc__edge(var_18, var_19, 1, 0, 0, 0); 
-  __visc__edge(var_18, var_19, 1, 1, 1, 0); 
-  __visc__bindIn(var_19, 24, 2, 0); 
-  __visc__bindIn(var_19, 25, 3, 0); 
+  __visc__edge(var_19, var_20, 1, 0, 0, 0);
+  __visc__edge(var_19, var_20, 1, 1, 1, 0);
 
-  void* var_20 = __visc__createNodeND(0, var_20_node); 
+  void *var_21 = __visc__createNodeND(0, var_21_node);
 
-  __visc__edge(var_19, var_20, 1, 0, 0, 0); 
-  __visc__edge(var_19, var_20, 1, 1, 1, 0); 
+  __visc__edge(var_20, var_21, 1, 0, 0, 0);
+  __visc__edge(var_20, var_21, 1, 1, 1, 0);
+  __visc__bindIn(var_21, 26, 2, 0);
+  __visc__bindIn(var_21, 27, 3, 0);
 
-  void* var_21 = __visc__createNodeND(0, var_21_node); 
+  void *var_22 = __visc__createNodeND(0, var_22_node);
 
-  __visc__edge(var_20, var_21, 1, 0, 0, 0); 
-  __visc__edge(var_20, var_21, 1, 1, 1, 0); 
-  __visc__bindIn(var_21, 26, 2, 0); 
-  __visc__bindIn(var_21, 27, 3, 0); 
+  __visc__edge(var_21, var_22, 1, 0, 0, 0);
+  __visc__edge(var_21, var_22, 1, 1, 1, 0);
+  __visc__bindIn(var_22, 28, 2, 0);
+  __visc__bindIn(var_22, 29, 3, 0);
 
-  void* var_22 = __visc__createNodeND(0, var_22_node); 
+  void *var_23 = __visc__createNodeND(0, var_23_node);
 
-  __visc__edge(var_21, var_22, 1, 0, 0, 0); 
-  __visc__edge(var_21, var_22, 1, 1, 1, 0); 
-  __visc__bindIn(var_22, 28, 2, 0); 
-  __visc__bindIn(var_22, 29, 3, 0); 
+  __visc__edge(var_22, var_23, 1, 0, 0, 0);
+  __visc__edge(var_22, var_23, 1, 1, 1, 0);
 
-  void* var_23 = __visc__createNodeND(0, var_23_node); 
+  void *var_24 = __visc__createNodeND(0, var_24_node);
 
-  __visc__edge(var_22, var_23, 1, 0, 0, 0); 
-  __visc__edge(var_22, var_23, 1, 1, 1, 0); 
+  __visc__edge(var_23, var_24, 1, 0, 0, 0);
+  __visc__edge(var_23, var_24, 1, 1, 1, 0);
+  __visc__bindIn(var_24, 30, 2, 0);
+  __visc__bindIn(var_24, 31, 3, 0);
 
-  void* var_24 = __visc__createNodeND(0, var_24_node); 
+  void *var_25 = __visc__createNodeND(0, var_25_node);
 
-  __visc__edge(var_23, var_24, 1, 0, 0, 0); 
-  __visc__edge(var_23, var_24, 1, 1, 1, 0); 
-  __visc__bindIn(var_24, 30, 2, 0); 
-  __visc__bindIn(var_24, 31, 3, 0); 
+  __visc__edge(var_24, var_25, 1, 0, 0, 0);
+  __visc__edge(var_24, var_25, 1, 1, 1, 0);
+  __visc__bindIn(var_25, 32, 2, 0);
+  __visc__bindIn(var_25, 33, 3, 0);
 
-  void* var_25 = __visc__createNodeND(0, var_25_node); 
+  void *var_26 = __visc__createNodeND(0, var_26_node);
 
-  __visc__edge(var_24, var_25, 1, 0, 0, 0); 
-  __visc__edge(var_24, var_25, 1, 1, 1, 0); 
-  __visc__bindIn(var_25, 32, 2, 0); 
-  __visc__bindIn(var_25, 33, 3, 0); 
-
-  void* var_26 = __visc__createNodeND(0, var_26_node); 
-
-  __visc__edge(var_25, var_26, 1, 0, 0, 0); 
-  __visc__edge(var_25, var_26, 1, 1, 1, 0); 
-
-  __visc__bindOut(var_26, 0, 0, 0); 
-  __visc__bindOut(var_26, 1, 1, 0); 
+  __visc__edge(var_25, var_26, 1, 0, 0, 0);
+  __visc__edge(var_25, var_26, 1, 1, 1, 0);
 
+  __visc__bindOut(var_26, 0, 0, 0);
+  __visc__bindOut(var_26, 1, 1, 0);
 }
 
 struct ret_t {
-  void* tensor; 
-  size_t bytes; 
-}; 
+  void *tensor;
+  size_t bytes;
+};
 
 typedef struct __attribute__((__packed__)) {
-  void* input; 
-  size_t input_bytes; 
-  void* conv2d_1_w; 
-  size_t conv2d_1_w_bytes; 
-  void* conv2d_1_b; 
-  size_t conv2d_1_b_bytes; 
-  void* conv2d_2_w; 
-  size_t conv2d_2_w_bytes; 
-  void* conv2d_2_b; 
-  size_t conv2d_2_b_bytes; 
-  void* conv2d_3_w; 
-  size_t conv2d_3_w_bytes; 
-  void* conv2d_3_b; 
-  size_t conv2d_3_b_bytes; 
-  void* conv2d_4_w; 
-  size_t conv2d_4_w_bytes; 
-  void* conv2d_4_b; 
-  size_t conv2d_4_b_bytes; 
-  void* conv2d_5_w; 
-  size_t conv2d_5_w_bytes; 
-  void* conv2d_5_b; 
-  size_t conv2d_5_b_bytes; 
-  void* dense_1_w; 
-  size_t dense_1_w_bytes; 
-  void* dense_1_b; 
-  size_t dense_1_b_bytes; 
-  void* dense_2_w; 
-  size_t dense_2_w_bytes; 
-  void* dense_2_b; 
-  size_t dense_2_b_bytes; 
-  void* dense_3_w; 
-  size_t dense_3_w_bytes; 
-  void* dense_3_b; 
-  size_t dense_3_b_bytes; 
-
-  struct ret_t r; 
-}
-RootIn;
-
-
-int main(){ 
-
-  std::string dir_prefix = std::string("/shared/hsharif3/alexnet_imagenet_tune/"); 
-  std::string input_path =  dir_prefix + std::string("test_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,9216,4096); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,4096,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,4096,4096); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,4096,1,1); 
-  std::string dense_3_w_path =  dir_prefix + std::string("dense_3_w.bin"); 
-  void* dense_3_w =  readTrainedWeights(dense_3_w_path.c_str(), 0,1,1,4096,1000); 
-  std::string dense_3_b_path =  dir_prefix + std::string("dense_3_b.bin"); 
-  void* dense_3_b =  readTrainedWeights(dense_3_b_path.c_str(), 0,1,1000,1,1); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0, 1000,3,224,224); 
-  //uint32_t* labels = readLabels2(labels_path.c_str(),6000); 
-
-  //uint32_t* labels = readLabels3(labels_path.c_str(), 1000); 
-
-    
-  __visc__init(); 
-  RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); 
-
-  //args->input = input; 
-  //args->input_bytes = 0; 
-  args->conv2d_1_w = conv2d_1_w; 
-  args->conv2d_1_w_bytes = 0; 
-  args->conv2d_1_b = conv2d_1_b; 
-  args->conv2d_1_b_bytes = 0; 
-  args->conv2d_2_w = conv2d_2_w; 
-  args->conv2d_2_w_bytes = 0; 
-  args->conv2d_2_b = conv2d_2_b; 
-  args->conv2d_2_b_bytes = 0; 
-  args->conv2d_3_w = conv2d_3_w; 
-  args->conv2d_3_w_bytes = 0; 
-  args->conv2d_3_b = conv2d_3_b; 
-  args->conv2d_3_b_bytes = 0; 
-  args->conv2d_4_w = conv2d_4_w; 
-  args->conv2d_4_w_bytes = 0; 
-  args->conv2d_4_b = conv2d_4_b; 
-  args->conv2d_4_b_bytes = 0; 
-  args->conv2d_5_w = conv2d_5_w; 
-  args->conv2d_5_w_bytes = 0; 
-  args->conv2d_5_b = conv2d_5_b; 
-  args->conv2d_5_b_bytes = 0; 
-  args->dense_1_w = dense_1_w; 
-  args->dense_1_w_bytes = 0; 
-  args->dense_1_b = dense_1_b; 
-  args->dense_1_b_bytes = 0; 
-  args->dense_2_w = dense_2_w; 
-  args->dense_2_w_bytes = 0; 
-  args->dense_2_b = dense_2_b; 
-  args->dense_2_b_bytes = 0; 
-  args->dense_3_w = dense_3_w; 
-  args->dense_3_w_bytes = 0; 
-  args->dense_3_b = dense_3_b; 
-  args->dense_3_b_bytes = 0; 
+  void *input;
+  size_t input_bytes;
+  void *conv2d_1_w;
+  size_t conv2d_1_w_bytes;
+  void *conv2d_1_b;
+  size_t conv2d_1_b_bytes;
+  void *conv2d_2_w;
+  size_t conv2d_2_w_bytes;
+  void *conv2d_2_b;
+  size_t conv2d_2_b_bytes;
+  void *conv2d_3_w;
+  size_t conv2d_3_w_bytes;
+  void *conv2d_3_b;
+  size_t conv2d_3_b_bytes;
+  void *conv2d_4_w;
+  size_t conv2d_4_w_bytes;
+  void *conv2d_4_b;
+  size_t conv2d_4_b_bytes;
+  void *conv2d_5_w;
+  size_t conv2d_5_w_bytes;
+  void *conv2d_5_b;
+  size_t conv2d_5_b_bytes;
+  void *dense_1_w;
+  size_t dense_1_w_bytes;
+  void *dense_1_b;
+  size_t dense_1_b_bytes;
+  void *dense_2_w;
+  size_t dense_2_w_bytes;
+  void *dense_2_b;
+  size_t dense_2_b_bytes;
+  void *dense_3_w;
+  size_t dense_3_w_bytes;
+  void *dense_3_b;
+  size_t dense_3_b_bytes;
+
+  struct ret_t r;
+} RootIn;
+
+int main() {
+
+  std::string dir_prefix =
+      std::string("/shared/hsharif3/alexnet_imagenet_tune/");
+  std::string input_path = dir_prefix + std::string("test_input.bin");
+  std::string labels_path = dir_prefix + std::string("test_labels.bin");
+  std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
+  void *conv2d_1_w =
+      readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11);
+  std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin");
+  void *conv2d_1_b =
+      readTrainedWeights(conv2d_1_b_path.c_str(), 0, 1, 64, 1, 1);
+  std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin");
+  void *conv2d_2_w =
+      readTrainedWeights(conv2d_2_w_path.c_str(), 0, 192, 64, 5, 5);
+  std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin");
+  void *conv2d_2_b =
+      readTrainedWeights(conv2d_2_b_path.c_str(), 0, 1, 192, 1, 1);
+  std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin");
+  void *conv2d_3_w =
+      readTrainedWeights(conv2d_3_w_path.c_str(), 0, 384, 192, 3, 3);
+  std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin");
+  void *conv2d_3_b =
+      readTrainedWeights(conv2d_3_b_path.c_str(), 0, 1, 384, 1, 1);
+  std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin");
+  void *conv2d_4_w =
+      readTrainedWeights(conv2d_4_w_path.c_str(), 0, 256, 384, 3, 3);
+  std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin");
+  void *conv2d_4_b =
+      readTrainedWeights(conv2d_4_b_path.c_str(), 0, 1, 256, 1, 1);
+  std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin");
+  void *conv2d_5_w =
+      readTrainedWeights(conv2d_5_w_path.c_str(), 0, 256, 256, 3, 3);
+  std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin");
+  void *conv2d_5_b =
+      readTrainedWeights(conv2d_5_b_path.c_str(), 0, 1, 256, 1, 1);
+  std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin");
+  void *dense_1_w =
+      readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 9216, 4096);
+  std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
+  void *dense_1_b =
+      readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 4096, 1, 1);
+  std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin");
+  void *dense_2_w =
+      readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 4096, 4096);
+  std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
+  void *dense_2_b =
+      readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 4096, 1, 1);
+  std::string dense_3_w_path = dir_prefix + std::string("dense_3_w.bin");
+  void *dense_3_w =
+      readTrainedWeights(dense_3_w_path.c_str(), 0, 1, 1, 4096, 1000);
+  std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin");
+  void *dense_3_b =
+      readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1);
+  // void* input = readTrainedWeights(input_path.c_str(), 0, 1000,3,224,224);
+  // uint32_t* labels = readLabels2(labels_path.c_str(),6000);
+
+  // uint32_t* labels = readLabels3(labels_path.c_str(), 1000);
+
+  __visc__init();
+  RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
+
+  // args->input = input;
+  // args->input_bytes = 0;
+  args->conv2d_1_w = conv2d_1_w;
+  args->conv2d_1_w_bytes = 0;
+  args->conv2d_1_b = conv2d_1_b;
+  args->conv2d_1_b_bytes = 0;
+  args->conv2d_2_w = conv2d_2_w;
+  args->conv2d_2_w_bytes = 0;
+  args->conv2d_2_b = conv2d_2_b;
+  args->conv2d_2_b_bytes = 0;
+  args->conv2d_3_w = conv2d_3_w;
+  args->conv2d_3_w_bytes = 0;
+  args->conv2d_3_b = conv2d_3_b;
+  args->conv2d_3_b_bytes = 0;
+  args->conv2d_4_w = conv2d_4_w;
+  args->conv2d_4_w_bytes = 0;
+  args->conv2d_4_b = conv2d_4_b;
+  args->conv2d_4_b_bytes = 0;
+  args->conv2d_5_w = conv2d_5_w;
+  args->conv2d_5_w_bytes = 0;
+  args->conv2d_5_b = conv2d_5_b;
+  args->conv2d_5_b_bytes = 0;
+  args->dense_1_w = dense_1_w;
+  args->dense_1_w_bytes = 0;
+  args->dense_1_b = dense_1_b;
+  args->dense_1_b_bytes = 0;
+  args->dense_2_w = dense_2_w;
+  args->dense_2_w_bytes = 0;
+  args->dense_2_b = dense_2_b;
+  args->dense_2_b_bytes = 0;
+  args->dense_3_w = dense_3_w;
+  args->dense_3_w_bytes = 0;
+  args->dense_3_b = dense_3_b;
+  args->dense_3_b_bytes = 0;
 
   int batch_size = 100;
   int test_input_size = 4000;
@@ -555,25 +563,24 @@ int main(){
   startMemTracking();
   startProfiling();
 
-  for (int j = 0; j < 1; j++){
-    for (int i = 0; i < batch_count; i++){
+  for (int j = 0; j < 1; j++) {
+    for (int i = 0; i < batch_count; i++) {
 
       int start = i * batch_size;
       int end = (i + 1) * batch_size;
 
-      void* input = readInputBatch(input_path.c_str(), 0,
-                                   start, end, 
-                                   3, 224, 224);
-     
+      void *input =
+          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
+
       args->input = input;
       args->input_bytes = 0;
 
-      void* dfg = __visc__launch(0, root, (void*) args); 
-    
-      __visc__wait(dfg); 
+      void *dfg = __visc__launch(0, root, (void *)args);
+
+      __visc__wait(dfg);
 
-      void *result = static_cast<RootIn*>(args)->input; 
-      hpvm_request_tensor(result, 0); 
+      void *result = static_cast<RootIn *>(args)->input;
+      hpvm_request_tensor(result, 0);
 
       llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
 
@@ -582,8 +589,7 @@ int main(){
   }
 
   stopProfiling();
-  __visc__cleanup(); 
+  __visc__cleanup();
 
-  return 0; 
-
-} 
+  return 0;
+}