Porting Alexnet in the ApproxIR representation - model matching

7183547b · Hashim Sharif · c6ef1bc6 · 7183547b · 7183547b
Commit 7183547b authored 6 years ago by Hashim Sharif
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile
+DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
+# NOTE: can configure build directory
+HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_new/
+
+CC = $(HPVM_BUILD_DIR)/bin/clang++
+OPT = $(HPVM_BUILD_DIR)/bin/opt
+LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis
+LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link
+LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
+
+
+SRC_DIR = src
+BUILD_DIR = build
+APP = alexnet
+
+TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
+TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
+TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a
+TENSOR_AUTOTUNER_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_autotuner.a
+
+CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH)  -fno-exceptions -ffast-math -std=c++11 -O3
+CCFLAGS += -DDEVICE=CUDNN_TARGET
+LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL
+
+HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib
+HPVM_LIB_DIR = /home/hsharif3/Gitlab/hpvm/build_new/lib
+
+
+VISC_OPTFLAGS = -load  $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load  $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load  $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load  $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG
+
+TARGET = $(BUILD_DIR)/$(APP).opt.bc
+SOURCES = $(SRC_DIR)/$(APP).cpp
+VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll
+
+#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll)
+.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll
+default: $(BUILD_DIR) $(TARGET)
+
+
+$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp
+	$(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $<
+
+#-visc-timers-gen
+$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll
+	$(OPT) -load LLVMGenVISC.so -genvisc -globaldce  $< -S -o $@
+
+$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.visc.ll
+	$(OPT) $(VISC_OPTFLAGS) $< -o $@
+	$(LLVM_LINK) $@ $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_linked.bc
+	$(CC) $(BUILD_DIR)/$(APP)_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_linked $(LINKER_FLAGS)
+	$(CC) $(BUILD_DIR)/$(APP)_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/$(APP)_tune $(LINKER_FLAGS)
+
+$(BUILD_DIR):
+	mkdir -p $@
+
+clean:
+	rm -rf $(BUILD_DIR)
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp
+
+#include <iostream>
+#include <cstdio>
+#include <cstring>
+#include <cinttypes>
+#include <visc.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+
+using namespace std;
+
+
+
+/* DNN Layer 1 **/
+void tensorConvNode1(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorAddNode1(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorPoolNode1(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode1(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 1 **/
+
+
+
+/* DNN Layer 2 **/
+void tensorConvNode2(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorAddNode2(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorPoolNode2(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode2(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 2 **/
+
+
+/* DNN Layer 3 **/
+void tensorConvNode3(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+// FIXIT: Think if we can reuse the add, tanh, and pool nodes?
+void tensorAddNode3(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode3(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 3 **/
+
+
+/* DNN Layer 4 **/
+void tensorConvNode4(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+// FIXIT: Think if we can reuse the add, tanh, and pool nodes?
+void tensorAddNode4(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode4(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 4 **/
+
+
+
+/*** DNN Layer 5 ****/
+void tensorConvNode5(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorAddNode5(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorPoolNode5(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode5(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 5 **/
+
+
+/***** DNN Layer 6 - FC layer ****/
+void tensorMulNode6(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_mul(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorAddNode6(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+/** End of Layer 6 **/
+
+
+
+
+void root(void *x, size_t x_bytes,
+	  void *conv1_w, size_t conv1_w_bytes,
+	  void *conv1_b, size_t conv1_b_bytes,
+	  void *conv2_w, size_t conv2_w_bytes,
+	  void *conv2_b, size_t conv2_b_bytes,
+	  void *conv3_w, size_t conv3_w_bytes,
+	  void *conv3_b, size_t conv3_b_bytes,
+	  void *conv4_w, size_t conv4_w_bytes,
+	  void *conv4_b, size_t conv4_b_bytes,
+	  void *conv5_w, size_t conv5_w_bytes,
+	  void *conv5_b, size_t conv5_b_bytes,
+	  void *fc1_w, size_t fc1_w_bytes,
+	  void *fc1_b, size_t fc1_b_bytes){
+  
+    __visc__hint(visc::CPU_TARGET);
+    __visc__attributes(13, x, conv1_w, conv1_b, conv2_w, conv2_b, conv3_w, conv3_b,
+		       conv4_w, conv4_b, conv5_w, conv5_b, fc1_w, fc1_b, 0);
+
+    // Conv1 Nodes
+    void *nodeConv1 = __visc__createNodeND(0, tensorConvNode1);
+    void *nodeAdd1 = __visc__createNodeND(0, tensorAddNode1);
+    void *nodeTanh1 = __visc__createNodeND(0, tensorTanhNode1);
+    void *nodePool1 = __visc__createNodeND(0, tensorPoolNode1);
+    // Conv2 Nodes
+    void *nodeConv2 = __visc__createNodeND(0, tensorConvNode2);
+    void *nodeAdd2 = __visc__createNodeND(0, tensorAddNode2);
+    void *nodeTanh2 = __visc__createNodeND(0, tensorTanhNode2);
+    void *nodePool2 = __visc__createNodeND(0, tensorPoolNode2);
+    // Conv3 Nodes
+    void *nodeConv3 = __visc__createNodeND(0, tensorConvNode3);
+    void *nodeAdd3 = __visc__createNodeND(0, tensorAddNode3);
+    void *nodeTanh3 = __visc__createNodeND(0, tensorTanhNode3);
+    // Conv4 Nodes
+    void *nodeConv4 = __visc__createNodeND(0, tensorConvNode4);
+    void *nodeAdd4 = __visc__createNodeND(0, tensorAddNode4);
+    void *nodeTanh4 = __visc__createNodeND(0, tensorTanhNode4);
+    // Conv5 Nodes
+    void *nodeConv5 = __visc__createNodeND(0, tensorConvNode5);
+    void *nodeAdd5 = __visc__createNodeND(0, tensorAddNode5);
+    void *nodeTanh5 = __visc__createNodeND(0, tensorTanhNode5);
+    void *nodePool5 = __visc__createNodeND(0, tensorPoolNode5);
+    // FC1 Nodes
+    void *nodeMul6 = __visc__createNodeND(0, tensorMulNode6);
+    void *nodeAdd6 = __visc__createNodeND(0, tensorAddNode6);
+       
+    //***** Conv Layer 1 *******/
+    // node, src, dst, stream
+    __visc__bindIn(nodeConv1, 0, 0, 0);
+    __visc__bindIn(nodeConv1, 1, 1, 0);
+    __visc__bindIn(nodeConv1, 2, 2, 0);
+    __visc__bindIn(nodeConv1, 3, 3, 0);
+
+    // node, node, type, src, dst, stream
+    __visc__edge(nodeConv1, nodeAdd1, 1, 0, 0, 0);
+    __visc__edge(nodeConv1, nodeAdd1, 1, 1, 1, 0);
+
+    // parent_index, dest_index, bind_type
+    __visc__bindIn(nodeAdd1, 4, 2, 0);
+    __visc__bindIn(nodeAdd1, 5, 3, 0);
+
+    // node, node, type, src, dst, stream
+    __visc__edge(nodeAdd1, nodeTanh1, 1, 0, 0, 0);
+    __visc__edge(nodeAdd1, nodeTanh1, 1, 1, 1, 0);
+
+    // node, node, type, src, dst, stream
+    __visc__edge(nodeTanh1, nodePool1, 1, 0, 0, 0);
+    __visc__edge(nodeTanh1, nodePool1, 1, 1, 1, 0);
+
+
+    /**** Conv Layer 2 ****/
+    // ConvOp2
+    __visc__bindIn(nodeConv2, 6, 2, 0);
+    __visc__bindIn(nodeConv2, 7, 3, 0);
+    
+    __visc__edge(nodePool1, nodeConv2, 1, 0, 0, 0);
+    __visc__edge(nodePool1, nodeConv2, 1, 1, 1, 0);
+
+    // AddOp2
+    __visc__bindIn(nodeAdd2, 8, 2, 0);
+    __visc__bindIn(nodeAdd2, 9, 3, 0);
+
+    __visc__edge(nodeConv2, nodeAdd2, 1, 0, 0, 0);
+    __visc__edge(nodeConv2, nodeAdd2, 1, 1, 1, 0);
+
+    // TanhOp2
+    __visc__edge(nodeAdd2, nodeTanh2, 1, 0, 0, 0);
+    __visc__edge(nodeAdd2, nodeTanh2, 1, 1, 1, 0);
+
+    // PoolOp2
+    __visc__edge(nodeTanh2, nodePool2, 1, 0, 0, 0);
+    __visc__edge(nodeTanh2, nodePool2, 1, 1, 1, 0);
+
+
+    /**** Conv Layer 3 ****/
+    // ConvOp3
+    __visc__bindIn(nodeConv3, 10, 2, 0);
+    __visc__bindIn(nodeConv3, 11, 3, 0);
+    
+    __visc__edge(nodePool2, nodeConv3, 1, 0, 0, 0);
+    __visc__edge(nodePool2, nodeConv3, 1, 1, 1, 0);
+
+    // AddOp3
+    __visc__bindIn(nodeAdd3, 12, 2, 0);
+    __visc__bindIn(nodeAdd3, 13, 3, 0);
+
+    __visc__edge(nodeConv3, nodeAdd3, 1, 0, 0, 0);
+    __visc__edge(nodeConv3, nodeAdd3, 1, 1, 1, 0);
+
+    // TanhOp3
+    __visc__edge(nodeAdd3, nodeTanh3, 1, 0, 0, 0);
+    __visc__edge(nodeAdd3, nodeTanh3, 1, 1, 1, 0);
+
+
+    /**** Conv Layer 4 ****/
+    // ConvOp4
+    __visc__bindIn(nodeConv4, 14, 2, 0);
+    __visc__bindIn(nodeConv4, 15, 3, 0);
+    
+    __visc__edge(nodeTanh3, nodeConv4, 1, 0, 0, 0);
+    __visc__edge(nodeTanh3, nodeConv4, 1, 1, 1, 0);
+
+    // AddOp4
+    __visc__bindIn(nodeAdd4, 16, 2, 0);
+    __visc__bindIn(nodeAdd4, 17, 3, 0);
+
+    __visc__edge(nodeConv4, nodeAdd4, 1, 0, 0, 0);
+    __visc__edge(nodeConv4, nodeAdd4, 1, 1, 1, 0);
+
+    // TanhOp4
+    __visc__edge(nodeAdd4, nodeTanh4, 1, 0, 0, 0);
+    __visc__edge(nodeAdd4, nodeTanh4, 1, 1, 1, 0);
+
+
+    /**** Conv Layer 5 ****/
+    // ConvOp5
+    __visc__bindIn(nodeConv5, 18, 2, 0);
+    __visc__bindIn(nodeConv5, 19, 3, 0);
+    
+    __visc__edge(nodeTanh4, nodeConv5, 1, 0, 0, 0);
+    __visc__edge(nodeTanh4, nodeConv5, 1, 1, 1, 0);
+
+    // AddOp5
+    __visc__bindIn(nodeAdd5, 20, 2, 0);
+    __visc__bindIn(nodeAdd5, 21, 3, 0);
+
+    __visc__edge(nodeConv5, nodeAdd5, 1, 0, 0, 0);
+    __visc__edge(nodeConv5, nodeAdd5, 1, 1, 1, 0);
+
+    // TanhOp5
+    __visc__edge(nodeAdd5, nodeTanh5, 1, 0, 0, 0);
+    __visc__edge(nodeAdd5, nodeTanh5, 1, 1, 1, 0);
+
+    // PoolOp5
+    __visc__edge(nodeTanh5, nodePool5, 1, 0, 0, 0);
+    __visc__edge(nodeTanh5, nodePool5, 1, 1, 1, 0);
+
+    
+
+    /**** FC Layer 1 ****/
+    // MulOp6
+    __visc__bindIn(nodeMul6, 22, 2, 0);
+    __visc__bindIn(nodeMul6, 23, 3, 0);
+    
+    __visc__edge(nodePool5, nodeMul6, 1, 0, 0, 0);
+    __visc__edge(nodePool5, nodeMul6, 1, 1, 1, 0);
+
+    // AddOp6
+    __visc__bindIn(nodeAdd6, 24, 2, 0);
+    __visc__bindIn(nodeAdd6, 25, 3, 0);
+
+    __visc__edge(nodeMul6, nodeAdd6, 1, 0, 0, 0);
+    __visc__edge(nodeMul6, nodeAdd6, 1, 1, 1, 0);
+
+    /***** Output Binding ****/
+    __visc__bindOut(nodeAdd6, 0, 0, 0);
+    __visc__bindOut(nodeAdd6, 1, 1, 0);
+
+}
+
+
+// Return type for the nodes
+struct ret_t {
+    void *tensor;
+    size_t bytes;
+};
+
+typedef struct __attribute__((__packed__)) {
+    void *x;
+    size_t x_bytes;
+    // 1st Layer parameters
+    void* conv1_w;
+    size_t conv1_w_bytes;
+    void* conv1_b;
+    size_t conv1_b_bytes;
+    // 2nd Layer parameters
+    void* conv2_w;
+    size_t conv2_w_bytes;
+    void* conv2_b;
+    size_t conv2_b_bytes;
+    // 3rd Layer parameters
+    void* conv3_w;
+    size_t conv3_w_bytes;
+    void* conv3_b;
+    size_t conv3_b_bytes;
+    // 4th Layer parameters
+    void* conv4_w;
+    size_t conv4_w_bytes;
+    void* conv4_b;
+    size_t conv4_b_bytes;
+    // 5th Layer parameters
+    void* conv5_w;
+    size_t conv5_w_bytes;
+    void* conv5_b;
+    size_t conv5_b_bytes;
+    // 6th Layer parameters
+    void* fc1_w;
+    size_t fc1_w_bytes;
+    void* fc1_b;
+    size_t fc1_b_bytes;
+
+    struct ret_t r;
+}
+RootIn;
+
+int main() {
+
+    int test_batch_size = 5000;
+    std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params";
+
+    std::string input_data_path = prefix + std::string("/alexnet_cifar10/norm_cifar_input.bin");
+    std::string labels_path = prefix + std::string("/alexnet_cifar10/test_labels.bin");
+    
+    std::string conv1_w_path = prefix + std::string("/alexnet_cifar10/conv1.bin");			  
+    std::string conv1_b_path = prefix + std::string("/alexnet_cifar10/conv1_bias.bin");
+    
+    std::string conv2_w_path = prefix + std::string("/alexnet_cifar10/conv2.bin");			  
+    std::string conv2_b_path = prefix + std::string("/alexnet_cifar10/conv2_bias.bin");
+    
+    std::string conv3_w_path = prefix + std::string("/alexnet_cifar10/conv3.bin");			  
+    std::string conv3_b_path = prefix + std::string("/alexnet_cifar10/conv3_bias.bin");
+    
+    std::string conv4_w_path = prefix + std::string("/alexnet_cifar10/conv4.bin");			  
+    std::string conv4_b_path = prefix + std::string("/alexnet_cifar10/conv4_bias.bin");
+    
+    std::string conv5_w_path = prefix + std::string("/alexnet_cifar10/conv5.bin");			  
+    std::string conv5_b_path = prefix + std::string("/alexnet_cifar10/conv5_bias.bin");
+
+    std::string fc1_w_path = prefix + std::string("/alexnet_cifar10/fc1.bin");			  
+    std::string fc1_b_path = prefix + std::string("/alexnet_cifar10/fc1_bias.bin");
+    
+    printf("Reading Input Data from = %s \n", input_data_path.c_str());
+
+    uint8_t* labels = readLabels(labels_path.c_str(), test_batch_size);
+    void* x = readTrainedWeights(input_data_path.c_str(), float_type,
+				 test_batch_size, 3, 32, 32);
+    
+    // Loading convolution weights
+    void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 64, 3, 11, 11);
+    void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 64, 1, 1);
+    void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 192, 64, 5, 5);
+    void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 192, 1, 1);
+    void* conv3_w = readTrainedWeights(conv3_w_path.c_str(), float_type, 384, 192, 3, 3);
+    void* conv3_b = readTrainedWeights(conv3_b_path.c_str(), float_type, 1, 384, 1, 1);
+    void* conv4_w = readTrainedWeights(conv4_w_path.c_str(), float_type, 256, 384, 3, 3);
+    void* conv4_b = readTrainedWeights(conv4_b_path.c_str(), float_type, 1, 256, 1, 1);
+    void* conv5_w = readTrainedWeights(conv5_w_path.c_str(), float_type, 256, 256, 3, 3);
+    void* conv5_b = readTrainedWeights(conv5_b_path.c_str(), float_type, 1, 256, 1, 1);
+
+    // Loading FC layer weights
+    void* fc1_w = readTrainedWeights(fc1_w_path.c_str(), float_type, 1, 1, 4096, 10);
+    void* fc1_b = readTrainedWeights(fc1_b_path.c_str(), float_type, 1, 10, 1, 1);
+
+    __visc__init();
+
+    RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
+    args->x = x;
+    args->x_bytes = 0;
+    // Conv Layers params
+    args->conv1_w = conv1_w;
+    args->conv1_w_bytes = 0;
+    args->conv1_b = conv1_b;
+    args->conv1_b_bytes = 0;
+    
+    args->conv2_w = conv2_w;
+    args->conv2_w_bytes = 0;
+    args->conv2_b = conv2_b;
+    args->conv2_b_bytes = 0;
+
+    args->conv3_w = conv3_w;
+    args->conv3_w_bytes = 0;
+    args->conv3_b = conv3_b;
+    args->conv3_b_bytes = 0;
+    
+    args->conv4_w = conv4_w;
+    args->conv4_w_bytes = 0;
+    args->conv4_b = conv4_b;
+    args->conv4_b_bytes = 0;
+
+    args->conv5_w = conv5_w;
+    args->conv5_w_bytes = 0;
+    args->conv5_b = conv5_b;
+    args->conv5_b_bytes = 0;
+
+    // FC Layers params
+    args->fc1_w = fc1_w;
+    args->fc1_w_bytes = 0;
+    args->fc1_b = fc1_b;
+    args->fc1_b_bytes = 0;
+   
+    void *dfg = __visc__launch(0, root, (void *)args);
+
+    __visc__wait(dfg);
+
+    // FIXME: Value returned in the wrong index!!
+    //void *r = static_cast<RootIn*>(args)->r.tensor;
+    void *result = static_cast<RootIn*>(args)->x;
+    hpvm_request_tensor(result, 0);
+
+    __visc__cleanup();
+   
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    return 0;
+}
+
+