change code gen and graph ir for tensor runtime

10a57c79 · shingjan · 6491cfe3 · 10a57c79 · 10a57c79 · 10a57c79
Commit 10a57c79 authored 5 years ago by shingjan
--- a/hpvm/projects/onnx/frontend/approx_codegen.py
+++ b/hpvm/projects/onnx/frontend/approx_codegen.py
@@ -5,7 +5,7 @@ class GraphCodeGen:
        self._root = ""
        self._root_struct = ""
        self._main_func = ""
    def emitHeaders(self):
        headers = "\n#include <stdio.h> \n"
        headers += "#include <stdlib.h> \n"
@@ -17,7 +17,7 @@ class GraphCodeGen:
        headers += "#include <tensorTypes.h> \n"
        headers += "#include <tensorUtils.h> \n\n"
        self._headers = headers
    def emitRoot(self):
        def emitRootNodeHeader():
            root_signature = "void root("
@@ -27,29 +27,31 @@ class GraphCodeGen:
                    root_signature += "\t  "
                self.filter_names[f_name] = index
                root_signature += "void* " + f_name + ", "
-                root_signature += "size_t " + f_name + "_bytes" 
+                root_signature += "size_t " + f_name + "_bytes"
                if index < len(self.filter_names) - 1:
                    root_signature += ", \n"
                index += 1
            root_signature += "){ \n\n"
            root_signature += "\n  __visc__hint(visc::CPU_TARGET); \n"
-            root_signature += "  __visc__attributes(" + str(len(self.filter_names)) + ", "
+            root_signature += "  __visc__attributes(" + \
+                str(len(self.filter_names)) + ", "
            index = 0
            for f_name in self.filter_names:
-                root_signature += f_name 
+                root_signature += f_name
                if index < len(self.filter_names) - 1:
                root_signature += ", "
                index += 1
            root_signature += ", 0); \n\n"
            return root_signature
+        def emitRootNodeFooter(self):
-        def emitRootNodeFooter(self):    
            last_node = self.dfg.last_node
            output_var = self.output_map[last_node.layer_name]
            # Binding output of last DFG node to the Root Node output
-            root_footer_str = "\n  __visc__bindOut(" + output_var + ", 0, 0, 0); \n"
+            root_footer_str = "\n  __visc__bindOut(" + \
-            root_footer_str += "  __visc__bindOut(" + output_var + ", 1, 1, 0); \n"
+                output_var + ", 0, 0, 0); \n"
+            root_footer_str += "  __visc__bindOut(" + \
+                output_var + ", 1, 1, 0); \n"
            root_footer_str += "\n}\n\n"
            return root_footer_str
@@ -71,14 +73,14 @@ class GraphCodeGen:
        self._root_struct += emitRootStructure()
        self.codegen(self.dfg)
        self._root += emitRootNodeFooter()
    def emitMainFunc(self, test_data):
        main_func_str = "int main(){ \n\n"
        main_func_str += self.weight_str
        main_func_str += self.input_str
        main_func_str += "\n__visc__init(); \n"
        main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n"
-        for f_name in self.filter_names:    
+        for f_name in self.filter_names:
            main_func_str += "args->" + f_name + " = " + f_name + "; \n"
            main_func_str += "args->" + f_name + "_bytes = 0; \n"
        main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n"
@@ -86,23 +88,22 @@ class GraphCodeGen:
        main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n"
        main_func_str += "hpvm_request_tensor(result, 0); \n\n"
        main_func_str += "__visc__cleanup(); \n "
-        main_func_str += "computeAccuracy3(labels, result); \n"    
+        main_func_str += "computeAccuracy3(labels, result); \n"
        main_func_str += "return 0; \n\n"
-        main_func_str += "} \n"    
+        main_func_str += "} \n"
        self._main_func += main_func_str
    def emitSource(self, dir_prefix):
        source = self._headers + self._nodes + self._root
        source += self._root_struct + self._main_func
-        print (source)
+        print(source)
        f = open(dir_prefix + "/approxhpvm_src.cc", "w+")
        f.write(source)
        f.close()
    def codegen(self, model, weights_dir, test_data):
        self.emitHeaders()
        self.emitRoot()
        self.emitMainFunc(test_data)
        # dump generated program string to source file
        self.emitSource(weights_dir)
\ No newline at end of file
--- a/hpvm/projects/onnx/frontend/common.py
+++ b/hpvm/projects/onnx/frontend/common.py
 import sys
 import os
+from onnx import numpy_helper
+class Tensor(object):
+	def __init__(self, proto):
+		if not proto.name.strip():
+			raise ValueError("Tensor's name is required.")
+		self.name = proto.name
+	def __str__(self):
+		return "Tensor: {}\n".format(self.name)
+	__repr__ = __str__
+# Can be either input or weight tensor
+class WeightTensor(Tensor):
+	def __init__(self, input_proto):
+		Tensor.__init__(self, input_proto)
+		self.input_data = numpy_helper.to_array(input_proto)#.reshape(tuple(input_proto.dims))
+class InputTensor(Tensor):
+	def __init__(self, inter_name):
+		self.name = inter_name
--- a/hpvm/projects/onnx/frontend/graph_builder.py
+++ b/hpvm/projects/onnx/frontend/graph_builder.py
+import sys
+from onnx import numpy_helper
+from graph_ir import Node
+from common import InputTensor, WeightTensor
+support_onnx_ops = {"DepthwiseConv2D" : None,
+               "Conv" : [2], # only 2d supported here
+               "MatMul" : None,
+               "MaxPool": [2], # only 2d supported here
+               "Activation" : None,
+               "BatchNormalization" : None,
+               "Flatten" : None,
+               "Add" : None,
+               "Relu" : None,
+               "Softmax" : None,
+               "Identity": None}
 class DFG(object):
-    root_set = False;
+    root_set = False
    def __init__(self):
        self.node_map = {}
@@ -15,12 +33,10 @@ class DFG(object):
                                  "Flatten"}
        self.mutliInputLayers = {"Add"}
    def hasSingleInput(self, layer):
        layer_name = layer.__class__.__name__
        return layer_name in self.singleInputLayers
    def hasMultipleInputs(self, layer):
        layer_name = layer.__class__.__name__
        return layer_name in self.multiInputLayers
@@ -30,36 +46,37 @@ class DFG(object):
        inbound_node_name = inbound_node_name.split("/")[0]
        if inbound_node_name in self.node_map:
            inbound_node = self.node_map[inbound_node_name]
-            print (inbound_node_name, " found!")
+            print(inbound_node_name, " found!")
            inbound_node.add_output(dfg_node)
            dfg_node.add_input(inbound_node)
        else:
-            print ("--inbound node NOT FOUND!")
+            print("--inbound node NOT FOUND!")
    def add_to_graph(self, layer):
        dfg_node = DFGNode(layer)
        if not self.root_set:
            self.root_node = dfg_node
-            self.root_set = True # DFG root node is now set
+            self.root_set = True  # DFG root node is now set
-        if self.hasMultipleInputs(layer):  
+        if self.hasMultipleInputs(layer):
            for j in range(len(layer.input)):
                print(type(layer.input[j]))
-                print(layer.input[j].op.name)        
+                print(layer.input[j].op.name)
                self.add_dfg_edge(layer.input[j].op.name, dfg_node)
        else:
-            print (layer.input.name)        
+            print(layer.input.name)
            self.add_dfg_edge(layer.input.name, dfg_node)
        # Adding DFG node to name mapping
        self.node_map[layer.name] = dfg_node
+    # Check if all predecessor nodes have been visited thus far - reverse
+    # postorder traversal
-    # Check if all predecessor nodes have been visited thus far - reverse postorder traversal
    def predVisited(self, cur_node, visited_nodes):
        for input_node in cur_node.inputs:
            if input_node.layer_name not in visited_nodes:
-                return False;
+                return False
-        # All predecessors are visited 
+        # All predecessors are visited
        return True
    def traverseNode(self, cur_node, visited_nodes):
@@ -76,19 +93,20 @@ class DFG(object):
            for output_node in cur_node.outputs:
                self.traverseNode(output_node, visited_nodes)
-            # NOTE: Assuming that no outbound edges implies the last node in the graph
+            # NOTE: Assuming that no outbound edges implies the last node in
+            # the graph
            if len(cur_node.outputs) == 0:
                self.last_node = cur_node
+    # Build and  Print the DFG in reverse postorder
-    #Build and  Print the DFG in reverse postorder
    def buildDFG(self):
-        print ("\n\n ****** Traversing and Printing DFG ******* \n\n")
+        print("\n\n ****** Traversing and Printing DFG ******* \n\n")
        visited_nodes = {}
        # Starting traversal at the DFG root node
        self.traverseNode(self.root_node, visited_nodes)
-    ## This should be the place where partial evaluation happens
+    # This should be the place where partial evaluation happens
    def emitNode(self, layer):
        if layer.op_type == "Conv":
            return Conv2DNode()
@@ -108,22 +126,21 @@ class DFG(object):
            pass
        else:
            raise ValueError("Unsupported operator type!")
+            sys.exit("Unsupported operator type!")
 class GraphBuilder(object):
-    def __init__(self, model, shape, dtype, opset):
+    def __init__(self, model, shape, dtype, opset, weight_dir):
        self._check_model(model)
-        self._nodes = {}
+        self.model = model
-        self._params = {}
+        self.dtype = dtype
-        self._renames = {}
+        self.graph = model.graph
-        self._num_input = 0
+        self.opset = opset
-        self._num_param = 0
+        self.weight_dir = weight_dir
-        self._dtype = dtype
+        self.shape = shape if shape else self._build_shape()
-        self._graph = model.graph
-        self._opset = opset
-        self._shape = shape if shape else self._build_shape()
-        self.program_str = ""
        self.dfg = DFG()
+        self.tensors = dict()
    ################################################
    # Aux functions for graph building
    ################################################
@@ -140,18 +157,19 @@ class GraphBuilder(object):
                    import warnings
                    warnings.warn(str(e))
        except ImportError as e:
-            raise ImportError("Unable to import onnx.checker which is required {}".format(e))
+            raise ImportError(
+                "Unable to import onnx.checker which is required {}".format(e))
    def _build_shape(self):
        shape = {}
-        for input in self._graph.input:
+        for input in self.graph.input:
            # get type of input tensor
            tensor_type = input.type.tensor_type
            # check if it has a shape:
            if (tensor_type.HasField("shape")):
                shape[input.name] = tensor_type.shape
        return shape
    def _parse_array(self, tensor_proto):
        try:
            from onnx.numpy_helper import to_array
@@ -160,15 +178,15 @@ class GraphBuilder(object):
                "Unable to import onnx which is required {}".format(e))
        np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
        return np_array
    def _parse_value_proto(self, value_proto):
        """Parse ValueProto or raw str."""
        try:
            name = value_proto.name
        except AttributeError:
            name = value_proto
-        return name 
+        return name
    def _parse_dtype(self, value_proto, dtype):
        """Parse dtype."""
        try:
@@ -177,55 +195,54 @@ class GraphBuilder(object):
        except AttributeError:
            return dtype
+    def _support_check(self, node):
+        op_name = node.op_type
+        if op_name not in support_onnx_ops:
+            return False
+        else:
+            if support_onnx_ops[op_name] == None:
+                return True
+            else:
+                #print(type(node.attribute))
+                for attr in node.attribute:
+                    # partially evaluate the kernel shape
+                    if attr.name == 'kernel_shape':
+                        return len(attr.ints) in support_onnx_ops[op_name]
+                return False
+    def _dump_weight(self, weight_tensor):
+        print("weights dump here")
    ################################################
    # Top level Graph Building functions
-    # return the compile-ready graph
+    # return the compilation-ready graph
    ################################################
    def build_graph(self):
-        # parse init tensors
+        for weight_tensor in self.graph.initializer:
-        for init_tensor in self._graph.initializer:
+            self.tensors[weight_tensor.name] = WeightTensor(weight_tensor)
-            if not init_tensor.name.strip():
+        for i in self.graph.input:
-                raise ValueError("Tensor's name is required.")
+            if i.name not in self.tensors:
-            self._params[init_tensor.name] = self._parse_array(init_tensor)
+                self.tensors[i.name] = InputTensor(i.name)
-            self._nodes[init_tensor.name] = Node(init_tensor.name,
+        for node in self.graph.node:
-                                                 self._params[init_tensor.name].shape,
-                                                 self._params[init_tensor.name].dtype)
-        # parse inputs
-        # from onnx v0.2, GraphProto.input has type ValueInfoProto,
-        # and the name is 'i.name'
-        for i in self._graph.input:
-            i_name = self._parse_value_proto(i)
-            d_type = self._parse_dtype(i, 'float32')
-            if i_name in self._params:
-                # i is a param instead of input
-                self._num_param += 1
-                self._params[i_name] = self._params.pop(i_name)
-                self._nodes[i_name] = Node(i_name,
-                                           self._params[i_name].shape,
-                                           self._params[i_name].dtype)
-            else:
-                self._num_input += 1
-                if i_name in self._shape:
-                    tshape = self._shape[i_name]
-                else:
-                    raise ValueError("Must provide an input shape for `{0}`.".format(i_name))
-                if isinstance(self._dtype, dict):
-                    dtype = self._dtype[i_name] if i_name in self._dtype else d_type
-                else:
-                    dtype = d_type
-                self._nodes[i_name] = Node(i_name,
-                                           tshape, 
-                                           dtype)
-        for node in self._graph.node:
            op_name = node.op_type
-            print("###############################")
+            #print("###############################")
-            print(op_name)
+            if not self._support_check(node):
+                raise ValueError(
+                        "Operator not currently supported: `{0}`!".format(op_name))
            #print("attribute: " + str(node.attribute))
            #print("input: " + str(node.input))
            #print("output: " + str(node.output))
-        #print(self._nodes)
+            #print("###############################")
+            for i in node.input:
+                if i not in self.tensors:
+                    raise ValueError(
+                        "Compilation Interrupted for missing input!`{0}`.".format(i))
+            for i in node.output:
+                if i not in self.tensors:
+                    self.tensors[i] = InputTensor(i)
+        for tensor in self.tensors.values():
+            if isinstance(tensor, WeightTensor):
+                print(tensor.name)
        return self.dfg
\ No newline at end of file
--- a/hpvm/projects/onnx/frontend/graph_codegen.py
+++ b/hpvm/projects/onnx/frontend/graph_codegen.py
@@ -4,7 +4,8 @@ import os
 from graph_builder import *
 from graph_ir import *
 class GraphCodeGen(object):
    def __init__(self, DFG):
        self.program_str = ""
@@ -16,7 +17,7 @@ class GraphCodeGen(object):
    def dump_weights(self):
        for init_tensor in self._graph.initializer:
-            #print(init_tensor)
+            # print(init_tensor)
            pass
    def traverse_graph(self, cur_node, visited):
@@ -33,7 +34,7 @@ class GraphCodeGen(object):
        self.build_graph()
        visited_nodes = {}
        self.traverse_graph(self.dfg.root, visited)
    def emit_header(self):
        headers = "\n#include <stdio.h> \n"
        headers += "#include <stdlib.h> \n"
@@ -53,9 +54,9 @@ class GraphCodeGen(object):
    def emit_footer(self, test_data):
        if test_data is not None and self.dfg.last_node is not None:
-          last_node = self.dfg.last_node
+            last_node = self.dfg.last_node
-          output_var = self.output_map[last_node.layer_name]
+            output_var = self.output_map[last_node.layer_name]
        destructors = "\nllvm_hpvm_cleanupTensorRt(); \n"
        end_main = "\nreturn 0; \n\n}\n"
        self.program_str += destructors
@@ -66,10 +67,10 @@ class GraphCodeGen(object):
        C = x_test.shape[1]
        H = x_test.shape[2]
        W = x_test.shape[3]
        loop_str = ""
        loop_str += "\nstartMemTracking(); \n\n"
        loop_str += "int test_input_size = " + str(N) + "; \n"
        loop_str += "int batch_size = " + str(N) + "; \n"
        loop_str += "int batch_count = test_input_size / batch_size; \n"
@@ -79,7 +80,7 @@ class GraphCodeGen(object):
        loop_str += "int start = i * batch_size; \n"
        loop_str += "int end = (i + 1) * batch_size; \n"
-        loop_str += "\nvoid* input = readInputBatch(input_path.c_str(),0,start,end," 
+        loop_str += "\nvoid* input = readInputBatch(input_path.c_str(),0,start,end,"
        loop_str += str(C) + "," + str(H) + "," + str(W) + "); \n\n"
        self.program_str += loop_str
@@ -89,7 +90,8 @@ class GraphCodeGen(object):
        end_loop_str += "\nuint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); \n"
        last_node = self.dfg.last_node
        output_var = self.output_map[last_node.layer_name]
-        accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, " + output_var + "); \n"
+        accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, " + \
+            output_var + "); \n"
        end_loop_str += accuracy_call
        #end_loop_str += "float accuracy = computeAccuracy2(labels, batch_size, var_60); "
        end_loop_str += "final_accuracy += accuracy; \n"
@@ -114,8 +116,8 @@ class GraphCodeGen(object):
    def codegen(self, weights_dir, test_data, test_labels):
        if os.path.exists(weights_dir):
            raise ValueError("Weight dir existed. Compilation interrupted!")
-        os.mkdir(weights_dir) 
+        os.mkdir(weights_dir)
-        self.emit_header()  
+        self.emit_header()
        self.emit_weights(weights_dir)
        self.emit_batch_loop(test_data)
        self.emit_graph()

--- a/hpvm/projects/onnx/frontend/graph_ir.py
+++ b/hpvm/projects/onnx/frontend/graph_ir.py
 ################################################
 # Top Level DFGNode interface
 ################################################
 class DFGNode(object):
    def add_output(self, output_node):
        self.outputs.append(output_node)
    def add_input(self, input_node):
        self.inputs.append(input_node)
    def __init__(self, layer):
        self.inputs = []
        self.outputs = []
        self.name = layer.name
        self.op_type = layer.op_type
 '''
 Element wise operatos that is for activation function
 e.g. HardSigmoid, LeakyRelu, PRelu, Pow, Reciprocal,
 Relu, Selu, Sigmoid, Softplus, Sqrt, ThresholdedRelu,
 Abs, Ceil, Elu, Floor, Neg
 '''
 class ActivationNode(DFGNode):
    pass
 '''
 ELement wise operators that is not for activation function.
-In other words, they are logical comparison operators 
+In other words, they are logical comparison operators
-e.g. And, Equal, Greater, GreaterOrEqual, Less, LessOrEqual, 
+e.g. And, Equal, Greater, GreaterOrEqual, Less, LessOrEqual,
 Or, Xor
 '''
 class LogicalOpNode(DFGNode):
    pass
 class Node(object):
    def __init__(self, name, shape, dtype):
        self._name = name
        self._shape = shape if shape else {}
        self._dtype = dtype
    def __str__(self):
-        return "Node: " + self._name + " with shape: " + str(self._shape) + " and data type " + str(self._dtype)
+        return "Node: " + self._name + " with shape: " + \
+            str(self._shape) + " and data type " + str(self._dtype)
    __repr__ = __str__
 ################################################
 # Actually Implementation of Operators
 ################################################
 class AddNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
    def codegen(self):
        input_vars = self.getMultipleInputNames(cur_node)
        inst_str = "void* " + out_var_name1 + " = "
-        inst_str += "tensorAdd(" + input_vars[0] + ", " + input_vars[1] + "); \n"
+        inst_str += "tensorAdd(" + \
+            input_vars[0] + ", " + input_vars[1] + "); \n"
        return inst_str
 class MatMulNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
    def codegen(self):
        pass
 class SoftMaxNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
    def codegen(self):
        pass
 class Conv2DNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
@@ -73,6 +95,7 @@ class Conv2DNode(DFGNode):
        self.strides = layer.strides
        print("\t", self.strides)
        print("\tPadding = ", self.padding)
    def codegen(self):
        input_var_name = self.getSingleInputName(cur_node)
        weights = cur_node.weights
@@ -81,12 +104,12 @@ class Conv2DNode(DFGNode):
        if cur_node.padding.strip() == "valid":
            padding = 0
        else:
-            padding = cur_node.padding      
+            padding = cur_node.padding
            padding = int((weights.shape[0] - 1) / 2)
        prev_padding = self.getPrevLayerPadding(cur_node)
-        if prev_padding != None:
+        if prev_padding is not None:
            # FIXME: currently only supporting symmetric padding
-            padding = prev_padding[0][0]        
+            padding = prev_padding[0][0]
        inst_str = "void* " + out_var_name1 + " = "
        inst_str += "tensorConvolution(" + input_var_name + ", "
@@ -102,11 +125,13 @@ class Conv2DNode(DFGNode):
        else:
            inst_str += "1); \n"
        if strides[0] > 1 and cur_node.padding.strip() == "same":
-            print ("!ERROR: Same Padding not supported for Conv with Stride > 1")
+            print("!ERROR: Same Padding not supported for Conv with Stride > 1")
-            print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
+            print("Use: ZeroPadding2D(padding=(" +
+                  str(padding) + "," + str(padding) + "));\n")
            sys.exit(0)
        return inst_str
 class DepthwiseConv2DNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
@@ -117,6 +142,7 @@ class DepthwiseConv2DNode(DFGNode):
        self.strides = layer.strides
        print("\t", self.strides)
        print("\tPadding = ", self.padding)
    def codegen(self):
        input_var_name = self.getSingleInputName(cur_node)
        weights = cur_node.weights
@@ -125,12 +151,12 @@ class DepthwiseConv2DNode(DFGNode):
        if cur_node.padding.strip() == "valid":
            padding = 0
        else:
-            padding = cur_node.padding      
+            padding = cur_node.padding
            padding = int((weights.shape[0] - 1) / 2)
        prev_padding = self.getPrevLayerPadding(cur_node)
-        if prev_padding != None:
+        if prev_padding is not None:
            # FIXME: currently only supporting symmetric padding
-            padding = prev_padding[0][0]        
+            padding = prev_padding[0][0]
        inst_str = "void* " + out_var_name1 + " = "
        inst_str += "tensorConvolution(" + input_var_name + ", "
@@ -146,17 +172,20 @@ class DepthwiseConv2DNode(DFGNode):
        else:
            inst_str += "1); \n"
        if strides[0] > 1 and cur_node.padding.strip() == "same":
-            print ("!ERROR: Same Padding not supported for Conv with Stride > 1")
+            print("!ERROR: Same Padding not supported for Conv with Stride > 1")
-            print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
+            print("Use: ZeroPadding2D(padding=(" +
+                  str(padding) + "," + str(padding) + "));\n")
            sys.exit(0)
        return inst_str
 class DenseNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
        self.weights = layer.get_weights()[0]
        print("\t", self.weights.shape)
        self.use_bias = layer.use_bias
    def codegen(self):
        input_var_name = self.getSingleInputName(cur_node)
        weights = cur_node.weights
@@ -166,6 +195,7 @@ class DenseNode(DFGNode):
        inst_str += "); \n"
        return inst_str
 class MaxPool2DNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
@@ -173,12 +203,16 @@ class MaxPool2DNode(DFGNode):
        self.strides = layer.strides
        print("\t pool_size = ", self.pool_size)
        print("\t strides = ", self.strides)
    def codegen(self):
        inst_str = "void* " + out_var_name1 + " = "
-        inst_str += "tensorPooling(" + input_var_name + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1]) 
+        inst_str += "tensorPooling(" + input_var_name + "," + \
-        inst_str +=  "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1])
+            pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1])
+        inst_str += "," + str(padding) + "," + str(padding) + \
+            "," + str(strides[0]) + "," + str(strides[1])
        inst_str += "); \n"
 class AveragePooling2DNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
@@ -186,22 +220,28 @@ class AveragePooling2DNode(DFGNode):
        self.strides = layer.strides
        print("\t pool_size = ", self.pool_size)
        print("\t strides = ", self.strides)
    def codegen(self):
        inst_str = "void* " + out_var_name1 + " = "
-        inst_str += "tensorPooling(" + input_var_name + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1]) 
+        inst_str += "tensorPooling(" + input_var_name + "," + \
-        inst_str +=  "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1])
+            pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1])
+        inst_str += "," + str(padding) + "," + str(padding) + \
+            "," + str(strides[0]) + "," + str(strides[1])
        inst_str += "); \n"
        return insr_str
 class ZeroPadding2DNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
-        print ("***ZeroPaddding \n");
+        print("***ZeroPaddding \n")
        self.padding = layer.padding
-        print ("padding = ", self.padding)
+        print("padding = ", self.padding)
    def codegen(self):
        pass
 class BatchNormalizationNode(DFGNode):
    def __init__(self, layer):
        DFGNode.__init__(self, layer)
@@ -210,6 +250,7 @@ class BatchNormalizationNode(DFGNode):
        self.gamma = layer.gamma
        self.moving_mean = layer.moving_mean
        self.moving_variance = layer.moving_variance
    def codegen(self):
        input_var_name = self.getSingleInputName(cur_node)
        inst_str = "void* " + out_var_name1 + " = "

--- a/hpvm/projects/onnx/frontend/main.py
+++ b/hpvm/projects/onnx/frontend/main.py
@@ -4,42 +4,42 @@ import numpy as np
 import onnx
 import glob
-from onnx import numpy_helper, version_converter
+from onnx import version_converter
-from onnxruntime.backend.backend import OnnxRuntimeBackend as backend
+#from onnxruntime.backend.backend import OnnxRuntimeBackend as backend
 # onnx2hpvm modules
 from graph_builder import GraphBuilder
 from graph_codegen import GraphCodeGen
 # from approx_codegen import GraphCodeGen
-def convert_version(model):
+def convert_version(model, new_version):
    print('The model before conversion:\n{}'.format(model))
    # A full list of supported adapters can be found here:
    # https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21
    # Apply the version conversion on the original model
-    converted_model = version_converter.convert_version(model, 12)
+    converted_model = version_converter.convert_version(model, new_version)
    print('The model after conversion:\n{}'.format(converted_model))
    return converted_model
 def main():
-    model = onnx.load('../models/keras/alexnet.onnx')
+    model = onnx.load('../models/keras/lenet.onnx')
    weights_dir = './test_src'
-    test_data_dir = '../models/mnist/test_data_set_0'
+    # test_data_dir = '../models/mnist/test_data_set_0'
    # converted_model = convert_version(model)
-    graph = model.graph
    try:
        opset = model.opset_import[0].version if model.opset_import else 1
    except AttributeError:
        opset = 1  # default opset version set to 1 if not specified
    print("opset version: ", opset)
-    gBuilder = GraphBuilder(model, None, "float32", opset)
+    gBuilder = GraphBuilder(model, None, "float32", opset, weights_dir)
-    gCodegen = GraphCodeGen(gBuilder.build_graph())
+    gBuilder.build_graph()
-    gCodegen.codegen(weights_dir, test_data)#, test_labels)
+    #gCodegen = GraphCodeGen(gBuilder.build_graph())
+    # gCodegen.codegen(weights_dir, test_data)#, test_labels)
 if __name__ == "__main__":

--- a/hpvm/projects/onnx/frontend/weight_utils.py
+++ b/hpvm/projects/onnx/frontend/weight_utils.py
+import numpy as np
+import struct
+import random
+def dumpLabels(file_name, Y_test):
+    f = open(file_name, "wb")
+    labels_map = {}    
+    for label in Y_test:
+        label_val = 0
+        if len(Y_test.shape) > 1:        
+          #label_val = np.int8(label[0])
+          label_val = np.int32(label[0])
+        else:
+          #label_val = np.int8(label)
+          label_val = np.int32(label)
+        if label_val not in labels_map:
+            labels_map[label_val] = 0
+            labels_map[label_val] += 1
+        f.write(label_val)
+    f.close()
+"""def dumpData(file_name, X_test):
+    N = X_test.shape[0]
+    C = X_test.shape[1]
+    H = X_test.shape[2]
+    W = X_test.shape[3]
+    print ("*DumpData")
+    print("-min_val = ", np.amin(X_test))
+    print("-max_val = ", np.amax(X_test))
+    f = open(file_name, "wb")
+    for i in range(N):
+        for j in range(C):
+            for k in range(H):
+                for l in range(W):
+                    val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l]))
+                    f.write(np.float32(val[0]))
+    f.close()
+"""
+def dumpData(file_name, X_test):
+    N = X_test.shape[0]
+    C = X_test.shape[1]
+    H = X_test.shape[2]
+    W = X_test.shape[3]
+    print ("*DumpData")
+    print("-min_val = ", np.amin(X_test))
+    print("-max_val = ", np.amax(X_test))
+    f = open(file_name, "wb")
+    X_test.tofile(f)    
+    f.close()
+def dumpConvWeights(file_name, weights, N, C, H, W):
+    print (weights.shape)
+    print ("*DumpConvWeights")
+    print("-min_val = ", np.amin(weights))
+    print("-max_val = ", np.amax(weights))
+    f = open(file_name, "wb")
+    for i in range(N):
+        for j in range(C):
+            for k in range(H):
+                for l in range(W):
+                    f.write(weights[k][l][j][i])
+    f.close()
+def dumpFcWeights(file_name, weights, H, W):
+    print (weights.shape)
+    print ("*DumpFcWeights")
+    print("-min_val = ", np.amin(weights))
+    print("-max_val = ", np.amax(weights))
+    f = open(file_name, "wb")
+    for i in range(H):
+        for j in range(W):
+            f.write(weights[i][j])
+    f.close()        
+def dumpFcBias(file_name, bias, W):
+    print (bias.shape)
+    print ("*DumpFcBias")
+    print("-min_val = ", np.amin(bias))
+    print("-max_val = ", np.amax(bias))
+    f = open(file_name, "wb")
+    for i in range(W):
+        f.write(bias[i])
+    f.close()
+def dumpCalibrationData(file_name, X_train, labels_fname, train_labels):
+  combined_list = []
+  for i in range(len(X_train)):
+    tup = (X_train[i], train_labels[i])
+    combined_list.append(tup)       
+  np.random.shuffle(combined_list)
+  #X_calibration = X_train[0:5000]
+  data_list = []
+  labels_list = []
+  for i in range(5000):
+    tup = combined_list[i]
+    data_list.append(tup[0])
+    labels_list.append(tup[1])
+  data_list = np.array(data_list)
+  labels_list = np.array(labels_list)
+  dumpData(file_name, data_list)
+  dumpLabels(labels_fname, labels_list)
--- a/hpvm/projects/onnx/keras_ref/generated/vgg16/approxhpvm_src.cc
+++ b/hpvm/projects/onnx/keras_ref/generated/vgg16/approxhpvm_src.cc
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/stat.h> 
-#include <cstring> 
-#include <visc.h> 
-#include <tensorTypes.h> 
-#include <tensorUtils.h> 
-void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_2_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_5_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_6_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_9_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_12_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_13_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_16_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_17_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_19_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_20_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_22_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_23_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_26_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_29_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_32_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_33_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_35_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_36_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_38_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_39_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_41_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_42_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_43_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_44_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_45_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_46_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_relu(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_47_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_mul(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_48_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(2, t1, t2, 0); 
-  void *r = __visc__tensor_add(t1, t2); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void var_49_node(void* t1, size_t bytes_t1) { 
-  __visc__hint(visc::CUDNN_TARGET); 
-  __visc__attributes(1, t1, 0); 
-  void* r = __visc__tensor_softmax(t1); 
-  __visc__return(2, r, (size_t) 0); 
-}
-void root(void* input, size_t input_bytes, 
-	  void* conv2d_1_w, size_t conv2d_1_w_bytes, 
-	  void* conv2d_1_b, size_t conv2d_1_b_bytes, 
-	  void* conv2d_2_w, size_t conv2d_2_w_bytes, 
-	  void* conv2d_2_b, size_t conv2d_2_b_bytes, 
-	  void* conv2d_3_w, size_t conv2d_3_w_bytes, 
-	  void* conv2d_3_b, size_t conv2d_3_b_bytes, 
-	  void* conv2d_4_w, size_t conv2d_4_w_bytes, 
-	  void* conv2d_4_b, size_t conv2d_4_b_bytes, 
-	  void* conv2d_5_w, size_t conv2d_5_w_bytes, 
-	  void* conv2d_5_b, size_t conv2d_5_b_bytes, 
-	  void* conv2d_6_w, size_t conv2d_6_w_bytes, 
-	  void* conv2d_6_b, size_t conv2d_6_b_bytes, 
-	  void* conv2d_7_w, size_t conv2d_7_w_bytes, 
-	  void* conv2d_7_b, size_t conv2d_7_b_bytes, 
-	  void* conv2d_8_w, size_t conv2d_8_w_bytes, 
-	  void* conv2d_8_b, size_t conv2d_8_b_bytes, 
-	  void* conv2d_9_w, size_t conv2d_9_w_bytes, 
-	  void* conv2d_9_b, size_t conv2d_9_b_bytes, 
-	  void* conv2d_10_w, size_t conv2d_10_w_bytes, 
-	  void* conv2d_10_b, size_t conv2d_10_b_bytes, 
-	  void* conv2d_11_w, size_t conv2d_11_w_bytes, 
-	  void* conv2d_11_b, size_t conv2d_11_b_bytes, 
-	  void* conv2d_12_w, size_t conv2d_12_w_bytes, 
-	  void* conv2d_12_b, size_t conv2d_12_b_bytes, 
-	  void* conv2d_13_w, size_t conv2d_13_w_bytes, 
-	  void* conv2d_13_b, size_t conv2d_13_b_bytes, 
-	  void* dense_1_w, size_t dense_1_w_bytes, 
-	  void* dense_1_b, size_t dense_1_b_bytes, 
-	  void* dense_2_w, size_t dense_2_w_bytes, 
-	  void* dense_2_b, size_t dense_2_b_bytes){ 
-  __visc__hint(visc::CPU_TARGET); 
-  __visc__attributes(31, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, conv2d_6_w, conv2d_6_b, conv2d_7_w, conv2d_7_b, conv2d_8_w, conv2d_8_b, conv2d_9_w, conv2d_9_b, conv2d_10_w, conv2d_10_b, conv2d_11_w, conv2d_11_b, conv2d_12_w, conv2d_12_b, conv2d_13_w, conv2d_13_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0); 
-  void* var_0 = __visc__createNodeND(0, var_0_node); 
-  __visc__bindIn(var_0, 0, 0, 0); 
-  __visc__bindIn(var_0, 1, 1, 0); 
-  __visc__bindIn(var_0, 2, 2, 0); 
-  __visc__bindIn(var_0, 3, 3, 0); 
-  void* var_1 = __visc__createNodeND(0, var_1_node); 
-  __visc__edge(var_0, var_1, 1, 0, 0, 0); 
-  __visc__edge(var_0, var_1, 1, 1, 1, 0); 
-  __visc__bindIn(var_1, 4, 2, 0); 
-  __visc__bindIn(var_1, 5, 3, 0); 
-  void* var_2 = __visc__createNodeND(0, var_2_node); 
-  __visc__edge(var_1, var_2, 1, 0, 0, 0); 
-  __visc__edge(var_1, var_2, 1, 1, 1, 0); 
-  void* var_3 = __visc__createNodeND(0, var_3_node); 
-  __visc__edge(var_2, var_3, 1, 0, 0, 0); 
-  __visc__edge(var_2, var_3, 1, 1, 1, 0); 
-  __visc__bindIn(var_3, 6, 2, 0); 
-  __visc__bindIn(var_3, 7, 3, 0); 
-  void* var_4 = __visc__createNodeND(0, var_4_node); 
-  __visc__edge(var_3, var_4, 1, 0, 0, 0); 
-  __visc__edge(var_3, var_4, 1, 1, 1, 0); 
-  __visc__bindIn(var_4, 8, 2, 0); 
-  __visc__bindIn(var_4, 9, 3, 0); 
-  void* var_5 = __visc__createNodeND(0, var_5_node); 
-  __visc__edge(var_4, var_5, 1, 0, 0, 0); 
-  __visc__edge(var_4, var_5, 1, 1, 1, 0); 
-  void* var_6 = __visc__createNodeND(0, var_6_node); 
-  __visc__edge(var_5, var_6, 1, 0, 0, 0); 
-  __visc__edge(var_5, var_6, 1, 1, 1, 0); 
-  void* var_7 = __visc__createNodeND(0, var_7_node); 
-  __visc__edge(var_6, var_7, 1, 0, 0, 0); 
-  __visc__edge(var_6, var_7, 1, 1, 1, 0); 
-  __visc__bindIn(var_7, 10, 2, 0); 
-  __visc__bindIn(var_7, 11, 3, 0); 
-  void* var_8 = __visc__createNodeND(0, var_8_node); 
-  __visc__edge(var_7, var_8, 1, 0, 0, 0); 
-  __visc__edge(var_7, var_8, 1, 1, 1, 0); 
-  __visc__bindIn(var_8, 12, 2, 0); 
-  __visc__bindIn(var_8, 13, 3, 0); 
-  void* var_9 = __visc__createNodeND(0, var_9_node); 
-  __visc__edge(var_8, var_9, 1, 0, 0, 0); 
-  __visc__edge(var_8, var_9, 1, 1, 1, 0); 
-  void* var_10 = __visc__createNodeND(0, var_10_node); 
-  __visc__edge(var_9, var_10, 1, 0, 0, 0); 
-  __visc__edge(var_9, var_10, 1, 1, 1, 0); 
-  __visc__bindIn(var_10, 14, 2, 0); 
-  __visc__bindIn(var_10, 15, 3, 0); 
-  void* var_11 = __visc__createNodeND(0, var_11_node); 
-  __visc__edge(var_10, var_11, 1, 0, 0, 0); 
-  __visc__edge(var_10, var_11, 1, 1, 1, 0); 
-  __visc__bindIn(var_11, 16, 2, 0); 
-  __visc__bindIn(var_11, 17, 3, 0); 
-  void* var_12 = __visc__createNodeND(0, var_12_node); 
-  __visc__edge(var_11, var_12, 1, 0, 0, 0); 
-  __visc__edge(var_11, var_12, 1, 1, 1, 0); 
-  void* var_13 = __visc__createNodeND(0, var_13_node); 
-  __visc__edge(var_12, var_13, 1, 0, 0, 0); 
-  __visc__edge(var_12, var_13, 1, 1, 1, 0); 
-  void* var_14 = __visc__createNodeND(0, var_14_node); 
-  __visc__edge(var_13, var_14, 1, 0, 0, 0); 
-  __visc__edge(var_13, var_14, 1, 1, 1, 0); 
-  __visc__bindIn(var_14, 18, 2, 0); 
-  __visc__bindIn(var_14, 19, 3, 0); 
-  void* var_15 = __visc__createNodeND(0, var_15_node); 
-  __visc__edge(var_14, var_15, 1, 0, 0, 0); 
-  __visc__edge(var_14, var_15, 1, 1, 1, 0); 
-  __visc__bindIn(var_15, 20, 2, 0); 
-  __visc__bindIn(var_15, 21, 3, 0); 
-  void* var_16 = __visc__createNodeND(0, var_16_node); 
-  __visc__edge(var_15, var_16, 1, 0, 0, 0); 
-  __visc__edge(var_15, var_16, 1, 1, 1, 0); 
-  void* var_17 = __visc__createNodeND(0, var_17_node); 
-  __visc__edge(var_16, var_17, 1, 0, 0, 0); 
-  __visc__edge(var_16, var_17, 1, 1, 1, 0); 
-  __visc__bindIn(var_17, 22, 2, 0); 
-  __visc__bindIn(var_17, 23, 3, 0); 
-  void* var_18 = __visc__createNodeND(0, var_18_node); 
-  __visc__edge(var_17, var_18, 1, 0, 0, 0); 
-  __visc__edge(var_17, var_18, 1, 1, 1, 0); 
-  __visc__bindIn(var_18, 24, 2, 0); 
-  __visc__bindIn(var_18, 25, 3, 0); 
-  void* var_19 = __visc__createNodeND(0, var_19_node); 
-  __visc__edge(var_18, var_19, 1, 0, 0, 0); 
-  __visc__edge(var_18, var_19, 1, 1, 1, 0); 
-  void* var_20 = __visc__createNodeND(0, var_20_node); 
-  __visc__edge(var_19, var_20, 1, 0, 0, 0); 
-  __visc__edge(var_19, var_20, 1, 1, 1, 0); 
-  __visc__bindIn(var_20, 26, 2, 0); 
-  __visc__bindIn(var_20, 27, 3, 0); 
-  void* var_21 = __visc__createNodeND(0, var_21_node); 
-  __visc__edge(var_20, var_21, 1, 0, 0, 0); 
-  __visc__edge(var_20, var_21, 1, 1, 1, 0); 
-  __visc__bindIn(var_21, 28, 2, 0); 
-  __visc__bindIn(var_21, 29, 3, 0); 
-  void* var_22 = __visc__createNodeND(0, var_22_node); 
-  __visc__edge(var_21, var_22, 1, 0, 0, 0); 
-  __visc__edge(var_21, var_22, 1, 1, 1, 0); 
-  void* var_23 = __visc__createNodeND(0, var_23_node); 
-  __visc__edge(var_22, var_23, 1, 0, 0, 0); 
-  __visc__edge(var_22, var_23, 1, 1, 1, 0); 
-  void* var_24 = __visc__createNodeND(0, var_24_node); 
-  __visc__edge(var_23, var_24, 1, 0, 0, 0); 
-  __visc__edge(var_23, var_24, 1, 1, 1, 0); 
-  __visc__bindIn(var_24, 30, 2, 0); 
-  __visc__bindIn(var_24, 31, 3, 0); 
-  void* var_25 = __visc__createNodeND(0, var_25_node); 
-  __visc__edge(var_24, var_25, 1, 0, 0, 0); 
-  __visc__edge(var_24, var_25, 1, 1, 1, 0); 
-  __visc__bindIn(var_25, 32, 2, 0); 
-  __visc__bindIn(var_25, 33, 3, 0); 
-  void* var_26 = __visc__createNodeND(0, var_26_node); 
-  __visc__edge(var_25, var_26, 1, 0, 0, 0); 
-  __visc__edge(var_25, var_26, 1, 1, 1, 0); 
-  void* var_27 = __visc__createNodeND(0, var_27_node); 
-  __visc__edge(var_26, var_27, 1, 0, 0, 0); 
-  __visc__edge(var_26, var_27, 1, 1, 1, 0); 
-  __visc__bindIn(var_27, 34, 2, 0); 
-  __visc__bindIn(var_27, 35, 3, 0); 
-  void* var_28 = __visc__createNodeND(0, var_28_node); 
-  __visc__edge(var_27, var_28, 1, 0, 0, 0); 
-  __visc__edge(var_27, var_28, 1, 1, 1, 0); 
-  __visc__bindIn(var_28, 36, 2, 0); 
-  __visc__bindIn(var_28, 37, 3, 0); 
-  void* var_29 = __visc__createNodeND(0, var_29_node); 
-  __visc__edge(var_28, var_29, 1, 0, 0, 0); 
-  __visc__edge(var_28, var_29, 1, 1, 1, 0); 
-  void* var_30 = __visc__createNodeND(0, var_30_node); 
-  __visc__edge(var_29, var_30, 1, 0, 0, 0); 
-  __visc__edge(var_29, var_30, 1, 1, 1, 0); 
-  __visc__bindIn(var_30, 38, 2, 0); 
-  __visc__bindIn(var_30, 39, 3, 0); 
-  void* var_31 = __visc__createNodeND(0, var_31_node); 
-  __visc__edge(var_30, var_31, 1, 0, 0, 0); 
-  __visc__edge(var_30, var_31, 1, 1, 1, 0); 
-  __visc__bindIn(var_31, 40, 2, 0); 
-  __visc__bindIn(var_31, 41, 3, 0); 
-  void* var_32 = __visc__createNodeND(0, var_32_node); 
-  __visc__edge(var_31, var_32, 1, 0, 0, 0); 
-  __visc__edge(var_31, var_32, 1, 1, 1, 0); 
-  void* var_33 = __visc__createNodeND(0, var_33_node); 
-  __visc__edge(var_32, var_33, 1, 0, 0, 0); 
-  __visc__edge(var_32, var_33, 1, 1, 1, 0); 
-  void* var_34 = __visc__createNodeND(0, var_34_node); 
-  __visc__edge(var_33, var_34, 1, 0, 0, 0); 
-  __visc__edge(var_33, var_34, 1, 1, 1, 0); 
-  __visc__bindIn(var_34, 42, 2, 0); 
-  __visc__bindIn(var_34, 43, 3, 0); 
-  void* var_35 = __visc__createNodeND(0, var_35_node); 
-  __visc__edge(var_34, var_35, 1, 0, 0, 0); 
-  __visc__edge(var_34, var_35, 1, 1, 1, 0); 
-  __visc__bindIn(var_35, 44, 2, 0); 
-  __visc__bindIn(var_35, 45, 3, 0); 
-  void* var_36 = __visc__createNodeND(0, var_36_node); 
-  __visc__edge(var_35, var_36, 1, 0, 0, 0); 
-  __visc__edge(var_35, var_36, 1, 1, 1, 0); 
-  void* var_37 = __visc__createNodeND(0, var_37_node); 
-  __visc__edge(var_36, var_37, 1, 0, 0, 0); 
-  __visc__edge(var_36, var_37, 1, 1, 1, 0); 
-  __visc__bindIn(var_37, 46, 2, 0); 
-  __visc__bindIn(var_37, 47, 3, 0); 
-  void* var_38 = __visc__createNodeND(0, var_38_node); 
-  __visc__edge(var_37, var_38, 1, 0, 0, 0); 
-  __visc__edge(var_37, var_38, 1, 1, 1, 0); 
-  __visc__bindIn(var_38, 48, 2, 0); 
-  __visc__bindIn(var_38, 49, 3, 0); 
-  void* var_39 = __visc__createNodeND(0, var_39_node); 
-  __visc__edge(var_38, var_39, 1, 0, 0, 0); 
-  __visc__edge(var_38, var_39, 1, 1, 1, 0); 
-  void* var_40 = __visc__createNodeND(0, var_40_node); 
-  __visc__edge(var_39, var_40, 1, 0, 0, 0); 
-  __visc__edge(var_39, var_40, 1, 1, 1, 0); 
-  __visc__bindIn(var_40, 50, 2, 0); 
-  __visc__bindIn(var_40, 51, 3, 0); 
-  void* var_41 = __visc__createNodeND(0, var_41_node); 
-  __visc__edge(var_40, var_41, 1, 0, 0, 0); 
-  __visc__edge(var_40, var_41, 1, 1, 1, 0); 
-  __visc__bindIn(var_41, 52, 2, 0); 
-  __visc__bindIn(var_41, 53, 3, 0); 
-  void* var_42 = __visc__createNodeND(0, var_42_node); 
-  __visc__edge(var_41, var_42, 1, 0, 0, 0); 
-  __visc__edge(var_41, var_42, 1, 1, 1, 0); 
-  void* var_43 = __visc__createNodeND(0, var_43_node); 
-  __visc__edge(var_42, var_43, 1, 0, 0, 0); 
-  __visc__edge(var_42, var_43, 1, 1, 1, 0); 
-  void* var_44 = __visc__createNodeND(0, var_44_node); 
-  __visc__edge(var_43, var_44, 1, 0, 0, 0); 
-  __visc__edge(var_43, var_44, 1, 1, 1, 0); 
-  __visc__bindIn(var_44, 54, 2, 0); 
-  __visc__bindIn(var_44, 55, 3, 0); 
-  void* var_45 = __visc__createNodeND(0, var_45_node); 
-  __visc__edge(var_44, var_45, 1, 0, 0, 0); 
-  __visc__edge(var_44, var_45, 1, 1, 1, 0); 
-  __visc__bindIn(var_45, 56, 2, 0); 
-  __visc__bindIn(var_45, 57, 3, 0); 
-  void* var_46 = __visc__createNodeND(0, var_46_node); 
-  __visc__edge(var_45, var_46, 1, 0, 0, 0); 
-  __visc__edge(var_45, var_46, 1, 1, 1, 0); 
-  void* var_47 = __visc__createNodeND(0, var_47_node); 
-  __visc__edge(var_46, var_47, 1, 0, 0, 0); 
-  __visc__edge(var_46, var_47, 1, 1, 1, 0); 
-  __visc__bindIn(var_47, 58, 2, 0); 
-  __visc__bindIn(var_47, 59, 3, 0); 
-  void* var_48 = __visc__createNodeND(0, var_48_node); 
-  __visc__edge(var_47, var_48, 1, 0, 0, 0); 
-  __visc__edge(var_47, var_48, 1, 1, 1, 0); 
-  __visc__bindIn(var_48, 60, 2, 0); 
-  __visc__bindIn(var_48, 61, 3, 0); 
-  void* var_49 = __visc__createNodeND(0, var_49_node); 
-  __visc__edge(var_48, var_49, 1, 0, 0, 0); 
-  __visc__edge(var_48, var_49, 1, 1, 1, 0); 
-  __visc__bindOut(var_49, 0, 0, 0); 
-  __visc__bindOut(var_49, 1, 1, 0); 
-}
-struct ret_t {
-  void* tensor; 
-  size_t bytes; 
-}; 
-typedef struct __attribute__((__packed__)) {
-  void* input; 
-  size_t input_bytes; 
-  void* conv2d_1_w; 
-  size_t conv2d_1_w_bytes; 
-  void* conv2d_1_b; 
-  size_t conv2d_1_b_bytes; 
-  void* conv2d_2_w; 
-  size_t conv2d_2_w_bytes; 
-  void* conv2d_2_b; 
-  size_t conv2d_2_b_bytes; 
-  void* conv2d_3_w; 
-  size_t conv2d_3_w_bytes; 
-  void* conv2d_3_b; 
-  size_t conv2d_3_b_bytes; 
-  void* conv2d_4_w; 
-  size_t conv2d_4_w_bytes; 
-  void* conv2d_4_b; 
-  size_t conv2d_4_b_bytes; 
-  void* conv2d_5_w; 
-  size_t conv2d_5_w_bytes; 
-  void* conv2d_5_b; 
-  size_t conv2d_5_b_bytes; 
-  void* conv2d_6_w; 
-  size_t conv2d_6_w_bytes; 
-  void* conv2d_6_b; 
-  size_t conv2d_6_b_bytes; 
-  void* conv2d_7_w; 
-  size_t conv2d_7_w_bytes; 
-  void* conv2d_7_b; 
-  size_t conv2d_7_b_bytes; 
-  void* conv2d_8_w; 
-  size_t conv2d_8_w_bytes; 
-  void* conv2d_8_b; 
-  size_t conv2d_8_b_bytes; 
-  void* conv2d_9_w; 
-  size_t conv2d_9_w_bytes; 
-  void* conv2d_9_b; 
-  size_t conv2d_9_b_bytes; 
-  void* conv2d_10_w; 
-  size_t conv2d_10_w_bytes; 
-  void* conv2d_10_b; 
-  size_t conv2d_10_b_bytes; 
-  void* conv2d_11_w; 
-  size_t conv2d_11_w_bytes; 
-  void* conv2d_11_b; 
-  size_t conv2d_11_b_bytes; 
-  void* conv2d_12_w; 
-  size_t conv2d_12_w_bytes; 
-  void* conv2d_12_b; 
-  size_t conv2d_12_b_bytes; 
-  void* conv2d_13_w; 
-  size_t conv2d_13_w_bytes; 
-  void* conv2d_13_b; 
-  size_t conv2d_13_b_bytes; 
-  void* dense_1_w; 
-  size_t dense_1_w_bytes; 
-  void* dense_1_b; 
-  size_t dense_1_b_bytes; 
-  void* dense_2_w; 
-  size_t dense_2_w_bytes; 
-  void* dense_2_b; 
-  size_t dense_2_b_bytes; 
-  struct ret_t r; 
-}
-RootIn;
-int main(){ 
-std::string dir_prefix = std::string("data/vgg16_cifar10/"); 
-std::string input_path =  dir_prefix + std::string("input.bin"); 
-std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); 
-uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-__visc__init(); 
-RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); 
-args->input = input; 
-args->input_bytes = 0; 
-args->conv2d_1_w = conv2d_1_w; 
-args->conv2d_1_w_bytes = 0; 
-args->conv2d_1_b = conv2d_1_b; 
-args->conv2d_1_b_bytes = 0; 
-args->conv2d_2_w = conv2d_2_w; 
-args->conv2d_2_w_bytes = 0; 
-args->conv2d_2_b = conv2d_2_b; 
-args->conv2d_2_b_bytes = 0; 
-args->conv2d_3_w = conv2d_3_w; 
-args->conv2d_3_w_bytes = 0; 
-args->conv2d_3_b = conv2d_3_b; 
-args->conv2d_3_b_bytes = 0; 
-args->conv2d_4_w = conv2d_4_w; 
-args->conv2d_4_w_bytes = 0; 
-args->conv2d_4_b = conv2d_4_b; 
-args->conv2d_4_b_bytes = 0; 
-args->conv2d_5_w = conv2d_5_w; 
-args->conv2d_5_w_bytes = 0; 
-args->conv2d_5_b = conv2d_5_b; 
-args->conv2d_5_b_bytes = 0; 
-args->conv2d_6_w = conv2d_6_w; 
-args->conv2d_6_w_bytes = 0; 
-args->conv2d_6_b = conv2d_6_b; 
-args->conv2d_6_b_bytes = 0; 
-args->conv2d_7_w = conv2d_7_w; 
-args->conv2d_7_w_bytes = 0; 
-args->conv2d_7_b = conv2d_7_b; 
-args->conv2d_7_b_bytes = 0; 
-args->conv2d_8_w = conv2d_8_w; 
-args->conv2d_8_w_bytes = 0; 
-args->conv2d_8_b = conv2d_8_b; 
-args->conv2d_8_b_bytes = 0; 
-args->conv2d_9_w = conv2d_9_w; 
-args->conv2d_9_w_bytes = 0; 
-args->conv2d_9_b = conv2d_9_b; 
-args->conv2d_9_b_bytes = 0; 
-args->conv2d_10_w = conv2d_10_w; 
-args->conv2d_10_w_bytes = 0; 
-args->conv2d_10_b = conv2d_10_b; 
-args->conv2d_10_b_bytes = 0; 
-args->conv2d_11_w = conv2d_11_w; 
-args->conv2d_11_w_bytes = 0; 
-args->conv2d_11_b = conv2d_11_b; 
-args->conv2d_11_b_bytes = 0; 
-args->conv2d_12_w = conv2d_12_w; 
-args->conv2d_12_w_bytes = 0; 
-args->conv2d_12_b = conv2d_12_b; 
-args->conv2d_12_b_bytes = 0; 
-args->conv2d_13_w = conv2d_13_w; 
-args->conv2d_13_w_bytes = 0; 
-args->conv2d_13_b = conv2d_13_b; 
-args->conv2d_13_b_bytes = 0; 
-args->dense_1_w = dense_1_w; 
-args->dense_1_w_bytes = 0; 
-args->dense_1_b = dense_1_b; 
-args->dense_1_b_bytes = 0; 
-args->dense_2_w = dense_2_w; 
-args->dense_2_w_bytes = 0; 
-args->dense_2_b = dense_2_b; 
-args->dense_2_b_bytes = 0; 
-void* dfg = __visc__launch(0, root, (void*) args); 
-__visc__wait(dfg); 
-void *result = static_cast<RootIn*>(args)->input; 
-hpvm_request_tensor(result, 0); 
-__visc__cleanup(); 
- computeAccuracy2(labels, 10000, result); 
-return 0; 
-} 
--- a/hpvm/projects/onnx/keras_ref/generated/vgg16/src.cc
+++ b/hpvm/projects/onnx/keras_ref/generated/vgg16/src.cc
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-int main(){ 
-llvm_hpvm_initTensorRt(0); 
-std::string dir_prefix = std::string("data/vgg16_cifar10/"); 
-std::string input_path =  dir_prefix + std::string("input.bin"); 
-std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-startMemTracking(); 
-int test_input_size = 10000; 
-int batch_size = 10000; 
-int batch_count = test_input_size / batch_size; 
-float final_accuracy = 0.0; 
-for(int i = 0; i < batch_count; i++){ 
-int start = i * batch_size; 
-int end = (i + 1) * batch_size; 
-void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-void* var_2 = tensorRelu(var_1); 
-void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-void* var_6 = tensorRelu(var_5); 
-void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-void* var_10 = tensorRelu(var_9); 
-void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-void* var_14 = tensorRelu(var_13); 
-void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-void* var_18 = tensorRelu(var_17); 
-void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-void* var_22 = tensorRelu(var_21); 
-void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-void* var_26 = tensorRelu(var_25); 
-void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-void* var_30 = tensorRelu(var_29); 
-void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-void* var_34 = tensorRelu(var_33); 
-void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-void* var_38 = tensorRelu(var_37); 
-void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-void* var_42 = tensorRelu(var_41); 
-void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-void* var_46 = tensorRelu(var_45); 
-void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-void* var_50 = tensorRelu(var_49); 
-void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-void* var_55 = tensorAdd(var_54, dense_1_b); 
-void* var_56 = tensorRelu(var_55); 
-void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-void* var_59 = tensorAdd(var_58, dense_2_b); 
-void* var_60 = tensorSoftmax(var_59); 
-uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-float accuracy = computeAccuracy2(labels, batch_size, var_60); 
-final_accuracy += accuracy; 
-freeBatchMemory(); 
-}
-final_accuracy = final_accuracy / batch_count; 
-dumpFinalAccuracy(final_accuracy); 
-llvm_hpvm_cleanupTensorRt(); 
-return 0; 
-}
--- a/hpvm/projects/onnx/keras_ref/keras_environment.yml
+++ b/hpvm/projects/onnx/keras_ref/keras_environment.yml
-name: approxhpvm_keras
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - absl-py=0.6.1=py36_0
-  - anaconda-project=0.8.2=py36_0
-  - asn1crypto=0.24.0=py36_0
-  - automat=0.7.0=py36_0
-  - babel=2.6.0=py36_0
-  - backports=1.0=py36_1
-  - backports.os=0.1.1=py36_0
-  - beautifulsoup4=4.6.3=py36_0
-  - bkcharts=0.2=py36_0
-  - blaze=0.11.3=py36_0
-  - conda=4.5.11=py36_0
-  - conda-env=2.6.0=1
-  - contextlib2=0.5.5=py36_0
-  - cycler=0.10.0=py36_0
-  - dill=0.2.8.2=py36_0
-  - docutils=0.14=py36_0
-  - entrypoints=0.2.3=py36_2
-  - et_xmlfile=1.0.1=py36_0
-  - idna=2.7=py36_0
-  - imageio=2.4.1=py36_0
-  - importlib_metadata=0.6=py36_0
-  - ipython_genutils=0.2.0=py36_0
-  - isort=4.3.4=py36_0
-  - jdcal=1.4=py36_0
-  - jedi=0.13.1=py36_0
-  - jinja2=2.10=py36_0
-  - jmespath=0.9.3=py36_0
-  - jsonschema=2.6.0=py36_0
-  - keyring=16.0.0=py36_0
-  - libgcc=7.2.0=h69d50b8_2
-  - libgfortran=3.0.0=1
-  - locket=0.2.0=py36_1
-  - more-itertools=4.3.0=py36_0
-  - nbconvert=5.3.1=py36_0
-  - nbformat=4.4.0=py36_0
-  - nose=1.3.7=py36_2
-  - notebook=5.7.0=py36_0
-  - numpydoc=0.8.0=py36_0
-  - odo=0.5.1=py36_0
-  - pathlib2=2.3.2=py36_0
-  - pexpect=4.6.0=py36_0
-  - pickleshare=0.7.5=py36_0
-  - ply=3.11=py36_0
-  - ptyprocess=0.6.0=py36_0
-  - pycodestyle=2.4.0=py36_0
-  - pygments=2.2.0=py36_0
-  - pylint=2.1.1=py36_0
-  - pyopenssl=18.0.0=py36_0
-  - qtconsole=4.4.2=py36_0
-  - requests=2.19.1=py36_0
-  - s3transfer=0.1.13=py36_0
-  - secretstorage=3.1.0=py36_0
-  - setuptools=40.5.0=py36_0
-  - singledispatch=3.4.0.3=py36_0
-  - six=1.11.0=py36_1
-  - snowballstemmer=1.2.1=py36_0
-  - sortedcollections=1.0.1=py36_0
-  - sphinx=1.8.1=py36_0
-  - spyder=3.3.1=py36_1
-  - sympy=1.3=py36_0
-  - tblib=1.3.2=py36_0
-  - termcolor=1.1.0=py36_1
-  - terminado=0.8.1=py36_1
-  - testpath=0.4.2=py36_0
-  - torchvision=0.2.1=py36_0
-  - traitlets=4.3.2=py36_0
-  - typing=3.6.4=py36_0
-  - unicodecsv=0.14.1=py36_0
-  - urllib3=1.23=py36_0
-  - wcwidth=0.1.7=py36_0
-  - wheel=0.32.2=py36_0
-  - widgetsnbextension=3.4.2=py36_0
-  - xlwt=1.3.0=py36_0
-  - _license=1.1=py36_1
-  - _tflow_select=2.1.0=gpu
-  - alabaster=0.7.12=py36_0
-  - anaconda-client=1.7.2=py36_0
-  - anaconda=custom=py36hbbc8b67_0
-  - anaconda-navigator=1.9.2=py36_0
-  - appdirs=1.4.3=py36h28b3542_0
-  - astor=0.7.1=py36_0
-  - astroid=2.0.4=py36_0
-  - astropy=3.0.5=py36h7b6447c_0
-  - atomicwrites=1.2.1=py36_0
-  - attrs=18.2.0=py36h28b3542_0
-  - backcall=0.1.0=py36_0
-  - backports.shutil_get_terminal_size=1.0.0=py36_2
-  - bitarray=0.8.3=py36h14c3975_0
-  - blas=1.0=mkl
-  - bleach=3.0.2=py36_0
-  - blosc=1.14.4=hdbcaa40_0
-  - bokeh=1.0.1=py36_0
-  - boto=2.49.0=py36_0
-  - boto3=1.9.35=py36_0
-  - botocore=1.12.35=py36_0
-  - bottleneck=1.2.1=py36h035aef0_1
-  - bz2file=0.98=py36_1
-  - bzip2=1.0.6=h14c3975_5
-  - ca-certificates=2018.03.07=0
-  - cairo=1.14.12=h8948797_3
-  - certifi=2018.10.15=py36_0
-  - cffi=1.11.5=py36he75722e_1
-  - chardet=3.0.4=py36_1
-  - chest=0.2.3=py36_1
-  - click=7.0=py36_0
-  - cloudpickle=0.6.1=py36_0
-  - clyent=1.2.2=py36_1
-  - colorama=0.4.0=py36_0
-  - configobj=5.0.6=py36_1
-  - constantly=15.1.0=py36h28b3542_0
-  - cryptography=2.3.1=py36hc365091_0
-  - cudatoolkit=9.0=h13b8566_0
-  - cudnn=7.1.2=cuda9.0_0
-  - cupti=9.0.176=0
-  - curl=7.61.0=h84994c4_0
-  - cython=0.29=py36he6710b0_0
-  - cytoolz=0.9.0.1=py36h14c3975_1
-  - dask=0.20.0=py36_0
-  - dask-core=0.20.0=py36_0
-  - datashape=0.5.4=py36_1
-  - dbus=1.13.2=h714fa37_1
-  - decorator=4.3.0=py36_0
-  - defusedxml=0.5.0=py36_1
-  - distributed=1.24.0=py36_0
-  - expat=2.2.6=he6710b0_0
-  - fastcache=1.0.2=py36h14c3975_2
-  - filelock=3.0.10=py36_0
-  - flask=1.0.2=py36_1
-  - flask-cors=3.0.6=py36_0
-  - fontconfig=2.13.0=h9420a91_0
-  - freetype=2.9.1=h8a8886c_1
-  - fribidi=1.0.5=h7b6447c_0
-  - gast=0.2.0=py36_0
-  - gensim=3.4.0=py36h14c3975_0
-  - get_terminal_size=1.0.0=haa9412d_0
-  - gevent=1.3.7=py36h7b6447c_1
-  - glib=2.56.2=hd408876_0
-  - glob2=0.6=py36_1
-  - gmp=6.1.2=h6c8ec71_1
-  - gmpy2=2.0.8=py36h10f8cd9_2
-  - graphite2=1.3.12=h23475e2_2
-  - greenlet=0.4.15=py36h7b6447c_0
-  - grpcio=1.12.1=py36hdbcaa40_0
-  - gst-plugins-base=1.14.0=hbbd80ab_1
-  - gstreamer=1.14.0=hb453b48_1
-  - h5py=2.8.0=py36h989c5e5_3
-  - harfbuzz=1.8.8=hffaf4a1_0
-  - hdf5=1.10.2=hba1933b_1
-  - heapdict=1.0.0=py36_2
-  - html5lib=1.0.1=py36_0
-  - hyperlink=18.0.0=py36_0
-  - icu=58.2=h9c2bf20_1
-  - imagesize=1.1.0=py36_0
-  - incremental=17.5.0=py36_0
-  - ipykernel=5.1.0=py36h39e3cac_0
-  - ipython=7.1.1=py36h39e3cac_0
-  - ipywidgets=7.4.2=py36_0
-  - itsdangerous=1.1.0=py36_0
-  - jbig=2.1=hdba287a_0
-  - jeepney=0.4=py36_0
-  - jpeg=9b=h024ee3a_2
-  - keras=2.1.6=py36_0
-  - keras-applications=1.0.6=py36_0
-  - keras-preprocessing=1.0.5=py36_0
-  - kiwisolver=1.0.1=py36hf484d3e_0
-  - lazy-object-proxy=1.3.1=py36h14c3975_2
-  - libcurl=7.61.0=h1ad7b7a_0
-  - libedit=3.1.20170329=h6b74fdf_2
-  - libffi=3.2.1=hd88cf55_4
-  - libgcc-ng=8.2.0=hdf63c60_1
-  - libgfortran-ng=7.3.0=hdf63c60_0
-  - libiconv=1.15=h63c8f33_5
-  - libpng=1.6.35=hbc83047_0
-  - libprotobuf=3.6.1=hd408876_0
-  - libsodium=1.0.16=h1bed415_0
-  - libssh2=1.8.0=h9cfc8f7_4
-  - libstdcxx-ng=8.2.0=hdf63c60_1
-  - libtiff=4.0.9=he85c1e1_2
-  - libtool=2.4.6=h7b6447c_5
-  - libuuid=1.0.3=h1bed415_2
-  - libxcb=1.13=h1bed415_1
-  - libxml2=2.9.8=h26e45fe_1
-  - libxslt=1.1.32=h1312cb7_0
-  - llvmlite=0.25.0=py36hd408876_0
-  - lxml=4.2.5=py36hefd8a0e_0
-  - lzo=2.10=h49e0be7_2
-  - markdown=3.0.1=py36_0
-  - markupsafe=1.0=py36h14c3975_1
-  - matplotlib=3.0.1=py36h5429711_0
-  - mccabe=0.6.1=py36_1
-  - mistune=0.8.4=py36h7b6447c_0
-  - mkl=2018.0.3=1
-  - mkl-service=1.1.2=py36h90e4bf4_5
-  - mkl_fft=1.0.6=py36h7dd41cf_0
-  - mkl_random=1.0.1=py36h4414c95_1
-  - mpc=1.1.0=h10f8cd9_1
-  - mpfr=4.0.1=hdf1c602_3
-  - mpmath=1.0.0=py36_2
-  - msgpack-python=0.5.6=py36h6bb024c_1
-  - multipledispatch=0.6.0=py36_0
-  - navigator-updater=0.2.1=py36_0
-  - nccl=1.3.5=cuda9.0_0
-  - ncurses=6.1=hf484d3e_0
-  - networkx=2.2=py36_1
-  - ninja=1.8.2=py36h6bb024c_1
-  - nltk=3.3.0=py36_0
-  - numba=0.40.0=py36h962f231_0
-  - numexpr=2.6.8=py36hd89afb7_0
-  - numpy=1.15.3=py36h1d66e8a_0
-  - numpy-base=1.15.3=py36h81de0dd_0
-  - olefile=0.46=py36_0
-  - openpyxl=2.5.9=py36_0
-  - openssl=1.0.2p=h14c3975_0
-  - packaging=18.0=py36_0
-  - pandas=0.23.4=py36h04863e7_0
-  - pandoc=2.2.3.2=0
-  - pandocfilters=1.4.2=py36_1
-  - pango=1.42.4=h049681c_0
-  - parso=0.3.1=py36_0
-  - partd=0.3.9=py36_0
-  - patchelf=0.9=he6710b0_3
-  - path.py=11.5.0=py36_0
-  - patsy=0.5.1=py36_0
-  - pcre=8.42=h439df22_0
-  - pep8=1.7.1=py36_0
-  - pillow=5.3.0=py36h34e0f95_0
-  - pip=18.1=py36_0
-  - pixman=0.34.0=hceecf20_3
-  - pkginfo=1.4.2=py36_1
-  - pluggy=0.8.0=py36_0
-  - prometheus_client=0.4.2=py36_0
-  - prompt_toolkit=2.0.7=py36_0
-  - protobuf=3.6.1=py36he6710b0_0
-  - psutil=5.4.8=py36h7b6447c_0
-  - py=1.7.0=py36_0
-  - pyasn1=0.4.4=py36h28b3542_0
-  - pyasn1-modules=0.2.2=py36_0
-  - pycosat=0.6.3=py36h14c3975_0
-  - pycparser=2.19=py36_0
-  - pycrypto=2.6.1=py36h14c3975_9
-  - pycurl=7.43.0.2=py36hb7f436b_0
-  - pyflakes=2.0.0=py36_0
-  - pyhamcrest=1.9.0=py36_2
-  - pyodbc=4.0.24=py36he6710b0_0
-  - pyparsing=2.2.2=py36_0
-  - pyqt=5.9.2=py36h05f1152_2
-  - pysocks=1.6.8=py36_0
-  - pytables=3.4.4=py36ha205bf6_0
-  - pytest=3.9.3=py36_0
-  - pytest-arraydiff=0.2=py36h39e3cac_0
-  - pytest-astropy=0.4.0=py36_0
-  - pytest-doctestplus=0.1.3=py36_0
-  - pytest-openfiles=0.3.0=py36_0
-  - pytest-remotedata=0.3.1=py36_0
-  - python=3.6.6=h6e4f718_2
-  - python-dateutil=2.7.5=py36_0
-  - pytorch=0.4.1=py36ha74772b_0
-  - pytz=2018.7=py36_0
-  - pywavelets=1.0.1=py36hdd07704_0
-  - pyyaml=3.13=py36h14c3975_0
-  - pyzmq=17.1.2=py36h14c3975_0
-  - qt=5.9.6=h8703b6f_2
-  - qtawesome=0.5.2=py36_0
-  - qtpy=1.5.2=py36_0
-  - readline=7.0=h7b6447c_5
-  - redis=5.0.0=h7b6447c_0
-  - redis-py=2.10.6=py36_0
-  - rope=0.11.0=py36_0
-  - ruamel_yaml=0.15.46=py36h14c3975_0
-  - scikit-image=0.14.0=py36hf484d3e_1
-  - scikit-learn=0.20.0=py36h4989274_1
-  - scipy=1.1.0=py36hfa4b5c9_1
-  - seaborn=0.9.0=py36_0
-  - send2trash=1.5.0=py36_0
-  - service_identity=17.0.0=py36h28b3542_0
-  - simplegeneric=0.8.1=py36_2
-  - sip=4.19.8=py36hf484d3e_0
-  - smart_open=1.7.1=py36_0
-  - snappy=1.1.7=hbae5bb6_3
-  - sockjs-tornado=1.0.6=py36_0
-  - sortedcontainers=2.0.5=py36_0
-  - sphinxcontrib=1.0=py36_1
-  - sphinxcontrib-websupport=1.1.0=py36_1
-  - spyder-kernels=0.2.6=py36_0
-  - sqlalchemy=1.2.12=py36h7b6447c_0
-  - sqlite=3.25.2=h7b6447c_0
-  - statsmodels=0.9.0=py36h035aef0_0
-  - tensorboard=1.11.0=py36hf484d3e_0
-  - tensorflow=1.11.0=gpu_py36h4459f94_0
-  - tensorflow-base=1.11.0=gpu_py36h8e0ae2d_0
-  - tensorflow-gpu=1.11.0=h0d30ee6_0
-  - tk=8.6.8=hbc83047_0
-  - toolz=0.9.0=py36_0
-  - tornado=5.1.1=py36h7b6447c_0
-  - tqdm=4.28.1=py36h28b3542_0
-  - twisted=18.9.0=py36h7b6447c_0
-  - typed-ast=1.1.0=py36h14c3975_0
-  - unixodbc=2.3.7=h14c3975_0
-  - webencodings=0.5.1=py36_1
-  - werkzeug=0.14.1=py36_0
-  - wrapt=1.10.11=py36h14c3975_2
-  - xlrd=1.1.0=py36_1
-  - xlsxwriter=1.1.2=py36_0
-  - xz=5.2.4=h14c3975_4
-  - yaml=0.1.7=had09818_2
-  - zeromq=4.2.5=hf484d3e_1
-  - zict=0.1.3=py36_0
-  - zlib=1.2.11=ha838bed_2
-  - zope=1.0=py36_1
-  - zope.interface=4.6.0=py36h7b6447c_0
-  - cuda91=1.0=h4c16780_0
-  - pip:
-    - msgpack==0.5.6
-    - tables==3.4.4
-    - torch==0.4.1
--- a/hpvm/projects/onnx/keras_ref/keras_frontend/__init__.py
+++ b/hpvm/projects/onnx/keras_ref/keras_frontend/__init__.py
--- a/hpvm/projects/onnx/keras_ref/keras_frontend/setup.py
+++ b/hpvm/projects/onnx/keras_ref/keras_frontend/setup.py
-from setuptools import setup
-setup(
-    name='frontend',
-    version='1.0',
-    description='ApproxHPVM frontend modules',
-    author='Hashim',
-    author_email='hsharif3@illinois.edu',
-    packages=['frontend'],
-    install_requires=[],
-)
--- a/hpvm/projects/onnx/keras_ref/setup.py
+++ b/hpvm/projects/onnx/keras_ref/setup.py
-from setuptools import setup
-setup(
-    name='frontend',
-    version='1.0',
-    description='ApproxHPVM frontend modules',
-    author='Hashim',
-    author_email='hsharif3@illinois.edu',
-    packages=['frontend'],
-    install_requires=[],
-)