From 6056da6322dbf5f55ee01b486bf78a4bab5386f0 Mon Sep 17 00:00:00 2001
From: shingjan <yjshi03@gmail.com>
Date: Thu, 26 Mar 2020 02:19:54 -0500
Subject: [PATCH] emit structure almost finished

---
 hpvm/projects/onnx/frontend/graph_builder.py  | 221 ++++++++++++++++++
 hpvm/projects/onnx/frontend/ir.py             |  14 +-
 .../onnx/frontend/{mnist.py => main.py}       |   2 +-
 .../frontend/onnx_frontend/graph_builder.py   |  10 +-
 .../onnx/frontend/onnx_frontend/util.py       |   5 +-
 hpvm/projects/onnx/frontend/operators.py      |  66 +++++-
 hpvm/projects/onnx/frontend/util.py           |  62 +++++
 7 files changed, 361 insertions(+), 19 deletions(-)
 create mode 100644 hpvm/projects/onnx/frontend/graph_builder.py
 rename hpvm/projects/onnx/frontend/{mnist.py => main.py} (93%)
 create mode 100644 hpvm/projects/onnx/frontend/util.py

diff --git a/hpvm/projects/onnx/frontend/graph_builder.py b/hpvm/projects/onnx/frontend/graph_builder.py
new file mode 100644
index 0000000000..5ee60b034d
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/graph_builder.py
@@ -0,0 +1,221 @@
+import sys
+import numpy as np
+import os
+
+from operators import *
+from ir import *
+
+class Node(object):
+    def __init__(self, name, shape, dtype):
+        self._name = name
+        self._shape = shape if shape else {}
+        self._dtype = dtype
+    def __str__(self):
+        return "Node: " + self._name + " with shape: " + str(self._shape) + " and data type " + str(self._dtype)
+    __repr__ = __str__
+        
+class GraphBuilder(object):
+    def __init__(self, model, shape, dtype, opset):
+        self._nodes = {}
+        self._params = {}
+        self._renames = {}
+        self._num_input = 0
+        self._num_param = 0
+        self._dtype = dtype
+        self._graph = model.graph
+        self._opset = opset
+        self._shape = shape if shape else self._build_shape()
+        self.program_str = ""
+        self.dfg = DFG()
+    
+    ################################################
+    # Aux functions for graph building
+    ################################################
+    def _build_shape(self):
+        shape = {}
+        for input in self._graph.input:
+            # get type of input tensor
+            tensor_type = input.type.tensor_type
+            # check if it has a shape:
+            if (tensor_type.HasField("shape")):
+                shape[input.name] = tensor_type.shape
+        return shape
+   
+    def _parse_array(self, tensor_proto):
+        try:
+            from onnx.numpy_helper import to_array
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import onnx which is required {}".format(e))
+        np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
+        return np_array
+    
+    def _parse_value_proto(self, value_proto):
+        """Parse ValueProto or raw str."""
+        try:
+            name = value_proto.name
+        except AttributeError:
+            name = value_proto
+        return name 
+    
+    def _parse_dtype(self, value_proto, dtype):
+        """Parse dtype."""
+        try:
+            from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
+            return TENSOR_TYPE_TO_NP_TYPE[value_proto.type.tensor_type.elem_type].name
+        except AttributeError:
+            return dtype
+
+    ################################################
+    # Emit functions for code generation
+    ################################################
+    def dump_weights(self):
+        for init_tensor in self._graph.initializer:
+            print(init_tensor)
+    
+    def build_graph(self):
+        # parse init tensors
+        for init_tensor in self._graph.initializer:
+            if not init_tensor.name.strip():
+                raise ValueError("Tensor's name is required.")
+            self._params[init_tensor.name] = self._parse_array(init_tensor)
+            self._nodes[init_tensor.name] = Node(init_tensor.name,
+                                                 self._params[init_tensor.name].shape,
+                                                 self._params[init_tensor.name].dtype)
+        
+        # parse inputs
+        # from onnx v0.2, GraphProto.input has type ValueInfoProto,
+        # and the name is 'i.name'
+        for i in self._graph.input:
+            i_name = self._parse_value_proto(i)
+            d_type = self._parse_dtype(i, 'float32')
+            if i_name in self._params:
+                # i is a param instead of input
+                self._num_param += 1
+                self._params[i_name] = self._params.pop(i_name)
+                self._nodes[i_name] = Node(i_name,
+                                           self._params[i_name].shape,
+                                           self._params[i_name].dtype)
+            else:
+                self._num_input += 1
+                if i_name in self._shape:
+                    tshape = self._shape[i_name]
+                else:
+                    raise ValueError("Must provide an input shape for `{0}`.".format(i_name))
+                if isinstance(self._dtype, dict):
+                    dtype = self._dtype[i_name] if i_name in self._dtype else d_type
+                else:
+                    dtype = d_type
+                self._nodes[i_name] = Node(i_name,
+                                           tshape, 
+                                           dtype)
+        for node in self._graph.node:
+            op_name = node.op_type
+            print("###############################")
+            print(op_name)
+            #print("attribute: " + str(node.attribute))
+            #print("input: " + str(node.input))
+            #print("output: " + str(node.output))
+        #print(self._nodes)
+
+    def traverse_graph(self, cur_node, visited):
+        if cur_node in visited:
+            return
+
+        if dfg.predVisited(cur_node, visited):
+            visited_nodes[cur_node.layer_name] = True
+            self.program_str += cur_node.codegen()
+            for output_node in cur_node.outputs:
+                self.codegenNode(dfg, output_node, visited)
+
+    def emit_graph(self):
+        self.build_graph()
+        visited_nodes = {}
+        self.traverse_graph(self.dfg.root, visited)
+
+    def emit_header(self):
+        headers = "\n#include <stdio.h> \n"
+        headers += "#include <stdlib.h> \n"
+        headers += "#include <unistd.h> \n"
+        headers += "#include <fcntl.h> \n"
+        headers += "#include <sys/types.h> \n"
+        headers += "#include <sys/stat.h> \n"
+        headers += "#include <string.h> \n"
+        headers += "#include \"../../tensor_runtime/include/tensor_runtime.h\" \n"
+        headers += "#include \"../include/utils.h\" \n\n"
+
+        main_func = "int main(){ \n\n"
+        initialization = "llvm_hpvm_initTensorRt(0); \n\n"
+        self.program_str += headers
+        self.program_str += main_func
+        self.program_str += initialization
+
+    def emit_footer(self, test_data):
+        if test_data is not None and self.dfg.last_node is not None:
+          last_node = self.dfg.last_node
+          output_var = self.output_map[last_node.layer_name]
+        
+        destructors = "\nllvm_hpvm_cleanupTensorRt(); \n"
+        end_main = "\nreturn 0; \n\n}\n"
+        self.program_str += destructors
+        self.program_str += end_main
+
+    def emit_batch_loop(self, x_test):
+        N = x_test.shape[0]
+        C = x_test.shape[1]
+        H = x_test.shape[2]
+        W = x_test.shape[3]
+        
+        loop_str = ""
+        loop_str += "\nstartMemTracking(); \n\n"
+        
+        loop_str += "int test_input_size = " + str(N) + "; \n"
+        loop_str += "int batch_size = " + str(N) + "; \n"
+        loop_str += "int batch_count = test_input_size / batch_size; \n"
+        loop_str += "float final_accuracy = 0.0; \n\n"
+
+        loop_str += "for(int i = 0; i < batch_count; i++){ \n\n"
+        loop_str += "int start = i * batch_size; \n"
+        loop_str += "int end = (i + 1) * batch_size; \n"
+
+        loop_str += "\nvoid* input = readInputBatch(input_path.c_str(),0,start,end," 
+        loop_str += str(C) + "," + str(H) + "," + str(W) + "); \n\n"
+
+        self.program_str += loop_str
+
+    def emit_batch_loop_end(self):
+        end_loop_str = ""
+        end_loop_str += "\nuint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); \n"
+        last_node = self.dfg.last_node
+        output_var = self.output_map[last_node.layer_name]
+        accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, " + output_var + "); \n"
+        end_loop_str += accuracy_call
+        #end_loop_str += "float accuracy = computeAccuracy2(labels, batch_size, var_60); "
+        end_loop_str += "final_accuracy += accuracy; \n"
+        end_loop_str += "freeBatchMemory(); \n "
+        end_loop_str += "\n}\n\n"
+
+        end_loop_str += "final_accuracy = final_accuracy / batch_count; \n"
+        end_loop_str += "dumpFinalAccuracy(final_accuracy); \n\n"
+
+        self.program_str += end_loop_str
+
+    def emit_program(self, src_dir):
+        f = open(src_dir + "/src.cc", "w+")
+        f.write(self.program_str)
+        f.close()
+    '''
+    Compile is a top level function to compile an onnx model into C/C++
+    program with HPVM intrinsics
+    '''
+    def codegen(self, weights_dir, test_data, test_labels):
+        if os.path.exists(weights_dir):
+            raise ValueError("Weight dir existed. Compilation interrupted!")
+        os.mkdir(weights_dir) 
+        self.emit_header()  
+        self.emit_weights(weights_dir)
+        self.emit_batch_loop(test_data)
+        self.emit_graph()
+        self.emit_batch_loop_end()
+        self.emit_footer(test_data)
+        self.emit_program(weights_dir)
diff --git a/hpvm/projects/onnx/frontend/ir.py b/hpvm/projects/onnx/frontend/ir.py
index 2a3bbe4a6b..6ebf1f17c6 100644
--- a/hpvm/projects/onnx/frontend/ir.py
+++ b/hpvm/projects/onnx/frontend/ir.py
@@ -1,4 +1,4 @@
-class DFG:
+class DFG(object):
     
     root_set = False;
 
@@ -87,10 +87,9 @@ class DFG:
         visited_nodes = {}
         # Starting traversal at the DFG root node
         self.traverseNode(self.root_node, visited_nodes)
-
-## This should be the place where partial evaluation happens
-class NodeFactory:
-    def build(self, layer):
+    
+    ## This should be the place where partial evaluation happens
+    def emitNode(self, layer):
         if layer.op_type == "Conv":
             pass
         elif layer.op_type == "Tanh":
@@ -107,10 +106,11 @@ class NodeFactory:
             pass
         elif layer.op_type == "Identity":
             pass
-        else
+        else:
             raise ValueError("Unsupported operator type!")
 
-class DFGNode:
+
+class DFGNode(object):
     def add_output(self, output_node):
         self.outputs.append(output_node)
     def add_input(self, input_node):
diff --git a/hpvm/projects/onnx/frontend/mnist.py b/hpvm/projects/onnx/frontend/main.py
similarity index 93%
rename from hpvm/projects/onnx/frontend/mnist.py
rename to hpvm/projects/onnx/frontend/main.py
index 8a4f3a85d7..bd8d34f2e1 100644
--- a/hpvm/projects/onnx/frontend/mnist.py
+++ b/hpvm/projects/onnx/frontend/main.py
@@ -11,7 +11,7 @@ from onnx import numpy_helper, version_converter
 from onnx_frontend.util import convert_to_hpvm
 
 #model = onnx.load('../models/mnist/mnist.onnx')
-model = onnx.load('../models/alexnet/alexnet.onnx')
+model = onnx.load('../models/keras/alexnet.onnx')
 test_data_dir = '../models/mnist/test_data_set_0'
 # print('The model before conversion:\n{}'.format(model))
 
diff --git a/hpvm/projects/onnx/frontend/onnx_frontend/graph_builder.py b/hpvm/projects/onnx/frontend/onnx_frontend/graph_builder.py
index 5cf8644cbf..6ad0d59b6e 100644
--- a/hpvm/projects/onnx/frontend/onnx_frontend/graph_builder.py
+++ b/hpvm/projects/onnx/frontend/onnx_frontend/graph_builder.py
@@ -2,6 +2,8 @@ import sys
 import numpy as np
 import os
 
+from operators import *
+
 class Node(object):
     def __init__(self, name, shape, dtype):
         self._name = name
@@ -12,7 +14,6 @@ class Node(object):
     __repr__ = __str__
         
 class GraphBuilder(object):
-    
     def __init__(self, model, shape, dtype, opset):
         self._nodes = {}
         self._params = {}
@@ -23,8 +24,11 @@ class GraphBuilder(object):
         self._graph = model.graph
         self._opset = opset
         self._shape = shape if shape else self._build_shape()
+
+    def dump_weights(self):
+        print(self._graph.initializer)
     
-    def build_cfg(self):
+    def build_graph(self):
         # parse init tensors
         for init_tensor in self._graph.initializer:
             if not init_tensor.name.strip():
@@ -110,4 +114,4 @@ class GraphBuilder(object):
         if os.path.exists(weights_dir):
             raise ValueError("Weight dir existed. Compilation interrupted!")
         os.mkdir(weights_dir)   
-        self.build_cfg()
\ No newline at end of file
+        self.build_graph()
\ No newline at end of file
diff --git a/hpvm/projects/onnx/frontend/onnx_frontend/util.py b/hpvm/projects/onnx/frontend/onnx_frontend/util.py
index 2900f2faf3..78091e7c32 100644
--- a/hpvm/projects/onnx/frontend/onnx_frontend/util.py
+++ b/hpvm/projects/onnx/frontend/onnx_frontend/util.py
@@ -1,7 +1,7 @@
 import sys
 import numpy as np
 import os
-from .graph_builder import GraphBuilder
+from graph_builder import GraphBuilder
 
 def check_model(onnx_model):
     try:
@@ -59,4 +59,5 @@ def convert_to_hpvm(model,
             opset = 1 # default opset version set to 1 if not specified
     print("opset version: ", opset)
     gb = GraphBuilder(model, shape, dtype, opset)
-    gb.build_cfg()
\ No newline at end of file
+    gb.dump_weights()
+    gb.build_graph()
\ No newline at end of file
diff --git a/hpvm/projects/onnx/frontend/operators.py b/hpvm/projects/onnx/frontend/operators.py
index f5851e8955..3a52237307 100644
--- a/hpvm/projects/onnx/frontend/operators.py
+++ b/hpvm/projects/onnx/frontend/operators.py
@@ -1,14 +1,26 @@
-from .ir import DFGNode, ActivationNode, LogicalOpNode
+from ir import DFGNode, ActivationNode, LogicalOpNode
 
 class AddNode(DFGNode):
+    def __init__(self, layer):
+        DFGNode.__init__(self, layer)
     def codegen(self):
-        pass
+        input_vars = self.getMultipleInputNames(cur_node)
+        inst_str = "void* " + out_var_name1 + " = "
+        inst_str += "tensorAdd(" + input_vars[0] + ", " + input_vars[1] + "); \n"
+        return inst_str
+
 class MatMulNode(DFGNode):
+    def __init__(self, layer):
+        DFGNode.__init__(self, layer)
     def codegen(self):
         pass
+
 class SoftMaxNode(DFGNode):
+    def __init__(self, layer):
+        DFGNode.__init__(self, layer)
     def codegen(self):
         pass
+
 class Conv2DNode(DFGNode):
     def __init__(self, layer):
         DFGNode.__init__(self, layer)
@@ -20,7 +32,39 @@ class Conv2DNode(DFGNode):
         print("\t", self.strides)
         print("\tPadding = ", self.padding)
     def codegen(self):
-        pass
+        input_var_name = self.getSingleInputName(cur_node)
+        weights = cur_node.weights
+        strides = cur_node.strides
+        padding = 0
+        if cur_node.padding.strip() == "valid":
+            padding = 0
+        else:
+            padding = cur_node.padding      
+            padding = int((weights.shape[0] - 1) / 2)
+        prev_padding = self.getPrevLayerPadding(cur_node)
+        if prev_padding != None:
+            # FIXME: currently only supporting symmetric padding
+            padding = prev_padding[0][0]        
+
+        inst_str = "void* " + out_var_name1 + " = "
+        inst_str += "tensorConvolution(" + input_var_name + ", "
+        inst_str += cur_node.layer_name + "_w, "
+        inst_str += str(padding) + ", "
+        inst_str += str(padding) + ", "
+        inst_str += str(strides[0]) + ", "
+        inst_str += str(strides[1]) + ", "
+        inst_str += "1, "
+        if layer_type == "DepthwiseConv2D":
+            C = weights.shape[2]
+            inst_str += str(C) + "); \n"
+        else:
+            inst_str += "1); \n"
+        if strides[0] > 1 and cur_node.padding.strip() == "same":
+            print ("!ERROR: Same Padding not supported for Conv with Stride > 1")
+            print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
+            sys.exit(0)
+        return inst_str
+
 class DepthwiseConv2DNode(DFGNode):
     def __init__(self, layer):
         DFGNode.__init__(self, layer)
@@ -63,7 +107,7 @@ class DepthwiseConv2DNode(DFGNode):
             print ("!ERROR: Same Padding not supported for Conv with Stride > 1")
             print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
             sys.exit(0)
-        return += inst_str
+        return inst_str
 
 class DenseNode(DFGNode):
     def __init__(self, layer):
@@ -88,7 +132,11 @@ class MaxPool2DNode(DFGNode):
         print("\t pool_size = ", self.pool_size)
         print("\t strides = ", self.strides)
     def codegen(self):
-        pass
+        inst_str = "void* " + out_var_name1 + " = "
+        inst_str += "tensorPooling(" + input_var_name + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1]) 
+        inst_str +=  "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1])
+        inst_str += "); \n"
+
 class AveragePooling2DNode(DFGNode):
     def __init__(self, layer):
         DFGNode.__init__(self, layer)
@@ -97,7 +145,12 @@ class AveragePooling2DNode(DFGNode):
         print("\t pool_size = ", self.pool_size)
         print("\t strides = ", self.strides)
     def codegen(self):
-        pass
+        inst_str = "void* " + out_var_name1 + " = "
+        inst_str += "tensorPooling(" + input_var_name + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1]) 
+        inst_str +=  "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1])
+        inst_str += "); \n"
+        return insr_str
+
 class ZeroPadding2DNode(DFGNode):
     def __init__(self, layer):
         DFGNode.__init__(self, layer)
@@ -106,6 +159,7 @@ class ZeroPadding2DNode(DFGNode):
         print ("padding = ", self.padding)
     def codegen(self):
         pass
+
 class BatchNormalizationNode(DFGNode):
     def __init__(self, layer):
         DFGNode.__init__(self, layer)
diff --git a/hpvm/projects/onnx/frontend/util.py b/hpvm/projects/onnx/frontend/util.py
new file mode 100644
index 0000000000..2900f2faf3
--- /dev/null
+++ b/hpvm/projects/onnx/frontend/util.py
@@ -0,0 +1,62 @@
+import sys
+import numpy as np
+import os
+from .graph_builder import GraphBuilder
+
+def check_model(onnx_model):
+    try:
+        from onnx import checker, onnx_cpp2py_export
+        if hasattr(checker, 'check_model'):
+            # try use onnx's own model checker before converting any model
+            try:
+                checker.check_model(onnx_model)
+                print("onnx model is checked valid.")
+            except onnx_cpp2py_export.checker.ValidationError as e:
+                import warnings
+                    # the checker is a bit violent about errors, so simply print warnings here
+                warnings.warn(str(e))
+    except ImportError as e:
+        raise ImportError("Unable to import onnx.checker which is required {}".format(e))
+        
+def convert_to_hpvm(model,
+                      shape=None,
+                      dtype="float32",
+                      opset=None):
+    """Converting an onnx model to equivalent HPVM IR
+    
+    ONNX graphs are represented as Python Protobuf objects.
+    The companion parameters will be handled automatically.
+    However, the input names from onnx graph is vague, mixing inputs and
+    network weights/bias such as "1", "2"...
+    For convenience, we rename the `real` input names to "input_0",
+    "input_1"... And renaming parameters to "param_0", "param_1"...
+    
+    Parameters
+    ----------
+    model : protobuf object
+        ONNX ModelProto after ONNX v1.1.0
+    
+    shape : dict of str to tuple, optional
+        The input shape to the graph
+    
+    dtype : str or dict of str to str
+        The input types to the graph
+    
+    opset : int, optional
+        Override to autodetected opset.
+        This can be helpful for some testing.
+    
+    Returns
+    -------
+
+    """
+    check_model(model)
+    graph = model.graph
+    if opset is None:
+        try:
+            opset = model.opset_import[0].version if model.opset_import else 1
+        except AttributeError:
+            opset = 1 # default opset version set to 1 if not specified
+    print("opset version: ", opset)
+    gb = GraphBuilder(model, shape, dtype, opset)
+    gb.build_cfg()
\ No newline at end of file
-- 
GitLab