HPVM codegen now uses template

02bcd130 · Yifan Zhao · a0f4d21c · 02bcd130 · 02bcd130
Commit 02bcd130 authored 4 years ago by Yifan Zhao
--- a/hpvm/projects/onnx/frontend/hpvm_codegen.py
+++ b/hpvm/projects/onnx/frontend/hpvm_codegen.py
+from os import PathLike
+import jinja2
+from graph_builder import DFG
 from tensor import WeightTensor
-from utils import skip_layer
+TEMPLATE_FILE = "hpvm_template.cpp"
+loader = jinja2.FileSystemLoader(searchpath="./")
+template_env = jinja2.Environment(loader=loader, trim_blocks=True)
+template = template_env.get_template(TEMPLATE_FILE)
 class HpvmCodeGen:
-    def __init__(self, DFG, weights_dir, test_data_shape=None):
+    def __init__(self, DFG: DFG, output_dir: PathLike):
-        self.program_str = ""
+        self.dfg = DFG
-        self.graph = DFG.graph
        self.tensors = DFG.tensors
-        self.nodes = DFG.nodes
+        self.var_count = 0
-        self.var_cnt = -1
+        self.output_dir = output_dir
-        self.weights_dir = weights_dir
+        # self.variables is a "onnx name to our name" map
-        self.test_data_shape = test_data_shape
+        # Each value is (varname, bool) and the bool indicates
-        # filter_names is essentially weight & 1st input tensor(s)
+        # "is root node input" or not.
-        # TODO: Replace manually adding input to filter_names
+        self.variables = self._get_root_args(DFG.inputs, DFG.tensors)
-        self.hpvm_graph_str = ""
-        self.filter_names = dict()
-        self.hpvm_node_names = dict()
-        for i in self.graph.input:
-            self.filter_names[self.tensors[i.name].get_mapped_name()] = 1
-        for tensor in self.tensors.values():
-            if isinstance(tensor, WeightTensor):
-                self.filter_names[tensor.get_mapped_name()] = 1
-                self.filter_names["input"] = 1 
-        print(self.filter_names)
    ################################################
    # Aux functions
    ################################################
-    def get_last_var(self):
-        return "var_" + str(self.var_cnt)
-    def get_new_var(self):
+    @staticmethod
-        self.var_cnt = self.var_cnt + 1
+    def _get_root_args(input_nodes, tensors):
-        return "var_" + str(self.var_cnt)
+        # Input to the graph + all weight tensors
+        # Sometimes these 2 kinds can overlap (due to ONNX optim)
+        # We'll dedup this array as well.
+        root_args = []
+        for i in input_nodes:
+            root_args.append(i.name)
+        for tensor in tensors.values():
+            if isinstance(tensor, WeightTensor):
+                root_args.append(tensor.name)
+        root_args = sorted(list(set(root_args)))
+        return {f_name: (index, True) for index, f_name in enumerate(root_args)}
+    def _allocate_varname(self):
+        varname = f"var_{self.var_count}"
+        self.var_count += 1
+        return varname
+    def get_varname_of(self, onnx_var_name):
+        if onnx_var_name in self.root_args:
+            return True, self.root_args[onnx_var_name]
+        elif onnx_var_name in self.local_vars:
+            return False, self.local_vars[onnx_var_name]
+        else:
+            raise KeyError(onnx_var_name)
+    @staticmethod
+    def transform_name(name: str):
+        name = name.replace(".", "_")
+        if name[0].isnumeric():
+            name = "_" + name
+        return name
    ################################################
    # CodeGen functions
    ################################################
-    def emit_header(self):
-        headers = "\n#include <stdio.h> \n"
-        headers += "#include <stdlib.h> \n"
-        headers += "#include <unistd.h> \n"
-        headers += "#include <fcntl.h> \n"
-        headers += "#include <sys/stat.h> \n"
-        headers += "#include <cstring> \n"
-        headers += "#include <visc.h> \n"
-        headers += "#include <tensorTypes.h> \n"
-        headers += "#include <tensorUtils.h> \n\n"
-        self.program_str += headers
-    def emit_hpvm_node_structures(self):
+    def _emit_hpvm_node_edges(self, input_vars):
+        ret = []
-        def emit_hpvm_node_header(new_var, input_size):
+        it = 0
-            node_header_str = "void " + new_var + "_node("
+        for onnx_var_name in input_vars:
-            for i in range(input_size):
+            hpvm_var_name, is_root_input = self.variables[onnx_var_name]
-              node_header_str += "void* t" + str(i + 1) + ", "
+            if is_root_input:
-              node_header_str += "size_t bytes_t" + str(i + 1)
+                assert isinstance(hpvm_var_name, int)
-              if i < input_size - 1:
+                ret.append(
-                node_header_str += ", "
+                    {"is_bindin": True, "input_idx": hpvm_var_name, "edge_idx": it}
+                )
-            node_header_str += ") { \n" 
-            node_header_str += "  __visc__hint(visc::CUDNN_TARGET); \n"
-            node_header_str += "  __visc__attributes(" + str(input_size) + ", "
-            for i in range(input_size):
-              node_header_str += "t" + str(i + 1) 
-              if i < input_size - 1:
-                node_header_str += ", "
-            node_header_str += ", 0); \n\n" 
-            return node_header_str
-        def emit_hpvm_node_footer(input_size):
-            node_footer_str = "  __visc__return("
-            node_footer_str += str(input_size) + ", "
-            node_footer_str += "r, "
-            node_footer_str += "(size_t) 0); \n"
-            node_footer_str += "}\n\n"
-            return node_footer_str
-        def emit_root_node_footer(self):
-            mapped_output_var = self.tensors[self.graph.output[0].name].get_mapped_name()
-            # Binding output of last DFG node to the Root Node output
-            root_footer_str = "\n  __visc__bindOut(" + \
-                mapped_output_var + ", 0, 0, 0); \n"
-            root_footer_str += "  __visc__bindOut(" + \
-                mapped_output_var + ", 1, 1, 0); \n"
-            root_footer_str += "\n}\n\n"
-            return root_footer_str
-        def emit_hpvm_node_edges(out_var_name, input_vars):
-            hpvm_edge_str = "\n  void* " + out_var_name + " = "
-            hpvm_edge_str += "__visc__createNodeND(0, " + out_var_name + "_node); \n\n"
-            it = 0
-            for input_var_name in input_vars:
-              if input_var_name in self.filter_names:
-                input_index = self.filter_names[input_var_name]
-                index1 = input_index * 2
-                index2 = index1 + 1      
-                hpvm_edge_str += "  __visc__bindIn(" + out_var_name + ", " + str(index1) + ", " + str(it*2) + ", 0); \n"
-                hpvm_edge_str += "  __visc__bindIn(" + out_var_name + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n"
-              elif input_var_name in self.hpvm_node_names:
-                hpvm_edge_str += "  __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 0, " + str(it*2) + ", 0); \n"
-                hpvm_edge_str += "  __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 1, " + str(it*2+1) + ", 0); \n"
-              it += 1
-            return hpvm_edge_str
-        node_str = ""
-        for node in self.nodes:
-            if node.op_type in skip_layer:
-                mapped_output_name = self.get_last_var()
            else:
-                mapped_output_name = self.get_new_var()
+                ret.append(
-            self.tensors[node.output[0]].set_mapped_name(mapped_output_name)
+                    {"is_bindin": False, "input_node": hpvm_var_name, "edge_idx": it}
-            self.hpvm_node_names[mapped_output_name] = 1
+                )
-            if node.op_type in skip_layer:
+            it += 1
+        return ret
+    def emit_hpvm_node_structures(self):
+        node_envs = []
+        for node in self.dfg.traverse_order:
+            generated_code = node.hpvm_codegen(self.tensors)
+            if generated_code == "":
+                # Node must have single input, we equate the output to
+                # the input and skip code generation.
+                assert len(node.input) == 1 and len(node.output) == 1
+                self.variables[node.output[0]] = self.variables[node.input[0]]
                continue
-            # node_str
+            varname = self._allocate_varname()
-            node_str += emit_hpvm_node_header(mapped_output_name, len(node.input))
+            self.variables[node.output[0]] = varname, False  # not root-node arg
-            node_str += node.hpvm_codegen(self.tensors)
+            node_envs.append(
-            node_str += emit_hpvm_node_footer(2) # Hardcoded as in Keras frontend
+                {
-            # hpvm_graph_str
+                    "name": varname,
-            input_vars = list()
+                    "input_size": len(node.input),
-            for i in node.input:
+                    "edges": self._emit_hpvm_node_edges(node.input),
-                input_vars.append(self.tensors[i].get_mapped_name())
+                    "code": generated_code,
-            self.hpvm_graph_str += emit_hpvm_node_edges(mapped_output_name, input_vars)
+                }
-        self.hpvm_graph_str += emit_root_node_footer(self)
+            )
-        self.program_str += node_str
+        return node_envs
-    def emit_root_node_header(self):
+    def emit_root_io(self):
-        root_signature = "void root("
+        input_args = [
-        index = 0
+            self.transform_name(name)
-        for f_name in self.filter_names:
+            for name, (_, is_root) in self.variables.items()
-            if index > 0:
+            if is_root
-                root_signature += "\t  "
+        ]
-            self.filter_names[f_name] = index
+        output_arg = self.variables[self.dfg.output.name][0]
-            root_signature += "void* " + f_name + ", "
+        return input_args, output_arg
-            root_signature += "size_t " + f_name + "_bytes"
-            if index < len(self.filter_names) - 1:
-                root_signature += ", \n"
-            index += 1
-        root_signature += "){ \n\n"
-        root_signature += "\n  __visc__hint(visc::CPU_TARGET); \n"
-        root_signature += "  __visc__attributes(" + \
-            str(len(self.filter_names)) + ", "
-        index = 0
-        for f_name in self.filter_names:
-            root_signature += f_name
-            if index < len(self.filter_names) - 1:
-                root_signature += ", "
-                index += 1
-        root_signature += ", 0); \n\n"
-        self.program_str += root_signature
-    def emit_root_structure(self):
-        root_struct = ""
-        root_struct += "struct ret_t {\n"
-        root_struct += "  void* tensor; \n"
-        root_struct += "  size_t bytes; \n"
-        root_struct += "}; \n\n"
-        root_struct += "typedef struct __attribute__((__packed__)) {\n"
-        for f_name in self.filter_names:
-            root_struct += "  void* " + f_name + "; \n"
-            root_struct += "  size_t " + f_name + "_bytes; \n"
-        root_struct += "\n  struct ret_t r; \n"
-        root_struct += "}\nRootIn;\n\n"
-        self.program_str += root_struct
-    def emit_hpvm_graph(self):
-        self.program_str += self.hpvm_graph_str
    def emit_weights(self):
-        weights_str = "\n"
+        ret = []
-        weights_str += "std::string dir_prefix = std::string(\"" + str(self.weights_dir) + "\");\n"
+        for name, tensor in self.tensors.items():
-        weights_str += "std::string input_path =  dir_prefix + std::string(\"input.bin\");\n"
+            if not isinstance(tensor, WeightTensor):
-        weights_str += "std::string labels_path =  dir_prefix + std::string(\"labels.bin\");\n"
+                continue
-        for tensor in self.tensors.values():
+            name = self.transform_name(name)
-            if isinstance(tensor, WeightTensor):
+            file_path = f"{tensor.get_mapped_name()}_path.bin"
-                from graph_codegen import GraphCodeGen
+            ret.append({"name": name, "shape": tensor.shape, "filename": file_path})
-                weights_str += self.emit_single_weight(tensor)
+        return ret
-        return weights_str
-    def emit_single_weight(self, tensor):
-        N = tensor.shape[0]
-        C = tensor.shape[1]
-        H = tensor.shape[2]
-        W = tensor.shape[3]
-        mapped_name = tensor.get_mapped_name()
-        file_path = mapped_name + "_path" 
-        unique_file_name = file_path + ".bin"
-        weight_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
-        weight_str += unique_file_name + "\"); \n"
-        weight_str += "void* " + mapped_name + " = " + " readTrainedWeights("
-        weight_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," + str(H) + "," + str(W)
-        weight_str += "); \n"
-        return weight_str
-    def emit_main(self):
-        main_func_str = "int main(){ \n\n"
-        main_func_str += self.emit_weights()
-        #main_func_str += self.input_str
-        main_func_str += "\n__visc__init(); \n"
-        main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n"
-        for f_name in self.filter_names:
-            main_func_str += "args->" + f_name + " = " + f_name + "; \n"
-            main_func_str += "args->" + f_name + "_bytes = 0; \n"
-        main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n"
-        main_func_str += "__visc__wait(dfg); \n\n"
-        main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n"
-        main_func_str += "hpvm_request_tensor(result, 0); \n\n"
-        main_func_str += "__visc__cleanup(); \n"
-        main_func_str += "computeAccuracy3(labels, result); \n"
-        main_func_str += "return 0; \n\n"
-        main_func_str += "} \n"
-        self.program_str += main_func_str
-    def emit_source(self, dir_prefix):
-        with open(dir_prefix / "hpvm_src.cc", "w") as f:
-            f.write(self.program_str)
    def compile(self):
-        self.emit_header()
+        nodes = self.emit_hpvm_node_structures()
-        self.emit_hpvm_node_structures()
+        inputs, output = self.emit_root_io()
-        self.emit_root_node_header()
+        weights = self.emit_weights()
-        self.emit_hpvm_graph()
+        with open(self.output_dir / "hpvm_src.cc", "w") as f:
-        self.emit_root_structure()
+            f.write(
-        self.emit_main()
+                template.render(
-        # dump generated program string to source file
+                    nodes=nodes,
-        self.emit_source(self.weights_dir)
+                    inputs=inputs,
+                    output=output,
+                    weights=weights,
+                    output_dir=self.output_dir,
+                )
+            )
--- a/hpvm/projects/onnx/frontend/hpvm_template.cpp
+++ b/hpvm/projects/onnx/frontend/hpvm_template.cpp
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <cstring>
+#include <visc.h>
+#include <tensorTypes.h>
+#include <tensorUtils.h>
+{% for node in nodes %}
+void {{node.name}}_node(
+{%- for n in range(1, node.input_size + 1) -%}
+void *t{{n}}, size_t bytes_t{{n}}{{", " if not loop.last}}
+{%- endfor %}) {
+  __visc__hint(visc::CUDNN_TARGET);
+  __visc__attributes({{node.input_size}}, {% for n in range(1, node.input_size + 1) -%}
+t{{n}}{{", " if not loop.last}}
+{%- endfor %}, 0);
+{{node.code}}
+  __visc__return(2, r, (size_t) 0);
+}
+{% endfor -%}
+void root({%- for n in inputs -%}
+void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
+{%- endfor %}) {
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes({{inputs|length}}, {% for n in inputs -%}
+{{n}}{{", " if not loop.last}}
+{%- endfor %}, 0);
+{% for node in nodes %}
+  void* {{node.name}} = __visc__createNodeND(0, {{node.name}}_node);
+{% for edge in node.edges %}
+{% if edge.is_bindin %}
+  __visc__bindIn({{node.name}}, {{edge.input_idx * 2}}, {{edge.edge_idx * 2}}, 0);
+  __visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 1}}, {{edge.edge_idx * 2 + 1}}, 0);
+{% else %}
+  __visc__edge({{edge.input_node}}, {{node.name}}, 1, 0, {{edge.edge_idx * 2}}, 0);
+  __visc__edge({{edge.input_node}}, {{node.name}}, 1, 1, {{edge.edge_idx * 2 + 1}}, 0);
+{% endif %}
+{% endfor %}
+{% endfor %}
+  __visc__bindOut({{output}}, 0, 0, 0);
+  __visc__bindOut({{output}}, 1, 1, 0);
+}
+struct ret_t {
+  void* tensor;
+  size_t bytes;
+};
+typedef struct __attribute__((__packed__)) {
+{% for n in inputs %}
+  void *{{n}};
+  size_t {{n}}_bytes;
+{% endfor %}
+  struct ret_t r;
+} RootIn;
+int main(){
+  std::string dir_prefix = "{{output_dir}}";
+  std::string input_path = dir_prefix + "input.bin";
+  std::string labels_path = dir_prefix + "labels.bin";
+{% for w in weights %}
+  std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}");
+  void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
+{% endfor %}
+  RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
+{% for n in inputs %}
+  args->{{n}} = {{n}};
+  args->{{n}}_bytes = 0
+{% endfor %}
+  __visc__init();
+  void* dfg = __visc__launch(0, root, (void*) args);
+  __visc__wait(dfg);
+  void *result = static_cast<RootIn*>(args)->input;
+  hpvm_request_tensor(result, 0);
+  __visc__cleanup();
+  computeAccuracy3(labels, result);
+  return 0;
+}