Added batch support

ee8ea8af · Yifan Zhao · 37464d36 · ee8ea8af · ee8ea8af · ee8ea8af
Commit ee8ea8af authored 4 years ago by Yifan Zhao
--- a/hpvm/projects/onnx/frontend/codegen_hpvm.py
+++ b/hpvm/projects/onnx/frontend/codegen_hpvm.py
 from os import PathLike
 from pathlib import Path
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 import jinja2
@@ -14,11 +14,25 @@ template = template_env.get_template(TEMPLATE_FILE)
 class HpvmCodeGen:
-    def __init__(self, dfg: DFG, output_dir: PathLike):
+    def __init__(
+        self,
+        dfg: DFG,
+        output_dir: PathLike,
+        input_size: int,
+        batch_size: int = None,
+        prefix: str = None,
+    ):
        self.dfg = dfg
        self.tensors = dfg.tensors
        self.var_count = 0
        self.output_dir = Path(output_dir)
+        self.prefix = prefix
+        # Some reasoning of input information
+        input_arg, input_tensor = self.dfg.discover_input_var()
+        self.input_name = input_arg
+        self.input_shape = input_tensor.shape[1:]
+        self.input_size = input_size
+        self.batch_size = batch_size or input_size
        # self.variables is a "onnx name to our name" map
        # Each value is (varname, bool) and the bool indicates
        # "is root node input" or not.
@@ -94,14 +108,19 @@ class HpvmCodeGen:
        nodes = self.emit_hpvm_node_structures()
        inputs, output = self.emit_root_io()
        weights = emit_weights(self.tensors)
+        prefix = self.prefix or self.output_dir
        with open(self.output_dir / "hpvm_src.cc", "w") as f:
            f.write(
                template.render(
                    nodes=nodes,
-                    inputs=inputs,
+                    input_name=self.input_name,
-                    output=output,
+                    input_size=self.input_size,
+                    batch_size=self.batch_size,
+                    input_shape=self.input_shape,
+                    root_inputs=inputs,
+                    root_output=output,
                    weights=weights,
-                    output_dir=self.output_dir,
+                    prefix=prefix,
                )
            )

--- a/hpvm/projects/onnx/frontend/main.py
+++ b/hpvm/projects/onnx/frontend/main.py
@@ -24,21 +24,24 @@ def check_version(model, new_version):
 def compile(
-    model,
+    onnx_file: Path,
-    input_size: Optional[List[int]],
    output_dir: Path,
-    opset_version: Optional[int],
+    input_size: int,
+    prefix: Optional[str],
+    batch_size: Optional[int],
+    opset: Optional[int],
    hpvmc: bool,
 ):
    from graph_builder import GraphBuilder
    from codegen_tensor import TensorCodeGen
    from codegen_hpvm import HpvmCodeGen
-    if opset_version is not None:
+    model = onnx.load(onnx_file)
-        model = check_version(model, opset_version)
+    if opset is not None:
+        model = check_version(model, opset)
    graphBuilder = GraphBuilder(model)
    if hpvmc:
-        hpvmCodeGen = HpvmCodeGen(graphBuilder.dfg, output_dir)
+        hpvmCodeGen = HpvmCodeGen(graphBuilder.dfg, output_dir, input_size, batch_size, prefix)
        hpvmCodeGen.compile()
    else:
        TensorCodeGen = TensorCodeGen(graphBuilder.dfg, output_dir, input_size)
@@ -51,20 +54,28 @@ def parse_args():
    parser = argparse.ArgumentParser(description="ONNX to HPVM-C")
    parser.add_argument("onnx_file", type=Path, help="Path to input ONNX file")
-    parser.add_argument(
-        "-s",
-        "--input-size",
-        type=int,
-        nargs="+",
-        help="""Size of input tensor to the model.
-Usually 4 dim, including batch size.
-For example: -s 1 3 32 32""",
-    )
    parser.add_argument(
        "output_dir",
        type=Path,
        help="Output folder where source file and weight files are generated",
    )
+    parser.add_argument(
+        "input_size", type=int, help="Size of input dataset",
+    )
+    parser.add_argument(
+        "-p",
+        "--prefix",
+        type=str,
+        help="Prefix in generated code; will be attached before name of weight/input files."
+        "Defaults to output_dir.",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch-size",
+        type=int,
+        help="Batch size to be used in the generated code. "
+        "Defaults to input size (i.e., not using batch).",
+    )
    parser.add_argument("--opset", type=int, help="ONNX opset version (enforced)")
    parser.add_argument(
        "-c",
@@ -79,6 +90,7 @@ hpvmc: HPVM C Interface. Default value is hpvmc.""",
    args = parser.parse_args()
    args.hpvmc = args.compile_mode == "hpvmc"
+    delattr(args, 'compile_mode')
    return args
@@ -87,13 +99,7 @@ def main():
    args = parse_args()
    os.makedirs(args.output_dir, exist_ok=True)
-    compile(
+    compile(**vars(args))
-        onnx.load(args.onnx_file),
-        args.input_size,
-        args.output_dir,
-        args.opset,
-        args.hpvmc,
-    )
 if __name__ == "__main__":

--- a/hpvm/projects/onnx/frontend/template_hpvm.cpp
+++ b/hpvm/projects/onnx/frontend/template_hpvm.cpp
-#include <stdio.h>
+#include <string>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cstring>
 #include <visc.h>
 #include <tensorTypes.h>
 #include <tensorUtils.h>
@@ -25,11 +20,11 @@ t{{n}}{{", " if not loop.last}}
 {% endfor -%}
-void root({%- for n in inputs -%}
+void root({%- for n in root_inputs -%}
 void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
 {%- endfor %}) {
  __visc__hint(visc::CPU_TARGET);
-  __visc__attributes({{inputs|length}}, {% for n in inputs -%}
+  __visc__attributes({{root_inputs|length}}, {% for n in root_inputs -%}
 {{n}}{{", " if not loop.last}}
 {%- endfor %}, 0);
@@ -46,8 +41,8 @@ void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
 {% endfor %}
 {% endfor %}
-  __visc__bindOut({{output}}, 0, 0, 0);
+  __visc__bindOut({{root_output}}, 0, 0, 0);
-  __visc__bindOut({{output}}, 1, 1, 0);
+  __visc__bindOut({{root_output}}, 1, 1, 0);
 }
 struct ret_t {
@@ -56,7 +51,7 @@ struct ret_t {
 };
 typedef struct __attribute__((__packed__)) {
-{% for n in inputs %}
+{% for n in root_inputs %}
  void *{{n}};
  size_t {{n}}_bytes;
 {% endfor %}
@@ -64,28 +59,39 @@ typedef struct __attribute__((__packed__)) {
 } RootIn;
+const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size;
 int main(){
-  std::string dir_prefix = "{{output_dir}}";
+  std::string dir_prefix = "{{prefix}}/";
  std::string input_path = dir_prefix + "input.bin";
  std::string labels_path = dir_prefix + "labels.bin";
 {% for w in weights %}
-  std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}");
+  std::string {{w.name}}_path = dir_prefix + "{{w.filename}}";
  void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
 {% endfor %}
  RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
-{% for n in inputs %}
+  void* {{input_name}} = create4DTensor(0, nchw, batch_size, {{input_shape|join(', ')}});
+{% for n in root_inputs %}
  args->{{n}} = {{n}};
  args->{{n}}_bytes = 0;
 {% endfor %}
  __visc__init();
-  void* dfg = __visc__launch(0, root, (void*) args);
+  startMemTracking();
-  __visc__wait(dfg);
+  for (int i = 0; i < batch_count; i++){
-  void *result = static_cast<RootIn*>(args)->input;
+    int start = i * batch_size, end = start + batch_size;
-  visc_request_tensor(result, 0);
+    copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}});
-  __visc__cleanup();
-  computeAccuracy3(labels, result);
+    void* dfg = __visc__launch(0, root, (void*) args);
+    __visc__wait(dfg);
+    void *result = static_cast<RootIn*>(args)->{{input_name}};
+    hpvm_request_tensor(result, 0);
+    uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end);
+    computeAccuracy3(labels, result);
+    freeBatchMemory();
+  }
+  __visc__cleanup();
  return 0;
 }