Skip to content
Snippets Groups Projects
Commit ee8ea8af authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Added batch support

parent 37464d36
No related branches found
No related tags found
No related merge requests found
from os import PathLike from os import PathLike
from pathlib import Path from pathlib import Path
from typing import Dict, List, Tuple, Union from typing import Dict, List, Optional, Tuple, Union
import jinja2 import jinja2
...@@ -14,11 +14,25 @@ template = template_env.get_template(TEMPLATE_FILE) ...@@ -14,11 +14,25 @@ template = template_env.get_template(TEMPLATE_FILE)
class HpvmCodeGen: class HpvmCodeGen:
def __init__(self, dfg: DFG, output_dir: PathLike): def __init__(
self,
dfg: DFG,
output_dir: PathLike,
input_size: int,
batch_size: int = None,
prefix: str = None,
):
self.dfg = dfg self.dfg = dfg
self.tensors = dfg.tensors self.tensors = dfg.tensors
self.var_count = 0 self.var_count = 0
self.output_dir = Path(output_dir) self.output_dir = Path(output_dir)
self.prefix = prefix
# Some reasoning of input information
input_arg, input_tensor = self.dfg.discover_input_var()
self.input_name = input_arg
self.input_shape = input_tensor.shape[1:]
self.input_size = input_size
self.batch_size = batch_size or input_size
# self.variables is a "onnx name to our name" map # self.variables is a "onnx name to our name" map
# Each value is (varname, bool) and the bool indicates # Each value is (varname, bool) and the bool indicates
# "is root node input" or not. # "is root node input" or not.
...@@ -94,14 +108,19 @@ class HpvmCodeGen: ...@@ -94,14 +108,19 @@ class HpvmCodeGen:
nodes = self.emit_hpvm_node_structures() nodes = self.emit_hpvm_node_structures()
inputs, output = self.emit_root_io() inputs, output = self.emit_root_io()
weights = emit_weights(self.tensors) weights = emit_weights(self.tensors)
prefix = self.prefix or self.output_dir
with open(self.output_dir / "hpvm_src.cc", "w") as f: with open(self.output_dir / "hpvm_src.cc", "w") as f:
f.write( f.write(
template.render( template.render(
nodes=nodes, nodes=nodes,
inputs=inputs, input_name=self.input_name,
output=output, input_size=self.input_size,
batch_size=self.batch_size,
input_shape=self.input_shape,
root_inputs=inputs,
root_output=output,
weights=weights, weights=weights,
output_dir=self.output_dir, prefix=prefix,
) )
) )
......
...@@ -24,21 +24,24 @@ def check_version(model, new_version): ...@@ -24,21 +24,24 @@ def check_version(model, new_version):
def compile( def compile(
model, onnx_file: Path,
input_size: Optional[List[int]],
output_dir: Path, output_dir: Path,
opset_version: Optional[int], input_size: int,
prefix: Optional[str],
batch_size: Optional[int],
opset: Optional[int],
hpvmc: bool, hpvmc: bool,
): ):
from graph_builder import GraphBuilder from graph_builder import GraphBuilder
from codegen_tensor import TensorCodeGen from codegen_tensor import TensorCodeGen
from codegen_hpvm import HpvmCodeGen from codegen_hpvm import HpvmCodeGen
if opset_version is not None: model = onnx.load(onnx_file)
model = check_version(model, opset_version) if opset is not None:
model = check_version(model, opset)
graphBuilder = GraphBuilder(model) graphBuilder = GraphBuilder(model)
if hpvmc: if hpvmc:
hpvmCodeGen = HpvmCodeGen(graphBuilder.dfg, output_dir) hpvmCodeGen = HpvmCodeGen(graphBuilder.dfg, output_dir, input_size, batch_size, prefix)
hpvmCodeGen.compile() hpvmCodeGen.compile()
else: else:
TensorCodeGen = TensorCodeGen(graphBuilder.dfg, output_dir, input_size) TensorCodeGen = TensorCodeGen(graphBuilder.dfg, output_dir, input_size)
...@@ -51,20 +54,28 @@ def parse_args(): ...@@ -51,20 +54,28 @@ def parse_args():
parser = argparse.ArgumentParser(description="ONNX to HPVM-C") parser = argparse.ArgumentParser(description="ONNX to HPVM-C")
parser.add_argument("onnx_file", type=Path, help="Path to input ONNX file") parser.add_argument("onnx_file", type=Path, help="Path to input ONNX file")
parser.add_argument(
"-s",
"--input-size",
type=int,
nargs="+",
help="""Size of input tensor to the model.
Usually 4 dim, including batch size.
For example: -s 1 3 32 32""",
)
parser.add_argument( parser.add_argument(
"output_dir", "output_dir",
type=Path, type=Path,
help="Output folder where source file and weight files are generated", help="Output folder where source file and weight files are generated",
) )
parser.add_argument(
"input_size", type=int, help="Size of input dataset",
)
parser.add_argument(
"-p",
"--prefix",
type=str,
help="Prefix in generated code; will be attached before name of weight/input files."
"Defaults to output_dir.",
)
parser.add_argument(
"-b",
"--batch-size",
type=int,
help="Batch size to be used in the generated code. "
"Defaults to input size (i.e., not using batch).",
)
parser.add_argument("--opset", type=int, help="ONNX opset version (enforced)") parser.add_argument("--opset", type=int, help="ONNX opset version (enforced)")
parser.add_argument( parser.add_argument(
"-c", "-c",
...@@ -79,6 +90,7 @@ hpvmc: HPVM C Interface. Default value is hpvmc.""", ...@@ -79,6 +90,7 @@ hpvmc: HPVM C Interface. Default value is hpvmc.""",
args = parser.parse_args() args = parser.parse_args()
args.hpvmc = args.compile_mode == "hpvmc" args.hpvmc = args.compile_mode == "hpvmc"
delattr(args, 'compile_mode')
return args return args
...@@ -87,13 +99,7 @@ def main(): ...@@ -87,13 +99,7 @@ def main():
args = parse_args() args = parse_args()
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
compile( compile(**vars(args))
onnx.load(args.onnx_file),
args.input_size,
args.output_dir,
args.opset,
args.hpvmc,
)
if __name__ == "__main__": if __name__ == "__main__":
......
#include <stdio.h> #include <string>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <cstring>
#include <visc.h> #include <visc.h>
#include <tensorTypes.h> #include <tensorTypes.h>
#include <tensorUtils.h> #include <tensorUtils.h>
...@@ -25,11 +20,11 @@ t{{n}}{{", " if not loop.last}} ...@@ -25,11 +20,11 @@ t{{n}}{{", " if not loop.last}}
{% endfor -%} {% endfor -%}
void root({%- for n in inputs -%} void root({%- for n in root_inputs -%}
void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}} void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
{%- endfor %}) { {%- endfor %}) {
__visc__hint(visc::CPU_TARGET); __visc__hint(visc::CPU_TARGET);
__visc__attributes({{inputs|length}}, {% for n in inputs -%} __visc__attributes({{root_inputs|length}}, {% for n in root_inputs -%}
{{n}}{{", " if not loop.last}} {{n}}{{", " if not loop.last}}
{%- endfor %}, 0); {%- endfor %}, 0);
...@@ -46,8 +41,8 @@ void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}} ...@@ -46,8 +41,8 @@ void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
{% endfor %} {% endfor %}
{% endfor %} {% endfor %}
__visc__bindOut({{output}}, 0, 0, 0); __visc__bindOut({{root_output}}, 0, 0, 0);
__visc__bindOut({{output}}, 1, 1, 0); __visc__bindOut({{root_output}}, 1, 1, 0);
} }
struct ret_t { struct ret_t {
...@@ -56,7 +51,7 @@ struct ret_t { ...@@ -56,7 +51,7 @@ struct ret_t {
}; };
typedef struct __attribute__((__packed__)) { typedef struct __attribute__((__packed__)) {
{% for n in inputs %} {% for n in root_inputs %}
void *{{n}}; void *{{n}};
size_t {{n}}_bytes; size_t {{n}}_bytes;
{% endfor %} {% endfor %}
...@@ -64,28 +59,39 @@ typedef struct __attribute__((__packed__)) { ...@@ -64,28 +59,39 @@ typedef struct __attribute__((__packed__)) {
} RootIn; } RootIn;
const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size;
int main(){ int main(){
std::string dir_prefix = "{{output_dir}}"; std::string dir_prefix = "{{prefix}}/";
std::string input_path = dir_prefix + "input.bin"; std::string input_path = dir_prefix + "input.bin";
std::string labels_path = dir_prefix + "labels.bin"; std::string labels_path = dir_prefix + "labels.bin";
{% for w in weights %} {% for w in weights %}
std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}"); std::string {{w.name}}_path = dir_prefix + "{{w.filename}}";
void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}}); void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
{% endfor %} {% endfor %}
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
{% for n in inputs %} void* {{input_name}} = create4DTensor(0, nchw, batch_size, {{input_shape|join(', ')}});
{% for n in root_inputs %}
args->{{n}} = {{n}}; args->{{n}} = {{n}};
args->{{n}}_bytes = 0; args->{{n}}_bytes = 0;
{% endfor %} {% endfor %}
__visc__init(); __visc__init();
void* dfg = __visc__launch(0, root, (void*) args); startMemTracking();
__visc__wait(dfg); for (int i = 0; i < batch_count; i++){
void *result = static_cast<RootIn*>(args)->input; int start = i * batch_size, end = start + batch_size;
visc_request_tensor(result, 0); copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}});
__visc__cleanup();
computeAccuracy3(labels, result); void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->{{input_name}};
hpvm_request_tensor(result, 0);
uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end);
computeAccuracy3(labels, result);
freeBatchMemory();
}
__visc__cleanup();
return 0; return 0;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment