Skip to content
Snippets Groups Projects
Commit ee8ea8af authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Added batch support

parent 37464d36
No related branches found
No related tags found
No related merge requests found
from os import PathLike
from pathlib import Path
from typing import Dict, List, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union
import jinja2
......@@ -14,11 +14,25 @@ template = template_env.get_template(TEMPLATE_FILE)
class HpvmCodeGen:
def __init__(self, dfg: DFG, output_dir: PathLike):
def __init__(
self,
dfg: DFG,
output_dir: PathLike,
input_size: int,
batch_size: int = None,
prefix: str = None,
):
self.dfg = dfg
self.tensors = dfg.tensors
self.var_count = 0
self.output_dir = Path(output_dir)
self.prefix = prefix
# Some reasoning of input information
input_arg, input_tensor = self.dfg.discover_input_var()
self.input_name = input_arg
self.input_shape = input_tensor.shape[1:]
self.input_size = input_size
self.batch_size = batch_size or input_size
# self.variables is a "onnx name to our name" map
# Each value is (varname, bool) and the bool indicates
# "is root node input" or not.
......@@ -94,14 +108,19 @@ class HpvmCodeGen:
nodes = self.emit_hpvm_node_structures()
inputs, output = self.emit_root_io()
weights = emit_weights(self.tensors)
prefix = self.prefix or self.output_dir
with open(self.output_dir / "hpvm_src.cc", "w") as f:
f.write(
template.render(
nodes=nodes,
inputs=inputs,
output=output,
input_name=self.input_name,
input_size=self.input_size,
batch_size=self.batch_size,
input_shape=self.input_shape,
root_inputs=inputs,
root_output=output,
weights=weights,
output_dir=self.output_dir,
prefix=prefix,
)
)
......
......@@ -24,21 +24,24 @@ def check_version(model, new_version):
def compile(
model,
input_size: Optional[List[int]],
onnx_file: Path,
output_dir: Path,
opset_version: Optional[int],
input_size: int,
prefix: Optional[str],
batch_size: Optional[int],
opset: Optional[int],
hpvmc: bool,
):
from graph_builder import GraphBuilder
from codegen_tensor import TensorCodeGen
from codegen_hpvm import HpvmCodeGen
if opset_version is not None:
model = check_version(model, opset_version)
model = onnx.load(onnx_file)
if opset is not None:
model = check_version(model, opset)
graphBuilder = GraphBuilder(model)
if hpvmc:
hpvmCodeGen = HpvmCodeGen(graphBuilder.dfg, output_dir)
hpvmCodeGen = HpvmCodeGen(graphBuilder.dfg, output_dir, input_size, batch_size, prefix)
hpvmCodeGen.compile()
else:
TensorCodeGen = TensorCodeGen(graphBuilder.dfg, output_dir, input_size)
......@@ -51,20 +54,28 @@ def parse_args():
parser = argparse.ArgumentParser(description="ONNX to HPVM-C")
parser.add_argument("onnx_file", type=Path, help="Path to input ONNX file")
parser.add_argument(
"-s",
"--input-size",
type=int,
nargs="+",
help="""Size of input tensor to the model.
Usually 4 dim, including batch size.
For example: -s 1 3 32 32""",
)
parser.add_argument(
"output_dir",
type=Path,
help="Output folder where source file and weight files are generated",
)
parser.add_argument(
"input_size", type=int, help="Size of input dataset",
)
parser.add_argument(
"-p",
"--prefix",
type=str,
help="Prefix in generated code; will be attached before name of weight/input files."
"Defaults to output_dir.",
)
parser.add_argument(
"-b",
"--batch-size",
type=int,
help="Batch size to be used in the generated code. "
"Defaults to input size (i.e., not using batch).",
)
parser.add_argument("--opset", type=int, help="ONNX opset version (enforced)")
parser.add_argument(
"-c",
......@@ -79,6 +90,7 @@ hpvmc: HPVM C Interface. Default value is hpvmc.""",
args = parser.parse_args()
args.hpvmc = args.compile_mode == "hpvmc"
delattr(args, 'compile_mode')
return args
......@@ -87,13 +99,7 @@ def main():
args = parse_args()
os.makedirs(args.output_dir, exist_ok=True)
compile(
onnx.load(args.onnx_file),
args.input_size,
args.output_dir,
args.opset,
args.hpvmc,
)
compile(**vars(args))
if __name__ == "__main__":
......
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <cstring>
#include <string>
#include <visc.h>
#include <tensorTypes.h>
#include <tensorUtils.h>
......@@ -25,11 +20,11 @@ t{{n}}{{", " if not loop.last}}
{% endfor -%}
void root({%- for n in inputs -%}
void root({%- for n in root_inputs -%}
void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
{%- endfor %}) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes({{inputs|length}}, {% for n in inputs -%}
__visc__attributes({{root_inputs|length}}, {% for n in root_inputs -%}
{{n}}{{", " if not loop.last}}
{%- endfor %}, 0);
......@@ -46,8 +41,8 @@ void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
{% endfor %}
{% endfor %}
__visc__bindOut({{output}}, 0, 0, 0);
__visc__bindOut({{output}}, 1, 1, 0);
__visc__bindOut({{root_output}}, 0, 0, 0);
__visc__bindOut({{root_output}}, 1, 1, 0);
}
struct ret_t {
......@@ -56,7 +51,7 @@ struct ret_t {
};
typedef struct __attribute__((__packed__)) {
{% for n in inputs %}
{% for n in root_inputs %}
void *{{n}};
size_t {{n}}_bytes;
{% endfor %}
......@@ -64,28 +59,39 @@ typedef struct __attribute__((__packed__)) {
} RootIn;
const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size;
int main(){
std::string dir_prefix = "{{output_dir}}";
std::string dir_prefix = "{{prefix}}/";
std::string input_path = dir_prefix + "input.bin";
std::string labels_path = dir_prefix + "labels.bin";
{% for w in weights %}
std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}");
std::string {{w.name}}_path = dir_prefix + "{{w.filename}}";
void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
{% endfor %}
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
{% for n in inputs %}
void* {{input_name}} = create4DTensor(0, nchw, batch_size, {{input_shape|join(', ')}});
{% for n in root_inputs %}
args->{{n}} = {{n}};
args->{{n}}_bytes = 0;
{% endfor %}
__visc__init();
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->input;
visc_request_tensor(result, 0);
__visc__cleanup();
startMemTracking();
for (int i = 0; i < batch_count; i++){
int start = i * batch_size, end = start + batch_size;
copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}});
computeAccuracy3(labels, result);
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->{{input_name}};
hpvm_request_tensor(result, 0);
uint32_t* labels = readLabelsBatch3(labels_path.c_str(), start, end);
computeAccuracy3(labels, result);
freeBatchMemory();
}
__visc__cleanup();
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment