Skip to content
Snippets Groups Projects
Commit 02bcd130 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

HPVM codegen now uses template

parent a0f4d21c
No related branches found
No related tags found
No related merge requests found
from os import PathLike
import jinja2
from graph_builder import DFG
from tensor import WeightTensor from tensor import WeightTensor
from utils import skip_layer
TEMPLATE_FILE = "hpvm_template.cpp"
loader = jinja2.FileSystemLoader(searchpath="./")
template_env = jinja2.Environment(loader=loader, trim_blocks=True)
template = template_env.get_template(TEMPLATE_FILE)
class HpvmCodeGen: class HpvmCodeGen:
def __init__(self, DFG, weights_dir, test_data_shape=None): def __init__(self, DFG: DFG, output_dir: PathLike):
self.program_str = "" self.dfg = DFG
self.graph = DFG.graph
self.tensors = DFG.tensors self.tensors = DFG.tensors
self.nodes = DFG.nodes self.var_count = 0
self.var_cnt = -1 self.output_dir = output_dir
self.weights_dir = weights_dir # self.variables is a "onnx name to our name" map
self.test_data_shape = test_data_shape # Each value is (varname, bool) and the bool indicates
# filter_names is essentially weight & 1st input tensor(s) # "is root node input" or not.
# TODO: Replace manually adding input to filter_names self.variables = self._get_root_args(DFG.inputs, DFG.tensors)
self.hpvm_graph_str = ""
self.filter_names = dict()
self.hpvm_node_names = dict()
for i in self.graph.input:
self.filter_names[self.tensors[i.name].get_mapped_name()] = 1
for tensor in self.tensors.values():
if isinstance(tensor, WeightTensor):
self.filter_names[tensor.get_mapped_name()] = 1
self.filter_names["input"] = 1
print(self.filter_names)
################################################ ################################################
# Aux functions # Aux functions
################################################ ################################################
def get_last_var(self):
return "var_" + str(self.var_cnt)
def get_new_var(self): @staticmethod
self.var_cnt = self.var_cnt + 1 def _get_root_args(input_nodes, tensors):
return "var_" + str(self.var_cnt) # Input to the graph + all weight tensors
# Sometimes these 2 kinds can overlap (due to ONNX optim)
# We'll dedup this array as well.
root_args = []
for i in input_nodes:
root_args.append(i.name)
for tensor in tensors.values():
if isinstance(tensor, WeightTensor):
root_args.append(tensor.name)
root_args = sorted(list(set(root_args)))
return {f_name: (index, True) for index, f_name in enumerate(root_args)}
def _allocate_varname(self):
varname = f"var_{self.var_count}"
self.var_count += 1
return varname
def get_varname_of(self, onnx_var_name):
if onnx_var_name in self.root_args:
return True, self.root_args[onnx_var_name]
elif onnx_var_name in self.local_vars:
return False, self.local_vars[onnx_var_name]
else:
raise KeyError(onnx_var_name)
@staticmethod
def transform_name(name: str):
name = name.replace(".", "_")
if name[0].isnumeric():
name = "_" + name
return name
################################################ ################################################
# CodeGen functions # CodeGen functions
################################################ ################################################
def emit_header(self):
headers = "\n#include <stdio.h> \n"
headers += "#include <stdlib.h> \n"
headers += "#include <unistd.h> \n"
headers += "#include <fcntl.h> \n"
headers += "#include <sys/stat.h> \n"
headers += "#include <cstring> \n"
headers += "#include <visc.h> \n"
headers += "#include <tensorTypes.h> \n"
headers += "#include <tensorUtils.h> \n\n"
self.program_str += headers
def emit_hpvm_node_structures(self): def _emit_hpvm_node_edges(self, input_vars):
ret = []
def emit_hpvm_node_header(new_var, input_size): it = 0
node_header_str = "void " + new_var + "_node(" for onnx_var_name in input_vars:
for i in range(input_size): hpvm_var_name, is_root_input = self.variables[onnx_var_name]
node_header_str += "void* t" + str(i + 1) + ", " if is_root_input:
node_header_str += "size_t bytes_t" + str(i + 1) assert isinstance(hpvm_var_name, int)
if i < input_size - 1: ret.append(
node_header_str += ", " {"is_bindin": True, "input_idx": hpvm_var_name, "edge_idx": it}
)
node_header_str += ") { \n"
node_header_str += " __visc__hint(visc::CUDNN_TARGET); \n"
node_header_str += " __visc__attributes(" + str(input_size) + ", "
for i in range(input_size):
node_header_str += "t" + str(i + 1)
if i < input_size - 1:
node_header_str += ", "
node_header_str += ", 0); \n\n"
return node_header_str
def emit_hpvm_node_footer(input_size):
node_footer_str = " __visc__return("
node_footer_str += str(input_size) + ", "
node_footer_str += "r, "
node_footer_str += "(size_t) 0); \n"
node_footer_str += "}\n\n"
return node_footer_str
def emit_root_node_footer(self):
mapped_output_var = self.tensors[self.graph.output[0].name].get_mapped_name()
# Binding output of last DFG node to the Root Node output
root_footer_str = "\n __visc__bindOut(" + \
mapped_output_var + ", 0, 0, 0); \n"
root_footer_str += " __visc__bindOut(" + \
mapped_output_var + ", 1, 1, 0); \n"
root_footer_str += "\n}\n\n"
return root_footer_str
def emit_hpvm_node_edges(out_var_name, input_vars):
hpvm_edge_str = "\n void* " + out_var_name + " = "
hpvm_edge_str += "__visc__createNodeND(0, " + out_var_name + "_node); \n\n"
it = 0
for input_var_name in input_vars:
if input_var_name in self.filter_names:
input_index = self.filter_names[input_var_name]
index1 = input_index * 2
index2 = index1 + 1
hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index1) + ", " + str(it*2) + ", 0); \n"
hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n"
elif input_var_name in self.hpvm_node_names:
hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 0, " + str(it*2) + ", 0); \n"
hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 1, " + str(it*2+1) + ", 0); \n"
it += 1
return hpvm_edge_str
node_str = ""
for node in self.nodes:
if node.op_type in skip_layer:
mapped_output_name = self.get_last_var()
else: else:
mapped_output_name = self.get_new_var() ret.append(
self.tensors[node.output[0]].set_mapped_name(mapped_output_name) {"is_bindin": False, "input_node": hpvm_var_name, "edge_idx": it}
self.hpvm_node_names[mapped_output_name] = 1 )
if node.op_type in skip_layer: it += 1
return ret
def emit_hpvm_node_structures(self):
node_envs = []
for node in self.dfg.traverse_order:
generated_code = node.hpvm_codegen(self.tensors)
if generated_code == "":
# Node must have single input, we equate the output to
# the input and skip code generation.
assert len(node.input) == 1 and len(node.output) == 1
self.variables[node.output[0]] = self.variables[node.input[0]]
continue continue
# node_str varname = self._allocate_varname()
node_str += emit_hpvm_node_header(mapped_output_name, len(node.input)) self.variables[node.output[0]] = varname, False # not root-node arg
node_str += node.hpvm_codegen(self.tensors) node_envs.append(
node_str += emit_hpvm_node_footer(2) # Hardcoded as in Keras frontend {
# hpvm_graph_str "name": varname,
input_vars = list() "input_size": len(node.input),
for i in node.input: "edges": self._emit_hpvm_node_edges(node.input),
input_vars.append(self.tensors[i].get_mapped_name()) "code": generated_code,
self.hpvm_graph_str += emit_hpvm_node_edges(mapped_output_name, input_vars) }
self.hpvm_graph_str += emit_root_node_footer(self) )
self.program_str += node_str return node_envs
def emit_root_node_header(self): def emit_root_io(self):
root_signature = "void root(" input_args = [
index = 0 self.transform_name(name)
for f_name in self.filter_names: for name, (_, is_root) in self.variables.items()
if index > 0: if is_root
root_signature += "\t " ]
self.filter_names[f_name] = index output_arg = self.variables[self.dfg.output.name][0]
root_signature += "void* " + f_name + ", " return input_args, output_arg
root_signature += "size_t " + f_name + "_bytes"
if index < len(self.filter_names) - 1:
root_signature += ", \n"
index += 1
root_signature += "){ \n\n"
root_signature += "\n __visc__hint(visc::CPU_TARGET); \n"
root_signature += " __visc__attributes(" + \
str(len(self.filter_names)) + ", "
index = 0
for f_name in self.filter_names:
root_signature += f_name
if index < len(self.filter_names) - 1:
root_signature += ", "
index += 1
root_signature += ", 0); \n\n"
self.program_str += root_signature
def emit_root_structure(self):
root_struct = ""
root_struct += "struct ret_t {\n"
root_struct += " void* tensor; \n"
root_struct += " size_t bytes; \n"
root_struct += "}; \n\n"
root_struct += "typedef struct __attribute__((__packed__)) {\n"
for f_name in self.filter_names:
root_struct += " void* " + f_name + "; \n"
root_struct += " size_t " + f_name + "_bytes; \n"
root_struct += "\n struct ret_t r; \n"
root_struct += "}\nRootIn;\n\n"
self.program_str += root_struct
def emit_hpvm_graph(self):
self.program_str += self.hpvm_graph_str
def emit_weights(self): def emit_weights(self):
weights_str = "\n" ret = []
weights_str += "std::string dir_prefix = std::string(\"" + str(self.weights_dir) + "\");\n" for name, tensor in self.tensors.items():
weights_str += "std::string input_path = dir_prefix + std::string(\"input.bin\");\n" if not isinstance(tensor, WeightTensor):
weights_str += "std::string labels_path = dir_prefix + std::string(\"labels.bin\");\n" continue
for tensor in self.tensors.values(): name = self.transform_name(name)
if isinstance(tensor, WeightTensor): file_path = f"{tensor.get_mapped_name()}_path.bin"
from graph_codegen import GraphCodeGen ret.append({"name": name, "shape": tensor.shape, "filename": file_path})
weights_str += self.emit_single_weight(tensor) return ret
return weights_str
def emit_single_weight(self, tensor):
N = tensor.shape[0]
C = tensor.shape[1]
H = tensor.shape[2]
W = tensor.shape[3]
mapped_name = tensor.get_mapped_name()
file_path = mapped_name + "_path"
unique_file_name = file_path + ".bin"
weight_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
weight_str += unique_file_name + "\"); \n"
weight_str += "void* " + mapped_name + " = " + " readTrainedWeights("
weight_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," + str(H) + "," + str(W)
weight_str += "); \n"
return weight_str
def emit_main(self):
main_func_str = "int main(){ \n\n"
main_func_str += self.emit_weights()
#main_func_str += self.input_str
main_func_str += "\n__visc__init(); \n"
main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n"
for f_name in self.filter_names:
main_func_str += "args->" + f_name + " = " + f_name + "; \n"
main_func_str += "args->" + f_name + "_bytes = 0; \n"
main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n"
main_func_str += "__visc__wait(dfg); \n\n"
main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n"
main_func_str += "hpvm_request_tensor(result, 0); \n\n"
main_func_str += "__visc__cleanup(); \n"
main_func_str += "computeAccuracy3(labels, result); \n"
main_func_str += "return 0; \n\n"
main_func_str += "} \n"
self.program_str += main_func_str
def emit_source(self, dir_prefix):
with open(dir_prefix / "hpvm_src.cc", "w") as f:
f.write(self.program_str)
def compile(self): def compile(self):
self.emit_header() nodes = self.emit_hpvm_node_structures()
self.emit_hpvm_node_structures() inputs, output = self.emit_root_io()
self.emit_root_node_header() weights = self.emit_weights()
self.emit_hpvm_graph() with open(self.output_dir / "hpvm_src.cc", "w") as f:
self.emit_root_structure() f.write(
self.emit_main() template.render(
# dump generated program string to source file nodes=nodes,
self.emit_source(self.weights_dir) inputs=inputs,
output=output,
weights=weights,
output_dir=self.output_dir,
)
)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <cstring>
#include <visc.h>
#include <tensorTypes.h>
#include <tensorUtils.h>
{% for node in nodes %}
void {{node.name}}_node(
{%- for n in range(1, node.input_size + 1) -%}
void *t{{n}}, size_t bytes_t{{n}}{{", " if not loop.last}}
{%- endfor %}) {
__visc__hint(visc::CUDNN_TARGET);
__visc__attributes({{node.input_size}}, {% for n in range(1, node.input_size + 1) -%}
t{{n}}{{", " if not loop.last}}
{%- endfor %}, 0);
{{node.code}}
__visc__return(2, r, (size_t) 0);
}
{% endfor -%}
void root({%- for n in inputs -%}
void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
{%- endfor %}) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes({{inputs|length}}, {% for n in inputs -%}
{{n}}{{", " if not loop.last}}
{%- endfor %}, 0);
{% for node in nodes %}
void* {{node.name}} = __visc__createNodeND(0, {{node.name}}_node);
{% for edge in node.edges %}
{% if edge.is_bindin %}
__visc__bindIn({{node.name}}, {{edge.input_idx * 2}}, {{edge.edge_idx * 2}}, 0);
__visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 1}}, {{edge.edge_idx * 2 + 1}}, 0);
{% else %}
__visc__edge({{edge.input_node}}, {{node.name}}, 1, 0, {{edge.edge_idx * 2}}, 0);
__visc__edge({{edge.input_node}}, {{node.name}}, 1, 1, {{edge.edge_idx * 2 + 1}}, 0);
{% endif %}
{% endfor %}
{% endfor %}
__visc__bindOut({{output}}, 0, 0, 0);
__visc__bindOut({{output}}, 1, 1, 0);
}
struct ret_t {
void* tensor;
size_t bytes;
};
typedef struct __attribute__((__packed__)) {
{% for n in inputs %}
void *{{n}};
size_t {{n}}_bytes;
{% endfor %}
struct ret_t r;
} RootIn;
int main(){
std::string dir_prefix = "{{output_dir}}";
std::string input_path = dir_prefix + "input.bin";
std::string labels_path = dir_prefix + "labels.bin";
{% for w in weights %}
std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}");
void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
{% endfor %}
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
{% for n in inputs %}
args->{{n}} = {{n}};
args->{{n}}_bytes = 0
{% endfor %}
__visc__init();
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->input;
hpvm_request_tensor(result, 0);
__visc__cleanup();
computeAccuracy3(labels, result);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment