Skip to content
Snippets Groups Projects
Commit 02bcd130 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

HPVM codegen now uses template

parent a0f4d21c
No related branches found
No related tags found
No related merge requests found
from os import PathLike
import jinja2
from graph_builder import DFG
from tensor import WeightTensor
from utils import skip_layer
TEMPLATE_FILE = "hpvm_template.cpp"
loader = jinja2.FileSystemLoader(searchpath="./")
template_env = jinja2.Environment(loader=loader, trim_blocks=True)
template = template_env.get_template(TEMPLATE_FILE)
class HpvmCodeGen:
def __init__(self, DFG, weights_dir, test_data_shape=None):
self.program_str = ""
self.graph = DFG.graph
def __init__(self, DFG: DFG, output_dir: PathLike):
self.dfg = DFG
self.tensors = DFG.tensors
self.nodes = DFG.nodes
self.var_cnt = -1
self.weights_dir = weights_dir
self.test_data_shape = test_data_shape
# filter_names is essentially weight & 1st input tensor(s)
# TODO: Replace manually adding input to filter_names
self.hpvm_graph_str = ""
self.filter_names = dict()
self.hpvm_node_names = dict()
for i in self.graph.input:
self.filter_names[self.tensors[i.name].get_mapped_name()] = 1
for tensor in self.tensors.values():
if isinstance(tensor, WeightTensor):
self.filter_names[tensor.get_mapped_name()] = 1
self.filter_names["input"] = 1
print(self.filter_names)
self.var_count = 0
self.output_dir = output_dir
# self.variables is a "onnx name to our name" map
# Each value is (varname, bool) and the bool indicates
# "is root node input" or not.
self.variables = self._get_root_args(DFG.inputs, DFG.tensors)
################################################
# Aux functions
################################################
def get_last_var(self):
return "var_" + str(self.var_cnt)
def get_new_var(self):
self.var_cnt = self.var_cnt + 1
return "var_" + str(self.var_cnt)
@staticmethod
def _get_root_args(input_nodes, tensors):
# Input to the graph + all weight tensors
# Sometimes these 2 kinds can overlap (due to ONNX optim)
# We'll dedup this array as well.
root_args = []
for i in input_nodes:
root_args.append(i.name)
for tensor in tensors.values():
if isinstance(tensor, WeightTensor):
root_args.append(tensor.name)
root_args = sorted(list(set(root_args)))
return {f_name: (index, True) for index, f_name in enumerate(root_args)}
def _allocate_varname(self):
varname = f"var_{self.var_count}"
self.var_count += 1
return varname
def get_varname_of(self, onnx_var_name):
if onnx_var_name in self.root_args:
return True, self.root_args[onnx_var_name]
elif onnx_var_name in self.local_vars:
return False, self.local_vars[onnx_var_name]
else:
raise KeyError(onnx_var_name)
@staticmethod
def transform_name(name: str):
name = name.replace(".", "_")
if name[0].isnumeric():
name = "_" + name
return name
################################################
# CodeGen functions
################################################
def emit_header(self):
headers = "\n#include <stdio.h> \n"
headers += "#include <stdlib.h> \n"
headers += "#include <unistd.h> \n"
headers += "#include <fcntl.h> \n"
headers += "#include <sys/stat.h> \n"
headers += "#include <cstring> \n"
headers += "#include <visc.h> \n"
headers += "#include <tensorTypes.h> \n"
headers += "#include <tensorUtils.h> \n\n"
self.program_str += headers
def emit_hpvm_node_structures(self):
def emit_hpvm_node_header(new_var, input_size):
node_header_str = "void " + new_var + "_node("
for i in range(input_size):
node_header_str += "void* t" + str(i + 1) + ", "
node_header_str += "size_t bytes_t" + str(i + 1)
if i < input_size - 1:
node_header_str += ", "
node_header_str += ") { \n"
node_header_str += " __visc__hint(visc::CUDNN_TARGET); \n"
node_header_str += " __visc__attributes(" + str(input_size) + ", "
for i in range(input_size):
node_header_str += "t" + str(i + 1)
if i < input_size - 1:
node_header_str += ", "
node_header_str += ", 0); \n\n"
return node_header_str
def emit_hpvm_node_footer(input_size):
node_footer_str = " __visc__return("
node_footer_str += str(input_size) + ", "
node_footer_str += "r, "
node_footer_str += "(size_t) 0); \n"
node_footer_str += "}\n\n"
return node_footer_str
def emit_root_node_footer(self):
mapped_output_var = self.tensors[self.graph.output[0].name].get_mapped_name()
# Binding output of last DFG node to the Root Node output
root_footer_str = "\n __visc__bindOut(" + \
mapped_output_var + ", 0, 0, 0); \n"
root_footer_str += " __visc__bindOut(" + \
mapped_output_var + ", 1, 1, 0); \n"
root_footer_str += "\n}\n\n"
return root_footer_str
def emit_hpvm_node_edges(out_var_name, input_vars):
hpvm_edge_str = "\n void* " + out_var_name + " = "
hpvm_edge_str += "__visc__createNodeND(0, " + out_var_name + "_node); \n\n"
it = 0
for input_var_name in input_vars:
if input_var_name in self.filter_names:
input_index = self.filter_names[input_var_name]
index1 = input_index * 2
index2 = index1 + 1
hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index1) + ", " + str(it*2) + ", 0); \n"
hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n"
elif input_var_name in self.hpvm_node_names:
hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 0, " + str(it*2) + ", 0); \n"
hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 1, " + str(it*2+1) + ", 0); \n"
it += 1
return hpvm_edge_str
node_str = ""
for node in self.nodes:
if node.op_type in skip_layer:
mapped_output_name = self.get_last_var()
def _emit_hpvm_node_edges(self, input_vars):
ret = []
it = 0
for onnx_var_name in input_vars:
hpvm_var_name, is_root_input = self.variables[onnx_var_name]
if is_root_input:
assert isinstance(hpvm_var_name, int)
ret.append(
{"is_bindin": True, "input_idx": hpvm_var_name, "edge_idx": it}
)
else:
mapped_output_name = self.get_new_var()
self.tensors[node.output[0]].set_mapped_name(mapped_output_name)
self.hpvm_node_names[mapped_output_name] = 1
if node.op_type in skip_layer:
ret.append(
{"is_bindin": False, "input_node": hpvm_var_name, "edge_idx": it}
)
it += 1
return ret
def emit_hpvm_node_structures(self):
node_envs = []
for node in self.dfg.traverse_order:
generated_code = node.hpvm_codegen(self.tensors)
if generated_code == "":
# Node must have single input, we equate the output to
# the input and skip code generation.
assert len(node.input) == 1 and len(node.output) == 1
self.variables[node.output[0]] = self.variables[node.input[0]]
continue
# node_str
node_str += emit_hpvm_node_header(mapped_output_name, len(node.input))
node_str += node.hpvm_codegen(self.tensors)
node_str += emit_hpvm_node_footer(2) # Hardcoded as in Keras frontend
# hpvm_graph_str
input_vars = list()
for i in node.input:
input_vars.append(self.tensors[i].get_mapped_name())
self.hpvm_graph_str += emit_hpvm_node_edges(mapped_output_name, input_vars)
self.hpvm_graph_str += emit_root_node_footer(self)
self.program_str += node_str
def emit_root_node_header(self):
root_signature = "void root("
index = 0
for f_name in self.filter_names:
if index > 0:
root_signature += "\t "
self.filter_names[f_name] = index
root_signature += "void* " + f_name + ", "
root_signature += "size_t " + f_name + "_bytes"
if index < len(self.filter_names) - 1:
root_signature += ", \n"
index += 1
root_signature += "){ \n\n"
root_signature += "\n __visc__hint(visc::CPU_TARGET); \n"
root_signature += " __visc__attributes(" + \
str(len(self.filter_names)) + ", "
index = 0
for f_name in self.filter_names:
root_signature += f_name
if index < len(self.filter_names) - 1:
root_signature += ", "
index += 1
root_signature += ", 0); \n\n"
self.program_str += root_signature
def emit_root_structure(self):
root_struct = ""
root_struct += "struct ret_t {\n"
root_struct += " void* tensor; \n"
root_struct += " size_t bytes; \n"
root_struct += "}; \n\n"
root_struct += "typedef struct __attribute__((__packed__)) {\n"
for f_name in self.filter_names:
root_struct += " void* " + f_name + "; \n"
root_struct += " size_t " + f_name + "_bytes; \n"
root_struct += "\n struct ret_t r; \n"
root_struct += "}\nRootIn;\n\n"
self.program_str += root_struct
def emit_hpvm_graph(self):
self.program_str += self.hpvm_graph_str
varname = self._allocate_varname()
self.variables[node.output[0]] = varname, False # not root-node arg
node_envs.append(
{
"name": varname,
"input_size": len(node.input),
"edges": self._emit_hpvm_node_edges(node.input),
"code": generated_code,
}
)
return node_envs
def emit_root_io(self):
input_args = [
self.transform_name(name)
for name, (_, is_root) in self.variables.items()
if is_root
]
output_arg = self.variables[self.dfg.output.name][0]
return input_args, output_arg
def emit_weights(self):
weights_str = "\n"
weights_str += "std::string dir_prefix = std::string(\"" + str(self.weights_dir) + "\");\n"
weights_str += "std::string input_path = dir_prefix + std::string(\"input.bin\");\n"
weights_str += "std::string labels_path = dir_prefix + std::string(\"labels.bin\");\n"
for tensor in self.tensors.values():
if isinstance(tensor, WeightTensor):
from graph_codegen import GraphCodeGen
weights_str += self.emit_single_weight(tensor)
return weights_str
def emit_single_weight(self, tensor):
N = tensor.shape[0]
C = tensor.shape[1]
H = tensor.shape[2]
W = tensor.shape[3]
mapped_name = tensor.get_mapped_name()
file_path = mapped_name + "_path"
unique_file_name = file_path + ".bin"
weight_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
weight_str += unique_file_name + "\"); \n"
weight_str += "void* " + mapped_name + " = " + " readTrainedWeights("
weight_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," + str(H) + "," + str(W)
weight_str += "); \n"
return weight_str
def emit_main(self):
main_func_str = "int main(){ \n\n"
main_func_str += self.emit_weights()
#main_func_str += self.input_str
main_func_str += "\n__visc__init(); \n"
main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n"
for f_name in self.filter_names:
main_func_str += "args->" + f_name + " = " + f_name + "; \n"
main_func_str += "args->" + f_name + "_bytes = 0; \n"
main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n"
main_func_str += "__visc__wait(dfg); \n\n"
main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n"
main_func_str += "hpvm_request_tensor(result, 0); \n\n"
main_func_str += "__visc__cleanup(); \n"
main_func_str += "computeAccuracy3(labels, result); \n"
main_func_str += "return 0; \n\n"
main_func_str += "} \n"
self.program_str += main_func_str
def emit_source(self, dir_prefix):
with open(dir_prefix / "hpvm_src.cc", "w") as f:
f.write(self.program_str)
ret = []
for name, tensor in self.tensors.items():
if not isinstance(tensor, WeightTensor):
continue
name = self.transform_name(name)
file_path = f"{tensor.get_mapped_name()}_path.bin"
ret.append({"name": name, "shape": tensor.shape, "filename": file_path})
return ret
def compile(self):
self.emit_header()
self.emit_hpvm_node_structures()
self.emit_root_node_header()
self.emit_hpvm_graph()
self.emit_root_structure()
self.emit_main()
# dump generated program string to source file
self.emit_source(self.weights_dir)
nodes = self.emit_hpvm_node_structures()
inputs, output = self.emit_root_io()
weights = self.emit_weights()
with open(self.output_dir / "hpvm_src.cc", "w") as f:
f.write(
template.render(
nodes=nodes,
inputs=inputs,
output=output,
weights=weights,
output_dir=self.output_dir,
)
)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <cstring>
#include <visc.h>
#include <tensorTypes.h>
#include <tensorUtils.h>
{% for node in nodes %}
void {{node.name}}_node(
{%- for n in range(1, node.input_size + 1) -%}
void *t{{n}}, size_t bytes_t{{n}}{{", " if not loop.last}}
{%- endfor %}) {
__visc__hint(visc::CUDNN_TARGET);
__visc__attributes({{node.input_size}}, {% for n in range(1, node.input_size + 1) -%}
t{{n}}{{", " if not loop.last}}
{%- endfor %}, 0);
{{node.code}}
__visc__return(2, r, (size_t) 0);
}
{% endfor -%}
void root({%- for n in inputs -%}
void *{{n}}, size_t {{n}}_bytes{{", " if not loop.last}}
{%- endfor %}) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes({{inputs|length}}, {% for n in inputs -%}
{{n}}{{", " if not loop.last}}
{%- endfor %}, 0);
{% for node in nodes %}
void* {{node.name}} = __visc__createNodeND(0, {{node.name}}_node);
{% for edge in node.edges %}
{% if edge.is_bindin %}
__visc__bindIn({{node.name}}, {{edge.input_idx * 2}}, {{edge.edge_idx * 2}}, 0);
__visc__bindIn({{node.name}}, {{edge.input_idx * 2 + 1}}, {{edge.edge_idx * 2 + 1}}, 0);
{% else %}
__visc__edge({{edge.input_node}}, {{node.name}}, 1, 0, {{edge.edge_idx * 2}}, 0);
__visc__edge({{edge.input_node}}, {{node.name}}, 1, 1, {{edge.edge_idx * 2 + 1}}, 0);
{% endif %}
{% endfor %}
{% endfor %}
__visc__bindOut({{output}}, 0, 0, 0);
__visc__bindOut({{output}}, 1, 1, 0);
}
struct ret_t {
void* tensor;
size_t bytes;
};
typedef struct __attribute__((__packed__)) {
{% for n in inputs %}
void *{{n}};
size_t {{n}}_bytes;
{% endfor %}
struct ret_t r;
} RootIn;
int main(){
std::string dir_prefix = "{{output_dir}}";
std::string input_path = dir_prefix + "input.bin";
std::string labels_path = dir_prefix + "labels.bin";
{% for w in weights %}
std::string {{w.name}}_path = dir_prefix + std::string("{{w.filename}}");
void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
{% endfor %}
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
{% for n in inputs %}
args->{{n}} = {{n}};
args->{{n}}_bytes = 0
{% endfor %}
__visc__init();
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->input;
hpvm_request_tensor(result, 0);
__visc__cleanup();
computeAccuracy3(labels, result);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment