diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/approxknobs.json b/hpvm/projects/torch2hpvm/torch2hpvm/approxknobs.json new file mode 100644 index 0000000000000000000000000000000000000000..2462a512f2c97f4faed1ee565ade086a21f07e0f --- /dev/null +++ b/hpvm/projects/torch2hpvm/torch2hpvm/approxknobs.json @@ -0,0 +1,207 @@ +[ + { + "name": "11", + "speedup": 1.0, + "applies_to": [ + "Conv2D", + "MatMul" + ] + }, + { + "name": "12", + "speedup": 1.5, + "applies_to": [ + "Conv2D", + "MatMul" + ] + }, + { + "name": "151", + "speedup": 3.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "152", + "speedup": 3.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "153", + "speedup": 3.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "154", + "speedup": 3.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "155", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "156", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "157", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "158", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "159", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "160", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "161", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "162", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "163", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "164", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "165", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "166", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "167", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "168", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "261", + "speedup": 3.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "262", + "speedup": 3.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "263", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "264", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "265", + "speedup": 2.25, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "266", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "267", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "268", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + }, + { + "name": "269", + "speedup": 2.0, + "applies_to": [ + "Conv2D" + ] + } +] diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py index 9cf1f1a1670d89ceb9ba3d7db6592898b0312688..5b8cb3e1f6006a74de8f41a4012cc7d240ddc46f 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py @@ -25,6 +25,8 @@ ModelTy = Union[PathLike, onnx.ModelProto, Module] # Path to a pair of HPVM bin format file (input, labels), or PyTorch Dataset DatasetTy = Union[BinDataset, Dataset] +def_approx_knobs_file = Path(__file__).parent / "approxknobs.json" + class ModelExporter: tuneset_name = "tune_input.bin", "tune_labels.bin" @@ -74,9 +76,30 @@ class ModelExporter: input_, labels = self.testset_name self._dump_dataset(self.test_dataset, self.weight_dir / input_, self.weight_dir / labels) + def export_metadata( + self, + output: PathLike = None, approx_knobs_file: PathLike = def_approx_knobs_file + ): + import json + from collections import defaultdict + + with Path(approx_knobs_file).open() as f: + knobs = json.load(f) + ty_knobs = defaultdict(list) + for k in knobs: + for ty in k.pop("applies_to"): + ty_knobs[ty].append((k["name"], k["speedup"])) + knobs_used = set() + for node in self.dfg.traverse_order: + knobs = ty_knobs.get(node.op_type, []) + flops = node.get_flops() + knobs_used.update(knobs) + print(f"{node.name} ({node.op_type}) -> {knobs}, {flops}") + def export_all(self, output: PathLike = None, batch_size: Optional[int] = None): default_codefile = self.output_dir / self.source_file_name self.export_source_code(output or default_codefile, batch_size) + self.export_metadata() self.export_weights() self.export_datasets() @@ -195,3 +218,7 @@ def torch_to_onnx( output_names=["output"], # the model's output names strip_doc_string=False, ) + + +def get_full_typename(o: object) -> str: + return o.__module__ + "." + o.__class__.__qualname__ diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py index 255cef0047955826dc621d4ff7a43e36f8954904..640c845ff2515cd5764b2d2341f950c23df0ed7b 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py @@ -195,7 +195,7 @@ class DFG(object): attrs["input_shapes"] = predec_shapes[:2] mul_node = g.MatMulNode(node.name, **attrs) if node.op_type == "Gemm": - mul_node.gemm_transpose(node, predec) + mul_node.gemm_transpose(predec) if len(predec) == 2: return mul_node # Split into mul followed by an addition diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py index 58a8e8569155336261cda9b2ba77203780520b2f..39b6873e196723d2273267f71b465ade18e316f6 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py @@ -5,8 +5,6 @@ from typing import List, Optional, Sequence, Tuple import numpy as np import onnx -from .onnx_attr import node_attr_to_dict - ShapeT = Optional[List[int]] @@ -24,12 +22,7 @@ class DFGNode(abc.ABC): self.name = name self.input_shapes = input_shapes self.output_shape = output_shape - sin = " x ".join(str(sh) if sh else "??" for sh in input_shapes) - sout = output_shape if output_shape else "??" - if sin: - print(f"{name}: {sin} -> {sout})") - else: - print(f"{name}: {sout}") + self.attrs = kwargs def codegen(self) -> Tuple[str, list]: return "", [] @@ -37,8 +30,16 @@ class DFGNode(abc.ABC): def hpvm_codegen(self) -> Tuple[str, list]: return "", [] + def get_flops(self) -> int: + return np.prod(self.output_shape) if self.output_shape else 0 + def __repr__(self) -> str: - return f"{self.op_type}({self.name})" + sin = " x ".join(str(sh) if sh else "??" for sh in self.input_shapes) + sout = self.output_shape if self.output_shape else "??" + if sin: + return f"{self.name}({self.op_type}): {sin} -> {sout} ({self.get_flops()})" + else: + return f"{self.name}({self.op_type}): {sout}" class TensorNode(DFGNode, abc.ABC): @@ -148,6 +149,14 @@ class Conv2DNode(DFGNode): [self.pads, self.pads, self.sh, self.sw], ) + def get_flops(self) -> int: + _, kshape = self.input_shapes + if not self.output_shape or not kshape: + return 0 + _, _, h, w = self.output_shape + c1, c2, kh, kw = kshape + return int(c1 * c2 * h * w * kh * kw / (self.sh * self.sw)) + class _Pool2DNode(DFGNode, abc.ABC): """Common super class of Average pooling and Max pooling.""" @@ -188,6 +197,10 @@ class _Pool2DNode(DFGNode, abc.ABC): [*self.kernel_shape, *self.pads, *self.strides], ) + def get_flops(self) -> int: + input0 = self.input_shapes[0] + return np.prod(input0) if input0 else 0 + class MaxPool2DNode(_Pool2DNode): pool_type = "0" @@ -218,27 +231,36 @@ class MatMulNode(DFGNode): def hpvm_codegen(self): return "__hpvm__tensor_mul", [] - @staticmethod - def gemm_transpose(onnx_gemm_node, predec): + def gemm_transpose(self, predec): """Find and transpose weights of the onnx gemm node. This way we transpose the constant weight instead of exporting a transpose node (which doesn't yet exist in HPVM). """ - def _transpose(weight): + def _transpose(idx: int): + weight = predec[idx] if not isinstance(weight, WeightTensor): raise ValueError( f"Cannot transpose non-const {weight} (transpose op needed)" ) weight.transpose_() + self.input_shapes[idx] = weight.output_shape # Some tensors may need transposing - attrs = node_attr_to_dict(onnx_gemm_node) - if attrs.get("transA", False): - _transpose(predec[0]) - if attrs.get("transB", False): - _transpose(predec[1]) + if self.attrs.get("transA", False): + _transpose(0) + if self.attrs.get("transB", False): + _transpose(1) + + def get_flops(self) -> int: + ishape, wshape = self.input_shapes + if not ishape or not wshape: + return 0 + input_len = np.prod(ishape) + _, _, len_, k = wshape + assert input_len == len_ + return input_len * k class SoftMaxNode(DFGNode):