From 85f8bd00ebd1dc94308c23777dd2d0ecd3131a69 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Wed, 7 Jul 2021 21:27:08 -0500 Subject: [PATCH] Updated pytorch frontend and hpvm-clang to handle NVDLA correctly --- .../projects/torch2hpvm/torch2hpvm/compile.py | 257 +++++------------- .../torch2hpvm/torch2hpvm/graph_builder.py | 4 +- .../torch2hpvm/torch2hpvm/graph_ir.py | 5 +- .../torch2hpvm/template_hpvm.cpp.in | 42 +-- hpvm/tools/hpvm-clang/main.py.in | 9 +- 5 files changed, 83 insertions(+), 234 deletions(-) diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py index 12d1c544d0..bb13d34747 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py @@ -1,8 +1,9 @@ import os from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Dict, IO, List, NamedTuple, Optional, Sequence, Tuple, Union +from typing import IO, Optional, Sequence, Union +import numpy as np import onnx import torch from onnx import version_converter @@ -10,153 +11,65 @@ from torch.nn import Module from torch.utils.data import Dataset from .codegen_hpvm import HpvmCodeGen, PathLike -from .codegen_tensor import TensorCodeGen from .graph_builder import DFG - -class BinDataset(NamedTuple): - input_file: PathLike - labels_file: PathLike - shape: Sequence[int] - - # Path to ONNX model, loaded ONNX model, or PyTorch Module ModelTy = Union[PathLike, onnx.ModelProto, Module] -# Path to a pair of HPVM bin format file (input, labels), or PyTorch Dataset -DatasetTy = Union[BinDataset, Dataset] - -def_approx_knobs_file = Path(__file__).parent / "approxknobs.json" - class ModelExporter: - tuneset_name = "tune_input.bin", "tune_labels.bin" - testset_name = "test_input.bin", "test_labels.bin" + dataset_dir_name = "images" + label_name = "labels.bin" weight_dir_name = "weights" source_file_name = "hpvm_c.cpp" - metadata_file_name = "ops.json" - config_file_name = "tuner_confs.txt" - fifo_file_name_r = "hpvm_fifo_r" - fifo_file_name_w = "hpvm_fifo_w" def __init__( self, model: ModelTy, - tune_dataset: DatasetTy, - test_dataset: DatasetTy, + dataset: Dataset, output_dir: PathLike, - target: str = "hpvm_tensor", + config_file: PathLike, + target: str = "hpvm_nvdla", opset: Optional[int] = None, - config_file: PathLike = None, ): - self.tune_dataset, self.test_dataset = tune_dataset, test_dataset - self.dataset_shape = self._check_datasets(tune_dataset, test_dataset) + self.dataset = dataset + self.dataset_shape = self._check_dataset_get_shape(dataset) self.dataset_size = self.dataset_shape[0] onnx_model = self._load_model(model, self.dataset_shape, opset) self.dfg = DFG(onnx_model.graph) - output_dir = Path(output_dir).absolute() + self.output_dir = output_dir = Path(output_dir).absolute() os.makedirs(output_dir, exist_ok=False) # Will throw if already existss self.weight_dir = output_dir / self.weight_dir_name self.weight_dir.mkdir(exist_ok=True) self.codefile = output_dir / self.source_file_name - self.metafile = output_dir / self.metadata_file_name + self.dataset_dir = output_dir / self.dataset_dir_name + self.dataset_dir.mkdir(exist_ok=True) - args3 = self.dfg, self.weight_dir, self.dataset_size - self.compile_args = None self.path_params = {} - if target == "hpvm_tensor": - if config_file is None: - raise ValueError( - f"Config file must be given and exist under hpvm_tensor mode" - ) - self.path_params = {"config_file": str(config_file)} - self.compile_args = ["-t", "tensor", "--conf-file", str(config_file)] - self.codegen = HpvmCodeGen(*args3, "tensor", None) - elif target == "hpvm_tensor_inspect": - if config_file is None: - config_file = output_dir / self.config_file_name - else: - config_file = Path(config_file).absolute() - self.path_params = { - "tune_labels_path": (self.weight_dir / self.tuneset_name[1]).as_posix(), - "conf_path": config_file.as_posix(), - "fifo_path_r": (output_dir / self.fifo_file_name_r).as_posix(), - "fifo_path_w": (output_dir / self.fifo_file_name_w).as_posix(), - } - self.compile_args = ["-t", "tensor", "--conf-file", str(config_file)] - self.codegen = HpvmCodeGen(*args3, "tensor", self.path_params) - elif target == "hpvm_cudnn": - self.compile_target = "cudnn" - self.compile_args = ["-t", "cudnn"] - self.codegen = HpvmCodeGen(*args3, "cudnn", None) - elif target == "tensor": - self.codegen = TensorCodeGen(*args3) - else: - raise ValueError(f"Target {target} not recognized") + assert target == "hpvm_nvdla", f"Target {target} not recognized" + self.compile_args = ["-t", "nvdla", "--conf-file", config_file] + # NVDLA uses CUDNN_TARGET in HPVM-C + self.codegen = HpvmCodeGen(self.dfg, self.weight_dir, self.dataset_size, "cudnn", None) def export_source_code(self, output: PathLike, batch_size: Optional[int] = None): self.codegen.compile(output, batch_size) return self def export_weights(self): - self.dfg.dump_weights(self.weight_dir) + self.dfg.dump_weights(self.weight_dir, to_fp16=True) return self def export_datasets(self): - input_, labels = self.tuneset_name - self._dump_dataset( - self.tune_dataset, self.weight_dir / input_, self.weight_dir / labels - ) - input_, labels = self.testset_name - self._dump_dataset( - self.test_dataset, self.weight_dir / input_, self.weight_dir / labels - ) - return self - - def export_metadata( - self, output: PathLike, approx_knobs_file: PathLike = def_approx_knobs_file - ): - import json - from collections import defaultdict - - with Path(approx_knobs_file).open() as f: - knobs = json.load(f) # knob name to knob attrs dict - # Organize knobs into defaults and the ones for certain types - ty_knobs: Dict[str, str] = defaultdict(list) - default_knobs: List[str] = [] - for name, attrs in knobs.items(): - applies_to = attrs.pop("applies_to") - if applies_to is None: - default_knobs.append(name) - continue - for ty in applies_to: - ty_knobs[ty].append(name) - - # Enumerate operators and find knobs for each - idx = 0 - op_cost: Dict[str, int] = {} - op_knobs: Dict[str, List[str]] = {} - for node in self.dfg.traverse_order: - if not node.hpvm_op_type: - continue - hpvm_op_name = f"{node.hpvm_op_type}_{idx}" - type_knobs = ty_knobs.get(node.hpvm_op_type, []) - op_knobs[hpvm_op_name] = type_knobs + default_knobs - op_cost[hpvm_op_name] = int(node.get_flops()) # May get np.int64 - idx += 1 - - # Write out - with Path(output).open("w") as f: - json.dump( - { - "op_cost": op_cost, - "op_knobs": op_knobs, - "knobs": knobs, - **self.path_params, - }, - f, - indent=2, - ) + from PIL import Image + + labels = [] + for i in range(self.dataset_size): + image, label = self.dataset[i] + image = (image - image.min()) / (image.max() - image.min()) * 255 + image = image.transpose((1, 2, 0)).astype(np.uint8) + Image.fromarray(image).save(self.dataset_dir / f"{i}.jpg") + labels.append(label) + np.array(labels).tofile(self.output_dir / self.label_name) return self def compile(self, output_binary: PathLike, working_dir: Optional[PathLike] = None): @@ -181,86 +94,24 @@ class ModelExporter: self.codefile if output_code_file is None else Path(output_code_file) ) self.export_source_code(self.codefile, batch_size) - self.export_metadata(self.metafile) self.export_weights() self.export_datasets() return self @staticmethod - def _dump_dataset(dataset: DatasetTy, input_filename: Path, labels_filename: Path): - import numpy as np - from torch.utils.data import DataLoader - - def link_from_to(from_: PathLike, to: PathLike): - from_, to = Path(from_), Path(to) - if from_.exists(): - from_.unlink() - from_.symlink_to(to.absolute()) - - if isinstance(dataset, BinDataset): - link_from_to(input_filename, dataset.input_file) - link_from_to(labels_filename, dataset.labels_file) - return - inputs, labels = zip(*iter(DataLoader(dataset))) - inputs = np.stack(inputs, axis=0) - labels = np.stack(labels, axis=0) - inputs.tofile(input_filename) - inputs.tofile(labels_filename) - - @classmethod - def _check_datasets( - cls, tune_dataset: DatasetTy, test_dataset: DatasetTy - ) -> Tuple[int, int, int, int]: - tune_shape = cls._check_dataset_get_shape(tune_dataset) - test_shape = cls._check_dataset_get_shape(test_dataset) - if tune_shape != test_shape: + def _check_dataset_get_shape(dataset: Dataset) -> Sequence[int]: + if not isinstance(dataset, Dataset): + raise TypeError("Only PyTorch Dataset is supported") + size = len(dataset) + sample = dataset[0][0] + if ( + not isinstance(sample, (np.ndarray, torch.Tensor)) + or len(sample.shape) != 3 + ): raise ValueError( - f"Size of tune and test dataset must match (got {tune_shape} and {test_shape})" + "Dataset must be return 3d tensor (image) due to backend limitation" ) - return tuple(tune_shape) - - @staticmethod - def _check_dataset_get_shape(dataset: DatasetTy) -> Sequence[int]: - import numpy as np - - if isinstance(dataset, Dataset): - size = len(dataset) - sample = dataset[0] - if ( - not isinstance(sample, (np.ndarray, torch.Tensor)) - or len(sample.shape) != 4 - ): - raise ValueError( - "Dataset must be a 4D tensor due to backend limitation" - ) - return [size, *sample.shape] - if not isinstance(dataset, BinDataset): - raise TypeError("Only BinDataset or PyTorch Dataset are supported") - input_file = Path(dataset.input_file) - labels_file = Path(dataset.labels_file) - if not input_file.is_file(): - raise FileNotFoundError(f"Input file {input_file}") - if not labels_file.is_file(): - raise FileNotFoundError(f"Labels file {input_file}") - if len(dataset.shape) != 4: - raise ValueError("Dataset must be a 4D tensor due to backend limitation") - float_size = np.dtype(np.float32).itemsize - expected_input_size = np.array(dataset.shape).prod() * float_size - int32_size = np.dtype(np.int32).itemsize - expected_labels_size = dataset.shape[0] * int32_size - input_size = input_file.stat().st_size - labels_size = labels_file.stat().st_size - if input_size != expected_input_size: - raise RuntimeError( - f"Input file {input_file} should have size {expected_input_size} " - f"(shape {dataset.shape}), but got {input_size}" - ) - if labels_size != expected_labels_size: - raise RuntimeError( - f"Labels file {labels_file} should have size {expected_labels_size} " - f"(dataset length {dataset.shape[0]}), but got {labels_size}" - ) - return dataset.shape + return [size, *sample.shape] @staticmethod def _load_model( @@ -326,6 +177,38 @@ def torch_to_onnx( output_names=["output"], # the model's output names ) +class BinDataset(Dataset): + def __init__(self, input_file: PathLike, labels_file: PathLike, shape: Sequence[int]): + self.input_file = input_file + self.labels_file = labels_file + self.shape = shape + self.input = read_nparray_from_file(input_file, *self.shape) + self.labels = read_nparray_from_file(labels_file, self.shape[0], read_ty=np.int32) + + def __len__(self) -> int: + return len(self.input) + + def __getitem__(self, idx): + return self.input[idx], self.labels[idx] + + def __iter__(self): + for i in range(len(self)): + yield self[i] + def get_full_typename(o: object) -> str: return o.__module__ + "." + o.__class__.__qualname__ + + +def read_nparray_from_file( + filename: Union[str, Path], + *shape: int, + read_ty=np.float32, + cast_ty=None, + count: int = -1, + offset: int = 0, +) -> np.ndarray: + offset = offset * read_ty().itemsize + mmap = np.memmap(filename, dtype=read_ty, mode="r", offset=offset) + n_entries = min(mmap.shape[0], count) if count != -1 else mmap.shape[0] + return mmap[:n_entries].reshape(shape).astype(cast_ty or read_ty) diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py index 7739a64f53..f64cd1952e 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py @@ -74,13 +74,13 @@ class DFG(object): sorted_edges = sorted(self.graph.in_edges(node, "index"), key=lambda p: p[2]) return [e[0] for e in sorted_edges] - def dump_weights(self, output_dir: PathLike) -> None: + def dump_weights(self, output_dir: PathLike, to_fp16: bool = False) -> None: """Dump `WeightTensor`s into output_dir.""" output_dir = Path(output_dir) for node in self.graph.nodes: if not isinstance(node, g.WeightTensor): continue - node.dump_weight(output_dir / (node.new_name + "_path.bin")) + node.dump_weight(output_dir / (node.new_name + "_path.bin"), to_fp16) ################ Internal methods (high-level): diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py index abf7b60ff3..3ddcb18685 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py @@ -103,8 +103,9 @@ class WeightTensor(TensorNode): shape = [1] * 4 super().__init__(weight_proto, shape, new_name) - def dump_weight(self, file_name: PathLike): - self.input_data.tofile(file_name) + def dump_weight(self, file_name: PathLike, to_fp16: bool = False): + data = self.input_data.astype(np.float16) if to_fp16 else self.input_data + data.tofile(file_name) def transpose_(self): if len(self.input_data.shape) != 2: diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in index 208cdfe616..e3889c71b2 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in +++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in @@ -58,41 +58,13 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -void printUsage(const std::string &bin_name) { - std::cerr << "Usage: " << bin_name << "[-d {test|tune}] [-c CONF_FILE]\n"; -} - const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size; int main(int argc, char *argv[]) { - std::string config_path = "", runtype = "test"; - int flag; - while ((flag = getopt(argc, argv, "hc:")) != -1) { - switch (flag) { - case 'd': - runtype = std::string(optarg); - if (runtype != "test" && runtype != "tune") - printUsage(argv[0]); - return 1; - break; - case 'c': - config_path = std::string(optarg); - break; - case 'h': - printUsage(argv[0]); - return 0; - default: - printUsage(argv[0]); - return 1; - } - } - - std::string dir_prefix = "{{prefix}}/"; - std::string input_path = dir_prefix + "test_input.bin"; - std::string labels_path = dir_prefix + "test_labels.bin"; + auto input_path = "{{prefix}}/test_input.bin"; + auto labels_path = "{{prefix}}/test_labels.bin"; {% for w in weights %} - std::string {{w.name}}_path = dir_prefix + "{{w.filename}}"; - void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}}); + void* {{w.name}} = readTrainedWeights("{{prefix}}/{{w.filename}}", 0, {{w.shape|join(', ')}}); {% endfor %} RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); @@ -103,15 +75,11 @@ int main(int argc, char *argv[]) { {% endfor %} __hpvm__init(); - if (config_path != "") { - llvm_hpvm_initializeRuntimeController(config_path.c_str()); - } - startMemTracking(); #pragma clang loop unroll(disable) for (int i = 0; i < batch_count; i++){ int start = i * batch_size, end = start + batch_size; - void *{{input_name}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}}); + void *{{input_name}} = readInputBatch(input_path, 0, start, end, {{input_shape|join(', ')}}); args->{{input_name}} = {{input_name}}; args->{{input_name}}_bytes = 0; @@ -120,7 +88,7 @@ int main(int argc, char *argv[]) { void *result = static_cast<RootIn*>(args)->r.tensor; hpvm_request_tensor(result, 0); - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + llvm_hpvm_invokeRtControl(result, labels_path, start, end); freeBatchMemory(); } __hpvm__cleanup(); diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in index 7d4d8644e6..9d327ff74d 100644 --- a/hpvm/tools/hpvm-clang/main.py.in +++ b/hpvm/tools/hpvm-clang/main.py.in @@ -52,19 +52,16 @@ def compile_hpvm_c( "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi", f"configuration-inputs-filename={conf_file}" ] - elif tensor_target == "nvdla": if conf_file is None: - raise ValueError("conf_file must be defined when tensor_target=='tensor'.") - + raise ValueError("Argument conf_file (the calibration table) must be given when using NVDLA.") passes += ["LLVMHPVM2NVDLAPass"] pass_flags += [ "hpvm-nvdla", - #"cprecision=fp16", "cprecision=int8", - "calib-table=calib.txt" + f"calib-table={conf_file}", ] - + elif tensor_target == "cudnn": passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"] pass_flags += ["inplace", "dfg2llvm-cudnn"] -- GitLab