From a2b3e99af2f98d83d378ab77ebf972fb485c548f Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Mon, 22 Mar 2021 11:36:26 -0500 Subject: [PATCH] Updated frontend readme and fixed a bug --- hpvm/projects/torch2hpvm/README.md | 114 +++++++++++++++--- .../projects/torch2hpvm/torch2hpvm/compile.py | 2 +- 2 files changed, 100 insertions(+), 16 deletions(-) diff --git a/hpvm/projects/torch2hpvm/README.md b/hpvm/projects/torch2hpvm/README.md index 719abc5e3c..6da48e98ec 100644 --- a/hpvm/projects/torch2hpvm/README.md +++ b/hpvm/projects/torch2hpvm/README.md @@ -1,25 +1,109 @@ -## Importing Conda Environment: +# PyTorch Frontend for HPVM -conda env create -f onnx\_environment.yml +`torch2hpvm` is a PyTorch frontend for HPVM. It provides a set of API that -## Activate/deactivate Conda Environment +- Generates a PyTorch `module` into HPVM-C code; +- Exports a PyTorch dataset to ApproxHPVM dataset format; +- Compiles the generated code into binary by invoking HPVM automatically. -conda activate onnx\_frontend +## Installation -## Building and Installing Frontend for ONNX: +`pip` is the recommended package manager (also available within `conda`). +Using `pip`: -python setup.py build +```bash +pip install -e ./ +``` + +## Getting Started + +Let's look at an example that uses DNNs and weights pre-shipped with HPVM. +This is found at `hpvm/test/dnn_benchmarks/pytorch/test_frontend.py`. +*Note* that below we'll be working under directory `hpvm/test/dnn_benchmarks/pytorch`. -python setup.py install +We'll be generating ResNet-18 into an HPVM-compiled binary. +First, prepare 2 datasets for autotuning and testing. -### How to Run +```python +from torch2hpvm import BinDataset +from pathlib import Path + +data_dir = Path(__file__).parent / "model_params/resnet18_cifar10" +dataset_shape = 5000, 3, 32, 32 +tuneset = BinDataset(data_dir / "tune_input.bin", data_dir / "tune_labels.bin", dataset_shape) +testset = BinDataset(data_dir / "test_input.bin", data_dir / "test_labels.bin", dataset_shape) ``` -python main.py + +`BinDataset` is a dataset created over files of ApproxHPVM dataset format. +Any instance `torch.utils.data.Dataset` can be used here. + +*Note* that each `module` is bound to 2 datasets: a "tune" and a "test" set. +The generated binary accepts an argument to be either the string "tune" or "test", +and performs inference over a dataset accordingly. +This is because the dataset can contain arbitrary Python code which cannot yet be exported into HPVM-C; +instead the frontend has to export some predefined datasets for the model to use. +See TODOs (1). + +Create a DNN `module` and load the checkpoint: + +```python +from torch.nn import Module +import dnn # Defined at `hpvm/test/dnn_benchmarks/pytorch` + +model: Module = dnn.ResNet18() +checkpoint = Path(__file__).parent / "model_params/resnet18_cifar10.pth.tar" +model.load_state_dict(torch.load(checkpoint)) ``` -Set all your config, e.g. onnx model location, input size and emit directory for generated source code, in **config.py**. -### Resources -1. [ONNX overview](https://github.com/onnx/onnx/blob/master/docs/IR.md) -2. [ONNX operator specs](https://github.com/onnx/onnx/blob/master/docs/Operators.md) -3. [Conversion between models - available adapters](https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21) -4. \ No newline at end of file +Any `torch.nn.Module` can be similarly used, +as long as they only contain the tensor operators supported in HPVM +(see "Supported Operators" and TODOs (2)). + +Now we are ready to export the model. The main functioning class of `torch2hpvm` is `ModelExporter`: + +```python +from torch2hpvm import ModelExporter + +output_dir = Path("./resnet18_hpvm") +build_dir = output_dir / "build" +target_binary = build_dir / "resnet18" +batch_size = 500 +conf_file = "" # TODO: points to your configuration file. +exporter = ModelExporter(model, tuneset, testset, output_dir, config_file=conf_file) +exporter.generate(batch_size=batch_size).compile(target_binary, build_dir) +``` + +`output_dir`, `build_dir`, and `target_binary` define the folder for code generation, compilation, +and path to the compiled binary respectively. +`batch_size` is the batch size the binary uses during inference. + +*Note* that `conf_file` is the path to an HPVM approximation configuration file. +This file decides what approximation the binary will use during inference. +This path is hardcoded into the binary and is only read when the binary starts, +so it's fine to have `conf_file` point to a non-existing path. +An example can be found at `test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/data/tuner_confs.txt`. + +## Supported Operators + +Any builtin and custom PyTorch `Module` are supported +*as long as* the generated ONNX model consists of only the following operators +when the Module is exported into ONNX: + +| Convolution | Linear | Pooling | Pointwise | Other | +|-------------|--------|-------------------|--------------------|----------| +| Conv | MatMul | GlobalAveragePool | BatchNormalization | Flatten | +| | Gemm | AveragePool | Relu | Softmax | +| | | MaxPool | Tanh | Identity | +| | | | | Pad | +| | | | | Add | + +This choice of operators is largely constrained by backend (tensor_runtime) supports. + +## TODOs + +1. Optionally insert a Python-C interface in the generated binary to + call back into a Dataset class and read the data. + - Needs pybind11, hardcoding of Python environment, and some fiddling with import mechanism. +1. Expand the list of operators supported in the frontend. + - Most ideally, create a high-level description of operators that can tie + HPVM-C intrinsics and the frontend list of operators together. \ No newline at end of file diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py index 57566db5aa..bb4ce581f8 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py @@ -69,7 +69,7 @@ class ModelExporter: raise ValueError( f"Config file must be given and exist under hpvm_tensor mode" ) - self.path_params = {"config_file": Path(config_file)} + self.path_params = {"config_file": str(config_file)} self.compile_args = ["-t", "tensor", "--conf-file", str(config_file)] self.codegen = HpvmCodeGen(*args3, "tensor", None) elif target == "hpvm_tensor_inspect": -- GitLab