From 85f8bd00ebd1dc94308c23777dd2d0ecd3131a69 Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Wed, 7 Jul 2021 21:27:08 -0500
Subject: [PATCH] Updated pytorch frontend and hpvm-clang to handle NVDLA
 correctly

---
 .../projects/torch2hpvm/torch2hpvm/compile.py | 257 +++++-------------
 .../torch2hpvm/torch2hpvm/graph_builder.py    |   4 +-
 .../torch2hpvm/torch2hpvm/graph_ir.py         |   5 +-
 .../torch2hpvm/template_hpvm.cpp.in           |  42 +--
 hpvm/tools/hpvm-clang/main.py.in              |   9 +-
 5 files changed, 83 insertions(+), 234 deletions(-)

diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
index 12d1c544d0..bb13d34747 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
@@ -1,8 +1,9 @@
 import os
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import Dict, IO, List, NamedTuple, Optional, Sequence, Tuple, Union
+from typing import IO, Optional, Sequence, Union
 
+import numpy as np
 import onnx
 import torch
 from onnx import version_converter
@@ -10,153 +11,65 @@ from torch.nn import Module
 from torch.utils.data import Dataset
 
 from .codegen_hpvm import HpvmCodeGen, PathLike
-from .codegen_tensor import TensorCodeGen
 from .graph_builder import DFG
 
-
-class BinDataset(NamedTuple):
-    input_file: PathLike
-    labels_file: PathLike
-    shape: Sequence[int]
-
-
 # Path to ONNX model, loaded ONNX model, or PyTorch Module
 ModelTy = Union[PathLike, onnx.ModelProto, Module]
-# Path to a pair of HPVM bin format file (input, labels), or PyTorch Dataset
-DatasetTy = Union[BinDataset, Dataset]
-
-def_approx_knobs_file = Path(__file__).parent / "approxknobs.json"
-
 
 class ModelExporter:
-    tuneset_name = "tune_input.bin", "tune_labels.bin"
-    testset_name = "test_input.bin", "test_labels.bin"
+    dataset_dir_name = "images"
+    label_name = "labels.bin"
     weight_dir_name = "weights"
     source_file_name = "hpvm_c.cpp"
-    metadata_file_name = "ops.json"
-    config_file_name = "tuner_confs.txt"
-    fifo_file_name_r = "hpvm_fifo_r"
-    fifo_file_name_w = "hpvm_fifo_w"
 
     def __init__(
         self,
         model: ModelTy,
-        tune_dataset: DatasetTy,
-        test_dataset: DatasetTy,
+        dataset: Dataset,
         output_dir: PathLike,
-        target: str = "hpvm_tensor",
+        config_file: PathLike,
+        target: str = "hpvm_nvdla",
         opset: Optional[int] = None,
-        config_file: PathLike = None,
     ):
-        self.tune_dataset, self.test_dataset = tune_dataset, test_dataset
-        self.dataset_shape = self._check_datasets(tune_dataset, test_dataset)
+        self.dataset = dataset
+        self.dataset_shape = self._check_dataset_get_shape(dataset)
         self.dataset_size = self.dataset_shape[0]
         onnx_model = self._load_model(model, self.dataset_shape, opset)
         self.dfg = DFG(onnx_model.graph)
 
-        output_dir = Path(output_dir).absolute()
+        self.output_dir = output_dir = Path(output_dir).absolute()
         os.makedirs(output_dir, exist_ok=False)  # Will throw if already existss
         self.weight_dir = output_dir / self.weight_dir_name
         self.weight_dir.mkdir(exist_ok=True)
         self.codefile = output_dir / self.source_file_name
-        self.metafile = output_dir / self.metadata_file_name
+        self.dataset_dir = output_dir / self.dataset_dir_name
+        self.dataset_dir.mkdir(exist_ok=True)
 
-        args3 = self.dfg, self.weight_dir, self.dataset_size
-        self.compile_args = None
         self.path_params = {}
-        if target == "hpvm_tensor":
-            if config_file is None:
-                raise ValueError(
-                    f"Config file must be given and exist under hpvm_tensor mode"
-                )
-            self.path_params = {"config_file": str(config_file)}
-            self.compile_args = ["-t", "tensor", "--conf-file", str(config_file)]
-            self.codegen = HpvmCodeGen(*args3, "tensor", None)
-        elif target == "hpvm_tensor_inspect":
-            if config_file is None:
-                config_file = output_dir / self.config_file_name
-            else:
-                config_file = Path(config_file).absolute()
-            self.path_params = {
-                "tune_labels_path": (self.weight_dir / self.tuneset_name[1]).as_posix(),
-                "conf_path": config_file.as_posix(),
-                "fifo_path_r": (output_dir / self.fifo_file_name_r).as_posix(),
-                "fifo_path_w": (output_dir / self.fifo_file_name_w).as_posix(),
-            }
-            self.compile_args = ["-t", "tensor", "--conf-file", str(config_file)]
-            self.codegen = HpvmCodeGen(*args3, "tensor", self.path_params)
-        elif target == "hpvm_cudnn":
-            self.compile_target = "cudnn"
-            self.compile_args = ["-t", "cudnn"]
-            self.codegen = HpvmCodeGen(*args3, "cudnn", None)
-        elif target == "tensor":
-            self.codegen = TensorCodeGen(*args3)
-        else:
-            raise ValueError(f"Target {target} not recognized")
+        assert target == "hpvm_nvdla", f"Target {target} not recognized"
+        self.compile_args = ["-t", "nvdla", "--conf-file", config_file]
+        # NVDLA uses CUDNN_TARGET in HPVM-C
+        self.codegen = HpvmCodeGen(self.dfg, self.weight_dir, self.dataset_size, "cudnn", None)
 
     def export_source_code(self, output: PathLike, batch_size: Optional[int] = None):
         self.codegen.compile(output, batch_size)
         return self
 
     def export_weights(self):
-        self.dfg.dump_weights(self.weight_dir)
+        self.dfg.dump_weights(self.weight_dir, to_fp16=True)
         return self
 
     def export_datasets(self):
-        input_, labels = self.tuneset_name
-        self._dump_dataset(
-            self.tune_dataset, self.weight_dir / input_, self.weight_dir / labels
-        )
-        input_, labels = self.testset_name
-        self._dump_dataset(
-            self.test_dataset, self.weight_dir / input_, self.weight_dir / labels
-        )
-        return self
-
-    def export_metadata(
-        self, output: PathLike, approx_knobs_file: PathLike = def_approx_knobs_file
-    ):
-        import json
-        from collections import defaultdict
-
-        with Path(approx_knobs_file).open() as f:
-            knobs = json.load(f)  # knob name to knob attrs dict
-        # Organize knobs into defaults and the ones for certain types
-        ty_knobs: Dict[str, str] = defaultdict(list)
-        default_knobs: List[str] = []
-        for name, attrs in knobs.items():
-            applies_to = attrs.pop("applies_to")
-            if applies_to is None:
-                default_knobs.append(name)
-                continue
-            for ty in applies_to:
-                ty_knobs[ty].append(name)
-
-        # Enumerate operators and find knobs for each
-        idx = 0
-        op_cost: Dict[str, int] = {}
-        op_knobs: Dict[str, List[str]] = {}
-        for node in self.dfg.traverse_order:
-            if not node.hpvm_op_type:
-                continue
-            hpvm_op_name = f"{node.hpvm_op_type}_{idx}"
-            type_knobs = ty_knobs.get(node.hpvm_op_type, [])
-            op_knobs[hpvm_op_name] = type_knobs + default_knobs
-            op_cost[hpvm_op_name] = int(node.get_flops())  # May get np.int64
-            idx += 1
-
-        # Write out
-        with Path(output).open("w") as f:
-            json.dump(
-                {
-                    "op_cost": op_cost,
-                    "op_knobs": op_knobs,
-                    "knobs": knobs,
-                    **self.path_params,
-                },
-                f,
-                indent=2,
-            )
+        from PIL import Image
+
+        labels = []
+        for i in range(self.dataset_size):
+            image, label = self.dataset[i]
+            image = (image - image.min()) / (image.max() - image.min()) * 255
+            image = image.transpose((1, 2, 0)).astype(np.uint8)
+            Image.fromarray(image).save(self.dataset_dir / f"{i}.jpg")
+            labels.append(label)
+        np.array(labels).tofile(self.output_dir / self.label_name)
         return self
 
     def compile(self, output_binary: PathLike, working_dir: Optional[PathLike] = None):
@@ -181,86 +94,24 @@ class ModelExporter:
             self.codefile if output_code_file is None else Path(output_code_file)
         )
         self.export_source_code(self.codefile, batch_size)
-        self.export_metadata(self.metafile)
         self.export_weights()
         self.export_datasets()
         return self
 
     @staticmethod
-    def _dump_dataset(dataset: DatasetTy, input_filename: Path, labels_filename: Path):
-        import numpy as np
-        from torch.utils.data import DataLoader
-
-        def link_from_to(from_: PathLike, to: PathLike):
-            from_, to = Path(from_), Path(to)
-            if from_.exists():
-                from_.unlink()
-            from_.symlink_to(to.absolute())
-
-        if isinstance(dataset, BinDataset):
-            link_from_to(input_filename, dataset.input_file)
-            link_from_to(labels_filename, dataset.labels_file)
-            return
-        inputs, labels = zip(*iter(DataLoader(dataset)))
-        inputs = np.stack(inputs, axis=0)
-        labels = np.stack(labels, axis=0)
-        inputs.tofile(input_filename)
-        inputs.tofile(labels_filename)
-
-    @classmethod
-    def _check_datasets(
-        cls, tune_dataset: DatasetTy, test_dataset: DatasetTy
-    ) -> Tuple[int, int, int, int]:
-        tune_shape = cls._check_dataset_get_shape(tune_dataset)
-        test_shape = cls._check_dataset_get_shape(test_dataset)
-        if tune_shape != test_shape:
+    def _check_dataset_get_shape(dataset: Dataset) -> Sequence[int]:
+        if not isinstance(dataset, Dataset):
+            raise TypeError("Only PyTorch Dataset is supported")
+        size = len(dataset)
+        sample = dataset[0][0]
+        if (
+            not isinstance(sample, (np.ndarray, torch.Tensor))
+            or len(sample.shape) != 3
+        ):
             raise ValueError(
-                f"Size of tune and test dataset must match (got {tune_shape} and {test_shape})"
+                "Dataset must be return 3d tensor (image) due to backend limitation"
             )
-        return tuple(tune_shape)
-
-    @staticmethod
-    def _check_dataset_get_shape(dataset: DatasetTy) -> Sequence[int]:
-        import numpy as np
-
-        if isinstance(dataset, Dataset):
-            size = len(dataset)
-            sample = dataset[0]
-            if (
-                not isinstance(sample, (np.ndarray, torch.Tensor))
-                or len(sample.shape) != 4
-            ):
-                raise ValueError(
-                    "Dataset must be a 4D tensor due to backend limitation"
-                )
-            return [size, *sample.shape]
-        if not isinstance(dataset, BinDataset):
-            raise TypeError("Only BinDataset or PyTorch Dataset are supported")
-        input_file = Path(dataset.input_file)
-        labels_file = Path(dataset.labels_file)
-        if not input_file.is_file():
-            raise FileNotFoundError(f"Input file {input_file}")
-        if not labels_file.is_file():
-            raise FileNotFoundError(f"Labels file {input_file}")
-        if len(dataset.shape) != 4:
-            raise ValueError("Dataset must be a 4D tensor due to backend limitation")
-        float_size = np.dtype(np.float32).itemsize
-        expected_input_size = np.array(dataset.shape).prod() * float_size
-        int32_size = np.dtype(np.int32).itemsize
-        expected_labels_size = dataset.shape[0] * int32_size
-        input_size = input_file.stat().st_size
-        labels_size = labels_file.stat().st_size
-        if input_size != expected_input_size:
-            raise RuntimeError(
-                f"Input file {input_file} should have size {expected_input_size} "
-                f"(shape {dataset.shape}), but got {input_size}"
-            )
-        if labels_size != expected_labels_size:
-            raise RuntimeError(
-                f"Labels file {labels_file} should have size {expected_labels_size} "
-                f"(dataset length {dataset.shape[0]}), but got {labels_size}"
-            )
-        return dataset.shape
+        return [size, *sample.shape]
 
     @staticmethod
     def _load_model(
@@ -326,6 +177,38 @@ def torch_to_onnx(
         output_names=["output"],  # the model's output names
     )
 
+class BinDataset(Dataset):
+    def __init__(self, input_file: PathLike, labels_file: PathLike, shape: Sequence[int]):
+        self.input_file = input_file
+        self.labels_file = labels_file
+        self.shape = shape
+        self.input = read_nparray_from_file(input_file, *self.shape)
+        self.labels = read_nparray_from_file(labels_file, self.shape[0], read_ty=np.int32)
+
+    def __len__(self) -> int:
+        return len(self.input)
+
+    def __getitem__(self, idx):
+        return self.input[idx], self.labels[idx]
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self[i]
+
 
 def get_full_typename(o: object) -> str:
     return o.__module__ + "." + o.__class__.__qualname__
+
+
+def read_nparray_from_file(
+    filename: Union[str, Path],
+    *shape: int,
+    read_ty=np.float32,
+    cast_ty=None,
+    count: int = -1,
+    offset: int = 0,
+) -> np.ndarray:
+    offset = offset * read_ty().itemsize
+    mmap = np.memmap(filename, dtype=read_ty, mode="r", offset=offset)
+    n_entries = min(mmap.shape[0], count) if count != -1 else mmap.shape[0]
+    return mmap[:n_entries].reshape(shape).astype(cast_ty or read_ty)
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py
index 7739a64f53..f64cd1952e 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py
@@ -74,13 +74,13 @@ class DFG(object):
         sorted_edges = sorted(self.graph.in_edges(node, "index"), key=lambda p: p[2])
         return [e[0] for e in sorted_edges]
 
-    def dump_weights(self, output_dir: PathLike) -> None:
+    def dump_weights(self, output_dir: PathLike, to_fp16: bool = False) -> None:
         """Dump `WeightTensor`s into output_dir."""
         output_dir = Path(output_dir)
         for node in self.graph.nodes:
             if not isinstance(node, g.WeightTensor):
                 continue
-            node.dump_weight(output_dir / (node.new_name + "_path.bin"))
+            node.dump_weight(output_dir / (node.new_name + "_path.bin"), to_fp16)
 
     ################ Internal methods (high-level):
 
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
index abf7b60ff3..3ddcb18685 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
@@ -103,8 +103,9 @@ class WeightTensor(TensorNode):
             shape = [1] * 4
         super().__init__(weight_proto, shape, new_name)
 
-    def dump_weight(self, file_name: PathLike):
-        self.input_data.tofile(file_name)
+    def dump_weight(self, file_name: PathLike, to_fp16: bool = False):
+        data = self.input_data.astype(np.float16) if to_fp16 else self.input_data
+        data.tofile(file_name)
 
     def transpose_(self):
         if len(self.input_data.shape) != 2:
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
index 208cdfe616..e3889c71b2 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
@@ -58,41 +58,13 @@ typedef struct __attribute__((__packed__)) {
   struct ret_t r;
 } RootIn;
 
-void printUsage(const std::string &bin_name) {
-  std::cerr << "Usage: " << bin_name << "[-d {test|tune}] [-c CONF_FILE]\n";
-}
-
 const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size;
 
 int main(int argc, char *argv[]) {
-  std::string config_path = "", runtype = "test";
-  int flag;
-  while ((flag = getopt(argc, argv, "hc:")) != -1) {
-    switch (flag) {
-    case 'd':
-      runtype = std::string(optarg);
-      if (runtype != "test" && runtype != "tune")
-        printUsage(argv[0]);
-        return 1;
-      break;
-    case 'c':
-      config_path = std::string(optarg);
-      break;
-    case 'h':
-      printUsage(argv[0]);
-      return 0;
-    default:
-      printUsage(argv[0]);
-      return 1;
-    }
-  }
-
-  std::string dir_prefix = "{{prefix}}/";
-  std::string input_path = dir_prefix + "test_input.bin";
-  std::string labels_path = dir_prefix + "test_labels.bin";
+  auto input_path = "{{prefix}}/test_input.bin";
+  auto labels_path = "{{prefix}}/test_labels.bin";
 {% for w in weights %}
-  std::string {{w.name}}_path = dir_prefix + "{{w.filename}}";
-  void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
+  void* {{w.name}} = readTrainedWeights("{{prefix}}/{{w.filename}}", 0, {{w.shape|join(', ')}});
 {% endfor %}
 
   RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
@@ -103,15 +75,11 @@ int main(int argc, char *argv[]) {
 {% endfor %}
 
   __hpvm__init();
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++){
     int start = i * batch_size, end = start + batch_size;
-    void *{{input_name}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}});
+    void *{{input_name}} = readInputBatch(input_path, 0, start, end, {{input_shape|join(', ')}});
     args->{{input_name}} = {{input_name}};
     args->{{input_name}}_bytes = 0;
 
@@ -120,7 +88,7 @@ int main(int argc, char *argv[]) {
     void *result = static_cast<RootIn*>(args)->r.tensor;
     hpvm_request_tensor(result, 0);
 
-    llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+    llvm_hpvm_invokeRtControl(result, labels_path, start, end);
     freeBatchMemory();
   }
   __hpvm__cleanup();
diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in
index 7d4d8644e6..9d327ff74d 100644
--- a/hpvm/tools/hpvm-clang/main.py.in
+++ b/hpvm/tools/hpvm-clang/main.py.in
@@ -52,19 +52,16 @@ def compile_hpvm_c(
             "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi",
             f"configuration-inputs-filename={conf_file}"
         ]
-      
     elif tensor_target == "nvdla":
         if conf_file is None:
-            raise ValueError("conf_file must be defined when tensor_target=='tensor'.")
-
+            raise ValueError("Argument conf_file (the calibration table) must be given when using NVDLA.")
         passes += ["LLVMHPVM2NVDLAPass"]
         pass_flags += [
             "hpvm-nvdla",
-            #"cprecision=fp16",
             "cprecision=int8",
-            "calib-table=calib.txt"
+            f"calib-table={conf_file}",
         ]
-        
+
     elif tensor_target == "cudnn":
         passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"]
         pass_flags += ["inplace", "dfg2llvm-cudnn"]
-- 
GitLab