Added routine for quantizing miniera model:

added distiller as a submodule updated env.yaml auto-install distiller in installer

Added routine for quantizing miniera model:
ade58e30 · Yifan Zhao · 5e96a401 · ade58e30 · ade58e30 · 30c50f36
Commit ade58e30 authored 4 years ago by Yifan Zhao
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,3 +5,6 @@
 [submodule "hpvm/projects/sw"]
 	path = hpvm/projects/sw
 	url = https://github.com/nvdla/sw.git
+[submodule "hpvm/projects/distiller"]
+	path = hpvm/projects/distiller
+	url = git@gitlab.engr.illinois.edu:llvm/distiller.git
--- a/hpvm/env.yaml
+++ b/hpvm/env.yaml
 name: hpvm
 channels:
  - pytorch
-  - defaults
+  - conda-forge
 dependencies:
+  - Cython
  - jinja2=2.11
-  - jsonpickle=2
-  - keras==2.1.6
  - matplotlib=3.3
  - networkx=2.5
+  - onnx==1.8.0
  - pandas=1.1
-  - python==3.6.13
  - pip
-  - pytorch==1.6.0
-  - torchvision=0.7
-  - tqdm=4.59
-  - scipy==1.1.0
-  - h5py==2.10.0
+  - pydot=1.4
+  - python=3.7
+  - pytorch==1.5.0
+  - PyYAML
+  - scikit-learn=0.21
+  - scipy=1.3
+  - tabulate=0.8
+  - tensorflow==1.14.0
+  - torchvision==0.6
+  - tqdm=4.33
+  - xlsxwriter=1.2
  - pip:
    - argparse==1.4
-    - onnx==1.8
+    - graphviz==0.10
    - onnx-simplifier==0.3
-    - opentuner==0.8.3
-    - sqlalchemy==1.3.0
-    - tensorflow==1.14.0
-    - tensorflow-gpu==1.14.0
+    - torchnet==0.0.4
--- a/distiller @ 30c50f36
+++ b/distiller @ 30c50f36
+Subproject commit 30c50f368d219efa87d927c0ad0a563bcfa29f0a
--- a/hpvm/scripts/hpvm_installer.py
+++ b/hpvm/scripts/hpvm_installer.py
@@ -31,7 +31,7 @@ MAKE_TARGETS = ["hpvm-clang"]
 MAKE_TEST_TARGETS = ["check-hpvm-dnn", "check-hpvm-pass"]

 # Relative to project root which is __file__.parent.parent
-PY_PACKAGES = ["projects/torch2hpvm"]
+PY_PACKAGES = ["projects/torch2hpvm", "projects/distiller"]

 PYTHON_REQ = (3, 6)  # This means >= 3.6


--- a/hpvm/test/epoch_dnn/torch_dnn/__init__.py
+++ b/hpvm/test/epoch_dnn/torch_dnn/__init__.py
 from .datasets import CIFAR
 from .miniera import MiniERA
+from .quantizer import quantize
--- a/hpvm/test/epoch_dnn/torch_dnn/quantizer.py
+++ b/hpvm/test/epoch_dnn/torch_dnn/quantizer.py
+import os
+from copy import deepcopy
+from pathlib import Path
+from typing import Union
+
+import distiller
+import torch
+import yaml
+from distiller.data_loggers import collect_quant_stats
+from distiller.quantization import PostTrainLinearQuantizer
+from torch import nn
+from torch.utils.data import DataLoader
+
+from .datasets import CIFAR
+from .miniera import MiniERA
+
+PathLike = Union[str, Path]
+STATS_FILENAME = "acts_quantization_stats.yaml"
+QUANT_FILENAME = "layer_quant_params.yaml"
+LAYER_HPVM_NAME = {
+    nn.ReLU: "relu",
+    nn.Linear: "gemm",
+    nn.Conv2d: "conv",
+    nn.MaxPool2d: "pool",
+    nn.Softmax: "softmax",
+    nn.Parameter: "add",
+}
+LAYER_DISTILLER_NAME = {
+    nn.Linear: "fcs",
+    nn.Conv2d: "convs",
+    nn.Softmax: "softmax",
+}
+
+
+def quantize(
+    dataset_path: PathLike,
+    model_chkpt: PathLike,
+    strat: str = "NONE",
+    output: PathLike = "calib.txt",
+):
+    # possible quant strats ['NONE', 'AVG', 'N_STD', 'GAUSS', 'LAPLACE']
+    print("Quantizing...")
+    dataset_path = Path(dataset_path)
+    dataset = CIFAR(dataset_path / "input.bin", dataset_path / "labels.bin")
+    dataloader = DataLoader(dataset, batch_size=1)
+
+    # Load Model
+    model = MiniERA()
+    model.load_state_dict(torch.load(model_chkpt))
+
+    # Collect Pre Quantization Stats
+    distiller.utils.assign_layer_fq_names(model)
+
+    if not os.path.isfile(STATS_FILENAME):
+        # generates STATS_FILENAME
+        collect_quant_stats(
+            model, lambda model: evaluate(model, dataloader), save_dir="."
+        )
+
+    # Generate Quantized Scales
+    quantizer = PostTrainLinearQuantizer(
+        deepcopy(model),
+        model_activation_stats=STATS_FILENAME,
+        mode="SYMMETRIC",
+        bits_activations=8,
+        bits_accum=32,
+        clip_acts=strat,
+    )
+    dummy_input = torch.rand(1, 3, 32, 32)
+    quantizer.prepare_model(dummy_input)
+    quantizer.save_per_layer_parameters()
+
+    print("Quantization process finished.")
+    # converts .yaml file stats to hpvm standard
+    generate_calib_file(model, output)
+
+
+def generate_calib_file(model: MiniERA, output: PathLike):
+    print("Generating calibration file...")
+    with open(QUANT_FILENAME, "r") as stream:
+        scale_data = yaml.safe_load(stream)
+
+    lines = []
+    # add scales for input
+    # fmt: off
+    input_min_max = scale_data["convs.0"]["model_activation_stats"]["convs.0"]["inputs"][0]
+    # fmt: on
+    input_scale = max(abs(input_min_max["min"]), abs(input_min_max["max"])) / 127
+    lines.append(f"input:\t{input_scale}\n")
+
+    # because of definition of miniera
+    layers = [*model.convs, *model.fcs, model.softmax]
+    layer_count = {
+        nn.ReLU: 0,
+        nn.Linear: 0,
+        nn.Conv2d: 0,
+        nn.MaxPool2d: 0,
+        nn.Softmax: 0,
+        nn.Parameter: 0,
+    }
+    # add scales for layers
+    for layer in layers:
+        hpvm_name = LAYER_HPVM_NAME[type(layer)]
+        distiller_typename = LAYER_DISTILLER_NAME[type(layer)]
+        layer_idx = layer_count[type(layer)]
+        layer_count[type(layer)] += 1
+
+        scale_key = f"{distiller_typename}.{layer_idx}.output_scale"
+        layer_scale = 1 / scale_data["linear_quant_params"][scale_key]
+        lines.append(f"{hpvm_name}{layer_idx + 1}:\t{layer_scale}\n")
+
+        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
+            # include 'add' scale
+            add_hpvm_name = LAYER_HPVM_NAME[nn.Parameter]
+            add_idx = layer_count[nn.Parameter]
+            layer_count[nn.Parameter] += 1
+            lines.append(f"{add_hpvm_name}{add_idx + 1}:\t{layer_scale}\n")
+
+    with open(output, "w+") as f:
+        f.writelines(lines)
+    print(f"Calibration file generated to {output}")
+
+
+@torch.no_grad()
+def evaluate(model: MiniERA, dataloader: DataLoader):
+    from torch.nn import functional as F
+
+    # Turn on evaluation mode which disables dropout.
+    model.eval()
+    total_loss = 0
+    for batch in dataloader:
+        data, targets = batch
+        output = model(data)
+        total_loss += len(data) * F.cross_entropy(output, targets)
+    return total_loss / len(dataloader)