Skip to content
Snippets Groups Projects
Commit 4cf48924 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Fixed a few issues with quantization

parent ade58e30
No related branches found
No related tags found
No related merge requests found
import site
from pathlib import Path
from os import makedirs
import torch
from torch2hpvm import BinDataset, ModelExporter
self_folder = Path(__file__).parent.absolute()
site.addsitedir(self_folder)
site.addsitedir(self_folder.as_posix())
from torch_dnn import MiniERA
from torch_dnn import MiniERA, quantize
asset_dir = self_folder / "assets/miniera"
output_dir = Path("/tmp/miniera")
bin_dataset = BinDataset(asset_dir / "input.bin", asset_dir / "labels.bin", (5000, 3, 32, 32))
SCP_TARGET = ""
ASSET_DIR = self_folder / "assets/miniera"
QUANT_STRAT = "NONE" # Quantization method
WORKING_DIR = Path("/tmp/miniera")
makedirs(WORKING_DIR, exist_ok=True)
# Calculate quantization scales
ckpt = (ASSET_DIR / "miniera.pth").as_posix()
model = MiniERA()
model.load_state_dict(torch.load(asset_dir / "miniera.pth"))
exporter = ModelExporter(model, bin_dataset, output_dir, asset_dir / "calib.txt")
exporter.generate().compile(output_dir / "miniera", output_dir)
model.load_state_dict(torch.load(ckpt))
scale_output = quantize(model, ASSET_DIR, QUANT_STRAT, WORKING_DIR)
# Code generation (into /tmp/miniera/hpvm-mod.nvdla)
bin_dataset = BinDataset(
ASSET_DIR / "input.bin", ASSET_DIR / "labels.bin", (5000, 3, 32, 32)
)
exporter = ModelExporter(model, bin_dataset, WORKING_DIR, scale_output)
exporter.generate().compile(WORKING_DIR / "miniera", WORKING_DIR)
# SCP essential files to remote device
nvdla_buffer = WORKING_DIR / "hpvm-mod.nvdla"
input_images = exporter.dataset_dir
labels = WORKING_DIR / exporter.label_name
......@@ -2,6 +2,7 @@ import os
from copy import deepcopy
from pathlib import Path
from typing import Union
from shutil import move
import distiller
import torch
......@@ -17,6 +18,7 @@ from .miniera import MiniERA
PathLike = Union[str, Path]
STATS_FILENAME = "acts_quantization_stats.yaml"
QUANT_FILENAME = "layer_quant_params.yaml"
QUANT_AFTER_FILENAME = "quant_stats_after_prepare_model.yaml"
LAYER_HPVM_NAME = {
nn.ReLU: "relu",
nn.Linear: "gemm",
......@@ -26,58 +28,64 @@ LAYER_HPVM_NAME = {
nn.Parameter: "add",
}
LAYER_DISTILLER_NAME = {
nn.ReLU: "softmax", # All point-wise layers use softmax's scale!
nn.Linear: "fcs",
nn.Conv2d: "convs",
nn.MaxPool2d: "softmax",
nn.Softmax: "softmax",
}
def quantize(
model: nn.Module,
dataset_path: PathLike,
model_chkpt: PathLike,
strat: str = "NONE",
output: PathLike = "calib.txt",
working_dir: PathLike = ".",
output_name: str = "calib.txt",
):
# possible quant strats ['NONE', 'AVG', 'N_STD', 'GAUSS', 'LAPLACE']
print("Quantizing...")
dataset_path = Path(dataset_path)
dataset = CIFAR(dataset_path / "input.bin", dataset_path / "labels.bin")
dataset = CIFAR.from_file(dataset_path / "input.bin", dataset_path / "labels.bin")
dataloader = DataLoader(dataset, batch_size=1)
# Load Model
model = MiniERA()
model.load_state_dict(torch.load(model_chkpt))
# Collect Pre Quantization Stats
distiller.utils.assign_layer_fq_names(model)
if not os.path.isfile(STATS_FILENAME):
# generates STATS_FILENAME
working_dir = Path(working_dir)
stats_file = (working_dir / STATS_FILENAME).as_posix()
if not os.path.isfile(stats_file):
# generates `stats_file`
collect_quant_stats(
model, lambda model: evaluate(model, dataloader), save_dir="."
model, lambda model: evaluate(model, dataloader), save_dir=working_dir
)
# Generate Quantized Scales
quantizer = PostTrainLinearQuantizer(
deepcopy(model),
model_activation_stats=STATS_FILENAME,
model_activation_stats=stats_file,
mode="SYMMETRIC",
bits_activations=8,
bits_accum=32,
clip_acts=strat,
)
dummy_input = torch.rand(1, 3, 32, 32)
# generates QUANT_FILENAME and QUANT_AFTER_FILENAME in current dir
quantizer.prepare_model(dummy_input)
quantizer.save_per_layer_parameters()
# Let's move it to our working dir
move(QUANT_FILENAME, working_dir / QUANT_FILENAME)
# We don't need QUANT_AFTER_FILENAME, remove it
Path(QUANT_AFTER_FILENAME).unlink()
print("Quantization process finished.")
# converts .yaml file stats to hpvm standard
generate_calib_file(model, output)
generate_calib_file(model, working_dir, working_dir / output_name)
return working_dir / output_name
def generate_calib_file(model: MiniERA, output: PathLike):
def generate_calib_file(model: nn.Module, working_dir: Path, output_file: Path):
print("Generating calibration file...")
with open(QUANT_FILENAME, "r") as stream:
with open(working_dir / QUANT_FILENAME, "r") as stream:
scale_data = yaml.safe_load(stream)
lines = []
......@@ -89,7 +97,6 @@ def generate_calib_file(model: MiniERA, output: PathLike):
lines.append(f"input:\t{input_scale}\n")
# because of definition of miniera
layers = [*model.convs, *model.fcs, model.softmax]
layer_count = {
nn.ReLU: 0,
nn.Linear: 0,
......@@ -99,14 +106,16 @@ def generate_calib_file(model: MiniERA, output: PathLike):
nn.Parameter: 0,
}
# add scales for layers
for layer in layers:
quant_params = scale_data["linear_quant_params"]
for name, layer in model.named_modules():
scale_key = f"{name}.output_scale"
if scale_key not in quant_params:
continue
layer_scale = 1 / quant_params[scale_key]
hpvm_name = LAYER_HPVM_NAME[type(layer)]
distiller_typename = LAYER_DISTILLER_NAME[type(layer)]
layer_idx = layer_count[type(layer)]
layer_count[type(layer)] += 1
scale_key = f"{distiller_typename}.{layer_idx}.output_scale"
layer_scale = 1 / scale_data["linear_quant_params"][scale_key]
lines.append(f"{hpvm_name}{layer_idx + 1}:\t{layer_scale}\n")
if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
......@@ -116,9 +125,9 @@ def generate_calib_file(model: MiniERA, output: PathLike):
layer_count[nn.Parameter] += 1
lines.append(f"{add_hpvm_name}{add_idx + 1}:\t{layer_scale}\n")
with open(output, "w+") as f:
with open(output_file, "w") as f:
f.writelines(lines)
print(f"Calibration file generated to {output}")
print(f"Calibration file generated to {output_file}")
@torch.no_grad()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment