Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
from copy import deepcopy
from pathlib import Path
from typing import Union
import distiller
import torch
import yaml
from distiller.data_loggers import collect_quant_stats
from distiller.quantization import PostTrainLinearQuantizer
from torch import nn
from torch.utils.data import DataLoader
from .datasets import CIFAR
from .miniera import MiniERA
PathLike = Union[str, Path]
STATS_FILENAME = "acts_quantization_stats.yaml"
QUANT_FILENAME = "layer_quant_params.yaml"
LAYER_HPVM_NAME = {
nn.ReLU: "relu",
nn.Linear: "gemm",
nn.Conv2d: "conv",
nn.MaxPool2d: "pool",
nn.Softmax: "softmax",
nn.Parameter: "add",
}
LAYER_DISTILLER_NAME = {
nn.Linear: "fcs",
nn.Conv2d: "convs",
nn.Softmax: "softmax",
}
def quantize(
dataset_path: PathLike,
model_chkpt: PathLike,
strat: str = "NONE",
output: PathLike = "calib.txt",
):
# possible quant strats ['NONE', 'AVG', 'N_STD', 'GAUSS', 'LAPLACE']
print("Quantizing...")
dataset_path = Path(dataset_path)
dataset = CIFAR(dataset_path / "input.bin", dataset_path / "labels.bin")
dataloader = DataLoader(dataset, batch_size=1)
# Load Model
model = MiniERA()
model.load_state_dict(torch.load(model_chkpt))
# Collect Pre Quantization Stats
distiller.utils.assign_layer_fq_names(model)
if not os.path.isfile(STATS_FILENAME):
# generates STATS_FILENAME
collect_quant_stats(
model, lambda model: evaluate(model, dataloader), save_dir="."
)
# Generate Quantized Scales
quantizer = PostTrainLinearQuantizer(
deepcopy(model),
model_activation_stats=STATS_FILENAME,
mode="SYMMETRIC",
bits_activations=8,
bits_accum=32,
clip_acts=strat,
)
dummy_input = torch.rand(1, 3, 32, 32)
quantizer.prepare_model(dummy_input)
quantizer.save_per_layer_parameters()
print("Quantization process finished.")
# converts .yaml file stats to hpvm standard
generate_calib_file(model, output)
def generate_calib_file(model: MiniERA, output: PathLike):
print("Generating calibration file...")
with open(QUANT_FILENAME, "r") as stream:
scale_data = yaml.safe_load(stream)
lines = []
# add scales for input
# fmt: off
input_min_max = scale_data["convs.0"]["model_activation_stats"]["convs.0"]["inputs"][0]
# fmt: on
input_scale = max(abs(input_min_max["min"]), abs(input_min_max["max"])) / 127
lines.append(f"input:\t{input_scale}\n")
# because of definition of miniera
layers = [*model.convs, *model.fcs, model.softmax]
layer_count = {
nn.ReLU: 0,
nn.Linear: 0,
nn.Conv2d: 0,
nn.MaxPool2d: 0,
nn.Softmax: 0,
nn.Parameter: 0,
}
# add scales for layers
for layer in layers:
hpvm_name = LAYER_HPVM_NAME[type(layer)]
distiller_typename = LAYER_DISTILLER_NAME[type(layer)]
layer_idx = layer_count[type(layer)]
layer_count[type(layer)] += 1
scale_key = f"{distiller_typename}.{layer_idx}.output_scale"
layer_scale = 1 / scale_data["linear_quant_params"][scale_key]
lines.append(f"{hpvm_name}{layer_idx + 1}:\t{layer_scale}\n")
if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
# include 'add' scale
add_hpvm_name = LAYER_HPVM_NAME[nn.Parameter]
add_idx = layer_count[nn.Parameter]
layer_count[nn.Parameter] += 1
lines.append(f"{add_hpvm_name}{add_idx + 1}:\t{layer_scale}\n")
with open(output, "w+") as f:
f.writelines(lines)
print(f"Calibration file generated to {output}")
@torch.no_grad()
def evaluate(model: MiniERA, dataloader: DataLoader):
from torch.nn import functional as F
# Turn on evaluation mode which disables dropout.
model.eval()
total_loss = 0
for batch in dataloader:
data, targets = batch
output = model(data)
total_loss += len(data) * F.cross_entropy(output, targets)
return total_loss / len(dataloader)