import os from copy import deepcopy from pathlib import Path from typing import Union import distiller import torch import yaml from distiller.data_loggers import collect_quant_stats from distiller.quantization import PostTrainLinearQuantizer from torch import nn from torch.utils.data import DataLoader from .datasets import CIFAR from .miniera import MiniERA PathLike = Union[str, Path] STATS_FILENAME = "acts_quantization_stats.yaml" QUANT_FILENAME = "layer_quant_params.yaml" LAYER_HPVM_NAME = { nn.ReLU: "relu", nn.Linear: "gemm", nn.Conv2d: "conv", nn.MaxPool2d: "pool", nn.Softmax: "softmax", nn.Parameter: "add", } LAYER_DISTILLER_NAME = { nn.Linear: "fcs", nn.Conv2d: "convs", nn.Softmax: "softmax", } def quantize( dataset_path: PathLike, model_chkpt: PathLike, strat: str = "NONE", output: PathLike = "calib.txt", ): # possible quant strats ['NONE', 'AVG', 'N_STD', 'GAUSS', 'LAPLACE'] print("Quantizing...") dataset_path = Path(dataset_path) dataset = CIFAR(dataset_path / "input.bin", dataset_path / "labels.bin") dataloader = DataLoader(dataset, batch_size=1) # Load Model model = MiniERA() model.load_state_dict(torch.load(model_chkpt)) # Collect Pre Quantization Stats distiller.utils.assign_layer_fq_names(model) if not os.path.isfile(STATS_FILENAME): # generates STATS_FILENAME collect_quant_stats( model, lambda model: evaluate(model, dataloader), save_dir="." ) # Generate Quantized Scales quantizer = PostTrainLinearQuantizer( deepcopy(model), model_activation_stats=STATS_FILENAME, mode="SYMMETRIC", bits_activations=8, bits_accum=32, clip_acts=strat, ) dummy_input = torch.rand(1, 3, 32, 32) quantizer.prepare_model(dummy_input) quantizer.save_per_layer_parameters() print("Quantization process finished.") # converts .yaml file stats to hpvm standard generate_calib_file(model, output) def generate_calib_file(model: MiniERA, output: PathLike): print("Generating calibration file...") with open(QUANT_FILENAME, "r") as stream: scale_data = yaml.safe_load(stream) lines = [] # add scales for input # fmt: off input_min_max = scale_data["convs.0"]["model_activation_stats"]["convs.0"]["inputs"][0] # fmt: on input_scale = max(abs(input_min_max["min"]), abs(input_min_max["max"])) / 127 lines.append(f"input:\t{input_scale}\n") # because of definition of miniera layers = [*model.convs, *model.fcs, model.softmax] layer_count = { nn.ReLU: 0, nn.Linear: 0, nn.Conv2d: 0, nn.MaxPool2d: 0, nn.Softmax: 0, nn.Parameter: 0, } # add scales for layers for layer in layers: hpvm_name = LAYER_HPVM_NAME[type(layer)] distiller_typename = LAYER_DISTILLER_NAME[type(layer)] layer_idx = layer_count[type(layer)] layer_count[type(layer)] += 1 scale_key = f"{distiller_typename}.{layer_idx}.output_scale" layer_scale = 1 / scale_data["linear_quant_params"][scale_key] lines.append(f"{hpvm_name}{layer_idx + 1}:\t{layer_scale}\n") if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear): # include 'add' scale add_hpvm_name = LAYER_HPVM_NAME[nn.Parameter] add_idx = layer_count[nn.Parameter] layer_count[nn.Parameter] += 1 lines.append(f"{add_hpvm_name}{add_idx + 1}:\t{layer_scale}\n") with open(output, "w+") as f: f.writelines(lines) print(f"Calibration file generated to {output}") @torch.no_grad() def evaluate(model: MiniERA, dataloader: DataLoader): from torch.nn import functional as F # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 for batch in dataloader: data, targets = batch output = model(data) total_loss += len(data) * F.cross_entropy(output, targets) return total_loss / len(dataloader)