diff --git a/hpvm/test/epoch_dnn/main.py b/hpvm/test/epoch_dnn/main.py index 6f9e5795d323359a5a798abb0aa985bc8c91e224..8ce1aad0d69fe0fd4bc046bd327a53e699b2f4db 100644 --- a/hpvm/test/epoch_dnn/main.py +++ b/hpvm/test/epoch_dnn/main.py @@ -1,18 +1,34 @@ import site from pathlib import Path +from os import makedirs import torch from torch2hpvm import BinDataset, ModelExporter self_folder = Path(__file__).parent.absolute() -site.addsitedir(self_folder) +site.addsitedir(self_folder.as_posix()) -from torch_dnn import MiniERA +from torch_dnn import MiniERA, quantize -asset_dir = self_folder / "assets/miniera" -output_dir = Path("/tmp/miniera") -bin_dataset = BinDataset(asset_dir / "input.bin", asset_dir / "labels.bin", (5000, 3, 32, 32)) +SCP_TARGET = "" +ASSET_DIR = self_folder / "assets/miniera" +QUANT_STRAT = "NONE" # Quantization method +WORKING_DIR = Path("/tmp/miniera") +makedirs(WORKING_DIR, exist_ok=True) + +# Calculate quantization scales +ckpt = (ASSET_DIR / "miniera.pth").as_posix() model = MiniERA() -model.load_state_dict(torch.load(asset_dir / "miniera.pth")) -exporter = ModelExporter(model, bin_dataset, output_dir, asset_dir / "calib.txt") -exporter.generate().compile(output_dir / "miniera", output_dir) +model.load_state_dict(torch.load(ckpt)) +scale_output = quantize(model, ASSET_DIR, QUANT_STRAT, WORKING_DIR) + +# Code generation (into /tmp/miniera/hpvm-mod.nvdla) +bin_dataset = BinDataset( + ASSET_DIR / "input.bin", ASSET_DIR / "labels.bin", (5000, 3, 32, 32) +) +exporter = ModelExporter(model, bin_dataset, WORKING_DIR, scale_output) +exporter.generate().compile(WORKING_DIR / "miniera", WORKING_DIR) +# SCP essential files to remote device +nvdla_buffer = WORKING_DIR / "hpvm-mod.nvdla" +input_images = exporter.dataset_dir +labels = WORKING_DIR / exporter.label_name diff --git a/hpvm/test/epoch_dnn/torch_dnn/quantizer.py b/hpvm/test/epoch_dnn/torch_dnn/quantizer.py index 1c9d5d77eabf7227ac65fe69f5ec8635518a7712..8543f80603ab70bade01414664029ade083dcde6 100644 --- a/hpvm/test/epoch_dnn/torch_dnn/quantizer.py +++ b/hpvm/test/epoch_dnn/torch_dnn/quantizer.py @@ -2,6 +2,7 @@ import os from copy import deepcopy from pathlib import Path from typing import Union +from shutil import move import distiller import torch @@ -17,6 +18,7 @@ from .miniera import MiniERA PathLike = Union[str, Path] STATS_FILENAME = "acts_quantization_stats.yaml" QUANT_FILENAME = "layer_quant_params.yaml" +QUANT_AFTER_FILENAME = "quant_stats_after_prepare_model.yaml" LAYER_HPVM_NAME = { nn.ReLU: "relu", nn.Linear: "gemm", @@ -26,58 +28,64 @@ LAYER_HPVM_NAME = { nn.Parameter: "add", } LAYER_DISTILLER_NAME = { + nn.ReLU: "softmax", # All point-wise layers use softmax's scale! nn.Linear: "fcs", nn.Conv2d: "convs", + nn.MaxPool2d: "softmax", nn.Softmax: "softmax", } def quantize( + model: nn.Module, dataset_path: PathLike, - model_chkpt: PathLike, strat: str = "NONE", - output: PathLike = "calib.txt", + working_dir: PathLike = ".", + output_name: str = "calib.txt", ): # possible quant strats ['NONE', 'AVG', 'N_STD', 'GAUSS', 'LAPLACE'] print("Quantizing...") dataset_path = Path(dataset_path) - dataset = CIFAR(dataset_path / "input.bin", dataset_path / "labels.bin") + dataset = CIFAR.from_file(dataset_path / "input.bin", dataset_path / "labels.bin") dataloader = DataLoader(dataset, batch_size=1) - # Load Model - model = MiniERA() - model.load_state_dict(torch.load(model_chkpt)) - # Collect Pre Quantization Stats distiller.utils.assign_layer_fq_names(model) - if not os.path.isfile(STATS_FILENAME): - # generates STATS_FILENAME + working_dir = Path(working_dir) + stats_file = (working_dir / STATS_FILENAME).as_posix() + if not os.path.isfile(stats_file): + # generates `stats_file` collect_quant_stats( - model, lambda model: evaluate(model, dataloader), save_dir="." + model, lambda model: evaluate(model, dataloader), save_dir=working_dir ) # Generate Quantized Scales quantizer = PostTrainLinearQuantizer( deepcopy(model), - model_activation_stats=STATS_FILENAME, + model_activation_stats=stats_file, mode="SYMMETRIC", bits_activations=8, bits_accum=32, clip_acts=strat, ) dummy_input = torch.rand(1, 3, 32, 32) + # generates QUANT_FILENAME and QUANT_AFTER_FILENAME in current dir quantizer.prepare_model(dummy_input) - quantizer.save_per_layer_parameters() + # Let's move it to our working dir + move(QUANT_FILENAME, working_dir / QUANT_FILENAME) + # We don't need QUANT_AFTER_FILENAME, remove it + Path(QUANT_AFTER_FILENAME).unlink() print("Quantization process finished.") # converts .yaml file stats to hpvm standard - generate_calib_file(model, output) + generate_calib_file(model, working_dir, working_dir / output_name) + return working_dir / output_name -def generate_calib_file(model: MiniERA, output: PathLike): +def generate_calib_file(model: nn.Module, working_dir: Path, output_file: Path): print("Generating calibration file...") - with open(QUANT_FILENAME, "r") as stream: + with open(working_dir / QUANT_FILENAME, "r") as stream: scale_data = yaml.safe_load(stream) lines = [] @@ -89,7 +97,6 @@ def generate_calib_file(model: MiniERA, output: PathLike): lines.append(f"input:\t{input_scale}\n") # because of definition of miniera - layers = [*model.convs, *model.fcs, model.softmax] layer_count = { nn.ReLU: 0, nn.Linear: 0, @@ -99,14 +106,16 @@ def generate_calib_file(model: MiniERA, output: PathLike): nn.Parameter: 0, } # add scales for layers - for layer in layers: + quant_params = scale_data["linear_quant_params"] + for name, layer in model.named_modules(): + scale_key = f"{name}.output_scale" + if scale_key not in quant_params: + continue + layer_scale = 1 / quant_params[scale_key] + hpvm_name = LAYER_HPVM_NAME[type(layer)] - distiller_typename = LAYER_DISTILLER_NAME[type(layer)] layer_idx = layer_count[type(layer)] layer_count[type(layer)] += 1 - - scale_key = f"{distiller_typename}.{layer_idx}.output_scale" - layer_scale = 1 / scale_data["linear_quant_params"][scale_key] lines.append(f"{hpvm_name}{layer_idx + 1}:\t{layer_scale}\n") if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear): @@ -116,9 +125,9 @@ def generate_calib_file(model: MiniERA, output: PathLike): layer_count[nn.Parameter] += 1 lines.append(f"{add_hpvm_name}{add_idx + 1}:\t{layer_scale}\n") - with open(output, "w+") as f: + with open(output_file, "w") as f: f.writelines(lines) - print(f"Calibration file generated to {output}") + print(f"Calibration file generated to {output_file}") @torch.no_grad()