From cdcf7083752e0da95a08768a65c42cfd8b34d975 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Tue, 2 Feb 2021 01:16:43 -0600 Subject: [PATCH] Added new predtuner as submodule --- .gitmodules | 3 + hpvm/projects/pred_tuner/.gitignore | 28 -- hpvm/projects/pred_tuner/LICENSE | 21 - hpvm/projects/pred_tuner/README.md | 93 ---- hpvm/projects/pred_tuner/bin/benchmark.py | 111 ----- hpvm/projects/pred_tuner/bin/discrepancy.py | 53 --- .../projects/pred_tuner/bin/filter_configs.py | 54 --- hpvm/projects/pred_tuner/bin/inferences.py | 9 - .../projects/pred_tuner/bin/mock_autotuner.py | 230 --------- .../projects/pred_tuner/bin/print_approxes.py | 35 -- .../projects/pred_tuner/bin/progress_graph.py | 61 --- hpvm/projects/pred_tuner/bin/train_model.py | 186 -------- hpvm/projects/pred_tuner/exp.py | 438 ----------------- hpvm/projects/pred_tuner/model_params | 1 - hpvm/projects/pred_tuner/models/__init__.py | 3 - .../pred_tuner/models/datasets/__init__.py | 2 - .../pred_tuner/models/datasets/hpvm.py | 163 ------- .../pred_tuner/models/datasets/torch.py | 37 -- .../pred_tuner/models/domains/__init__.py | 1 - .../pred_tuner/models/domains/qoses.py | 317 ------------- .../pred_tuner/models/hpvm/__init__.py | 7 - .../pred_tuner/models/hpvm/alexnet.py | 49 -- .../pred_tuner/models/hpvm/alexnet_canny.py | 48 -- .../projects/pred_tuner/models/hpvm/layers.py | 223 --------- hpvm/projects/pred_tuner/models/hpvm/lenet.py | 16 - .../pred_tuner/models/hpvm/mobilenet.py | 45 -- .../projects/pred_tuner/models/hpvm/resnet.py | 96 ---- hpvm/projects/pred_tuner/models/hpvm/vgg16.py | 44 -- hpvm/projects/pred_tuner/models/inference.py | 99 ---- hpvm/projects/pred_tuner/models/networks.py | 54 --- .../pred_tuner/models/torch/__init__.py | 15 - .../pred_tuner/models/torch/densenet.py | 107 ----- hpvm/projects/pred_tuner/models/torch/dpn.py | 98 ---- .../pred_tuner/models/torch/efficientnet.py | 99 ---- .../pred_tuner/models/torch/googlenet.py | 106 ----- .../projects/pred_tuner/models/torch/lenet.py | 23 - .../pred_tuner/models/torch/mobilenet.py | 61 --- .../pred_tuner/models/torch/mobilenetv2.py | 86 ---- .../pred_tuner/models/torch/pnasnet.py | 125 ----- .../pred_tuner/models/torch/preact_resnet.py | 118 ----- .../pred_tuner/models/torch/resnet.py | 122 ----- .../pred_tuner/models/torch/resnext.py | 95 ---- .../projects/pred_tuner/models/torch/senet.py | 121 ----- .../pred_tuner/models/torch/shufflenet.py | 109 ----- .../pred_tuner/models/torch/shufflenetv2.py | 162 ------- hpvm/projects/pred_tuner/models/torch/vgg.py | 39 -- hpvm/projects/pred_tuner/run_tuner.py | 305 ------------ .../pred_tuner/tests/data/1_1_output.json | 98 ---- .../pred_tuner/tests/data/3_3_output.json | 146 ------ .../pred_tuner/tests/data/promise.json | 121 ----- .../pred_tuner/tests/data/quantization.json | 58 --- hpvm/projects/pred_tuner/tests/promise.py | 87 ---- hpvm/projects/pred_tuner/tests/resnet50.py | 33 -- hpvm/projects/pred_tuner/tests/sampling.py | 90 ---- hpvm/projects/pred_tuner/toolkit/__init__.py | 4 - hpvm/projects/pred_tuner/toolkit/approxdnn.py | 442 ------------------ .../projects/pred_tuner/toolkit/estimators.py | 383 --------------- hpvm/projects/pred_tuner/toolkit/indexing.py | 55 --- hpvm/projects/pred_tuner/toolkit/transform.py | 186 -------- hpvm/projects/pred_tuner/utils/__init__.py | 3 - .../projects/pred_tuner/utils/benchmarks.json | 100 ---- hpvm/projects/pred_tuner/utils/config.py | 318 ------------- hpvm/projects/pred_tuner/utils/logging.py | 87 ---- hpvm/projects/pred_tuner/utils/utils.py | 26 -- hpvm/projects/predtuner | 1 + 65 files changed, 4 insertions(+), 6552 deletions(-) create mode 100644 .gitmodules delete mode 100644 hpvm/projects/pred_tuner/.gitignore delete mode 100644 hpvm/projects/pred_tuner/LICENSE delete mode 100644 hpvm/projects/pred_tuner/README.md delete mode 100644 hpvm/projects/pred_tuner/bin/benchmark.py delete mode 100644 hpvm/projects/pred_tuner/bin/discrepancy.py delete mode 100644 hpvm/projects/pred_tuner/bin/filter_configs.py delete mode 100644 hpvm/projects/pred_tuner/bin/inferences.py delete mode 100644 hpvm/projects/pred_tuner/bin/mock_autotuner.py delete mode 100644 hpvm/projects/pred_tuner/bin/print_approxes.py delete mode 100644 hpvm/projects/pred_tuner/bin/progress_graph.py delete mode 100644 hpvm/projects/pred_tuner/bin/train_model.py delete mode 100644 hpvm/projects/pred_tuner/exp.py delete mode 120000 hpvm/projects/pred_tuner/model_params delete mode 100644 hpvm/projects/pred_tuner/models/__init__.py delete mode 100644 hpvm/projects/pred_tuner/models/datasets/__init__.py delete mode 100644 hpvm/projects/pred_tuner/models/datasets/hpvm.py delete mode 100644 hpvm/projects/pred_tuner/models/datasets/torch.py delete mode 100644 hpvm/projects/pred_tuner/models/domains/__init__.py delete mode 100644 hpvm/projects/pred_tuner/models/domains/qoses.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/__init__.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/alexnet.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/alexnet_canny.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/layers.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/lenet.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/mobilenet.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/resnet.py delete mode 100644 hpvm/projects/pred_tuner/models/hpvm/vgg16.py delete mode 100644 hpvm/projects/pred_tuner/models/inference.py delete mode 100644 hpvm/projects/pred_tuner/models/networks.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/__init__.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/densenet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/dpn.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/efficientnet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/googlenet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/lenet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/mobilenet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/mobilenetv2.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/pnasnet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/preact_resnet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/resnet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/resnext.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/senet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/shufflenet.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/shufflenetv2.py delete mode 100644 hpvm/projects/pred_tuner/models/torch/vgg.py delete mode 100644 hpvm/projects/pred_tuner/run_tuner.py delete mode 100644 hpvm/projects/pred_tuner/tests/data/1_1_output.json delete mode 100644 hpvm/projects/pred_tuner/tests/data/3_3_output.json delete mode 100644 hpvm/projects/pred_tuner/tests/data/promise.json delete mode 100644 hpvm/projects/pred_tuner/tests/data/quantization.json delete mode 100644 hpvm/projects/pred_tuner/tests/promise.py delete mode 100644 hpvm/projects/pred_tuner/tests/resnet50.py delete mode 100644 hpvm/projects/pred_tuner/tests/sampling.py delete mode 100644 hpvm/projects/pred_tuner/toolkit/__init__.py delete mode 100644 hpvm/projects/pred_tuner/toolkit/approxdnn.py delete mode 100644 hpvm/projects/pred_tuner/toolkit/estimators.py delete mode 100644 hpvm/projects/pred_tuner/toolkit/indexing.py delete mode 100644 hpvm/projects/pred_tuner/toolkit/transform.py delete mode 100644 hpvm/projects/pred_tuner/utils/__init__.py delete mode 100644 hpvm/projects/pred_tuner/utils/benchmarks.json delete mode 100644 hpvm/projects/pred_tuner/utils/config.py delete mode 100644 hpvm/projects/pred_tuner/utils/logging.py delete mode 100644 hpvm/projects/pred_tuner/utils/utils.py create mode 160000 hpvm/projects/predtuner diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..aeaea73f16 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "hpvm/projects/predtuner"] + path = hpvm/projects/predtuner + url = git@gitlab.engr.illinois.edu:yifanz16/predtuner.git diff --git a/hpvm/projects/pred_tuner/.gitignore b/hpvm/projects/pred_tuner/.gitignore deleted file mode 100644 index 23e6d25801..0000000000 --- a/hpvm/projects/pred_tuner/.gitignore +++ /dev/null @@ -1,28 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# Jupyter Notebook -.ipynb_checkpoints - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Opentuner -opentuner.db/ -opentuner.log - -# Custom -.idea/ -.vscode/ -/data/ -results/ -tuner_results -tuner_results/ -*.sh -*.ipynb -logistics/ -autotuner/ diff --git a/hpvm/projects/pred_tuner/LICENSE b/hpvm/projects/pred_tuner/LICENSE deleted file mode 100644 index 2e229faa39..0000000000 --- a/hpvm/projects/pred_tuner/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 liukuang - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/hpvm/projects/pred_tuner/README.md b/hpvm/projects/pred_tuner/README.md deleted file mode 100644 index 8d7a6db2bd..0000000000 --- a/hpvm/projects/pred_tuner/README.md +++ /dev/null @@ -1,93 +0,0 @@ -# Autotuning with Error-predictive Proxy - -Performs autotuning on program approximation knobs using an error-predictive proxy in place of the original -program, to greatly speedup autotuning while getting results comparable in quality. - -Work in progress. - -## Getting Started - -After finishing this readme, go to [./proxy_tuner.py](./proxy_tuner.py) to try tuning one -model. Use this set of arguments for a start: - -```bash -python proxy_tuner.py --test-limit 1000 --accuracy-drop 1.5 --accuracy-slack 2.1 \ --o tuner_output alexnet2 autotuner/data/alexnet2 -``` - -## Supported Programs & Approximations - -### Programs - -Currently DNN only. Support for several image processing benchmarks are in progress. - -Supported DNNs: - -- `LeNet @ MNIST` - -- `AlexNet @ CIFAR-10` - -- `AlexNet2 @ CIFAR-10` - -- `VGG16 @ CIFAR-10` - -- `ResNet18 @ CIFAR-10` - -- `MobileNet @ CIFAR-10` - -- `VGG16 @ CIFAR-100` - -- `VGG16 @ ImageNet` - -- `ResNet50 @ ImageNet` - -### Approximations - -Currently _hardware-independent_ approximations only. Hardware-reliant approximations are in progress. - -Approximations: (output) perforation for convolution, kernel sampling for convolution. - -## Proxy Model - -TODO: add working principle of proxy modeling. - -## Autotuner - -We use [opentuner](http://opentuner.org/) for autontuning tasks. - -## Project Structure - -### Library - -- `models`: PyTorch definition for DNN models - - - `models/dataset`: Dataset loaders for both HPVM and PyTorch-standard DNN models - - - `models/hpvm`: Definition for HPVM-ported models, with customized convolution layers - -- `toolkit`: core code of project, including DNN indexing / transformations / approximations. See - the code for details. - -### Entry Point - -- `./proxy_tuner.py`: perform autotuning for a given model, accuracy threshold, and a number of iterations, - using a proxy model that predicts the accuracy of approximated DNN (instead of running an inference, which - can be slow). - -- `./run_proxy_tuner.py`: run autotuning for all models defined in `utils/tuner_postprocess/benchmarks.py` on - a set of 3 accuracy thresholds, and perform postprocessing such as computing pareto curve. - - This is the right end-to-end script to use for obtaining a comprehensive set of autotuner results. - -### Other Code - -- `tests`: runnable scripts that can be used as tests (and other actual functionalities) - -- `utils`: helper functions for library and autotuner that are generally standalone, except - - - `utils/utils.py` contains some convenient wrapper for model training, etc. that depends on the library. - -### Data - -- `autotuner/data`: descriptions for each DNN model, such as listing of layers, tunable - knobs, etc. diff --git a/hpvm/projects/pred_tuner/bin/benchmark.py b/hpvm/projects/pred_tuner/bin/benchmark.py deleted file mode 100644 index 92c8b2de52..0000000000 --- a/hpvm/projects/pred_tuner/bin/benchmark.py +++ /dev/null @@ -1,111 +0,0 @@ -import gc -from time import time -from typing import Dict, Iterator, List - -import numpy -from tqdm import tqdm - -from exp import Benchmark, bench_tuner_data -from toolkit import ConfigT, LinearCombEstimator, LinearEstimator, LinearQoSEstimator, ModuleIndexer, \ - NetApproxSelector -from utils import gpu_mem_mb, init_by_name, nn_to_output, tensor_to_accuracy - - -def generate_random_configs(layer_approxes: Dict[int, List[int]], n_configs: int) -> Iterator[ConfigT]: - from numpy.random import choice - from random import randrange - all_layers = [k for k, ns in layer_approxes.items() if ns] - for _ in range(n_configs): - config = {} - n_approx_layers_ = randrange(len(all_layers) + 1) - approx_layers = choice(all_layers, n_approx_layers_, replace=False) - for layer_idx in approx_layers: - config[layer_idx] = choice(layer_approxes[layer_idx], 1)[0] - yield config - - -def time_action(action): - tt0 = time() - action() - tt1 = time() - return tt1 - tt0 - - -def mean_std_str(np_array): - return f"{np_array.mean():.7f} +- {np_array.std():.7f}" - - -def main_loop(bench, baseline_dag, testloader): - _t_baseline_inf = time() - baseline_output = nn_to_output(baseline_dag.module, testloader) - baseline_acc = tensor_to_accuracy(baseline_output, testloader) - print(f"Model accuracy: {baseline_acc}; test set size: {baseline_output.size(0)}") - t_baseline_inf = time() - _t_baseline_inf - nas = NetApproxSelector(baseline_dag) - - def acc_crit(inputs_): - return tensor_to_accuracy(inputs_, testloader) - - def threshold_eval(inputs_): - import numpy as np - accs = np.array([acc_crit(x) for x in inputs_]) - return baseline_acc - accs.mean() < 3.0 - - def run_model(net): - return nn_to_output(net, testloader) - - _t_profile = time() - pickle_path = bench.result_dir / 'proxy.pkl' - f1 = LinearCombEstimator( - nas, run_model, acc_crit, threshold_eval, 0.95, independent_init=False - ) - f2 = LinearQoSEstimator( - nas, run_model, acc_crit, threshold_eval, 0.95, independent_init=False - ) - LinearEstimator.coinit_estimators(nas, run_model, threshold_eval, f1, f2, storage=pickle_path) - t_profile = time() - _t_profile - print( - f"Baseline inference time: {t_baseline_inf:.3f} sec, predictor init time: {t_profile:.3f} sec; " - f"Predictor init time is {t_profile / t_baseline_inf:.3f} times of inference time" - ) - configs = generate_random_configs(nas.net_approxes, 30) - pbar = tqdm(configs) - times = [] - for config in pbar: - pbar.set_postfix(mem=gpu_mem_mb()) - approx = nas.apply_approx_by_config(config).module - t_inf = time_action(lambda: nn_to_output(approx, testloader)) - t_f1 = time_action(lambda: f1.estimate(config)) - t_f2 = time_action(lambda: f2.estimate(config)) - pbar.write( - f"Inference time: {t_inf:.3f} sec, predictors time: {t_f1:.3f} | {t_f2:.3f} sec" - ) - times.append([t_inf, t_f1, t_f2]) - gc.collect() - times = numpy.array(times) - s_inf, s0, s1 = numpy.apply_along_axis(mean_std_str, 0, times) - print(f"Result: inference time {s_inf}, predictor time: {s0} | {s1}") - print("Timing raw data:", times) - - -def main(): - for network in ( - 'alexnet_hpvm', 'alexnet2_hpvm', - 'vgg16_cifar10_hpvm', 'vgg16_cifar100_hpvm', - 'mobilenet_hpvm', - 'resnet18_hpvm', - 'lenet_hpvm', - 'vgg16_imagenet_hpvm', - 'alexnet_imagenet_hpvm', - # 'resnet50_imagenet_hpvm', - ): - bench: Benchmark = bench_tuner_data[network] - print(f"{network}: ") - baseline, testloader, _, shapes = init_by_name(network) - baseline_dag = ModuleIndexer(baseline) - main_loop(bench, baseline_dag, testloader) - gc.collect() - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/bin/discrepancy.py b/hpvm/projects/pred_tuner/bin/discrepancy.py deleted file mode 100644 index 8be92df66a..0000000000 --- a/hpvm/projects/pred_tuner/bin/discrepancy.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -from pathlib import Path -from typing import Optional - -import matplotlib.pyplot as plt -import seaborn -import torch -from tqdm import tqdm - -from toolkit import ModuleIndexer, NetApproxSelector, StateCapturer -from utils import device, init_by_name - - -def run_concat_output_at(net_index: ModuleIndexer, testloader, layer: int) -> Optional[torch.Tensor]: - snet = StateCapturer(net_index, lambda i, x: x.clone().detach() if i == layer else None) - for inputs, targets in testloader: - inputs, targets = inputs.to(device), targets.to(device) - snet(inputs) - outputs = snet.net_state[layer] - return torch.cat(outputs) if outputs else None - - -def get_discrepancy_for(baseline, approxed, testloader, changed_layer): - baseline_output = run_concat_output_at(baseline, testloader, changed_layer) - approxed_output = run_concat_output_at(approxed, testloader, changed_layer) - assert baseline_output.shape == approxed_output.shape - tqdm.write(f"{baseline_output.size()}") - diff = baseline_output - approxed_output - diff_rel = torch.abs(diff / baseline_output).cpu() - diff_rel[torch.isnan(diff_rel)] = 0 - diff_rel[diff_rel > 10] = 10 - return diff_rel - - -def main(): - prefix = Path('results/discrepancy/resnet50_imagenet_hpvm') - os.makedirs(prefix, exist_ok=True) - baseline, testloader, _, shapes = init_by_name('resnet50_imagenet_hpvm') - net_index = ModuleIndexer(baseline) - nas = NetApproxSelector(net_index) - total = sum(len(ns) for ns in nas.net_approxes.values()) - for layer, approx, approxed_net_dag in tqdm(nas.apply_indep_approx(), total=total): - if approx == 11: - continue - diff_rel = get_discrepancy_for(net_index, approxed_net_dag, testloader, layer) - fig, ax = plt.subplots() - seaborn.heatmap(diff_rel.mean(0).mean(0).numpy(), ax=ax) - fig.savefig((prefix / f'{layer}_{approx}.png').open('wb'), dpi=200) - plt.close(fig) - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/bin/filter_configs.py b/hpvm/projects/pred_tuner/bin/filter_configs.py deleted file mode 100644 index bf23668b81..0000000000 --- a/hpvm/projects/pred_tuner/bin/filter_configs.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import List, Tuple - -from exp import Benchmark, ExpState, bench_tuner_data -from utils.config import Config - - -def filter_configs( - validation: List[Config], test: List[Config], - vali_threshold: float, test_threshold: float = 3.0 -) -> Tuple[List[Config], List[Config]]: - # Filter validation and test set by their respective thresholds - filtered_validation = [ - c for c in validation if c.avg_loss <= vali_threshold - ] - filtered_test = [ - c for c in test if c.avg_loss <= test_threshold - ] - # Test configs also need to be a subset of validation configs. - name_to_filtered = {x.fname: x for x in filtered_test} - intersect_names = set(list(name_to_filtered.keys())).intersection( - set((x.fname for x in filtered_validation)) - ) - filtered_test_ = [name_to_filtered[fname] for fname in intersect_names] - assert set([id(x) for x in filtered_test_]).issubset(set([id(x) for x in filtered_test])) - return filtered_validation, filtered_test_ - - -def process_configs(bench: Benchmark, calib_slack: float, states: ExpState): - validated_configs = states.validated_configs.configs - tested_configs = states.tested_configs.configs - old_len = len(validated_configs) - valid_configs, test_configs = filter_configs( - validated_configs, tested_configs, calib_slack - ) - states.valid_configs.finalize_dump(valid_configs) - states.test_configs.finalize_dump(test_configs) - print(f"{bench.model_name}: {old_len} -> {len(validated_configs)}, {len(tested_configs)}") - # Finalize data input and plot everything. - states.finalize_plot() - - -def main(): - for bench in bench_tuner_data.values(): - bench: Benchmark - try: - states = ExpState(bench) - except ValueError: - print(f"Model {bench.model_name} has incomplete experiment data; skipping") - continue - process_configs(bench, 2.1, states) - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/bin/inferences.py b/hpvm/projects/pred_tuner/bin/inferences.py deleted file mode 100644 index 065abfd223..0000000000 --- a/hpvm/projects/pred_tuner/bin/inferences.py +++ /dev/null @@ -1,9 +0,0 @@ -from tqdm import tqdm - -from models import BaselineInfo, networks -from utils import device - -if __name__ == '__main__': - for net_name in networks: - baseline_info = BaselineInfo.init_by_name(net_name, device) - tqdm.write(f"{net_name}: {baseline_info.val_qos} (validation) {baseline_info.test_qos} (test") diff --git a/hpvm/projects/pred_tuner/bin/mock_autotuner.py b/hpvm/projects/pred_tuner/bin/mock_autotuner.py deleted file mode 100644 index ec12e1643a..0000000000 --- a/hpvm/projects/pred_tuner/bin/mock_autotuner.py +++ /dev/null @@ -1,230 +0,0 @@ -import gc -import json -import os -from pathlib import Path -from sys import argv -from typing import Dict, Iterable, Iterator, List, Optional, Tuple - -import matplotlib.pyplot as plt -import numpy as np -from tqdm import tqdm, trange - -from exp import Benchmark, bench_tuner_data -from toolkit import ConfigT, LinearCombEstimator, LinearEstimator, \ - LinearQoSEstimator, ModuleIndexer, NetApproxSelector, WeightedLinearCombEstimator -from toolkit.estimators import WeightedLinearQoSEstimator -from utils import config_pylogger, gpu_mem_mb, init_by_name, nn_to_accuracy, nn_to_output, qos_stats, tensor_to_accuracy - -msg_logger = config_pylogger(output_dir=Path('tuner_results/logs'), verbose=True) - - -class Evaluator: - def __init__( - self, nas: NetApproxSelector, n_approx_layers: Optional[int], - n_configs: int, testloader, threshold: Optional[float] - ): - self.nas = nas - self.layer_approxes = nas.net_approxes - self.n_approx_layers = n_approx_layers - self.n_configs = n_configs - self.testloader = testloader - self.threshold = threshold - self.config_accs = None - - def generate_random_configs(self) -> Iterator[ConfigT]: - from numpy.random import choice - from random import randrange - all_layers = [k for k, ns in self.layer_approxes.items() if ns] - for _ in range(self.n_configs): - config = {} - if self.n_approx_layers is None: - n_approx_layers_ = randrange(len(all_layers) + 1) - else: - n_approx_layers_ = min(self.n_approx_layers, len(all_layers)) - approx_layers = choice(all_layers, n_approx_layers_, replace=False) - for layer_idx in approx_layers: - config[layer_idx] = choice(self.layer_approxes[layer_idx], 1)[0] - yield config - - def evaluate_config(self, config: ConfigT) -> Tuple[float, float]: - deterministic = self.nas.is_deterministic(config) - n_runs = 1 if deterministic else 30 - approxed = self.nas.apply_approx_by_config(config).module - accs = [] - for _ in trange(n_runs, leave=None): - acc = nn_to_accuracy(approxed, self.testloader) - accs.append(acc) - mean, confident_acc, _ = qos_stats(accs, 0.95) - return mean, confident_acc - - def sort_configs_by_mean_acc(self): - sorted_ = sorted(self.config_accs, key=lambda p: p[1], reverse=True) - from itertools import takewhile - if self.threshold is not None: - sorted_ = list(takewhile(lambda p: p[1] > self.threshold, sorted_)) - self.config_accs = np.array(sorted_) - - @staticmethod - def calculate_perm_dist(pred_order): - n = len(pred_order) - actual_order = np.arange(n) - return np.linalg.norm(actual_order - pred_order, ord=1) / ((n ** 2 - 1) / 3) - - def use_predictors(self, predictors: Iterable[LinearEstimator]) -> \ - Optional[List[Tuple[np.ndarray, np.ndarray]]]: - self.sort_configs_by_mean_acc() - if len(self.config_accs) == 0: - return None - configs = self.config_accs[:, 0] - raw_prediction = [] - for predictor in predictors: - # N * 2 array: avg acc, 95% confidence acc - pred_accs = np.array([ - predictor.estimate(config) for config in configs - ]) - pred_order = (-pred_accs[:, 0]).argsort(kind='stable') - raw_prediction.append((pred_accs, pred_order)) - return raw_prediction - - def run_configs(self): - configs = self.generate_random_configs() - pbar = tqdm(configs) - config_accs = [] - for config in pbar: - pbar.set_postfix(mem=gpu_mem_mb()) - mean_acc, confident_acc = self.evaluate_config(config) - config_accs.append([config, mean_acc, confident_acc]) - gc.collect() - self.config_accs = np.array(config_accs) - - -class NumpyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - return json.JSONEncoder.default(self, obj) - - -class DataPlotStorage: - def __init__(self, save_to_prefix: Path): - self.save_to = save_to_prefix - os.makedirs(self.save_to.parent, exist_ok=True) - self.args = [] - self.fig, self.axes = plt.subplots() - - def plot(self, *args, **kwargs): - self.args.append({'args': args, 'kwargs': kwargs}) - self.axes.plot(*args, **kwargs) - - def errorbar(self, *args, **kwargs): - self.args.append({'args': args, 'kwargs': kwargs}) - self.axes.errorbar(*args, **kwargs) - - def save_and_close(self): - self.fig.savefig(self.save_to.with_suffix('.png'), dpi=200) - with self.save_to.with_suffix('.json').open('w') as f: - json.dump(self.args, f, cls=NumpyEncoder) - plt.close(self.fig) - - -def compare_estimators( - eva: Evaluator, predictors: Dict[str, LinearEstimator], n_runs: int, st: DataPlotStorage -): - all_dists = [] - for _ in trange(n_runs): - eva.run_configs() - raw_predictions = eva.use_predictors(predictors.values()) - dists = [eva.calculate_perm_dist(order) for _, order in raw_predictions] - all_dists.append(dists) - dists_t = zip(*all_dists) - for vs, label in zip(dists_t, predictors.keys()): - st.plot(sorted(vs), label=label) - st.axes.set_ylim(bottom=0) - st.fig.legend() - st.save_and_close() - - -def plot_acc_estm_discrepancy( - eva: Evaluator, predictors: Dict[str, LinearEstimator], st: DataPlotStorage -): - eva.run_configs() - raw_predictions = eva.use_predictors(predictors.values()) - if not raw_predictions: - return - measured_mean_accs = eva.config_accs[:, 1] - yerr = measured_mean_accs - eva.config_accs[:, 2] - st.errorbar( - measured_mean_accs, measured_mean_accs, fmt='.', yerr=yerr, uplims=True, label='baseline' - ) - for (pred_accs, _), label in zip(raw_predictions, predictors.keys()): - pred_accs = pred_accs - yerr = pred_accs[:, 0] - pred_accs[:, 1] - st.errorbar( - measured_mean_accs, pred_accs[:, 0], - fmt='.', yerr=yerr, uplims=True, label=label - ) - min_x, max_x = np.min(measured_mean_accs), np.max(measured_mean_accs) - diag_x = np.linspace(min_x, max_x, 500) - st.errorbar(diag_x, diag_x, linewidth=1) - st.axes.set_xlabel('Measured accuracy (%)') - st.axes.set_ylabel('Predicted accuracy (%)') - st.fig.legend() - st.save_and_close() - - -def train_predictors(eva: Evaluator, *predictors: LinearEstimator): - for conf in eva.generate_random_configs(): - for p in predictors: - p.estimate(conf) - - -def main(): - base_path = Path(argv[1]) if len(argv) > 1 else Path('results/mock_autotuner') - - for network in ( - 'alexnet2_hpvm', 'vgg16_cifar10_hpvm', 'vgg16_cifar100_hpvm', - 'mobilenet_hpvm', - 'resnet18_hpvm', - 'vgg16_imagenet_hpvm', 'resnet50_imagenet_hpvm' - ): - bench: Benchmark = bench_tuner_data[network] - print(f"{bench.model_name}: ") - baseline, testloader, _, shapes = init_by_name(bench.model_name) - baseline_dag = ModuleIndexer(baseline) - baseline_acc = nn_to_accuracy(baseline_dag.module, testloader) - nas = NetApproxSelector(baseline_dag) - - def acc_crit(inputs_): - return tensor_to_accuracy(inputs_, testloader) - - def threshold_eval(inputs_): - accs = np.array([acc_crit(x) for x in inputs_]) - return baseline_acc - accs.mean() < 3.0 - - def run_model(net): - return nn_to_output(net, testloader) - - f1 = LinearCombEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) - f2 = LinearQoSEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) - f3 = WeightedLinearCombEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) - f4 = WeightedLinearQoSEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) - LinearEstimator.coinit_estimators( - nas, run_model, threshold_eval, f1, f2, f3, f4, - storage=Path('model_params/pickles') / Path(bench.base_dir).name / 'proxy_dev.pkl' - ) - train_predictors(Evaluator(nas, None, 700, testloader, baseline_acc), f3, f4) - st = DataPlotStorage(base_path / "cmp_acc_diff" / f"{bench.model_name}") - plot_acc_estm_discrepancy( - Evaluator(nas, None, 200, testloader, baseline_acc - 10), - {'f1': f1, 'f2': f2, 'f3': f3, 'f4': f4}, st - ) - st = DataPlotStorage(base_path / 'cmp_ordering' / f"{bench.model_name}" / "n_none") - compare_estimators( - Evaluator(nas, None, 20, testloader, None), - {'f1': f1, 'f2': f2, 'f3': f3, 'f4': f4}, 10, st - ) - gc.collect() - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/bin/print_approxes.py b/hpvm/projects/pred_tuner/bin/print_approxes.py deleted file mode 100644 index c95d080326..0000000000 --- a/hpvm/projects/pred_tuner/bin/print_approxes.py +++ /dev/null @@ -1,35 +0,0 @@ -from collections import defaultdict - -import matplotlib.pyplot as plt -import pandas as pd -import seaborn -from tqdm import tqdm - -from models.domains import Accuracy -from models import BaselineInfo -from toolkit import NetApproxSelector -from utils import device - - -def main(): - baseline_info = BaselineInfo.init_by_name('mobilenet_hpvm', device) - nas = NetApproxSelector(baseline_info.baseline_net, dev_time_only=True, ignore_fp32=False) - table = defaultdict(dict) - pbar = tqdm(nas.list_single_approxes()) - for layer, approx, _ in pbar: - pbar.set_postfix(k=layer, i=approx) - approxed_net = nas.apply_approx_by_config({layer: approx}).module - acc: Accuracy = baseline_info.get_qos(approxed_net, baseline_info.val_loader) - table[layer][approx] = acc.to_scalar() - df = pd.DataFrame( - [pd.Series(list(d.values()), index=d.keys()) for d in table.values()], - index=list(table.keys()) - ) - with open('accuracy.json', 'w') as f: - df.to_json(f) - seaborn.heatmap(df.to_numpy()) - plt.savefig('accuracy.png', dpi=200) - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/bin/progress_graph.py b/hpvm/projects/pred_tuner/bin/progress_graph.py deleted file mode 100644 index 0d7d0d5526..0000000000 --- a/hpvm/projects/pred_tuner/bin/progress_graph.py +++ /dev/null @@ -1,61 +0,0 @@ -from itertools import groupby -from operator import itemgetter -from pathlib import Path -from typing import Tuple - -import matplotlib.pyplot as plt - -from exp import Benchmark, ExpState, batch_id, bench_tuner_data -from utils import Config - - -def finalize_figs(filename, ax, fig): - ax.legend() - ax.set_ylim(bottom=1.0) - fig.savefig(filename, dpi=200) - plt.close(fig) - - -def process_configs(bench: Benchmark, states: ExpState, shared_ax): - def get_features(c: Config) -> Tuple[int, int, float]: - *_, run_s, iter_s = c.fname.split('_') - return int(run_s), int(iter_s), c.speedup - - def get_max_speedup(group): - group = sorted(list(group), key=itemgetter(1)) - iter_max_speedup = [] - max_speedup = 0 - for _, i, speedup in group: - max_speedup = max(max_speedup, speedup) - iter_max_speedup.append((i, max_speedup)) - return iter_max_speedup - - run_iter_speedup = sorted( - [get_features(c) for c in states.all_configs.configs], key=itemgetter(0) - ) - run_groups = groupby(run_iter_speedup, key=itemgetter(0)) - fig, ax = plt.subplots() - for run, run_group in run_groups: - iter_max_speedup = get_max_speedup(run_group) - iters, max_speedups = zip(*iter_max_speedup) - ax.plot(iters, max_speedups, label=f"loss={run + 1}%") - if run + 1 == 3: - shared_ax.plot(iters, max_speedups, label=f"{bench.model_name.replace('_hpvm', '')}") - finalize_figs(bench.result_dir / f"tuner_progress.png", ax, fig) - - -def main(): - fig, ax = plt.subplots() - for bench in bench_tuner_data.values(): - bench: Benchmark - try: - states = ExpState(bench) - except ValueError: - print(f"Model {bench.model_name} has incomplete experiment data; skipping") - continue - process_configs(bench, states, ax) - finalize_figs(Path("results") / f"{batch_id}_tuner_progress.png", ax, fig) - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/bin/train_model.py b/hpvm/projects/pred_tuner/bin/train_model.py deleted file mode 100644 index d3d0d80725..0000000000 --- a/hpvm/projects/pred_tuner/bin/train_model.py +++ /dev/null @@ -1,186 +0,0 @@ -"""Train CIFAR10 with PyTorch.""" -import argparse -import os -from typing import List - -import numpy as np -import torch -from torch import optim -from torch.nn import CrossEntropyLoss, Module -from torch.optim.lr_scheduler import ReduceLROnPlateau -from tqdm import tqdm - -from models.torch import ResNet18 -from models.datasets import get_cifar10_train_dataloader, get_cifar10_test_dataloader -from utils import device - - -class RunningStats: - def __init__(self, criterion): - self.criterion = criterion - self.all_outputs = None - self.all_targets = np.zeros([0]) - self.avg_loss, self.correct, self.total = 0, 0, 0 - self.conf_mat = None - self.n_batches = 0 - - @property - def n_classes(self): - if self.all_outputs is None: - raise RuntimeError("Num of classes is unknown before seeing first input") - return self.all_outputs.shape[1] - - def setup_for_first_output(self, outputs): - n_classes = outputs.shape[1] - self.all_outputs = np.zeros([0, n_classes]) - self.conf_mat = np.zeros([n_classes, n_classes]) - - def add_output(self, outputs, targets): - if self.all_outputs is None: - self.setup_for_first_output(outputs) - loss = self.criterion(outputs, targets) - _, predicted = outputs.max(1) - self.avg_loss = (self.avg_loss * self.n_batches + loss.item()) / (self.n_batches + 1) - self.total += targets.size(0) - self.correct += predicted.eq(targets).sum().item() - for t, p in zip(targets, predicted): - self.conf_mat[int(t), p] += 1 - self.n_batches += 1 - outputs = outputs.clone().cpu().detach() - targets = targets.clone().cpu().detach() - self.all_outputs = np.vstack([self.all_outputs, outputs]) - self.all_targets = np.hstack([self.all_targets, targets]) - return loss - - def classwise_outputs(self) -> List[np.ndarray]: - class_outputs = [np.zeros([0, self.n_classes]) for _ in range(self.n_classes)] - for output, label_class in zip(self.all_outputs, self.all_targets): - co = class_outputs[int(label_class)] - class_outputs[int(label_class)] = np.vstack([co, output]) - return class_outputs - - @property - def acc(self): - return 100. * self.correct / self.total - - @property - def classwise_acc(self) -> List[float]: - return [self.conf_mat[i, i] / self.conf_mat[i].sum() for i in range(self.n_classes)] - - -def test(net, testloader, criterion): - net.eval() - rs = RunningStats(criterion) - with torch.no_grad(): - pbar = tqdm(enumerate(testloader), total=len(testloader)) - for batch_idx, (inputs, targets) in pbar: - inputs, targets = inputs.to(device), targets.to(device) - outputs = net(inputs) - rs.add_output(outputs, targets) - pbar.set_postfix_str( - f"Loss: {rs.avg_loss:.3f} | Acc: {rs.acc:.3f}% ({rs.correct}/{rs.total})" - ) - return rs - - -def load_torch_checkpoint(net: Module, chpt_path: str): - print('==> Loading checkpoint..') - checkpoint = torch.load(chpt_path) - net.load_state_dict(checkpoint['net']) - start_epoch = checkpoint['epoch'] - return start_epoch - - -def get_optimizer(net, lr): - return optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) - - -class EarlyStopping: - """Early stops the training if validation loss doesn't improve after a given patience.""" - - def __init__(self, path, patience=7, delta=0): - """ - Args: - patience (int): How long to wait after last time validation loss improved. - Default: 7 - delta (float): Minimum change in the monitored quantity to qualify as an improvement. - Default: 0 - path (str): Path for the checkpoint to be saved to. - Default: 'checkpoint.pt' - """ - self.patience = patience - self.counter = 0 - self.min_loss = None - self.delta = delta - self.path = path - - def __call__(self, val_loss, model, epoch): - if self.min_loss is None or val_loss < self.min_loss - self.delta: - # Improved - self.min_loss = val_loss - self.save_checkpoint(model, epoch) - self.counter = 0 - else: - self.counter += 1 - if self.counter >= self.patience: - return True - return False - - def save_checkpoint(self, model, epoch): - tqdm.write('Saving..') - state = { - 'net': model.state_dict(), - 'epoch': epoch, - } - if not os.path.isdir(os.path.dirname(self.path)): - os.makedirs(os.path.dirname(self.path)) - torch.save(state, self.path) - - -def train_one_epoch(net, trainloader, optimizer, criterion): - net.train() - rs = RunningStats(criterion) - pbar = tqdm(trainloader) - for inputs, targets in pbar: - optimizer.zero_grad() - inputs, targets = inputs.to(device), targets.to(device) - outputs = net(inputs) - loss = rs.add_output(outputs, targets) - loss.backward() - optimizer.step() - pbar.set_postfix_str( - f"Loss: {rs.avg_loss:.3f} | Acc: {rs.acc:.3f}% ({rs.correct}/{rs.total})" - ) - - -def train(net, checkpoint, output, lr): - start_epoch = load_torch_checkpoint(net, checkpoint) if checkpoint else 0 - trainloader = get_cifar10_train_dataloader('./data', 128) - testloader = get_cifar10_test_dataloader('./data', 100) - criterion = CrossEntropyLoss() - optimizer = get_optimizer(net, lr) - es = EarlyStopping(output, patience=5) - reduce_lr = ReduceLROnPlateau(optimizer, factor=0.2, patience=3, verbose=True) - for epoch in range(start_epoch + 1, start_epoch + 200): - print('\nEpoch: %d' % epoch) - train_one_epoch(net, trainloader, optimizer, criterion) - rs = test(net, testloader, criterion) - if es(rs.avg_loss, net, epoch): - print(f"Early stopped at {epoch}") - break - reduce_lr.step(rs.avg_loss) - - -def main(): - parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training') - parser.add_argument('--lr', default=0.1, type=float, help='learning rate') - parser.add_argument('--resume', '-r', type=str, help='resume from checkpoint') - parser.add_argument( - '--output', '-o', type=str, required=True, help='path to save checkpoint to' - ) - args = parser.parse_args() - train(ResNet18().to(device), args.resume, args.output, args.lr) - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/exp.py b/hpvm/projects/pred_tuner/exp.py deleted file mode 100644 index e7457d5b47..0000000000 --- a/hpvm/projects/pred_tuner/exp.py +++ /dev/null @@ -1,438 +0,0 @@ -import abc -import json -import os -from pathlib import Path -from typing import Dict, Iterable, List, Optional, Tuple, Type - -from torch.nn import Linear, Module -from torch.utils.data import DataLoader - -from models.domains import QoS, qos_stats -from models.hpvm import HPVMConvBundle -from models import BaselineInfo -from toolkit import LinearEstimator, NetApproxSelector -from utils import config_pylogger, get_knob_config_file, get_tensorrt_dir, device -from utils.config import Config, dump_rt_format_to, load_configs_from_dir, plot_configs - -batch_id = "batch405" -is_dev_time = False -ConfigT = Dict[int, int] -msg_logger = config_pylogger(output_dir=Path('tuner_results/logs'), verbose=True) - - -def get_layer_desc(path: Path) -> List[List[str]]: - with path.open() as f: - return [x.split() for x in f] - - -def get_layer_desc_in_pytorch(layer_desc: List[List[str]]) -> \ - Tuple[List[Optional[Module]], Dict[int, int]]: - desc = [] - remapping = {} - for ext_i, vals in enumerate(layer_desc): - if vals and 'conv' == vals[0]: - remapping[ext_i] = len(remapping) - desc.append(HPVMConvBundle) - elif vals and 'dense' == vals[0]: - remapping[ext_i] = len(remapping) - desc.append(Linear) - else: - desc.append(None) - return desc, remapping - - -def read_cost_file(layer_desc: List[List[str]], path: Path) -> List[float]: - with path.open() as f: - raw_costs = [float(x.strip()) for x in f] - costs = [] - raw_cost_it = 0 - for layer in layer_desc: - if 'conv' in layer or 'dense' in layer: - costs.append(raw_costs[raw_cost_it]) - raw_cost_it += 1 - else: - costs.append(0) - assert len(layer_desc) == len(costs) - return costs - - -def read_global_knobs_speedup(path: Path): - knobs_speedup = {} - with path.open() as f: - for x in f: - toks = x.split("\t") - ID = int(toks[0].split(",")[1]) - speedup = float(toks[2]) - knobs_speedup[ID] = speedup - return knobs_speedup - - -class Benchmark: - def __init__(self, json_data: dict): - self.json_data = json_data - self.model_name: str = self.model_name # RHS from json data - # Use baseline configuration as seed to aid the autotuner - # TODO: put this as a field in benchmarks.json - self.use_seed = self.model_name == 'resnet50_imagenet_hpvm' - tensorrt = get_tensorrt_dir() - self.cost_file = tensorrt / self.cost_file - self.layer_file = tensorrt / self.layer_file - self.knobs_config_file = tensorrt / "autotuner/data/global_knobs.txt" - self.batch_dir = tensorrt / self.base_dir / "loss_123" / batch_id - self.result_dir = self.batch_dir / ("dev_tuner" if is_dev_time else "inst_tuner") - - self.layer_desc = get_layer_desc(self.layer_file) - self.pytorch_layer_desc, self.layer_remap = get_layer_desc_in_pytorch(self.layer_desc) - msg_logger.debug(f"HPVM order to neutral order remapping, model {self.model_name}: {self.layer_remap}") - self.layer_costs = read_cost_file(self.layer_desc, self.cost_file) - self.knobs_speedup = read_global_knobs_speedup(get_knob_config_file()) - - def set_batch_id(self, batch_id_: str = batch_id, is_dev_time_: bool = is_dev_time): - tensorrt = get_tensorrt_dir() - self.batch_dir = tensorrt / self.base_dir / "loss_123" / batch_id_ - self.result_dir = self.batch_dir / ("dev_tuner" if is_dev_time_ else "inst_tuner") - - def __getattr__(self, item: str): - return self.json_data[item] - - def translate_config(self, autotuner: ConfigT) -> ConfigT: - ret = {} - for x, v in autotuner.items(): - if x not in self.layer_remap: - assert v == 11 - continue - ret[self.layer_remap[x]] = v - return ret - - def get_baseline_config(self, is_fp16: bool) -> ConfigT: - conf = {} - for layer_id, layer in enumerate(self.pytorch_layer_desc): - knob = 12 if layer is not None and is_fp16 else 11 - conf[layer_id] = knob - return conf - - def pattern_match_layer_knobs(self, module_to_knobs: Dict[Module, List[int]]) -> Dict[int, List[int]]: - conv_knobs = [knobs for m, knobs in module_to_knobs.items() if isinstance(m, HPVMConvBundle)] - linear_knobs = [knobs for m, knobs in module_to_knobs.items() if isinstance(m, Linear)] - assert len(conv_knobs) + len(linear_knobs) == len(module_to_knobs) - conv_knobs_idx, linear_knobs_idx = 0, 0 - ret = {} - for layer_id, module_ty in enumerate(self.pytorch_layer_desc): - if module_ty is HPVMConvBundle: - # PROMISE does not apply to first layer of LeNet. - if self.model_name == "lenet_hpvm" and layer_id == 0: - this_conv_knobs = [x for x in conv_knobs[conv_knobs_idx] if x >= 11] - else: - this_conv_knobs = conv_knobs[conv_knobs_idx] - ret[layer_id] = this_conv_knobs + [11] - conv_knobs_idx += 1 - elif module_ty is Linear: - ret[layer_id] = linear_knobs[linear_knobs_idx] + [11] - linear_knobs_idx += 1 - else: - ret[layer_id] = [11] - assert conv_knobs_idx == len(conv_knobs) - return ret - - def compute_config_cost(self, cfg: ConfigT) -> Tuple[float, float]: - orig_cost = 0.0 - total_cost = 0.0 - for layer, knob in cfg.items(): - op_cost = self.layer_costs[layer] - speedup = self.knobs_speedup[knob] - total_cost += (op_cost * 1.0 / speedup * 1.0) - orig_cost += op_cost - speedup = (orig_cost * 1.0) / (total_cost * 1.0) - return total_cost, speedup - - def get_n_layers(self) -> int: - return len(self.layer_desc) - - -class ConfigMeasurer(BaselineInfo): - def __init__( - self, net: Module, val_loader: DataLoader, test_loader: DataLoader, - non_tensor_output: bool, qos_class: Type[QoS], - nas: NetApproxSelector, bench: Benchmark - ): - super().__init__(net, val_loader, test_loader, non_tensor_output, qos_class) - self.nas = nas - self.bench_translate_config = bench.translate_config - self.layer_remap = {k: v for k, v in enumerate(list(self.nas.net_approxes.keys()))} - msg_logger.debug(f"Neutral order to module scanning order remapping: {self.layer_remap}") - self.bench = bench - msg_logger.info( - f"Model {bench.model_name} baseline accuracy = " - f"{self.val_qos} ({self.test_qos} test)" - ) - - def translate_config(self, autotuner_cfg: ConfigT): - autotuner_cfg = self.bench_translate_config(autotuner_cfg) - # Translate layer index from autotuner format (0, 1, 2...) - # to proxy format (actual layer index) - cfg = {self.layer_remap[k]: v for k, v in autotuner_cfg.items() if v != 11} - return cfg - - @classmethod - def init_from_bench(cls, bench: Benchmark) -> 'ConfigMeasurer': - bi = BaselineInfo.init_by_name(bench.model_name, device) - nas = NetApproxSelector(bi.baseline_net, dev_time_only=is_dev_time, ignore_fp32=not is_dev_time) - return cls( - bi.baseline_net, bi.val_loader, bi.test_loader, - bi.non_tensor_output, bi.qos_class, nas, bench - ) - - def proxy_estimate(self, cfg: ConfigT, proxy: LinearEstimator) -> Tuple[QoS, QoS]: - cfg = self.translate_config(cfg) - mean_acc, confident_acc = proxy.estimate(cfg) - return mean_acc, confident_acc - - def actual_measure( - self, cfg: ConfigT, n_runs: int, is_test_set: bool, threshold: QoS = None - ) -> Tuple[QoS, Optional[float]]: - cfg = self.translate_config(cfg) - approx = self.nas.apply_approx_by_config(cfg).module - dataloader = self.test_loader if is_test_set else self.val_loader - from tqdm import trange - qoses = [] - for _ in trange(n_runs, leave=None): - qoses.append(self.get_qos(approx, dataloader)) - mean, _, confidence = qos_stats(qoses, threshold=threshold) - return mean, confidence - - def get_knobs(self): - # Delaying computing knobs because nas can be modified externally (knobs filtered) - ext_layer_to_knobs = self.bench.pattern_match_layer_knobs(self.nas.get_layer_approxes()) - msg_logger.debug(f"Getting knobs:") - for layer, knobs in ext_layer_to_knobs.items(): - msg_logger.debug(f" {layer}: {knobs}") - return ext_layer_to_knobs - - -class PersistentState(abc.ABC): - def __init__(self): - self._substates: Dict[str, PersistentState] = {} - - def __setattr__(self, name, value): - if isinstance(value, PersistentState): - self._substates[name] = value - super().__setattr__(name, value) - - def dump(self): - self._dump_self() - for v in self._substates.values(): - v.dump() - - def load(self): - if self.filled(): - return - try: - self._load_self() - except (ValueError, RuntimeError, FileNotFoundError) as e: - msg_logger.info(f"Exception {e} when loading state") - for k, v in self._substates.items(): - v.load() - - def filled(self): - return self._self_is_initialized() and all((v.filled() for v in self._substates.values())) - - @abc.abstractmethod - def _dump_self(self): - pass - - @abc.abstractmethod - def _load_self(self): - pass - - @abc.abstractmethod - def _self_is_initialized(self) -> bool: - pass - - -class PersistentConfigs(PersistentState): - def __init__(self, bench: Benchmark, prefix: str, baseline_acc: QoS, rt_cpu: bool, rt_gpu: bool): - super().__init__() - self._data = [] - self._filled = False - self.bench = bench - self.prefix = prefix - self.baseline_qos = baseline_acc - self.rt_cpu_path = self.bench.result_dir / f"{prefix}_cpu.txt" if rt_cpu else None - self.rt_gpu_path = self.bench.result_dir / f"{prefix}_fp16.txt" if rt_gpu else None - - @property - def config_folder(self) -> Path: - return self.bench.result_dir / self.prefix - - @property - def configs(self) -> List[Config]: - return self._data - - def _load_self(self): - # Try reading autotuner configs and hpvm-rt configs - self._data = load_configs_from_dir(self.config_folder, self.baseline_qos) - # If hpvm-rt is not present, dump it. - # TODO: check rt format integrity - if ( - (self.rt_cpu_path and not self.rt_cpu_path.is_file()) or - (self.rt_cpu_path and not self.rt_cpu_path.is_file()) - ): - self.finalize_dump() - self._filled = True - - def _dump_self(self): - for conf in self._data: - self._dump_one(conf) - self.finalize_dump() - - def _self_is_initialized(self) -> bool: - return self._filled - - def _dump_one(self, config: Config): - if not self.config_folder.is_dir(): - os.mkdir(self.config_folder.as_posix()) - config_path = self.config_folder / config.fname - with config_path.open('w') as f: - f.write(config.to_tuner_format()) - - def append(self, config: Config): - self._data.append(config) - self._dump_one(config) - - def extend(self, configs: Iterable[Config]): - confs = [] - for conf in configs: - self._dump_one(conf) - confs.append(conf) - self._data.extend(confs) - - def finalize_dump(self, with_configs: Iterable[Config] = None): - if with_configs is not None: - self.extend(with_configs) - self._filled = True - dump_rt_format_to( - self.bench.layer_desc, self._data, self.baseline_qos, - self.rt_cpu_path, self.rt_gpu_path - ) - - -class TuningTime(PersistentState): - def __init__(self, path: Path): - super().__init__() - self.timers = {} - self.path = path - - def _load_self(self): - import re - with self.path.open() as f: - lines = f.readlines() - for line in lines: - line = line.strip() - if not line: - continue - match = re.match(r'Timer ([^=]+) = ([0-9.]+) hours', line) - if not match: - raise RuntimeError(f"File {self.path} malformed") - self.timers[match.group(1)] = float(match.group(2)) - - def _dump_self(self): - for k, v in self.timers.items(): - self._dump_one(k, v) - - def _self_is_initialized(self) -> bool: - return bool(self.timers) - - def _dump_one(self, key: str, value: float): - time_hrs = value / (60 * 60) - msg_logger.info(f"Timer {key} = {time_hrs:.3f} hours") - with self.path.open('a') as f: - f.write(f"Timer {key} = {time_hrs} hours\n") - - def add_timer(self, key: str, value: float): - self.timers[key] = value - self._dump_one(key, value) - - -class AccPair(PersistentState): - def __init__(self, path: Path, qos_class: Type[QoS]): - super().__init__() - self.path = path - self.qos_class = qos_class - self._data = None - - @property - def accs(self) -> Tuple[QoS, QoS]: - if self._data is None: - raise AttributeError("Accuracy not init'ed yet") - return self._data - - @accs.setter - def accs(self, value: Tuple[QoS, QoS]): - self._data = value - self._dump_self() - - def _load_self(self): - with self.path.open() as f: - acc_val, acc_test = [self.qos_class.parse(s) for s in f.read().split('\n')] - self._data = acc_val, acc_test - - def _dump_self(self): - with self.path.open('w') as f: - f.write(f"{self._data[0]}\n{self._data[1]}") - - def _self_is_initialized(self) -> bool: - return self._data is not None - - -class ExpState(PersistentState): - def __init__(self, bench: Benchmark, qos_class: Type[QoS], accs: Tuple[QoS, QoS] = None): - super().__init__() - self.bench = bench - self.baseline_accs = AccPair(bench.result_dir / 'baseline_acc.txt', qos_class) - self.baseline_accs.load() - if not self.baseline_accs.filled(): - if accs is None: - raise ValueError("Provide model baseline accuracy") - self.baseline_accs.accs = accs - acc_val, acc_test = self.baseline_accs.accs - self.all_configs = PersistentConfigs(bench, 'all', acc_val, False, False) - self.filtered_configs = PersistentConfigs(bench, 'filtered', acc_val, False, False) - self.validated_configs = PersistentConfigs(bench, 'validated', acc_val, False, False) - self.tested_configs = PersistentConfigs(bench, 'tested', acc_test, False, False) - self.valid_configs = PersistentConfigs(bench, 'valid', acc_val, True, True) - self.test_configs = PersistentConfigs(bench, 'test', acc_test, True, True) - self.timers = TuningTime(bench.result_dir / 'tuning_time.txt') - super().load() - - def _load_self(self): - pass - - def _dump_self(self): - pass - - def _self_is_initialized(self) -> bool: - return True - - def finalize_plot(self): - if not self.filled(): - raise RuntimeError("Cannot finalize before data slots are all filled") - plot_configs( - self.bench.result_dir / "all_plot.png", - all=self.all_configs.configs - ) - plot_configs( - self.bench.result_dir / "validated_tested_plot.png", - filtered=self.filtered_configs.configs, - validated=self.validated_configs.configs, - tested=self.tested_configs.configs - ) - plot_configs( - self.bench.result_dir / "filtered_plot.png", - valid=self.valid_configs.configs, - test=self.test_configs.configs - ) - - -with (Path(__file__).parent / 'utils/benchmarks.json').open() as f_: - benchmark_data = json.load(f_) -bench_tuner_data = {k: Benchmark(v) for k, v in benchmark_data.items()} diff --git a/hpvm/projects/pred_tuner/model_params b/hpvm/projects/pred_tuner/model_params deleted file mode 120000 index 90aaa403fd..0000000000 --- a/hpvm/projects/pred_tuner/model_params +++ /dev/null @@ -1 +0,0 @@ -../hpvm-tensor-rt/model_params \ No newline at end of file diff --git a/hpvm/projects/pred_tuner/models/__init__.py b/hpvm/projects/pred_tuner/models/__init__.py deleted file mode 100644 index 192f4b5bea..0000000000 --- a/hpvm/projects/pred_tuner/models/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .networks import networks -from .inference import get_all_output, move_to_device_recursively, BaselineInfo -from .domains import QoS diff --git a/hpvm/projects/pred_tuner/models/datasets/__init__.py b/hpvm/projects/pred_tuner/models/datasets/__init__.py deleted file mode 100644 index 1a1e35fcea..0000000000 --- a/hpvm/projects/pred_tuner/models/datasets/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .hpvm import CIFAR, CIFARImage, HPVMDataset, ImageNet, MNIST -from .torch import get_cifar10_test_dataset, get_cifar10_test_dataloader, get_cifar10_train_dataloader diff --git a/hpvm/projects/pred_tuner/models/datasets/hpvm.py b/hpvm/projects/pred_tuner/models/datasets/hpvm.py deleted file mode 100644 index aa871d89d8..0000000000 --- a/hpvm/projects/pred_tuner/models/datasets/hpvm.py +++ /dev/null @@ -1,163 +0,0 @@ -import logging -from pathlib import Path -from typing import Iterator, List, Tuple, TypeVar - -import numpy as np -import torch -from torch.utils.data.dataset import IterableDataset - -from models.hpvm import read_tensor_from_file - -RetT = Tuple[torch.Tensor, torch.Tensor] -T = TypeVar('T', bound='HPVMDataset') -msg_logger = logging.getLogger() - - -class HPVMDataset(IterableDataset): - def __init__(self, inputs: torch.Tensor, outputs: torch.Tensor): - self.inputs, self.outputs = inputs, outputs - - @classmethod - def from_file(cls, *args, **kwargs): - pass - - @property - def sample_input(self): - inputs, outputs = next(iter(self)) - return inputs - - def __len__(self) -> int: - return len(self.inputs) - - def __getitem__(self, idx) -> RetT: - if idx >= len(self): - raise IndexError("Dataset index out of range") - return self.inputs[idx], self.outputs[idx] - - def __iter__(self) -> Iterator[RetT]: - for i in range(len(self)): - yield self[i] - - -class HPVMDNNDataset(HPVMDataset): - @classmethod - def _from_file( - cls, input_file: Path, labels_file: Path, is_uint8_label: bool, - count: int, offset: int, *item_shapes: int - ): - # NOTE: assuming (N, *) ordering of inputs (such as NCHW, NHWC) - channel_size = np.prod(np.array(item_shapes)) - if count != -1: - count *= channel_size - offset *= channel_size - inputs = read_tensor_from_file( - input_file, -1, *item_shapes, count=count, offset=offset, - use_progress_bar=True - ) - label_read_ty = np.int8 if is_uint8_label else np.int32 - labels = read_tensor_from_file( - labels_file, -1, read_ty=label_read_ty, cast_ty=np.long, - count=count, offset=offset - ) - if inputs.size(0) != labels.size(0): - raise ValueError("Input and output have different number of data points") - msg_logger.info(f"{inputs.shape[0]} entries loaded from dataset.") - return cls(inputs, labels) - - @classmethod - def from_default_file(cls, prefix: str): - prefix = Path(prefix) - return cls.from_file( - Path(prefix) / 'input.bin', Path(prefix) / 'labels.bin' - ) - - -class MNIST(HPVMDNNDataset): - @classmethod - def from_file( - cls, input_file: Path, labels_file: Path, count: int = -1, offset: int = 0 - ): - return cls._from_file( - input_file, labels_file, True, count, offset, 1, 28, 28 - ) - - -class CIFAR(HPVMDNNDataset): - @classmethod - def from_file( - cls, input_file: Path, labels_file: Path, count: int = -1, offset: int = 0 - ): - return cls._from_file( - input_file, labels_file, True, count, offset, 3, 32, 32 - ) - - -class ImageNet(HPVMDNNDataset): - @classmethod - def from_file( - cls, input_file: Path, labels_file: Path, count: int = -1, offset: int = 0 - ): - return cls._from_file( - input_file, labels_file, False, count, offset, 3, 224, 224 - ) - - -class HPVMImageDataset(HPVMDataset): - @classmethod - def _from_file( - cls, input_file: Path, output_file: Path, - count: int, offset: int, input_shape: List[int], output_shape: List[int] - ): - # NOTE: assuming (N, *) ordering of inputs (such as NCHW, NHWC) - channel_size = np.prod(np.array(input_shape)) - if count != -1: - count *= channel_size - offset *= channel_size - inputs = read_tensor_from_file( - input_file, -1, *input_shape, count=count, offset=offset, - use_progress_bar=True - ) - outputs = read_tensor_from_file( - output_file, -1, *output_shape, count=count, offset=offset, - use_progress_bar=True - ) - print(f"(input={inputs.shape[0]}, output={outputs.shape[0]}) entries loaded from dataset.") - return cls(inputs, outputs) - - @classmethod - def from_default_file(cls, prefix: str): - prefix = Path(prefix) - return cls.from_file( - Path(prefix) / 'input.bin', Path(prefix) / 'canny_input.bin', - Path(prefix) / 'labels.bin', Path(prefix) / 'output.bin' - ) - - -class CIFARImage(HPVMImageDataset): - def __init__( - self, inputs: torch.Tensor, outputs: torch.Tensor, cifar: CIFAR - ): - super().__init__(inputs, outputs) - self.cifar = cifar - - @classmethod - def from_file( - cls, dnn_input_file: Path, image_input_file: Path, - labels_file: Path, output_file: Path, - batch_size: int = 100, count: int = -1, offset: int = 0 - ): - classifier = CIFAR.from_file(dnn_input_file, labels_file) - dataset = HPVMImageDataset._from_file( - image_input_file, output_file, count, offset, - [3, 128, 128], [1, 128, 128] - ) - return cls(dataset.inputs, dataset.outputs, classifier) - - def sample(self: 'CIFARImage', ratio: float) -> 'CIFARImage': - raise NotImplementedError() - - def __getitem__(self, idx): - if idx >= len(self): - raise IndexError("Dataset index out of range") - cifar_in, cifar_out = self.cifar[idx] - return (cifar_in, self.inputs[idx]), (cifar_out, self.outputs[idx]) diff --git a/hpvm/projects/pred_tuner/models/datasets/torch.py b/hpvm/projects/pred_tuner/models/datasets/torch.py deleted file mode 100644 index 1b07bd17c7..0000000000 --- a/hpvm/projects/pred_tuner/models/datasets/torch.py +++ /dev/null @@ -1,37 +0,0 @@ -import logging - -from torch.utils.data import DataLoader -from torchvision.datasets import CIFAR10 -from torchvision.transforms import transforms - -msg_logger = logging.getLogger() - - -def get_cifar10_train_dataloader(root: str, batchsize: int) -> DataLoader: - transform_train = transforms.Compose([ - transforms.RandomCrop(32, padding=4), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - dl = DataLoader( - CIFAR10(root=root, train=True, download=True, transform=transform_train), - batch_size=batchsize, shuffle=True - ) - msg_logger.info(f"{len(dl)} entries loaded from training dataset.") - return dl - - -def get_cifar10_test_dataset(root: str) -> CIFAR10: - transform_test = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - dataset = CIFAR10(root=root, train=False, download=True, transform=transform_test) - msg_logger.info(f"{len(dataset)} entries loaded from training dataset.") - return dataset - - -def get_cifar10_test_dataloader(root: str, batchsize: int) -> DataLoader: - dl = DataLoader(get_cifar10_test_dataset(root), batch_size=batchsize) - return dl diff --git a/hpvm/projects/pred_tuner/models/domains/__init__.py b/hpvm/projects/pred_tuner/models/domains/__init__.py deleted file mode 100644 index abe6c13a37..0000000000 --- a/hpvm/projects/pred_tuner/models/domains/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .qoses import QoS, Accuracy, qos_stats diff --git a/hpvm/projects/pred_tuner/models/domains/qoses.py b/hpvm/projects/pred_tuner/models/domains/qoses.py deleted file mode 100644 index 0a1e7f2eb1..0000000000 --- a/hpvm/projects/pred_tuner/models/domains/qoses.py +++ /dev/null @@ -1,317 +0,0 @@ -import abc -from typing import Iterable, List, Optional, Tuple - -import numpy as np -import torch -from torch.utils.data import DataLoader - - -class QoS(abc.ABC): - @abc.abstractmethod - def __sub__(self, other: 'QoS') -> 'QoS': - pass - - @abc.abstractmethod - def __add__(self, other: 'QoS') -> 'QoS': - pass - - @abc.abstractmethod - def __truediv__(self, other: float) -> 'QoS': - pass - - @abc.abstractmethod - def __lt__(self, other: 'QoS') -> bool: - pass - - @abc.abstractmethod - def __eq__(self, other: 'QoS') -> bool: - pass - - def __gt__(self, other: 'QoS') -> bool: - return not self <= other - - def __le__(self, other: 'QoS') -> bool: - return self < other or self == other - - def __ge__(self, other: 'QoS') -> bool: - return not self < other - - @abc.abstractmethod - def __hash__(self): - pass - - @abc.abstractmethod - def __repr__(self) -> str: - pass - - @abc.abstractmethod - def to_scalar(self, relative_to=None) -> float: - pass - - @abc.abstractmethod - def numpy(self) -> np.ndarray: - pass - - @abc.abstractmethod - def null(self) -> 'QoS': - pass - - @staticmethod - @abc.abstractmethod - def parse(string: str) -> 'QoS': - pass - - @abc.abstractmethod - def min_positive_loss(self) -> 'QoS': - pass - - @staticmethod - @abc.abstractmethod - def suggested_tuner_thresholds(baseline: 'QoS') -> List['QoS']: - pass - - @staticmethod - @abc.abstractmethod - def suggested_val_threshold(baseline: 'QoS') -> 'QoS': - pass - - @staticmethod - @abc.abstractmethod - def suggested_test_threshold(baseline: 'QoS') -> 'QoS': - pass - - @staticmethod - @abc.abstractmethod - def from_output(output, ground_truth) -> 'QoS': - pass - - @classmethod - def combine_qoses(cls, qoses: Iterable['QoS']) -> 'QoS': - qoses = np.array(qoses) - return qoses.mean() - - @classmethod - def from_all_output(cls, outputs: List, dataloader: DataLoader) -> 'QoS': - if not outputs: - raise ValueError("Empty output has no QoS value") # Probably can result cls.null() - qoses = [] - for (_, gt_output), output in zip(dataloader, outputs): - qoses.append(cls.from_output(output, gt_output)) - return cls.combine_qoses(qoses) - - -class ScalarQoS(QoS, abc.ABC): - def __init__(self, value: float): - self.value = value - - def __sub__(self, other: 'ScalarQoS') -> 'ScalarQoS': - return self.__class__(self.value - other.value) - - def __add__(self, other: 'ScalarQoS') -> 'ScalarQoS': - return self.__class__(self.value + other.value) - - def __truediv__(self, other: float): - return self.__class__(self.value / other) - - def __lt__(self, other: 'ScalarQoS') -> bool: - return self.value < other.value - - def __eq__(self, other: 'ScalarQoS') -> bool: - return self.value == other.value - - def __hash__(self): - return hash(self.value) - - def __repr__(self) -> str: - return repr(self.value) - - def null(self) -> 'ScalarQoS': - return self.__class__(0.0) - - def to_scalar(self, relative_to=None) -> float: - return self.value - - def numpy(self) -> np.ndarray: - return np.array([self.value]) - - @classmethod - def parse(cls, string: str) -> 'ScalarQoS': - return cls(float(string)) - - -class Accuracy(ScalarQoS): - def __init__(self, accuracy: float): - super().__init__(accuracy) - - def min_positive_loss(self) -> 'Accuracy': - return Accuracy(0.05) if self.value < 0 else self - - @staticmethod - def suggested_tuner_thresholds(baseline: 'Accuracy') -> List['Accuracy']: - return [baseline - Accuracy(0.8), baseline - Accuracy(1.5), baseline - Accuracy(2.1)] - - @staticmethod - def suggested_val_threshold(baseline: 'Accuracy') -> 'Accuracy': - return baseline - Accuracy(2.1) - - @staticmethod - def suggested_test_threshold(baseline: 'Accuracy') -> 'Accuracy': - return baseline - Accuracy(3.0) - - @staticmethod - def from_output(output: torch.Tensor, ground_truth: torch.Tensor) -> 'Accuracy': - ground_truth = ground_truth.to(output.device) - correct = output.argmax(dim=1).eq(ground_truth).sum().item() - acc = correct / ground_truth.shape[0] - return Accuracy(acc * 100) - - -class PSNR(ScalarQoS): - artificial_max = 100 - - def __init__(self, psnr: float): - super().__init__(psnr) - - def min_positive_loss(self) -> 'PSNR': - return PSNR(1) if self.value < 0 else self - - @staticmethod - def suggested_tuner_thresholds(baseline: 'PSNR') -> List['PSNR']: - return [PSNR(30), PSNR(25), PSNR(20)] - - @staticmethod - def suggested_val_threshold(baseline: 'PSNR') -> 'PSNR': - return PSNR(20) - - @staticmethod - def suggested_test_threshold(baseline: 'PSNR') -> 'PSNR': - return PSNR(20) - - @staticmethod - def from_output(output: torch.Tensor, ground_truth: torch.Tensor) -> 'PSNR': - ground_truth = ground_truth.to(output.device) - if ground_truth.shape[0] != 0: - max_i = ground_truth.max() - mse = torch.sum((output - ground_truth) ** 2) / output.nelement() - psnr = (20 * torch.log10(max_i) - 10 * torch.log10(mse)).item() - else: - psnr = PSNR.artificial_max - return PSNR(psnr) - - -class MultiQoS(QoS, abc.ABC): - def __init__(self, *qoses: ScalarQoS): - self.qoses = qoses - - def __sub__(self, other: 'MultiQoS') -> 'MultiQoS': - assert type(self) == type(other) - return self.__class__(*(x - y for x, y in zip(self.qoses, other.qoses))) - - def __add__(self, other: 'MultiQoS') -> 'MultiQoS': - assert type(self) == type(other) - return self.__class__(*(x + y for x, y in zip(self.qoses, other.qoses))) - - def __truediv__(self, other: int): - return self.__class__(*(x / other for x in self.qoses)) - - def __lt__(self, other: 'MultiQoS') -> bool: - assert type(self) == type(other) - return all((x < y for x, y in zip(self.qoses, other.qoses))) - - def __eq__(self, other: 'MultiQoS') -> bool: - assert type(self) == type(other) - return all((x == y for x, y in zip(self.qoses, other.qoses))) - - def __hash__(self): - return hash(self.qoses) - - def __repr__(self) -> str: - return ','.join(repr(q) for q in self.qoses) - - def null(self) -> 'MultiQoS': - return MultiQoS(*(q.null() for q in self.qoses)) - - def numpy(self) -> np.ndarray: - return np.array([q.to_scalar() for q in self.qoses]) - - def min_positive_loss(self) -> 'MultiQoS': - return self.__class__(*(q.min_positive_loss() for q in self.qoses)) - - -PairT = Tuple[torch.Tensor, torch.Tensor] -TripleT = Tuple[torch.Tensor, torch.Tensor, torch.Tensor] - - -class AccuracyPSNR(MultiQoS): - def __init__(self, acc: Accuracy, psnr: PSNR): - super().__init__(acc, psnr) - - def to_scalar(self, relative_to: 'AccuracyPSNR' = None) -> float: - acc, psnr = self.qoses - if relative_to is not None: - thres_acc, thres_psnr = relative_to.qoses - punishment = (-1 if acc < thres_acc else 0) + (-1 if psnr < thres_psnr else 0) - else: - punishment = 0 - max_psnr = PSNR.artificial_max - normed_psnr = min(psnr.value, max_psnr) / max_psnr # [0, 1], higher better - acc = acc.value / 100 # [0, 1], higher better - combined = (acc + normed_psnr) / 2 # [0, 1], higher better - assert 0 <= combined <= 1 - return combined + punishment - - @staticmethod - def parse(string: str) -> 'AccuracyPSNR': - acc, psnr = string.split(',') - return AccuracyPSNR(Accuracy.parse(acc), PSNR.parse(psnr)) - - # noinspection PyTypeChecker - @staticmethod - def suggested_tuner_thresholds(baseline: 'AccuracyPSNR') -> List['AccuracyPSNR']: - ret = [] - for acc in Accuracy.suggested_tuner_thresholds(baseline.qoses[0]): - for psnr in PSNR.suggested_tuner_thresholds(baseline.qoses[1]): - ret.append(AccuracyPSNR(acc, psnr)) - return ret - - # noinspection PyTypeChecker - @staticmethod - def suggested_val_threshold(baseline: 'AccuracyPSNR') -> 'AccuracyPSNR': - return AccuracyPSNR( - Accuracy.suggested_val_threshold(baseline.qoses[0]), - PSNR.suggested_val_threshold(baseline.qoses[1]) - ) - - # noinspection PyTypeChecker - @staticmethod - def suggested_test_threshold(baseline: 'AccuracyPSNR') -> 'AccuracyPSNR': - return AccuracyPSNR( - Accuracy.suggested_test_threshold(baseline.qoses[0]), - PSNR.suggested_test_threshold(baseline.qoses[1]) - ) - - @staticmethod - def from_output(output: TripleT, ground_truth: PairT) -> 'AccuracyPSNR': - gt_labels, gt_images = ground_truth - labels, image_selection, images = output - gt_labels = gt_labels.to(labels.device) - gt_images = gt_images.to(images.device) - acc = Accuracy.from_output(labels, gt_labels) - gt_images = gt_images[image_selection] - psnr = PSNR.from_output(images, gt_images) - return AccuracyPSNR(acc, psnr) - - -def qos_stats(qoses: List[QoS], confidence: float = None, threshold: QoS = None) -> \ - Tuple[QoS, Optional[QoS], Optional[float]]: - qoses = np.array(qoses) - n_runs = len(qoses) - confidence_at_thres = np.count_nonzero(qoses > threshold) / n_runs if threshold else None - if confidence is None: - qos_at_confidence = None - else: - index = int((1 - confidence) * n_runs) - # Otherwise it's np.float64 and causes trouble with opentuner - qos_at_confidence = qoses[index] - mean_acc = qoses.mean() - return mean_acc, qos_at_confidence, confidence_at_thres diff --git a/hpvm/projects/pred_tuner/models/hpvm/__init__.py b/hpvm/projects/pred_tuner/models/hpvm/__init__.py deleted file mode 100644 index 337738c0bf..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .alexnet import AlexNet, AlexNet2, AlexNetImageNet -from .alexnet_canny import AlexNet2Canny -from .layers import HPVMConvBundle, HPVMDNN, HPVMDefaultModule, read_tensor_from_file -from .lenet import LeNet -from .mobilenet import MobileNet -from .resnet import ResNet18, ResNet50 -from .vgg16 import VGG16Cifar10, VGG16Cifar100, VGG16ImageNet diff --git a/hpvm/projects/pred_tuner/models/hpvm/alexnet.py b/hpvm/projects/pred_tuner/models/hpvm/alexnet.py deleted file mode 100644 index b7c9b6c3ca..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/alexnet.py +++ /dev/null @@ -1,49 +0,0 @@ -from torch.nn import Linear, ReLU, Sequential, Tanh - -from .layers import HPVMConvBundle, HPVMDNN - - -class AlexNet(HPVMDNN): - def __init__(self): - convs = Sequential( - HPVMConvBundle(3, 64, 11, Tanh, pool_size=2, padding=5), - HPVMConvBundle(64, 192, 5, Tanh, pool_size=2, padding=2), - HPVMConvBundle(192, 384, 3, Tanh, padding=1), - HPVMConvBundle(384, 256, 3, Tanh, padding=1), - HPVMConvBundle(256, 256, 3, Tanh, pool_size=2, padding=1) - ) - linears = Sequential(Linear(4096, 10)) - super().__init__(convs, linears) - - -class AlexNet2(HPVMDNN): - def __init__(self): - convs = Sequential( - HPVMConvBundle(3, 32, 3, Tanh, padding=1), - HPVMConvBundle(32, 32, 3, Tanh, pool_size=2, padding=1), - HPVMConvBundle(32, 64, 3, Tanh, padding=1), - HPVMConvBundle(64, 64, 3, Tanh, pool_size=2, padding=1), - HPVMConvBundle(64, 128, 3, Tanh, padding=1), - HPVMConvBundle(128, 128, 3, Tanh, pool_size=2, padding=1) - ) - linears = Sequential(Linear(2048, 10)) - super().__init__(convs, linears) - - -class AlexNetImageNet(HPVMDNN): - def __init__(self): - convs = Sequential( - HPVMConvBundle(3, 64, 11, ReLU, padding=2, stride=4, pool_size=3, pool_stride=2), - HPVMConvBundle(64, 192, 5, ReLU, padding=2, pool_size=3, pool_stride=2), - HPVMConvBundle(192, 384, 3, ReLU, padding=1), - HPVMConvBundle(384, 256, 3, ReLU, padding=1), - HPVMConvBundle(256, 256, 3, ReLU, padding=1, pool_size=3, pool_stride=2) - ) - linears = Sequential( - Linear(9216, 4096), - ReLU(), - Linear(4096, 4096), - ReLU(), - Linear(4096, 1000), - ) - super().__init__(convs, linears) diff --git a/hpvm/projects/pred_tuner/models/hpvm/alexnet_canny.py b/hpvm/projects/pred_tuner/models/hpvm/alexnet_canny.py deleted file mode 100644 index 5e61027912..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/alexnet_canny.py +++ /dev/null @@ -1,48 +0,0 @@ -from typing import Iterable, Tuple - -import torch -from torch.nn import Softmax - -from .alexnet import AlexNet2 -from .layers import HPVMConvBundle, HPVMDefaultModule, ReduceKind, TensorReduce - - -class AlexNet2Canny(HPVMDefaultModule): - def __init__(self, on_classes: Iterable[int]): - super().__init__() - prototype = AlexNet2() - self.on_classes = list(on_classes) - self.convs = prototype.convs - self.linears = prototype.linears - self.softmax = Softmax(1) - self.reduce_1 = TensorReduce(1, ReduceKind.sum) - self.gaussian = HPVMConvBundle(1, 1, 5, padding=2, bias=False) - self.sobel_x = HPVMConvBundle(1, 1, 3, padding=1, bias=False) - self.sobel_y = HPVMConvBundle(1, 1, 3, padding=1, bias=False) - self.reduce_2 = TensorReduce(2, ReduceKind.max) - self.reduce_3 = TensorReduce(2, ReduceKind.max) - - def canny(self, images: torch.Tensor) -> torch.Tensor: - assert len(images.shape) == 4 # Assuming NCHW - grayscale = self.reduce_1(images) - grayscale = grayscale.unsqueeze(1) - denoised = self.gaussian(grayscale) - grad_x = self.sobel_x(denoised) - grad_y = self.sobel_y(denoised) - grad_mag = torch.sqrt(grad_x ** 2 + grad_y ** 2) - grad_max_1D = self.reduce_2(grad_mag) - grad_max = self.reduce_3(grad_max_1D) - grad_max = grad_max.unsqueeze(2).unsqueeze(3) - grad_mag_norm = grad_mag / grad_max - return grad_mag_norm - - def forward(self, inputs) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - from functools import reduce - from operator import ior - dnn_input, canny_input = inputs - conv_outputs = self.convs(dnn_input) - dnn_outputs = self.softmax(self.linears(conv_outputs.view(conv_outputs.shape[0], -1))) - classes = dnn_outputs.argmax(dim=1) - selection = reduce(ior, (classes == i for i in self.on_classes)) - selected_inputs = canny_input[selection] - return dnn_outputs, selection, self.canny(selected_inputs) diff --git a/hpvm/projects/pred_tuner/models/hpvm/layers.py b/hpvm/projects/pred_tuner/models/hpvm/layers.py deleted file mode 100644 index fed66e7b15..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/layers.py +++ /dev/null @@ -1,223 +0,0 @@ -from enum import Enum -from pathlib import Path -from typing import Callable, Dict, List, Optional, Tuple, Union - -import numpy as np -import torch -from torch.nn import AvgPool2d, BatchNorm2d, Conv2d, Linear, MaxPool2d, Module, Parameter, ReLU, Sequential, Softmax, \ - Tanh - - -def rsetattr(obj, attr, val): - pre, _, post = attr.rpartition('.') - return setattr(rgetattr(obj, pre) if pre else obj, post, val) - - -def rgetattr(obj, attr, *args): - def _getattr(obj_, attr_): - return getattr(obj_, attr_, *args) - - import functools - return functools.reduce(_getattr, attr.split('.'), obj) - - -def read_tensor_from_file( - filename: Union[str, Path], *shape: int, - read_ty=np.float32, cast_ty=np.float32, - count: int = -1, offset: int = 0, - use_progress_bar: bool = False -) -> torch.Tensor: - from tqdm import trange - block_size = 102400 - offset = offset * read_ty().itemsize - mmap = np.memmap(filename, dtype=read_ty, mode='r', offset=offset) - raw = np.empty_like(mmap) - n_entries = min(mmap.shape[0], count) if count != -1 else mmap.shape[0] - n_blocks = int(np.ceil(n_entries / block_size)) - iterable = trange(n_blocks) if use_progress_bar else range(n_blocks) - for block in iterable: - l, r = block * block_size, min(n_entries, (block + 1) * block_size) - raw[l:r] = mmap[l:r] - del mmap - if cast_ty != read_ty: - raw = raw.astype(cast_ty) - loaded_np = raw.reshape(shape) - return torch.from_numpy(loaded_np) - - -ActivT = Optional[Callable[[], Module]] -ArgsT = Union[List, Dict] -RangeT = Tuple[float, float] -RangeOT = Optional[RangeT] - - -class HPVMConvBundle(Module): - def __init__( - self, in_channels: int, out_channels: int, kernel_size: int, - activation: ActivT = None, - pool_size: Optional[int] = None, pool_stride: Optional[int] = None, - **conv_kwargs - ): - super().__init__() - self.conv = Conv2d(in_channels, out_channels, kernel_size, **conv_kwargs) - if pool_size is None: - self.pooling = Sequential() - else: - pool_stride = pool_stride or pool_size - self.pooling = MaxPool2d(pool_size, stride=pool_stride) - self.activation = Sequential() if activation is None else activation() - self.conv_ranges_ = None - - def forward(self, input_: torch.Tensor) -> torch.Tensor: - return self.activation(self.pooling(self.conv(input_))) - - def input_to_conv(self, input_: torch.Tensor) -> torch.Tensor: - bias = self.conv.bias - self.conv.bias = None - conv_out = self.conv(input_) - self.conv.bias = bias - return conv_out - - def conv_to_output(self, conv_output: torch.Tensor) -> torch.Tensor: - if self.conv.bias is not None: - broadcast_bias = self.conv.bias.reshape(1, -1, 1, 1) - return self.activation(self.pooling(conv_output + broadcast_bias)) - else: - return self.activation(self.pooling(conv_output)) - - def __getattr__(self, item): - if item in ('weight', 'bias'): - return getattr(self.conv, item) - return super(HPVMConvBundle, self).__getattr__(item) - - def __setattr__(self, key, value): - if key in ('weight', 'bias'): - setattr(self.conv, key, value) - else: - super(HPVMConvBundle, self).__setattr__(key, value) - - -class ReduceKind(Enum): - sum = 1 - max = 2 - - -class TensorReduce(Module): - def __init__(self, dim: int, kind: ReduceKind, skip_ratio: float = 0.0): - super().__init__() - self.dim = dim - self.skip_ratio = skip_ratio - if kind == ReduceKind.sum: - self.reducer = lambda x: x.sum(dim=0) # Because we transpose the input - self.normalizer = lambda x: x / (1 - self.skip_ratio) - elif kind == ReduceKind.max: - self.reducer = lambda x: x.max(dim=0)[0] - self.normalizer = lambda x: x - - def forward(self, inputs: torch.Tensor) -> torch.Tensor: - from math import ceil - inputs_t = inputs.transpose(0, self.dim) - if len(inputs) == 0: - dim_reduced = torch.zeros_like(inputs_t)[0] - else: - reduce_dim_size = inputs_t.size(0) - approxed_dim_size = int(ceil((1 - self.skip_ratio) * reduce_dim_size)) - # Take a contiguous chunk and reduce over it, ignore the rest - dim_reduced: torch.Tensor = self.normalizer(self.reducer(inputs_t[:approxed_dim_size])) - return dim_reduced.unsqueeze(0).transpose(0, self.dim).squeeze(self.dim) - - def change_skip_ratio(self, skip_ratio: float) -> 'TensorReduce': - return TensorReduce(self.dim, self.kind, skip_ratio) - - -def read_quant_ranges(prefix: Path): - range_file = prefix / 'quant_ranges.txt' - if not range_file.is_file(): - return None - with range_file.open() as f: - return [[float(field) for field in line.strip().split()] for line in f.readlines()] - - -class HPVMDefaultModule(Module): - @staticmethod - def load_into_layer( - layer: Module, attr_name: str, filename: str, prefix: Path, - is_linear_weight: bool = False - ): - tensor = rgetattr(layer, attr_name) - if is_linear_weight: - n_out, n_in = tensor.shape - loaded = read_tensor_from_file(prefix / filename, n_in, n_out).T - else: - loaded = read_tensor_from_file(prefix / filename, *tensor.shape) - if type(tensor) is Parameter: - loaded = Parameter(loaded, requires_grad=True) - rsetattr(layer, attr_name, loaded) - - @staticmethod - def install_quant_range(module: Module, values: List[float]): - in_min, in_max, w_min, w_max, b_min, b_max, out_min, out_max = values - module.conv_ranges = (in_min, in_max), (w_min, w_max), (b_min, b_max), (out_min, out_max) - - def default_load_hpvm_weights(self, prefix: str): - # TODO: this is probably better done with help of ModuleDAG - prefix = Path(prefix) - convs, group_convs, linears, bns = [], [], [], [] - weightless_types = AvgPool2d, MaxPool2d, ReLU, Tanh, Softmax, TensorReduce - container_types = (Sequential,) - for module in self.modules(): - if isinstance(module, HPVMConvBundle): - convs.append(module) - elif isinstance(module, Conv2d): - if module.groups != 1: - group_convs.append(module) - elif isinstance(module, Linear): - linears.append(module) - elif isinstance(module, BatchNorm2d): - bns.append(module) - elif type(module) in weightless_types: - pass - elif type(module) in container_types or len(list(module.children())) != 0: - continue - else: - raise RuntimeError(f"Layer type {type(module)} not understood") - load = self.load_into_layer - quant_ranges = read_quant_ranges(prefix) - quant_ranges_idx = 0 - for i, conv in enumerate(convs): - conv: HPVMConvBundle - load(conv, 'weight', f"conv2d_{i + 1}_w.bin", prefix) - if conv.bias is not None: - load(conv, 'bias', f"conv2d_{i + 1}_b.bin", prefix) - if quant_ranges is not None: - self.install_quant_range(conv, quant_ranges[quant_ranges_idx]) - quant_ranges_idx += 1 - for i, gconv in enumerate(group_convs): - load(gconv, 'weight', f"depthwise_conv2d_{i + 1}_w.bin", prefix) - if gconv.bias is not None: - load(gconv, 'bias', f"depthwise_conv2d_{i + 1}_b.bin", prefix) - for i, bn in enumerate(bns): - bn: BatchNorm2d - load(bn, 'weight', f"batch_normalization_{i + 1}_gamma.bin", prefix) - load(bn, 'bias', f"batch_normalization_{i + 1}_beta.bin", prefix) - load(bn, 'running_mean', f"batch_normalization_{i + 1}_mean.bin", prefix) - load(bn, 'running_var', f"batch_normalization_{i + 1}_variance.bin", prefix) - for i, linear in enumerate(linears): - load(linear, 'weight', f"dense_{i + 1}_w.bin", prefix, True) - load(linear, 'bias', f"dense_{i + 1}_b.bin", prefix) - if quant_ranges is not None: - self.install_quant_range(linear, quant_ranges[quant_ranges_idx]) - quant_ranges_idx += 1 - assert quant_ranges is None or len(quant_ranges) == quant_ranges_idx - - -class HPVMDNN(HPVMDefaultModule): - def __init__(self, convs: Sequential, linears: Sequential): - super().__init__() - self.convs = convs - self.linears = linears - self.softmax = Softmax(1) - - def forward(self, inputs: torch.Tensor) -> torch.Tensor: - outputs = self.convs(inputs) - return self.softmax(self.linears(outputs.view(outputs.shape[0], -1))) diff --git a/hpvm/projects/pred_tuner/models/hpvm/lenet.py b/hpvm/projects/pred_tuner/models/hpvm/lenet.py deleted file mode 100644 index 0802b5f78d..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/lenet.py +++ /dev/null @@ -1,16 +0,0 @@ -from torch.nn import Linear, Sequential, Tanh - -from .layers import HPVMConvBundle, HPVMDNN - - -class LeNet(HPVMDNN): - def __init__(self): - convs = Sequential( - HPVMConvBundle(1, 32, 5, Tanh, 2, padding=2), - HPVMConvBundle(32, 64, 5, Tanh, 2, padding=2) - ) - linears = Sequential( - Linear(7 * 7 * 64, 1024), Tanh(), - Linear(1024, 10), Tanh() - ) - super().__init__(convs, linears) diff --git a/hpvm/projects/pred_tuner/models/hpvm/mobilenet.py b/hpvm/projects/pred_tuner/models/hpvm/mobilenet.py deleted file mode 100644 index f48a214fc9..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/mobilenet.py +++ /dev/null @@ -1,45 +0,0 @@ -from torch.nn import AvgPool2d, BatchNorm2d, Conv2d, Linear, ReLU, Sequential - -from .layers import HPVMDNN, HPVMConvBundle - - -def _make_seq(in_channels, out_channels, c_kernel_size, gc_stride, gc_kernel_size=3): - return Sequential( - HPVMConvBundle( - in_channels, out_channels, c_kernel_size, - bias=False, padding=(c_kernel_size - 1) // 2 - ), - BatchNorm2d(out_channels, eps=0.001), - ReLU(), - Conv2d( - out_channels, out_channels, gc_kernel_size, - bias=False, stride=gc_stride, padding=(gc_kernel_size - 1) // 2, groups=out_channels - ), - BatchNorm2d(out_channels, eps=0.001), - ReLU() - ) - - -class MobileNet(HPVMDNN): - def __init__(self): - convs = Sequential( - _make_seq(3, 32, 3, 1), - _make_seq(32, 64, 1, 2), - _make_seq(64, 128, 1, 1), - _make_seq(128, 128, 1, 2), - _make_seq(128, 256, 1, 1), - _make_seq(256, 256, 1, 2), - _make_seq(256, 512, 1, 1), - _make_seq(512, 512, 1, 1), - _make_seq(512, 512, 1, 1), - _make_seq(512, 512, 1, 1), - _make_seq(512, 512, 1, 1), - _make_seq(512, 512, 1, 2), - _make_seq(512, 1024, 1, 1), - HPVMConvBundle(1024, 1024, 1, padding=0, bias=False), - BatchNorm2d(1024, eps=0.001), - ReLU(), - AvgPool2d(2) - ) - linears = Sequential(Linear(1024, 10)) - super().__init__(convs, linears) diff --git a/hpvm/projects/pred_tuner/models/hpvm/resnet.py b/hpvm/projects/pred_tuner/models/hpvm/resnet.py deleted file mode 100644 index fc42a00001..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/resnet.py +++ /dev/null @@ -1,96 +0,0 @@ -from torch.nn import AvgPool2d, BatchNorm2d, Linear, Module, ReLU, Sequential - -from .layers import HPVMConvBundle, HPVMDNN - - -class BasicBlock(Module): - def __init__(self, ins, outs, shortcut=False): - super().__init__() - stride = 2 if shortcut else 1 - self.mainline = Sequential( - HPVMConvBundle(ins, outs, 3, ReLU, padding=1, stride=stride), - HPVMConvBundle(outs, outs, 3, padding=1) - ) - self.relu1 = ReLU() - self.shortcut = HPVMConvBundle(ins, outs, 1, stride=stride) \ - if shortcut else Sequential() - - def forward(self, input_): - return self.relu1(self.mainline(input_) + self.shortcut(input_)) - - -class ResNet18(HPVMDNN): - def __init__(self): - convs = Sequential( - HPVMConvBundle(3, 16, 3, ReLU, padding=1), - BasicBlock(16, 16), - BasicBlock(16, 16), - BasicBlock(16, 16), - BasicBlock(16, 32, True), - BasicBlock(32, 32), - BasicBlock(32, 32), - BasicBlock(32, 64, True), - BasicBlock(64, 64), - BasicBlock(64, 64), - AvgPool2d(8) - ) - linears = Sequential(Linear(64, 10)) - super().__init__(convs, linears) - - -class Bottleneck(Module): - expansion = 4 - - def __init__(self, in_planes, planes, stride=1): - super(Bottleneck, self).__init__() - self.mainline = Sequential( - HPVMConvBundle(in_planes, planes, 1, stride=stride), - BatchNorm2d(planes, eps=0.001), - ReLU(), - HPVMConvBundle(planes, planes, 3, padding=1), - BatchNorm2d(planes, eps=0.001), - ReLU(), - HPVMConvBundle(planes, self.expansion * planes, 1), - BatchNorm2d(self.expansion * planes, eps=0.001) - ) - self.relu1 = ReLU() - if stride != 1 or in_planes != self.expansion * planes: - self.shortcut = Sequential( - HPVMConvBundle(in_planes, self.expansion * planes, 1, stride=stride), - BatchNorm2d(self.expansion * planes, eps=0.001) - ) - else: - self.shortcut = Sequential() - - def forward(self, input_): - return self.relu1(self.mainline(input_) + self.shortcut(input_)) - - -class ResNet50(HPVMDNN): - def __init__(self): - convs = Sequential( - HPVMConvBundle(3, 64, 7, ReLU, pool_size=3, pool_stride=2, padding=3, stride=2), - BatchNorm2d(64, eps=0.001), - Bottleneck(64, 64), - Bottleneck(256, 64), - Bottleneck(256, 64), - - Bottleneck(256, 128, stride=2), - Bottleneck(512, 128), - Bottleneck(512, 128), - Bottleneck(512, 128), - - Bottleneck(512, 256, stride=2), - Bottleneck(1024, 256), - Bottleneck(1024, 256), - Bottleneck(1024, 256), - Bottleneck(1024, 256), - Bottleneck(1024, 256), - - Bottleneck(1024, 512, stride=2), - Bottleneck(2048, 512), - Bottleneck(2048, 512), - AvgPool2d(7) - ) - linears = Sequential(Linear(2048, 1000)) - super().__init__(convs, linears) diff --git a/hpvm/projects/pred_tuner/models/hpvm/vgg16.py b/hpvm/projects/pred_tuner/models/hpvm/vgg16.py deleted file mode 100644 index b31c0d47ca..0000000000 --- a/hpvm/projects/pred_tuner/models/hpvm/vgg16.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Iterable - -from torch.nn import Linear, ReLU, Sequential - -from .layers import HPVMConvBundle, HPVMDNN - - -class _VGG16(HPVMDNN): - def __init__(self, linear_inouts: Iterable[int]): - convs = Sequential( - HPVMConvBundle(3, 64, 3, ReLU, padding=1), - HPVMConvBundle(64, 64, 3, ReLU, 2, padding=1), - HPVMConvBundle(64, 128, 3, ReLU, padding=1), - HPVMConvBundle(128, 128, 3, ReLU, 2, padding=1), - HPVMConvBundle(128, 256, 3, ReLU, padding=1), - HPVMConvBundle(256, 256, 3, ReLU, padding=1), - HPVMConvBundle(256, 256, 3, ReLU, 2, padding=1), - HPVMConvBundle(256, 512, 3, ReLU, padding=1), - HPVMConvBundle(512, 512, 3, ReLU, padding=1), - HPVMConvBundle(512, 512, 3, ReLU, 2, padding=1), - HPVMConvBundle(512, 512, 3, ReLU, padding=1), - HPVMConvBundle(512, 512, 3, ReLU, padding=1), - HPVMConvBundle(512, 512, 3, ReLU, 2, padding=1) - ) - linear_layers = [Linear(in_, out) for in_, out in zip(linear_inouts, linear_inouts[1:])] - linear_relus = [ReLU() for _ in range(2 * len(linear_layers) - 1)] - linear_relus[::2] = linear_layers - linears = Sequential(*linear_relus) - super().__init__(convs, linears) - - -class VGG16Cifar10(_VGG16): - def __init__(self): - super().__init__([512, 512, 10]) - - -class VGG16Cifar100(_VGG16): - def __init__(self): - super().__init__([512, 512, 100]) - - -class VGG16ImageNet(_VGG16): - def __init__(self): - super().__init__([25088, 4096, 4096, 1000]) diff --git a/hpvm/projects/pred_tuner/models/inference.py b/hpvm/projects/pred_tuner/models/inference.py deleted file mode 100644 index d797e9e605..0000000000 --- a/hpvm/projects/pred_tuner/models/inference.py +++ /dev/null @@ -1,99 +0,0 @@ -import logging -from typing import Type, Union - -import torch -from torch.nn import Module -from torch.utils.data import DataLoader, IterableDataset, Subset - -from .domains import QoS -from .hpvm import HPVMDNN, HPVMDefaultModule -from .networks import networks - -msg_logger = logging.getLogger(__name__) - - -def move_to_device_recursively(data: object, device_: Union[torch.device, str]): - if isinstance(data, torch.Tensor): - return data.to(device_) - if not hasattr(data, '__dict__'): - if isinstance(data, list): - return [move_to_device_recursively(x, device_) for x in data] - elif isinstance(data, tuple): - return tuple([move_to_device_recursively(x, device_) for x in data]) - else: - raise RuntimeError(f"Don't know how to manipulate {type(data)}") - for key, value in data.__dict__.items(): - data.__dict__[key] = move_to_device_recursively(value, device_) - return data - - -def _infer_net_device(net: Module): - return next(iter(net.parameters())).device - - -def get_all_output(net: Module, dataloader: DataLoader): - outputs = [] - device = _infer_net_device(net) - with torch.no_grad(): - for inputs, targets in dataloader: - inputs = move_to_device_recursively(inputs, device) - outputs.append(net(inputs)) - return outputs - - -def load_torch_checkpoint(net: Module, chpt_path: str): - msg_logger.info('==> Loading checkpoint..') - checkpoint = torch.load(chpt_path) - net.load_state_dict(checkpoint.pop('net')) - return checkpoint - - -class BaselineInfo: - def __init__( - self, net: Module, val_loader: DataLoader, test_loader: DataLoader, - non_tensor_output: bool, qos_class: Type[QoS] - ): - self.baseline_net = net - self.val_loader = val_loader - self.test_loader = test_loader - self.non_tensor_output = non_tensor_output - self.qos_class = qos_class - self.val_qos = self.get_qos(net, val_loader) - self.test_qos = self.get_qos(net, test_loader) - - def get_qos(self, net: Module, dataloader: DataLoader): - return self.qos_class.from_all_output(get_all_output(net, dataloader), dataloader) - - @staticmethod - def _split_dataset(dataset: IterableDataset, split_at: int): - return Subset(dataset, torch.arange(0, split_at)), \ - Subset(dataset, torch.arange(split_at, len(dataset))) - - @classmethod - def init_by_name(cls, model_name: str, device) -> 'BaselineInfo': - msg_logger.info('==> Building model..') - network_factory, dataset_factory, batchsize, prefix, qos_class = networks[model_name] - net = network_factory() - # 1. Load network weights - msg_logger.info('==> Loading checkpoint..') - if isinstance(net, HPVMDefaultModule): - net.default_load_hpvm_weights(prefix) - else: - load_torch_checkpoint(net, prefix) - net = net.eval().to(device) - # 2. Load dataset - msg_logger.info('==> Loading dataset...') - if isinstance(net, HPVMDNN): - dataset = dataset_factory(prefix) - non_tensor_output = False - elif isinstance(net, HPVMDefaultModule): # Is image benchmark - dataset = dataset_factory(prefix) - non_tensor_output = True - else: - dataset = dataset_factory('./data') - non_tensor_output = False - # 3. Split dataset - test_set, val_set = cls._split_dataset(dataset, 5000) - test_loader = DataLoader(test_set, batch_size=batchsize) - val_loader = DataLoader(val_set, batch_size=batchsize) - return cls(net, val_loader, test_loader, non_tensor_output, qos_class) diff --git a/hpvm/projects/pred_tuner/models/networks.py b/hpvm/projects/pred_tuner/models/networks.py deleted file mode 100644 index a5611bcb3e..0000000000 --- a/hpvm/projects/pred_tuner/models/networks.py +++ /dev/null @@ -1,54 +0,0 @@ -from . import hpvm -from .datasets import CIFAR, CIFARImage, MNIST, get_cifar10_test_dataset -from .domains import Accuracy -from .domains.qoses import AccuracyPSNR -from .torch import ResNet18, VGG - - -networks = { - 'lenet_hpvm': ( - hpvm.LeNet, MNIST.from_default_file, 5000, - 'model_params/lenet_mnist', Accuracy - ), - 'alexnet_hpvm': ( - hpvm.AlexNet, CIFAR.from_default_file, 2000, - 'model_params/alexnet_cifar10', Accuracy - ), - 'alexnet2_hpvm': ( - hpvm.AlexNet2, CIFAR.from_default_file, 2000, - 'model_params/alexnet2_cifar10', Accuracy - ), - 'vgg16_cifar10_hpvm': ( - hpvm.VGG16Cifar10, CIFAR.from_default_file, 500, - 'model_params/vgg16_cifar10', Accuracy - ), - 'vgg16_cifar100_hpvm': ( - hpvm.VGG16Cifar100, CIFAR.from_default_file, 500, - 'model_params/vgg16_cifar100', Accuracy - ), - 'mobilenet_hpvm': ( - hpvm.MobileNet, CIFAR.from_default_file, 1000, - 'model_params/mobilenet', Accuracy - ), - 'resnet18_hpvm': ( - hpvm.ResNet18, CIFAR.from_default_file, 1000, - 'model_params/resnet18_cifar10', Accuracy - ), - 'alexnet_imagenet_hpvm': ( - hpvm.AlexNetImageNet, CIFAR.from_default_file, 100, - 'model_params/alexnet_imagenet', Accuracy - ), - 'vgg16_imagenet_hpvm': ( - hpvm.VGG16ImageNet, CIFAR.from_default_file, 50, - 'model_params/vgg16_imagenet', Accuracy - ), - 'resnet50_imagenet_hpvm': ( - hpvm.ResNet50, CIFAR.from_default_file, 25, - 'model_params/resnet50_imagenet', Accuracy - ), - 'alexnet2_canny_hpvm': ( - lambda: hpvm.AlexNet2Canny(on_classes=[1, 2, 3, 4, 5]), - CIFARImage.from_default_file, 50, - 'model_params/alexnet2_canny', AccuracyPSNR - ) -} diff --git a/hpvm/projects/pred_tuner/models/torch/__init__.py b/hpvm/projects/pred_tuner/models/torch/__init__.py deleted file mode 100644 index aff98ce114..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from .vgg import * -from .dpn import * -from .lenet import * -from .senet import * -from .pnasnet import * -from .densenet import * -from .googlenet import * -from .shufflenet import * -from .shufflenetv2 import * -from .resnet import * -from .resnext import * -from .preact_resnet import * -from .mobilenet import * -from .mobilenetv2 import * -from .efficientnet import * diff --git a/hpvm/projects/pred_tuner/models/torch/densenet.py b/hpvm/projects/pred_tuner/models/torch/densenet.py deleted file mode 100644 index 47ebbbe08e..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/densenet.py +++ /dev/null @@ -1,107 +0,0 @@ -'''DenseNet in PyTorch.''' -import math - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Bottleneck(nn.Module): - def __init__(self, in_planes, growth_rate): - super(Bottleneck, self).__init__() - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) - self.bn2 = nn.BatchNorm2d(4*growth_rate) - self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) - - def forward(self, x): - out = self.conv1(F.relu(self.bn1(x))) - out = self.conv2(F.relu(self.bn2(out))) - out = torch.cat([out,x], 1) - return out - - -class Transition(nn.Module): - def __init__(self, in_planes, out_planes): - super(Transition, self).__init__() - self.bn = nn.BatchNorm2d(in_planes) - self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) - - def forward(self, x): - out = self.conv(F.relu(self.bn(x))) - out = F.avg_pool2d(out, 2) - return out - - -class DenseNet(nn.Module): - def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): - super(DenseNet, self).__init__() - self.growth_rate = growth_rate - - num_planes = 2*growth_rate - self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) - - self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) - num_planes += nblocks[0]*growth_rate - out_planes = int(math.floor(num_planes*reduction)) - self.trans1 = Transition(num_planes, out_planes) - num_planes = out_planes - - self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) - num_planes += nblocks[1]*growth_rate - out_planes = int(math.floor(num_planes*reduction)) - self.trans2 = Transition(num_planes, out_planes) - num_planes = out_planes - - self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) - num_planes += nblocks[2]*growth_rate - out_planes = int(math.floor(num_planes*reduction)) - self.trans3 = Transition(num_planes, out_planes) - num_planes = out_planes - - self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) - num_planes += nblocks[3]*growth_rate - - self.bn = nn.BatchNorm2d(num_planes) - self.linear = nn.Linear(num_planes, num_classes) - - def _make_dense_layers(self, block, in_planes, nblock): - layers = [] - for i in range(nblock): - layers.append(block(in_planes, self.growth_rate)) - in_planes += self.growth_rate - return nn.Sequential(*layers) - - def forward(self, x): - out = self.conv1(x) - out = self.trans1(self.dense1(out)) - out = self.trans2(self.dense2(out)) - out = self.trans3(self.dense3(out)) - out = self.dense4(out) - out = F.avg_pool2d(F.relu(self.bn(out)), 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - -def DenseNet121(): - return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) - -def DenseNet169(): - return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) - -def DenseNet201(): - return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) - -def DenseNet161(): - return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) - -def densenet_cifar(): - return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) - -def test(): - net = densenet_cifar() - x = torch.randn(1,3,32,32) - y = net(x) - print(y) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/dpn.py b/hpvm/projects/pred_tuner/models/torch/dpn.py deleted file mode 100644 index d334367fcc..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/dpn.py +++ /dev/null @@ -1,98 +0,0 @@ -'''Dual Path Networks in PyTorch.''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Bottleneck(nn.Module): - def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer): - super(Bottleneck, self).__init__() - self.out_planes = out_planes - self.dense_depth = dense_depth - - self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False) - self.bn2 = nn.BatchNorm2d(in_planes) - self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(out_planes+dense_depth) - - self.shortcut = nn.Sequential() - if first_layer: - self.shortcut = nn.Sequential( - nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(out_planes+dense_depth) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - x = self.shortcut(x) - d = self.out_planes - out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1) - out = F.relu(out) - return out - - -class DPN(nn.Module): - def __init__(self, cfg): - super(DPN, self).__init__() - in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] - num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] - - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.last_planes = 64 - self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) - self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) - self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) - self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) - self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10) - - def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for i,stride in enumerate(strides): - layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0)) - self.last_planes = out_planes + (i+2) * dense_depth - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def DPN26(): - cfg = { - 'in_planes': (96,192,384,768), - 'out_planes': (256,512,1024,2048), - 'num_blocks': (2,2,2,2), - 'dense_depth': (16,32,24,128) - } - return DPN(cfg) - -def DPN92(): - cfg = { - 'in_planes': (96,192,384,768), - 'out_planes': (256,512,1024,2048), - 'num_blocks': (3,4,20,3), - 'dense_depth': (16,32,24,128) - } - return DPN(cfg) - - -def test(): - net = DPN92() - x = torch.randn(1,3,32,32) - y = net(x) - print(y) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/efficientnet.py b/hpvm/projects/pred_tuner/models/torch/efficientnet.py deleted file mode 100644 index 6a10a97468..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/efficientnet.py +++ /dev/null @@ -1,99 +0,0 @@ -'''EfficientNet in PyTorch. - -Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks". -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Block(nn.Module): - '''expand + depthwise + pointwise + squeeze-excitation''' - - def __init__(self, in_planes, out_planes, expansion, stride): - super(Block, self).__init__() - self.stride = stride - - planes = expansion * in_planes - self.conv1 = nn.Conv2d( - in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, - stride=stride, padding=1, groups=planes, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d( - planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn3 = nn.BatchNorm2d(out_planes) - - self.shortcut = nn.Sequential() - if stride == 1 and in_planes != out_planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, out_planes, kernel_size=1, - stride=1, padding=0, bias=False), - nn.BatchNorm2d(out_planes), - ) - - # SE layers - self.fc1 = nn.Conv2d(out_planes, out_planes//16, kernel_size=1) - self.fc2 = nn.Conv2d(out_planes//16, out_planes, kernel_size=1) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - shortcut = self.shortcut(x) if self.stride == 1 else out - # Squeeze-Excitation - w = F.avg_pool2d(out, out.size(2)) - w = F.relu(self.fc1(w)) - w = self.fc2(w).sigmoid() - out = out * w + shortcut - return out - - -class EfficientNet(nn.Module): - def __init__(self, cfg, num_classes=10): - super(EfficientNet, self).__init__() - self.cfg = cfg - self.conv1 = nn.Conv2d(3, 32, kernel_size=3, - stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(32) - self.layers = self._make_layers(in_planes=32) - self.linear = nn.Linear(cfg[-1][1], num_classes) - - def _make_layers(self, in_planes): - layers = [] - for expansion, out_planes, num_blocks, stride in self.cfg: - strides = [stride] + [1]*(num_blocks-1) - for stride in strides: - layers.append(Block(in_planes, out_planes, expansion, stride)) - in_planes = out_planes - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layers(out) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def EfficientNetB0(): - # (expansion, out_planes, num_blocks, stride) - cfg = [(1, 16, 1, 2), - (6, 24, 2, 1), - (6, 40, 2, 2), - (6, 80, 3, 2), - (6, 112, 3, 1), - (6, 192, 4, 2), - (6, 320, 1, 2)] - return EfficientNet(cfg) - - -def test(): - net = EfficientNetB0() - x = torch.randn(2, 3, 32, 32) - y = net(x) - print(y.shape) - - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/googlenet.py b/hpvm/projects/pred_tuner/models/torch/googlenet.py deleted file mode 100644 index 8ed8f6eb23..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/googlenet.py +++ /dev/null @@ -1,106 +0,0 @@ -"""GoogLeNet with PyTorch.""" -import torch -import torch.nn as nn - - -class Inception(nn.Module): - def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): - super(Inception, self).__init__() - # 1x1 conv branch - self.b1 = nn.Sequential( - nn.Conv2d(in_planes, n1x1, kernel_size=1), - nn.BatchNorm2d(n1x1), - nn.ReLU(True), - ) - - # 1x1 conv -> 3x3 conv branch - self.b2 = nn.Sequential( - nn.Conv2d(in_planes, n3x3red, kernel_size=1), - nn.BatchNorm2d(n3x3red), - nn.ReLU(True), - nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), - nn.BatchNorm2d(n3x3), - nn.ReLU(True), - ) - - # 1x1 conv -> 5x5 conv branch - self.b3 = nn.Sequential( - nn.Conv2d(in_planes, n5x5red, kernel_size=1), - nn.BatchNorm2d(n5x5red), - nn.ReLU(True), - nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), - nn.BatchNorm2d(n5x5), - nn.ReLU(True), - nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), - nn.BatchNorm2d(n5x5), - nn.ReLU(True), - ) - - # 3x3 pool -> 1x1 conv branch - self.b4 = nn.Sequential( - nn.MaxPool2d(3, stride=1, padding=1), - nn.Conv2d(in_planes, pool_planes, kernel_size=1), - nn.BatchNorm2d(pool_planes), - nn.ReLU(True), - ) - - def forward(self, x): - y1 = self.b1(x) - y2 = self.b2(x) - y3 = self.b3(x) - y4 = self.b4(x) - return torch.cat([y1, y2, y3, y4], 1) - - -class GoogLeNet(nn.Module): - def __init__(self): - super(GoogLeNet, self).__init__() - self.pre_layers = nn.Sequential( - nn.Conv2d(3, 192, kernel_size=3, padding=1), - nn.BatchNorm2d(192), - nn.ReLU(True), - ) - - self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) - self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) - - self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) - - self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) - self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) - self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) - self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) - self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) - - self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) - self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) - - self.avgpool = nn.AvgPool2d(8, stride=1) - self.linear = nn.Linear(1024, 10) - - def forward(self, x): - out = self.pre_layers(x) - out = self.a3(out) - out = self.b3(out) - out = self.maxpool(out) - out = self.a4(out) - out = self.b4(out) - out = self.c4(out) - out = self.d4(out) - out = self.e4(out) - out = self.maxpool(out) - out = self.a5(out) - out = self.b5(out) - out = self.avgpool(out) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def test(): - net = GoogLeNet() - x = torch.randn(1, 3, 32, 32) - y = net(x) - print(y.size()) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/lenet.py b/hpvm/projects/pred_tuner/models/torch/lenet.py deleted file mode 100644 index d657b7482a..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/lenet.py +++ /dev/null @@ -1,23 +0,0 @@ -'''LeNet in PyTorch.''' -import torch.nn as nn -import torch.nn.functional as F - -class LeNet(nn.Module): - def __init__(self): - super(LeNet, self).__init__() - self.conv1 = nn.Conv2d(3, 6, 5) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16*5*5, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - def forward(self, x): - out = F.relu(self.conv1(x)) - out = F.max_pool2d(out, 2) - out = F.relu(self.conv2(out)) - out = F.max_pool2d(out, 2) - out = out.view(out.size(0), -1) - out = F.relu(self.fc1(out)) - out = F.relu(self.fc2(out)) - out = self.fc3(out) - return out diff --git a/hpvm/projects/pred_tuner/models/torch/mobilenet.py b/hpvm/projects/pred_tuner/models/torch/mobilenet.py deleted file mode 100644 index 497ef1e867..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/mobilenet.py +++ /dev/null @@ -1,61 +0,0 @@ -'''MobileNet in PyTorch. - -See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" -for more details. -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Block(nn.Module): - '''Depthwise conv + Pointwise conv''' - def __init__(self, in_planes, out_planes, stride=1): - super(Block, self).__init__() - self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn2 = nn.BatchNorm2d(out_planes) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - return out - - -class MobileNet(nn.Module): - # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 - cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] - - def __init__(self, num_classes=10): - super(MobileNet, self).__init__() - self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(32) - self.layers = self._make_layers(in_planes=32) - self.linear = nn.Linear(1024, num_classes) - - def _make_layers(self, in_planes): - layers = [] - for x in self.cfg: - out_planes = x if isinstance(x, int) else x[0] - stride = 1 if isinstance(x, int) else x[1] - layers.append(Block(in_planes, out_planes, stride)) - in_planes = out_planes - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layers(out) - out = F.avg_pool2d(out, 2) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def test(): - net = MobileNet() - x = torch.randn(1,3,32,32) - y = net(x) - print(y.size()) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/mobilenetv2.py b/hpvm/projects/pred_tuner/models/torch/mobilenetv2.py deleted file mode 100644 index 17e5823ef4..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/mobilenetv2.py +++ /dev/null @@ -1,86 +0,0 @@ -'''MobileNetV2 in PyTorch. - -See the paper "Inverted Residuals and Linear Bottlenecks: -Mobile Networks for Classification, Detection and Segmentation" for more details. -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Block(nn.Module): - '''expand + depthwise + pointwise''' - def __init__(self, in_planes, out_planes, expansion, stride): - super(Block, self).__init__() - self.stride = stride - - planes = expansion * in_planes - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn3 = nn.BatchNorm2d(out_planes) - - self.shortcut = nn.Sequential() - if stride == 1 and in_planes != out_planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(out_planes), - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out = out + self.shortcut(x) if self.stride==1 else out - return out - - -class MobileNetV2(nn.Module): - # (expansion, out_planes, num_blocks, stride) - cfg = [(1, 16, 1, 1), - (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1)] - - def __init__(self, num_classes=10): - super(MobileNetV2, self).__init__() - # NOTE: change conv1 stride 2 -> 1 for CIFAR10 - self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(32) - self.layers = self._make_layers(in_planes=32) - self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False) - self.bn2 = nn.BatchNorm2d(1280) - self.linear = nn.Linear(1280, num_classes) - - def _make_layers(self, in_planes): - layers = [] - for expansion, out_planes, num_blocks, stride in self.cfg: - strides = [stride] + [1]*(num_blocks-1) - for stride in strides: - layers.append(Block(in_planes, out_planes, expansion, stride)) - in_planes = out_planes - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layers(out) - out = F.relu(self.bn2(self.conv2(out))) - # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def test(): - net = MobileNetV2() - x = torch.randn(2,3,32,32) - y = net(x) - print(y.size()) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/pnasnet.py b/hpvm/projects/pred_tuner/models/torch/pnasnet.py deleted file mode 100644 index de8c4d51f2..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/pnasnet.py +++ /dev/null @@ -1,125 +0,0 @@ -'''PNASNet in PyTorch. - -Paper: Progressive Neural Architecture Search -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class SepConv(nn.Module): - '''Separable Convolution.''' - def __init__(self, in_planes, out_planes, kernel_size, stride): - super(SepConv, self).__init__() - self.conv1 = nn.Conv2d(in_planes, out_planes, - kernel_size, stride, - padding=(kernel_size-1)//2, - bias=False, groups=in_planes) - self.bn1 = nn.BatchNorm2d(out_planes) - - def forward(self, x): - return self.bn1(self.conv1(x)) - - -class CellA(nn.Module): - def __init__(self, in_planes, out_planes, stride=1): - super(CellA, self).__init__() - self.stride = stride - self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) - if stride==2: - self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn1 = nn.BatchNorm2d(out_planes) - - def forward(self, x): - y1 = self.sep_conv1(x) - y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) - if self.stride==2: - y2 = self.bn1(self.conv1(y2)) - return F.relu(y1+y2) - -class CellB(nn.Module): - def __init__(self, in_planes, out_planes, stride=1): - super(CellB, self).__init__() - self.stride = stride - # Left branch - self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) - self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride) - # Right branch - self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride) - if stride==2: - self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn1 = nn.BatchNorm2d(out_planes) - # Reduce channels - self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) - self.bn2 = nn.BatchNorm2d(out_planes) - - def forward(self, x): - # Left branch - y1 = self.sep_conv1(x) - y2 = self.sep_conv2(x) - # Right branch - y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) - if self.stride==2: - y3 = self.bn1(self.conv1(y3)) - y4 = self.sep_conv3(x) - # Concat & reduce channels - b1 = F.relu(y1+y2) - b2 = F.relu(y3+y4) - y = torch.cat([b1,b2], 1) - return F.relu(self.bn2(self.conv2(y))) - -class PNASNet(nn.Module): - def __init__(self, cell_type, num_cells, num_planes): - super(PNASNet, self).__init__() - self.in_planes = num_planes - self.cell_type = cell_type - - self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(num_planes) - - self.layer1 = self._make_layer(num_planes, num_cells=6) - self.layer2 = self._downsample(num_planes*2) - self.layer3 = self._make_layer(num_planes*2, num_cells=6) - self.layer4 = self._downsample(num_planes*4) - self.layer5 = self._make_layer(num_planes*4, num_cells=6) - - self.linear = nn.Linear(num_planes*4, 10) - - def _make_layer(self, planes, num_cells): - layers = [] - for _ in range(num_cells): - layers.append(self.cell_type(self.in_planes, planes, stride=1)) - self.in_planes = planes - return nn.Sequential(*layers) - - def _downsample(self, planes): - layer = self.cell_type(self.in_planes, planes, stride=2) - self.in_planes = planes - return layer - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = self.layer5(out) - out = F.avg_pool2d(out, 8) - out = self.linear(out.view(out.size(0), -1)) - return out - - -def PNASNetA(): - return PNASNet(CellA, num_cells=6, num_planes=44) - -def PNASNetB(): - return PNASNet(CellB, num_cells=6, num_planes=32) - - -def test(): - net = PNASNetB() - x = torch.randn(1,3,32,32) - y = net(x) - print(y) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/preact_resnet.py b/hpvm/projects/pred_tuner/models/torch/preact_resnet.py deleted file mode 100644 index abb1bc313c..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/preact_resnet.py +++ /dev/null @@ -1,118 +0,0 @@ -'''Pre-activation ResNet in PyTorch. - -Reference: -[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Identity Mappings in Deep Residual Networks. arXiv:1603.05027 -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class PreActBlock(nn.Module): - '''Pre-activation version of the BasicBlock.''' - expansion = 1 - - def __init__(self, in_planes, planes, stride=1): - super(PreActBlock, self).__init__() - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) - - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) - ) - - def forward(self, x): - out = F.relu(self.bn1(x)) - shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x - out = self.conv1(out) - out = self.conv2(F.relu(self.bn2(out))) - out += shortcut - return out - - -class PreActBottleneck(nn.Module): - '''Pre-activation version of the original Bottleneck module.''' - expansion = 4 - - def __init__(self, in_planes, planes, stride=1): - super(PreActBottleneck, self).__init__() - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) - - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) - ) - - def forward(self, x): - out = F.relu(self.bn1(x)) - shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x - out = self.conv1(out) - out = self.conv2(F.relu(self.bn2(out))) - out = self.conv3(F.relu(self.bn3(out))) - out += shortcut - return out - - -class PreActResNet(nn.Module): - def __init__(self, block, num_blocks, num_classes=10): - super(PreActResNet, self).__init__() - self.in_planes = 64 - - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) - self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) - self.linear = nn.Linear(512*block.expansion, num_classes) - - def _make_layer(self, block, planes, num_blocks, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - out = self.conv1(x) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def PreActResNet18(): - return PreActResNet(PreActBlock, [2,2,2,2]) - -def PreActResNet34(): - return PreActResNet(PreActBlock, [3,4,6,3]) - -def PreActResNet50(): - return PreActResNet(PreActBottleneck, [3,4,6,3]) - -def PreActResNet101(): - return PreActResNet(PreActBottleneck, [3,4,23,3]) - -def PreActResNet152(): - return PreActResNet(PreActBottleneck, [3,8,36,3]) - - -def test(): - net = PreActResNet18() - y = net((torch.randn(1,3,32,32))) - print(y.size()) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/resnet.py b/hpvm/projects/pred_tuner/models/torch/resnet.py deleted file mode 100644 index d7c03ed134..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/resnet.py +++ /dev/null @@ -1,122 +0,0 @@ -"""ResNet in PyTorch. - -For Pre-activation ResNet, see 'preact_resnet.py'. - -Reference: -[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Deep Residual Learning for Image Recognition. arXiv:1512.03385 -""" -import torch.nn as nn -import torch.nn.functional as F - -from models.hpvm import HPVMConvBundle - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, in_planes, planes, stride=1): - super(BasicBlock, self).__init__() - self.conv1 = HPVMConvBundle(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.relu1 = nn.ReLU() - self.conv2 = HPVMConvBundle(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion * planes: - self.shortcut = nn.Sequential( - HPVMConvBundle(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(self.expansion * planes) - ) - self.relu2 = nn.ReLU() - - def forward(self, x): - out = self.relu1(self.bn1(self.conv1(x))) - out = self.bn2(self.conv2(out)) - out += self.shortcut(x) - out = self.relu2(out) - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, in_planes, planes, stride=1): - super(Bottleneck, self).__init__() - self.conv1 = HPVMConvBundle(in_planes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = HPVMConvBundle(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = HPVMConvBundle(planes, self.expansion * planes, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(self.expansion * planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion * planes: - self.shortcut = nn.Sequential( - HPVMConvBundle(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(self.expansion * planes) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class ResNet(nn.Module): - def __init__(self, block, num_blocks, num_classes=10): - super(ResNet, self).__init__() - self.in_planes = 64 - - self.conv1 = HPVMConvBundle(3, 64, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu = nn.ReLU() - self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) - self.avg_pool2d = nn.AvgPool2d(4) - self.linear = nn.Linear(512 * block.expansion, num_classes) - - def _make_layer(self, block, planes, num_blocks, stride): - strides = [stride] + [1] * (num_blocks - 1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - out = self.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = self.avg_pool2d(out) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def ResNet18(): - return ResNet(BasicBlock, [2, 2, 2, 2]) - - -def ResNet34(): - return ResNet(BasicBlock, [3, 4, 6, 3]) - - -def ResNet50(): - return ResNet(Bottleneck, [3, 4, 6, 3]) - - -def ResNet101(): - return ResNet(Bottleneck, [3, 4, 23, 3]) - - -def ResNet152(): - return ResNet(Bottleneck, [3, 8, 36, 3]) diff --git a/hpvm/projects/pred_tuner/models/torch/resnext.py b/hpvm/projects/pred_tuner/models/torch/resnext.py deleted file mode 100644 index 7a08f3e7d9..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/resnext.py +++ /dev/null @@ -1,95 +0,0 @@ -'''ResNeXt in PyTorch. - -See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Block(nn.Module): - '''Grouped convolution block.''' - expansion = 2 - - def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): - super(Block, self).__init__() - group_width = cardinality * bottleneck_width - self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(group_width) - self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) - self.bn2 = nn.BatchNorm2d(group_width) - self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(self.expansion*group_width) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*group_width: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(self.expansion*group_width) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class ResNeXt(nn.Module): - def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): - super(ResNeXt, self).__init__() - self.cardinality = cardinality - self.bottleneck_width = bottleneck_width - self.in_planes = 64 - - self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.layer1 = self._make_layer(num_blocks[0], 1) - self.layer2 = self._make_layer(num_blocks[1], 2) - self.layer3 = self._make_layer(num_blocks[2], 2) - # self.layer4 = self._make_layer(num_blocks[3], 2) - self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) - - def _make_layer(self, num_blocks, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) - self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width - # Increase bottleneck_width by 2 after each stage. - self.bottleneck_width *= 2 - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - # out = self.layer4(out) - out = F.avg_pool2d(out, 8) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def ResNeXt29_2x64d(): - return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) - -def ResNeXt29_4x64d(): - return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) - -def ResNeXt29_8x64d(): - return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) - -def ResNeXt29_32x4d(): - return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) - -def test_resnext(): - net = ResNeXt29_2x64d() - x = torch.randn(1,3,32,32) - y = net(x) - print(y.size()) - -# test_resnext() diff --git a/hpvm/projects/pred_tuner/models/torch/senet.py b/hpvm/projects/pred_tuner/models/torch/senet.py deleted file mode 100644 index 98bfa0ca51..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/senet.py +++ /dev/null @@ -1,121 +0,0 @@ -'''SENet in PyTorch. - -SENet is the winner of ImageNet-2017. The paper is not released yet. -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class BasicBlock(nn.Module): - def __init__(self, in_planes, planes, stride=1): - super(BasicBlock, self).__init__() - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes) - ) - - # SE layers - self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear - self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.bn2(self.conv2(out)) - - # Squeeze - w = F.avg_pool2d(out, out.size(2)) - w = F.relu(self.fc1(w)) - w = F.sigmoid(self.fc2(w)) - # Excitation - out = out * w # New broadcasting feature from v0.2! - - out += self.shortcut(x) - out = F.relu(out) - return out - - -class PreActBlock(nn.Module): - def __init__(self, in_planes, planes, stride=1): - super(PreActBlock, self).__init__() - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) - - if stride != 1 or in_planes != planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False) - ) - - # SE layers - self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) - self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) - - def forward(self, x): - out = F.relu(self.bn1(x)) - shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x - out = self.conv1(out) - out = self.conv2(F.relu(self.bn2(out))) - - # Squeeze - w = F.avg_pool2d(out, out.size(2)) - w = F.relu(self.fc1(w)) - w = F.sigmoid(self.fc2(w)) - # Excitation - out = out * w - - out += shortcut - return out - - -class SENet(nn.Module): - def __init__(self, block, num_blocks, num_classes=10): - super(SENet, self).__init__() - self.in_planes = 64 - - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) - self.linear = nn.Linear(512, num_classes) - - def _make_layer(self, block, planes, num_blocks, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, stride)) - self.in_planes = planes - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def SENet18(): - return SENet(PreActBlock, [2,2,2,2]) - - -def test(): - net = SENet18() - y = net(torch.randn(1,3,32,32)) - print(y.size()) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/shufflenet.py b/hpvm/projects/pred_tuner/models/torch/shufflenet.py deleted file mode 100644 index acff6f7826..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/shufflenet.py +++ /dev/null @@ -1,109 +0,0 @@ -'''ShuffleNet in PyTorch. - -See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class ShuffleBlock(nn.Module): - def __init__(self, groups): - super(ShuffleBlock, self).__init__() - self.groups = groups - - def forward(self, x): - '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' - N,C,H,W = x.size() - g = self.groups - return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W) - - -class Bottleneck(nn.Module): - def __init__(self, in_planes, out_planes, stride, groups): - super(Bottleneck, self).__init__() - self.stride = stride - - mid_planes = out_planes/4 - g = 1 if in_planes==24 else groups - self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) - self.bn1 = nn.BatchNorm2d(mid_planes) - self.shuffle1 = ShuffleBlock(groups=g) - self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) - self.bn2 = nn.BatchNorm2d(mid_planes) - self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) - self.bn3 = nn.BatchNorm2d(out_planes) - - self.shortcut = nn.Sequential() - if stride == 2: - self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.shuffle1(out) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - res = self.shortcut(x) - out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) - return out - - -class ShuffleNet(nn.Module): - def __init__(self, cfg): - super(ShuffleNet, self).__init__() - out_planes = cfg['out_planes'] - num_blocks = cfg['num_blocks'] - groups = cfg['groups'] - - self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(24) - self.in_planes = 24 - self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) - self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) - self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) - self.linear = nn.Linear(out_planes[2], 10) - - def _make_layer(self, out_planes, num_blocks, groups): - layers = [] - for i in range(num_blocks): - stride = 2 if i == 0 else 1 - cat_planes = self.in_planes if i == 0 else 0 - layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) - self.in_planes = out_planes - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def ShuffleNetG2(): - cfg = { - 'out_planes': [200,400,800], - 'num_blocks': [4,8,4], - 'groups': 2 - } - return ShuffleNet(cfg) - -def ShuffleNetG3(): - cfg = { - 'out_planes': [240,480,960], - 'num_blocks': [4,8,4], - 'groups': 3 - } - return ShuffleNet(cfg) - - -def test(): - net = ShuffleNetG2() - x = torch.randn(1,3,32,32) - y = net(x) - print(y) - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/shufflenetv2.py b/hpvm/projects/pred_tuner/models/torch/shufflenetv2.py deleted file mode 100644 index eefcda3205..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/shufflenetv2.py +++ /dev/null @@ -1,162 +0,0 @@ -'''ShuffleNetV2 in PyTorch. - -See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details. -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class ShuffleBlock(nn.Module): - def __init__(self, groups=2): - super(ShuffleBlock, self).__init__() - self.groups = groups - - def forward(self, x): - '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' - N, C, H, W = x.size() - g = self.groups - return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W) - - -class SplitBlock(nn.Module): - def __init__(self, ratio): - super(SplitBlock, self).__init__() - self.ratio = ratio - - def forward(self, x): - c = int(x.size(1) * self.ratio) - return x[:, :c, :, :], x[:, c:, :, :] - - -class BasicBlock(nn.Module): - def __init__(self, in_channels, split_ratio=0.5): - super(BasicBlock, self).__init__() - self.split = SplitBlock(split_ratio) - in_channels = int(in_channels * split_ratio) - self.conv1 = nn.Conv2d(in_channels, in_channels, - kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(in_channels) - self.conv2 = nn.Conv2d(in_channels, in_channels, - kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False) - self.bn2 = nn.BatchNorm2d(in_channels) - self.conv3 = nn.Conv2d(in_channels, in_channels, - kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(in_channels) - self.shuffle = ShuffleBlock() - - def forward(self, x): - x1, x2 = self.split(x) - out = F.relu(self.bn1(self.conv1(x2))) - out = self.bn2(self.conv2(out)) - out = F.relu(self.bn3(self.conv3(out))) - out = torch.cat([x1, out], 1) - out = self.shuffle(out) - return out - - -class DownBlock(nn.Module): - def __init__(self, in_channels, out_channels): - super(DownBlock, self).__init__() - mid_channels = out_channels // 2 - # left - self.conv1 = nn.Conv2d(in_channels, in_channels, - kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False) - self.bn1 = nn.BatchNorm2d(in_channels) - self.conv2 = nn.Conv2d(in_channels, mid_channels, - kernel_size=1, bias=False) - self.bn2 = nn.BatchNorm2d(mid_channels) - # right - self.conv3 = nn.Conv2d(in_channels, mid_channels, - kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(mid_channels) - self.conv4 = nn.Conv2d(mid_channels, mid_channels, - kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False) - self.bn4 = nn.BatchNorm2d(mid_channels) - self.conv5 = nn.Conv2d(mid_channels, mid_channels, - kernel_size=1, bias=False) - self.bn5 = nn.BatchNorm2d(mid_channels) - - self.shuffle = ShuffleBlock() - - def forward(self, x): - # left - out1 = self.bn1(self.conv1(x)) - out1 = F.relu(self.bn2(self.conv2(out1))) - # right - out2 = F.relu(self.bn3(self.conv3(x))) - out2 = self.bn4(self.conv4(out2)) - out2 = F.relu(self.bn5(self.conv5(out2))) - # concat - out = torch.cat([out1, out2], 1) - out = self.shuffle(out) - return out - - -class ShuffleNetV2(nn.Module): - def __init__(self, net_size): - super(ShuffleNetV2, self).__init__() - out_channels = configs[net_size]['out_channels'] - num_blocks = configs[net_size]['num_blocks'] - - self.conv1 = nn.Conv2d(3, 24, kernel_size=3, - stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(24) - self.in_channels = 24 - self.layer1 = self._make_layer(out_channels[0], num_blocks[0]) - self.layer2 = self._make_layer(out_channels[1], num_blocks[1]) - self.layer3 = self._make_layer(out_channels[2], num_blocks[2]) - self.conv2 = nn.Conv2d(out_channels[2], out_channels[3], - kernel_size=1, stride=1, padding=0, bias=False) - self.bn2 = nn.BatchNorm2d(out_channels[3]) - self.linear = nn.Linear(out_channels[3], 10) - - def _make_layer(self, out_channels, num_blocks): - layers = [DownBlock(self.in_channels, out_channels)] - for i in range(num_blocks): - layers.append(BasicBlock(out_channels)) - self.in_channels = out_channels - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - # out = F.max_pool2d(out, 3, stride=2, padding=1) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = F.relu(self.bn2(self.conv2(out))) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -configs = { - 0.5: { - 'out_channels': (48, 96, 192, 1024), - 'num_blocks': (3, 7, 3) - }, - - 1: { - 'out_channels': (116, 232, 464, 1024), - 'num_blocks': (3, 7, 3) - }, - 1.5: { - 'out_channels': (176, 352, 704, 1024), - 'num_blocks': (3, 7, 3) - }, - 2: { - 'out_channels': (224, 488, 976, 2048), - 'num_blocks': (3, 7, 3) - } -} - - -def test(): - net = ShuffleNetV2(net_size=0.5) - x = torch.randn(3, 3, 32, 32) - y = net(x) - print(y.shape) - - -# test() diff --git a/hpvm/projects/pred_tuner/models/torch/vgg.py b/hpvm/projects/pred_tuner/models/torch/vgg.py deleted file mode 100644 index 2650d2f485..0000000000 --- a/hpvm/projects/pred_tuner/models/torch/vgg.py +++ /dev/null @@ -1,39 +0,0 @@ -"""VGG11/13/16/19 in Pytorch.""" -import torch.nn as nn -from models.hpvm import HPVMConvBundle - - -cfg = { - 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], - 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], - 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], - 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], -} - - -class VGG(nn.Module): - def __init__(self, vgg_name): - super(VGG, self).__init__() - self.features = self._make_layers(cfg[vgg_name]) - self.classifier = nn.Linear(512, 10) - - def forward(self, x): - out = self.features(x) - out = out.view(out.size(0), -1) - out = self.classifier(out) - return out - - @staticmethod - def _make_layers(config): - layers = [] - in_channels = 3 - for x in config: - if x == 'M': - layers += [nn.MaxPool2d(kernel_size=2, stride=2)] - else: - layers += [HPVMConvBundle(in_channels, x, kernel_size=3, padding=1), - nn.BatchNorm2d(x), - nn.ReLU(inplace=True)] - in_channels = x - layers += [nn.AvgPool2d(kernel_size=1, stride=1)] - return nn.Sequential(*layers) diff --git a/hpvm/projects/pred_tuner/run_tuner.py b/hpvm/projects/pred_tuner/run_tuner.py deleted file mode 100644 index 5470763ae0..0000000000 --- a/hpvm/projects/pred_tuner/run_tuner.py +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env python -# -# Development-time Tuner with Algorithmic Approximations: -# Approximations: Perforation, Sampling with varying knobs for rate, skip offset -import copy -import logging -import os -import shutil -import time -from pathlib import Path -from typing import List, Tuple - -import numpy as np -import opentuner -from opentuner import ConfigurationManipulator, EnumParameter, MeasurementInterface -from opentuner.measurement.inputmanager import FixedInputManager -from opentuner.search.objective import ThresholdAccuracyMinimizeTime -from opentuner.tuningrunmain import TuningRunMain -from torch.nn import Module -from tqdm import tqdm - -from exp import Benchmark, ConfigMeasurer, ExpState, TuningTime, batch_id, bench_tuner_data, is_dev_time -from models import get_all_output, networks, QoS -from toolkit import ConfigT -from toolkit.estimators import WeightedLinearQoSEstimator -from utils import Config, config, reapply_last_config - -msg_logger = logging.getLogger(__name__) -use_proxy = False -n_promise_valid_runs = 30 -confidence_level = 0.95 - - -def init_proxy(ni: ConfigMeasurer, pickle_path: Path): - def acc_crit(inputs_): - return ni.get_qos(inputs_, ni.val_loader) - - def threshold_eval(inputs_): - accs = np.array([acc_crit(x) for x in inputs_]) - return ni.val_qos - accs.mean() < 3.0 - - def run_model(net: Module): - return get_all_output(net, ni.val_loader) - - return WeightedLinearQoSEstimator( - ni.nas, run_model, acc_crit, threshold_eval, confidence_level, storage=pickle_path - ) - - -class Timer: - def __init__(self, timer_state: TuningTime, timer_name: str): - self.timer_state = timer_state - self.name = timer_name - self.start = None - - def __enter__(self): - self.start = time.time() - return self - - def __exit__(self, *args): - end = time.time() - interval = end - self.start - self.timer_state.add_timer(self.name, interval) - - -class TunerDriver: - def __init__(self, bench: Benchmark): - self.bench = bench - msg_logger.info(f"Tuning for model {self.bench.model_name}") - # Initialize folder. - self._init_folder(bench) - # Take a snapshot of current code. - self.take_code_snapshot() - # Initialize network information and qos thresholds - self.net_info = ConfigMeasurer.init_from_bench(self.bench) - qoses = self.net_info.val_qos, self.net_info.test_qos - qos_type = self.net_info.val_qos.__class__ - self.tuner_thres = qos_type.suggested_tuner_thresholds(self.net_info.val_qos) - self.val_thres = qos_type.suggested_val_threshold(self.net_info.val_qos) - self.test_thres = qos_type.suggested_test_threshold(self.net_info.test_qos) - # Tuner states. - self.states = ExpState(bench, qos_type, qoses) - # Current # of iteration. `ProxyTuner` will use this. - self.run_id, self.iter = 0, 0 - # Initialize proxy. - if use_proxy: - self.proxy = init_proxy(self.net_info, self.bench.result_dir / 'proxy.pkl') - else: - self.proxy = None - - @staticmethod - def _init_folder(bench: Benchmark): - def remove_file_or_folder(path: Path): - if path.is_dir(): - shutil.rmtree(child) - elif path.is_file(): - path.unlink() # Removes file despite the surprising name - - pickle_path = bench.result_dir / 'proxy.pkl' - # Remove everything in result folder except pickle file - if bench.result_dir.is_dir(): - msg_logger.warning(f"!Cleaning existing result dir = {bench.result_dir}") - for child in bench.result_dir.glob('*'): - if child == pickle_path: - continue - msg_logger.info(f" !Removing {child}") - remove_file_or_folder(child) - # Create result folder if it doesn't exist - if not bench.result_dir.is_dir(): - msg_logger.info(f"Creating output directory = {bench.result_dir}") - os.makedirs(bench.result_dir) - - def get_default_args(self): - args = opentuner.default_argparser().parse_args() - args.database = f"opentuner.db/{batch_id}.db" - args.test_limit = self.bench.autotuner_runs - parent = Path(args.database).parent - if not parent.is_dir(): - os.makedirs(parent, exist_ok=True) - return args - - def tuner_exec(self): - # Get default opentuner args - args = self.get_default_args() - # Start tuning for each threshold - for i, thres in enumerate(self.tuner_thres): - with Timer(self.states.timers, f"tuning_{i}"): - msg_logger.info( - f"Tuning goal: qos >= {thres}; keeping configs with qos >= {self.val_thres}" - ) - tuner = ProxyTuner(args, self, thres, self.val_thres) - # TuningRunMain.__init__ initializes its own logger, so we'll reapply our settings. - tuning_main = TuningRunMain(tuner, args) - reapply_last_config() - # Unleash the tuner! - tuning_main.main() - # Remove tuner progress bar - tuner.pbar.close() - self.run_id += 1 - self.iter = 0 - # Postprocess configs - self.process_configs() - - def calibrate_write_configs(self, configs: List[Config], is_test_set: bool): - write_to = self.states.tested_configs if is_test_set else self.states.validated_configs - gold_acc = self.net_info.test_qos if is_test_set else self.net_info.val_qos - for cfg in tqdm(configs, leave=False): - cfg = copy.deepcopy(cfg) - cfg: Config - flags = {k: v for k, v in enumerate(cfg.flags)} - measured_acc, confidence = self.net_info.actual_measure( - flags, cfg.total_runs, is_test_set, threshold=self.val_thres - ) - prev_acc = cfg.avg_qos - cfg.update_acc(measured_acc, confidence, gold_acc) - new_acc = cfg.avg_qos - msg_logger.debug(f"{prev_acc} (mean) -> {new_acc} (mean)") - write_to.append(cfg) - write_to.finalize_dump() - - @staticmethod - def filter_configs( - validation: List[Config], test: List[Config], - vali_threshold: QoS, test_threshold: QoS - ) -> Tuple[List[Config], List[Config]]: - # Filter validation and test set by their respective thresholds - filtered_validation = [ - c for c in validation if c.avg_loss <= vali_threshold - ] - filtered_test = [ - c for c in test if c.avg_loss <= test_threshold - ] - # Test configs also need to be a subset of validation configs. - name_to_filtered = {x.fname: x for x in filtered_test} - intersect_names = set(list(name_to_filtered.keys())).intersection( - set((x.fname for x in filtered_validation)) - ) - filtered_test_ = [name_to_filtered[fname] for fname in intersect_names] - return filtered_validation, filtered_test_ - - def process_configs(self): - # Finalize all configs because tuning is done. - # (this may not do anything now but will in the future) - self.states.all_configs.finalize_dump() - all_configs = self.states.all_configs.configs - # Pre-filter configs by a wide pareto margin - filtered_configs = config.is_pareto_efficient(all_configs, ratio=0.05, n_min=50, n_max=50) - msg_logger.info(f"Prefilter yields {len(filtered_configs)} configs from {len(all_configs)}") - self.states.filtered_configs.finalize_dump(with_configs=filtered_configs) - # Calibrate prefiltered configs (validation step) - with Timer(self.states.timers, "validate"): - self.calibrate_write_configs(filtered_configs, is_test_set=False) - validated_configs = self.states.validated_configs.configs - # Calibrate prefiltered configs on test set (test step) - with Timer(self.states.timers, "test"): - self.calibrate_write_configs(filtered_configs, is_test_set=True) - tested_configs = self.states.tested_configs.configs - # Filter valid and test set configs by thresholds - valid_configs, test_configs = self.filter_configs( - validated_configs, tested_configs, self.val_thres, self.test_thres - ) - self.states.valid_configs.finalize_dump(valid_configs) - self.states.test_configs.finalize_dump(test_configs) - # Finalize data input and plot everything. - self.states.finalize_plot() - - def take_code_snapshot(self): - import git - msg_logger.info(f"Taking git snapshot") - ref_dir = self.bench.result_dir / "references" - os.mkdir(ref_dir) - # Write current git commit (SHA id) - repo = git.Repo(search_parent_directories=True) - sha = repo.head.object.hexsha - msg_logger.info(f"Current code is at commit {sha}") - with (ref_dir / 'git_commit.txt').open('w') as f: - f.write(sha) - # Also put all outstanding code change in a diff file. - # This way changes in all git-tracked files are captured. - t = repo.head.commit.tree - with (ref_dir / 'diff.txt').open('w') as f: - f.write(repo.git.diff(t)) - - def make_config_name(self) -> str: - return f"{self.bench.model_name}_{self.run_id}_{self.iter}" - - def get_accuracy(self, cfg: ConfigT) -> Tuple[QoS, QoS, int]: - has_promise_flags = set(cfg.values()).intersection(set(range(1, 7 + 1))) - config_validation_runs = n_promise_valid_runs if has_promise_flags else 1 - if use_proxy: - mean_acc, confidence_acc = self.net_info.proxy_estimate(cfg, self.proxy) - assert has_promise_flags or (mean_acc == confidence_acc) - else: - mean_acc, _ = self.net_info.actual_measure(cfg, 1, is_test_set=False) - confidence_acc = mean_acc - return mean_acc, confidence_acc, config_validation_runs - - -class ProxyTuner(MeasurementInterface): - def __init__(self, args, driver: TunerDriver, tuner_thres: QoS, accept_thres: QoS): - self.tuner_driver = driver - self.model_info = driver.net_info - self.bench = driver.bench - self.tuner_thres = tuner_thres - self.all_configs = driver.states.all_configs - self.pbar = tqdm(total=args.test_limit, leave=False) - objective = ThresholdAccuracyMinimizeTime(tuner_thres.to_scalar()) - input_manager = FixedInputManager(size=driver.bench.get_n_layers()) - super(ProxyTuner, self).__init__( - args, program_name=self.bench.model_name, - input_manager=input_manager, objective=objective - ) - self.accept_thres = accept_thres - - def manipulator(self) -> ConfigurationManipulator: - """Define the search space by creating a ConfigurationManipulator.""" - manipulator = ConfigurationManipulator() - for ext_layer_id, knobs in self.model_info.get_knobs().items(): - manipulator.add_parameter(EnumParameter(ext_layer_id, knobs)) - return manipulator - - def seed_configurations(self): - """Provide baseline config as seed if model uses seed.""" - return [self.bench.get_baseline_config(not is_dev_time)] if self.bench.use_seed else [] - - def run(self, desired_result, input_, limit): - """Run a given configuration then return performance and accuracy.""" - cfg: ConfigT = desired_result.configuration.data - # get_accuracy gives estimation of mean accuracy and 95% confident accuracy - mean_acc, confident_acc, n_runs = self.tuner_driver.get_accuracy(cfg) - # getConfigCost returns the cost associated with the selected configuration - total_comps, speedup = self.bench.compute_config_cost(cfg) - Result = opentuner.resultsdb.models.Result() - Result.time = total_comps - # Convert QoS to scalar, because opentuner does not support custom comparable datatype - Result.accuracy = confident_acc.to_scalar(relative_to=self.tuner_thres) - - # If accuracy is acceptable, write this config - if confident_acc > self.accept_thres: - config_name = self.tuner_driver.make_config_name() - cfg_values = [cfg[layer] for layer in sorted(cfg.keys())] - writing_config = Config( - mean_acc, self.model_info.val_qos, config_name, cfg_values, - n_runs, 95.0, total_comps, speedup - ) - self.all_configs.append(writing_config) - msg_logger.debug( - f"Config chosen with accuracy (mean) = {mean_acc}, (95%) = {confident_acc} " - f"and speedup = {speedup}" - ) - self.tuner_driver.iter += 1 - self.pbar.update() - return Result - - def save_final_config(self, configuration): - """Print final configuration.""" - msg_logger.info(f"Final configuration {configuration.data}") - msg_logger.info("Done with Autotuning run") - - -if __name__ == '__main__': - assert set(networks.keys()).issubset(set(bench_tuner_data.keys())) - for network in ('alexnet2_hpvm',): - bench_: Benchmark = bench_tuner_data[network] - TunerDriver(bench_).tuner_exec() diff --git a/hpvm/projects/pred_tuner/tests/data/1_1_output.json b/hpvm/projects/pred_tuner/tests/data/1_1_output.json deleted file mode 100644 index 3892ae9622..0000000000 --- a/hpvm/projects/pred_tuner/tests/data/1_1_output.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "('0', '0', '1', '1', '2', '0')": { - "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvSampSim": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", - "ConvApprox": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", - "ConvApproxHalf2": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000," - }, - "('0', '0', '1', '1', '2', '1')": { - "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvSampSim": "40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,", - "ConvApprox": "40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,", - "ConvApproxHalf2": "40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000," - }, - "('0', '0', '1', '1', '3', '0')": { - "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvSampSim": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvApprox": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvApproxHalf2": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000," - }, - "('0', '0', '1', '1', '3', '1')": { - "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvSampSim": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvApprox": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvApproxHalf2": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000," - }, - "('0', '0', '1', '1', '4', '0')": { - "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvSampSim": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", - "ConvApprox": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", - "ConvApproxHalf2": "31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375," - }, - "('0', '0', '1', '1', '4', '1')": { - "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", - "ConvSampSim": "37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,", - "ConvApprox": "37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,", - "ConvApproxHalf2": "37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500," - }, - "('1', '1', '1', '1', '2', '0')": { - "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000," - }, - "('1', '1', '1', '1', '2', '1')": { - "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000," - }, - "('1', '1', '1', '1', '3', '0')": { - "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000," - }, - "('1', '1', '1', '1', '3', '1')": { - "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000," - }, - "('1', '1', '1', '1', '4', '0')": { - "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000," - }, - "('1', '1', '1', '1', '4', '1')": { - "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,", - "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000," - } -} diff --git a/hpvm/projects/pred_tuner/tests/data/3_3_output.json b/hpvm/projects/pred_tuner/tests/data/3_3_output.json deleted file mode 100644 index 2ccb23c01c..0000000000 --- a/hpvm/projects/pred_tuner/tests/data/3_3_output.json +++ /dev/null @@ -1,146 +0,0 @@ -{ - "('0', '0', '1', '1', '2', '0')": { - "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", - "Baseline": "41.000000,41.000000,41.000000,41.000000,", - "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", - "ConvSampSim": "26.000000,26.000000,26.000000,26.000000,", - "ConvApprox": "26.000000,26.000000,26.000000,26.000000,", - "ConvApproxHalf2": "26.000000,26.000000,26.000000,26.000000," - }, - "('0', '0', '1', '1', '2', '1')": { - "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", - "Baseline": "41.000000,41.000000,41.000000,41.000000,", - "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", - "ConvSampSim": "56.000000,56.000000,56.000000,56.000000,", - "ConvApprox": "56.000000,56.000000,56.000000,56.000000,", - "ConvApproxHalf2": "56.000000,56.000000,56.000000,56.000000," - }, - "('0', '0', '1', '1', '3', '0')": { - "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", - "Baseline": "41.000000,41.000000,41.000000,41.000000,", - "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", - "ConvSampSim": "39.000000,39.000000,39.000000,39.000000,", - "ConvApprox": "39.000000,39.000000,39.000000,39.000000,", - "ConvApproxHalf2": "39.000000,39.000000,39.000000,39.000000," - }, - "('0', '0', '1', '1', '3', '1')": { - "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", - "Baseline": "41.000000,41.000000,41.000000,41.000000,", - "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", - "ConvSampSim": "42.000000,42.000000,42.000000,42.000000,", - "ConvApprox": "42.000000,42.000000,42.000000,42.000000,", - "ConvApproxHalf2": "42.000000,42.000000,42.000000,42.000000," - }, - "('0', '0', '1', '1', '4', '0')": { - "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", - "Baseline": "41.000000,41.000000,41.000000,41.000000,", - "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", - "ConvSampSim": "36.000000,36.000000,36.000000,36.000000,", - "ConvApprox": "36.000000,36.000000,36.000000,36.000000,", - "ConvApproxHalf2": "35.968750,35.968750,35.968750,35.968750," - }, - "('0', '0', '1', '1', '4', '1')": { - "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", - "Baseline": "41.000000,41.000000,41.000000,41.000000,", - "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", - "ConvSampSim": "45.333336,45.333336,45.333336,45.333336,", - "ConvApprox": "45.333336,45.333336,45.333336,45.333336,", - "ConvApproxHalf2": "45.312500,45.312500,45.312500,45.312500," - }, - "('1', '1', '1', '1', '2', '0')": { - "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvSampSim": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,", - "ConvApprox": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,", - "ConvApproxHalf2": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000," - }, - "('1', '1', '1', '1', '2', '1')": { - "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvSampSim": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,", - "ConvApprox": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,", - "ConvApproxHalf2": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000," - }, - "('1', '1', '1', '1', '3', '0')": { - "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvSampSim": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,", - "ConvApprox": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,", - "ConvApproxHalf2": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000," - }, - "('1', '1', '1', '1', '3', '1')": { - "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvSampSim": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvApprox": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvApproxHalf2": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000," - }, - "('1', '1', '1', '1', '4', '0')": { - "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvSampSim": "16.000000,22.666666,22.666666,13.333333,25.333334,36.000000,36.000000,22.666668,25.333334,36.000000,36.000000,22.666668,18.666666,25.333334,25.333334,16.000000,", - "ConvApprox": "16.000000,22.666666,22.666666,13.333333,25.333334,36.000000,36.000000,22.666668,25.333334,36.000000,36.000000,22.666668,18.666666,25.333334,25.333334,16.000000,", - "ConvApproxHalf2": "16.000000,22.671875,22.671875,13.328125,25.328125,35.968750,35.968750,22.656250,25.328125,35.968750,35.968750,22.656250,18.671875,25.328125,25.328125,16.000000," - }, - "('1', '1', '1', '1', '4', '1')": { - "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", - "ConvSampSim": "18.666668,29.333332,29.333332,20.000000,29.333332,45.333336,45.333336,29.333332,29.333332,45.333336,45.333336,29.333332,20.000000,29.333332,29.333332,18.666668,", - "ConvApprox": "18.666668,29.333332,29.333332,20.000000,29.333332,45.333336,45.333336,29.333332,29.333332,45.333336,45.333336,29.333332,20.000000,29.333332,29.333332,18.666668,", - "ConvApproxHalf2": "18.656250,29.343750,29.343750,20.000000,29.328125,45.312500,45.312500,29.343750,29.328125,45.312500,45.312500,29.343750,20.000000,29.328125,29.328125,18.656250," - }, - "('1', '1', '2', '2', '2', '0')": { - "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", - "Baseline": "18.000000,27.000000,27.000000,41.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", - "ConvSampSim": "12.000000,18.000000,18.000000,26.000000,", - "ConvApprox": "12.000000,18.000000,18.000000,26.000000,", - "ConvApproxHalf2": "12.000000,18.000000,18.000000,26.000000," - }, - "('1', '1', '2', '2', '2', '1')": { - "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", - "Baseline": "18.000000,27.000000,27.000000,41.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", - "ConvSampSim": "24.000000,36.000000,36.000000,56.000000,", - "ConvApprox": "24.000000,36.000000,36.000000,56.000000,", - "ConvApproxHalf2": "24.000000,36.000000,36.000000,56.000000," - }, - "('1', '1', '2', '2', '3', '0')": { - "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", - "Baseline": "18.000000,27.000000,27.000000,41.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", - "ConvSampSim": "18.000000,27.000000,25.500000,39.000000,", - "ConvApprox": "18.000000,27.000000,25.500000,39.000000,", - "ConvApproxHalf2": "18.000000,27.000000,25.500000,39.000000," - }, - "('1', '1', '2', '2', '3', '1')": { - "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", - "Baseline": "18.000000,27.000000,27.000000,41.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", - "ConvSampSim": "18.000000,27.000000,28.500000,42.000000,", - "ConvApprox": "18.000000,27.000000,28.500000,42.000000,", - "ConvApproxHalf2": "18.000000,27.000000,28.500000,42.000000," - }, - "('1', '1', '2', '2', '4', '0')": { - "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", - "Baseline": "18.000000,27.000000,27.000000,41.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", - "ConvSampSim": "16.000000,22.666666,25.333334,36.000000,", - "ConvApprox": "16.000000,22.666666,25.333334,36.000000,", - "ConvApproxHalf2": "16.000000,22.671875,25.328125,35.968750," - }, - "('1', '1', '2', '2', '4', '1')": { - "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", - "Baseline": "18.000000,27.000000,27.000000,41.000000,", - "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", - "ConvSampSim": "18.666668,29.333332,29.333332,45.333336,", - "ConvApprox": "18.666668,29.333332,29.333332,45.333336,", - "ConvApproxHalf2": "18.656250,29.343750,29.328125,45.312500," - } -} \ No newline at end of file diff --git a/hpvm/projects/pred_tuner/tests/data/promise.json b/hpvm/projects/pred_tuner/tests/data/promise.json deleted file mode 100644 index 331ff8527a..0000000000 --- a/hpvm/projects/pred_tuner/tests/data/promise.json +++ /dev/null @@ -1,121 +0,0 @@ -{ - "1": [ - [ - -0.980938, - -1.976522, - -2.999873, - -4.095768, - -5.115182, - 0.0, - 5.075658, - 3.972848, - 2.912783, - 2.051733, - 1.004169, - 1.002379 - ], - 45.213196 - ], - "2": [ - [ - -1.017428, - -2.01491, - -2.951011, - -4.042611, - -4.954911, - 0.0, - 5.05412, - 3.951638, - 2.94989, - 1.99723, - 1.001167, - 0.98796 - ], - 12.535809 - ], - "3": [ - [ - -1.003108, - -2.006269, - -3.00263, - -3.97216, - -4.969401, - 0.0, - 5.012199, - 4.028375, - 2.950729, - 2.004691, - 1.004823, - 0.991805 - ], - 4.886813 - ], - "4": [ - [ - -1.006497, - -1.975768, - -3.031142, - -4.02248, - -5.061712, - 0.0, - 5.017349, - 3.992676, - 2.998843, - 2.002693, - 0.997514, - 1.00649 - ], - 3.129643 - ], - "5": [ - [ - -1.001629, - -1.976943, - -2.982565, - -3.964559, - -4.99636, - 0.0, - 4.992359, - 3.984341, - 2.990126, - 2.005831, - 1.000539, - 1.003548 - ], - 2.181237 - ], - "6": [ - [ - -1.003159, - -1.985892, - -3.005964, - -4.008651, - -4.992874, - 0.0, - 4.996098, - 4.012099, - 3.001986, - 2.001431, - 0.996138, - 0.997394 - ], - 1.362949 - ], - "7": [ - [ - -1.003133, - -1.99733, - -3.00755, - -4.007799, - -5.003314, - 0.0, - 5.000926, - 3.993208, - 2.988745, - 2.00329, - 0.99986, - 0.995669 - ], - 0.6926 - ] -} \ No newline at end of file diff --git a/hpvm/projects/pred_tuner/tests/data/quantization.json b/hpvm/projects/pred_tuner/tests/data/quantization.json deleted file mode 100644 index 723eaa2b55..0000000000 --- a/hpvm/projects/pred_tuner/tests/data/quantization.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "(-4, 6)": [ - -0.132812, - -4.0, - 0.179688, - -0.40625, - 1.664062, - -2.90625, - 0.6875, - 0.960938, - 6.0, - 6.0, - 2.484375, - 2.992188 - ], - "(-2, 2)": [ - -0.109375, - -2.0, - 0.1875, - -0.40625, - 1.6875, - -2.0, - 0.6875, - 0.984375, - 2.0, - 2.0, - 2.0, - 2.0 - ], - "(-25, 8)": [ - -0.121094, - -25.0, - 0.136719, - -0.507812, - 1.683594, - -2.957031, - 0.652344, - 0.910156, - 6.96875, - 7.097656, - 2.457031, - 2.972656 - ], - "(-10, 10)": [ - -0.15625, - -10.0, - 0.15625, - -0.46875, - 1.640625, - -2.96875, - 0.625, - 0.9375, - 6.953125, - 7.1875, - 2.5, - 2.96875 - ] -} \ No newline at end of file diff --git a/hpvm/projects/pred_tuner/tests/promise.py b/hpvm/projects/pred_tuner/tests/promise.py deleted file mode 100644 index 59506d9425..0000000000 --- a/hpvm/projects/pred_tuner/tests/promise.py +++ /dev/null @@ -1,87 +0,0 @@ -import json -from pathlib import Path - -import torch - -from toolkit import ModuleIndexer, NetApproxSelector -from toolkit.approxdnn import PromiseSim, quantize_256 -from utils import compute_accuracy, init_by_name, run_concat_output - -eps = 1e-5 -delta = 0.05 # Allow for some variance in promise testing - - -def gt_eps(tensor: torch.Tensor) -> bool: - return torch.any(tensor.abs() > eps).item() - - -def compare_quant(groundtruth: dict): - input_tensor = torch.tensor([-0.1, -25, 0.2, -0.4, 1.7, -2.9, 0.7, 0.99, 7, 7.2, 2.5, 3]) - for k, v in groundtruth.items(): - from ast import literal_eval as make_tuple - gt = torch.tensor(v) - ours = quantize_256(input_tensor, *make_tuple(k)) - if gt_eps(gt - ours): - print( - f"Quantization results differ by more than eps = {eps};\n" - f"parameters = {k}\ngroundtruth = {gt}\nours = {ours}" - ) - raise RuntimeError - - -def compare_promise(groundtruth: dict): - input_tensor = torch.tensor([-1, -2, -3, -4, -5, 0, 5, 4, 3, 2, 1, 1], dtype=torch.float) - N = 1000 - for k, (gt_avg, gt_error) in groundtruth.items(): - gt_avg = torch.tensor(gt_avg) - sum_, our_error = torch.zeros_like(input_tensor, dtype=torch.float), 0 - for _ in range(N): - out = PromiseSim.add_promise_noise(input_tensor, int(k)) - sum_ += out - our_error += torch.sum((out - input_tensor) ** 2).item() - our_avg = sum_ / N - our_error = our_error / N - print(gt_avg, our_avg) - if abs(our_error - gt_error) > delta * max(our_error, gt_error): - print( - f"Promise results differ by more than delta = {delta * 100:.1f}%;\n" - f"swing = {k}, groundtruth error = {gt_error}\nours = {our_error}" - ) - raise RuntimeError - - -def is_in_range(mean1: float, std1: float, mean2: float) -> bool: - return mean1 - 3.0 * std1 < mean2 < mean1 + 3.0 * std1 - - -def compare_accuracy(): - baseline, testloader, _, shapes = init_by_name('lenet_hpvm') - baseline_dag = ModuleIndexer(baseline) - nas = NetApproxSelector(baseline_dag, dev_time_only=False) - # {0: 1} -> 98.4808 0.1195 - approx1 = nas.apply_approx_by_config({3: 1}) - acc1 = compute_accuracy(run_concat_output(approx1.module, testloader), testloader) - assert is_in_range(0.984808, 0.001195, acc1) - # {0: 2} -> 99.5933 0.0519 - approx2 = nas.apply_approx_by_config({3: 2}) - acc2 = compute_accuracy(run_concat_output(approx2.module, testloader), testloader) - assert is_in_range(0.995933, 0.000519, acc2) - # {0: 3} -> 99.6723 0.0347 - approx3 = nas.apply_approx_by_config({3: 3}) - acc3 = compute_accuracy(run_concat_output(approx3.module, testloader), testloader) - assert is_in_range(0.996723, 0.000347, acc3) - print("Accuracy test passed.") - - -def main(): - data_folder = Path(__file__).parent / 'data' - with open(data_folder / 'quantization.json') as f: - compare_quant(json.load(f)) - with open(data_folder / 'promise.json') as f: - compare_promise(json.load(f)) - compare_accuracy() - print("Tests passed.") - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/tests/resnet50.py b/hpvm/projects/pred_tuner/tests/resnet50.py deleted file mode 100644 index 71711fbfd0..0000000000 --- a/hpvm/projects/pred_tuner/tests/resnet50.py +++ /dev/null @@ -1,33 +0,0 @@ -from toolkit import ModuleIndexer, NetApproxSelector -from utils import compute_accuracy, init_by_name, run_concat_output - - -def float_eq(f1, f2): - return abs(f1 - f2) < 1e-5 - - -def main(): - baseline, testloader, _, shapes = init_by_name('resnet50_imagenet_hpvm') - baseline_dag = ModuleIndexer(baseline) - nas = NetApproxSelector(baseline_dag) - # baseline - baseline_output = run_concat_output(baseline_dag.module, testloader) - baseline_acc = compute_accuracy(baseline_output, testloader) - assert float_eq(baseline_acc, 0.773) - # {13: 242} -> 75.5 - approx1 = nas.apply_approx_by_config({82: 242}) - acc1 = compute_accuracy(run_concat_output(approx1.module, testloader), testloader) - assert float_eq(acc1, 0.755) - # {13: 242, 17: 247} -> 74.6 - approx2 = nas.apply_approx_by_config({82: 242, 108: 247}) - acc2 = compute_accuracy(run_concat_output(approx2.module, testloader), testloader) - assert float_eq(acc2, 0.746) - # {9: 237, 13: 242, 17: 247} -> 74.1 - approx3 = nas.apply_approx_by_config({55: 237, 82: 242, 108: 247}) - acc3 = compute_accuracy(run_concat_output(approx3.module, testloader), testloader) - assert float_eq(acc3, 0.741) - print("Accuracy test passed.") - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/tests/sampling.py b/hpvm/projects/pred_tuner/tests/sampling.py deleted file mode 100644 index 707506ef7b..0000000000 --- a/hpvm/projects/pred_tuner/tests/sampling.py +++ /dev/null @@ -1,90 +0,0 @@ -import json -from copy import deepcopy -from pathlib import Path -from typing import Tuple - -import torch - -from models.hpvm import HPVMConvBundle -from toolkit import Conv2dSampling, Conv2dSamplingFP16, FP16Approx - -eps = 1e-5, 0.05 - - -def sampling_3_3_consts() -> Tuple[torch.Tensor, torch.Tensor]: - input_tensor = torch.ones(1, 3, 4, 4) - # Filter has value [2, 1, 2, 1, 2, 1...] - filter_tensor = torch.ones(1, 3, 3, 3) - filter_tensor.view(-1)[::2] = 2 - return input_tensor, filter_tensor - - -def sampling_1_1_consts() -> Tuple[torch.Tensor, torch.Tensor]: - input_tensor = torch.ones(1, 9, 2, 2) * 2 - filter_tensor = torch.ones(4, 9, 1, 1) * 2 - return input_tensor, filter_tensor - - -def parse_tensor_str(string: str) -> torch.Tensor: - # String has an extra ',' at the end, so skipping an empty string after split - entries = [float(s) for s in string.split(',')[:-1]] - return torch.tensor(entries).cuda() - - -def compare_to_groundtruth(groundtruth: dict, const_func): - input_tensor, filter_tensor = const_func() - input_tensor = input_tensor.cuda() - o_ch, i_ch, h, w = filter_tensor.size() - assert h == w - for k, v in groundtruth.items(): - def compare(groundtruth_t: torch.Tensor, ours_t: torch.Tensor, is_fp16: bool): - diff = groundtruth_t - ours_t - eps_ = eps[1] if is_fp16 else eps[0] - is_diff = torch.any(diff.abs() > eps_).item() - if is_diff: - print( - f"Results differ by more than eps = {eps};\n" - f"parameters = {k}\n" - f"groundtruth = {groundtruth_t}\n" - f"ours = {ours_t}" - ) - raise RuntimeError - - from ast import literal_eval as make_tuple - pad_h, pad_w, stride_h, stride_w, skip_every, offset = [int(s) for s in make_tuple(k)] - conv_layer = HPVMConvBundle( - i_ch, o_ch, h, stride=(stride_h, stride_w), padding=(pad_h, pad_w) - ) - conv_layer.weight.data = filter_tensor - conv_layer.bias.data = torch.zeros_like(conv_layer.bias.data) - conv_layer = conv_layer.cuda() - our_baseline = conv_layer(input_tensor).flatten() - fp16 = FP16Approx(deepcopy(conv_layer)) - our_fp16 = fp16(input_tensor).flatten() - sampling = Conv2dSampling(skip_every, offset, 1.0, deepcopy(conv_layer)) - our_sampled = sampling(input_tensor).flatten() - sampling_fp16 = Conv2dSamplingFP16(skip_every, offset, 1.0, deepcopy(conv_layer)) - our_sampled_fp16 = sampling_fp16(input_tensor).float().flatten() - groundtruth_baseline = parse_tensor_str(v['Baseline']) - compare(groundtruth_baseline, our_baseline, False) - groundtruth_sampled1 = parse_tensor_str(v['ConvApprox']) - compare(groundtruth_sampled1, our_sampled, False) - groundtruth_sampled2 = parse_tensor_str(v['ConvSampSim']) - compare(groundtruth_sampled2, our_sampled, False) - groundtruth_baseline_fp16 = parse_tensor_str(v['FP16_Baseline']) - compare(groundtruth_baseline_fp16, our_fp16, True) - groundtruth_sampled_fp16 = parse_tensor_str(v['ConvApproxHalf2']) - compare(groundtruth_sampled_fp16, our_sampled_fp16, True) - - -def main(): - data_folder = Path(__file__).parent / 'data' - with open(data_folder / '1_1_output.json') as f: - compare_to_groundtruth(json.load(f), sampling_1_1_consts) - with open(data_folder / '3_3_output.json') as f: - compare_to_groundtruth(json.load(f), sampling_3_3_consts) - print("Tests passed.") - - -if __name__ == '__main__': - main() diff --git a/hpvm/projects/pred_tuner/toolkit/__init__.py b/hpvm/projects/pred_tuner/toolkit/__init__.py deleted file mode 100644 index 892b8c1542..0000000000 --- a/hpvm/projects/pred_tuner/toolkit/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .approxdnn import Approximation, AvailableApproximations, Conv2dSampling, FP16Approx, \ - PerforateConv2dStride, PromiseSim -from .estimators import LinearCombEstimator, LinearEstimator, LinearQoSEstimator, WeightedLinearCombEstimator -from .transform import ConfigT, NetApproxSelector, StateCapturer diff --git a/hpvm/projects/pred_tuner/toolkit/approxdnn.py b/hpvm/projects/pred_tuner/toolkit/approxdnn.py deleted file mode 100644 index 06abca85d5..0000000000 --- a/hpvm/projects/pred_tuner/toolkit/approxdnn.py +++ /dev/null @@ -1,442 +0,0 @@ -"""All approximation techniques for torch.nn layers.""" -import abc -from typing import Dict, Iterable, List, Optional, Type - -import torch -from torch.nn import Linear, Module - -from models.hpvm import HPVMConvBundle -from utils import get_tensorrt_dir - - -def interpolate_first_dim(tensor: torch.Tensor, interp_indices: Iterable[int]): - def tensor_at(idx_: int): - if idx_ in interp_indices: - raise IndexError - if idx_ < 0 or idx_ >= tensor.size()[0]: - return torch.zeros_like(tensor[0]) - return tensor[idx_] - - for idx in interp_indices: - if idx < 0 or idx >= tensor.size()[0]: - raise IndexError - elif idx == 0: # First row - tensor[idx] = tensor_at(1) - elif idx == tensor.size()[0] - 1: # Last row - tensor[idx] = tensor_at(idx - 1) - else: # Middle rows - tensor[idx] = (tensor_at(idx - 1) + tensor_at(idx + 1)) / 2.0 - return tensor - - -class Approximation(abc.ABC): - @property - @abc.abstractmethod - def deterministic(self) -> bool: - pass - - @property - @abc.abstractmethod - def devtime(self) -> bool: - pass - - @property - @abc.abstractmethod - def fp32(self) -> bool: - pass - - @abc.abstractmethod - def apply(self, module: Module) -> Module: - pass - - @abc.abstractmethod - def is_less_approx(self, other: 'Approximation') -> Optional[bool]: - pass - - def __repr__(self): - return f"{self.__class__}({self.__dict__})" - - -class PerforateConv2dStride(Approximation): - r"""Simulation of strided perforated convolution for `torch.nn.Conv2d`. - - Perforated convolution skips computing some entries in the output and instead interpolates - these values, to reduce the number of float-ops needed to complete a convolution op. - In this implementation, selected rows or columns of the output are discarded and replaced - with linearly interpolated values from the neighboring rows or columns. Each channel is - considered independently. - This implementation gives the same output as actual perforated convolution but without the - performance benefit. - - Parameters - ---------- - direction_is_row : bool - If True, discard and interpolate rows, otherwise columns. - stride : int \in [2, +\infty) - Skip 1 row/column in the convolution kernel per `stride` elements. - offset : int \in [0, stride) - Skipped first row/column is `offset`. - - Attributes - ---------- - interp_axis : int :math:`\in \{2, 3\}` - The axis that will be perforated over. As the input is an NCHW tensor, if - `direction_is_row` then `interp_axis = 2`, otherwise `interp_axis = 3`. - stride : int :math:`\in [2, +\infty)` - Equal to parameter `stride`. - offset : int :math:`\in [0, stride)` - Equal to parameter `offset`. - """ - - def __init__(self, direction_is_row: bool, stride: int, offset: int, use_fp16: bool): - assert stride >= 2 - assert 0 <= offset < stride - self.interp_axis = 2 if direction_is_row else 3 - self.stride = stride - self.offset = offset - self.fp16 = use_fp16 - - @property - def deterministic(self) -> bool: - return True - - @property - def devtime(self) -> bool: - return not self.fp16 - - @property - def fp32(self) -> bool: - return not self.fp16 - - def is_less_approx(self, other: Approximation) -> Optional[bool]: - return None - - class PerforateConv2dStrideModule(Module): - def __init__(self, conv: HPVMConvBundle, approx: 'PerforateConv2dStride'): - super().__init__() - self.conv = conv - self.approx = approx - if self.approx.fp16: - self.conv = self.conv.half() - - def forward(self, x: torch.Tensor): - if self.approx.fp16: - x = x.half() - x = self.conv.input_to_conv(x) - assert x.dim() == 4 - # Put self.approx.interp_axis to first axis temporarily - x = x.transpose(0, self.approx.interp_axis) - interp_indices = torch.tensor(range(self.approx.offset, x.size(0), self.approx.stride)) - x = interpolate_first_dim(x, interp_indices) - # Putting axes back - x = x.transpose(0, self.approx.interp_axis) - x = self.conv.conv_to_output(x) - if self.approx.fp16: - assert x.dtype == torch.float16 - return x.float() - - def apply(self, module: HPVMConvBundle) -> PerforateConv2dStrideModule: - return self.PerforateConv2dStrideModule(module, self) - - -class Conv2dSampling(Approximation): - r"""Simulation of sampled convolution for `torch.nn.Conv2d`. - - Skips some elements of the convolution kernel in a uniform, strided manner, - to reduce the amount of float-ops needed to compute each output entry. - This implementation gives the same output as actual sampled convolution but without the - performance benefit. - - Parameters - ---------- - skip_every: int - Skip 1 element in the convolution kernel per `skip_every` elements. - skip_offset : int :math:`\in [0, +\infty)` - Index of first element to be skipped. - For example, if `skip_every = 3` and `skip_offset = 1`, then indices skipped - will be [1, 4, 7, ...] - interp_rate : float - The weight will be compensated ("interpolated") with a ratio after skipping elements, - which is naturally equal to :math:`1 + (1 / (skip_every - 1)`. - `interp_rate` modifies this rate to :math:`1 + (1 / (skip_every - 1) \times interp_rate`. - use_fp16 : bool - Whether to use fp16 weight/input or not. - """ - - def __init__( - self, skip_every: int, skip_offset: int, interp_rate: float, use_fp16: bool - ): - assert skip_every >= 2 and skip_offset >= 0 - self.skip_every = skip_every - self.skip_offset = skip_offset - self.interp_rate = interp_rate - self.fp16 = use_fp16 - - @property - def deterministic(self) -> bool: - return True - - @property - def devtime(self) -> bool: - return not self.fp16 - - @property - def fp32(self) -> bool: - return not self.fp16 - - def is_less_approx(self, other: Approximation) -> Optional[bool]: - return None - - @staticmethod - def sample_conv_weight( - interp_rate: float, skip_every: int, skip_offset: int, weight: torch.Tensor - ): - r"""Samples (skips & interpolates) convolution kernel according to parameters. - - For a given `weight` tensor of shape `(C1, C2, H, W)`, sample each output channel - (on axis 0) independently. - Flatten each output channel tensor into 1 dim. - In normal cases, set elements at indices ``range(skip_offset, C_2 * H * W, skip_every)`` - to 0. - However, if `skip_every` == `h` == `w` == 3, we may end up skipping the same whole rows for - each input channel, which is undesirable. - Instead, increment the offset by 1 for each input channel. - Last, multiplies the kernel by the inverse ratio of elements dropped for an interpolation. - """ - if len(weight.shape) != 4: - raise ValueError("Conv2d weight should be 4-dimensional") - c1, c2, h, w = weight.shape - if skip_every == h == w == 3: - # Indices (0..h*w) to skip for each input channel - per_chan_skip_indices = [ - range((i_chan + skip_offset) % skip_every, h * w, skip_every) - for i_chan in range(c2) - ] - # Indices (0..c2*h*w) for each output channel, created by adding i*h*w for ith channel. - skip_indices = torch.tensor([ - x + i * h * w for i, per_chan in enumerate(per_chan_skip_indices) - for x in per_chan - ]) - else: - # Indices (0..c2*h*w) to skip for each output channel - skip_indices = torch.arange(skip_offset, c2 * h * w, skip_every) - flat_weight = weight.reshape(c1, -1) - flat_weight[:, skip_indices] = 0 - interp_rate = 1 + (1 / (skip_every - 1) * interp_rate) - flat_weight *= interp_rate - return flat_weight.reshape_as(weight) - - def apply(self, module: HPVMConvBundle) -> HPVMConvBundle: - # Not copying weight tensor leads to memory leak - cloned_conv_w = module.weight.clone().detach() - module.weight.data = self.sample_conv_weight( - self.interp_rate, self.skip_every, self.skip_offset, cloned_conv_w - ) - return module - - -def quantize_256(tensor: torch.Tensor, range_min: float, range_max: float) -> torch.Tensor: - """Quantize a tensor so that only 256 unique float value exists.""" - quantize_range = 256 - input_range = range_max - range_min - mul = input_range / quantize_range - # Map tensor into [0, 256] range. - affined = (tensor - range_min) / mul - # Convert tensor to int and back to float so it will have - # 256 (actually 257!; following hpvm impl) unique float values [0, 256]. - # Then reverse affine it to the original range. - quanted = torch.floor(affined).to(torch.int).to(torch.float) - quanted_float = quanted * mul + range_min - # Clip tensor - return torch.clamp(quanted_float, range_min, range_max) - - -class PromiseSim(Approximation): - scaling_values = [0.75, 0.64, 0.336, 0.21, 0.168, 0.14, 0.11, 0.0784, 0.005] - - def __init__(self, noise_level: int): - super().__init__() - self.noise_level = noise_level - - @property - def deterministic(self) -> bool: - return False - - @property - def devtime(self) -> bool: - return False - - @property - def fp32(self) -> bool: - return False - - def is_less_approx(self, other: Approximation) -> Optional[bool]: - if isinstance(other, PromiseSim): - return self.noise_level > other.noise_level - return None - - def add_promise_noise(self, tensor: torch.Tensor): - scale = self.scaling_values[self.noise_level] - noise = torch.normal( - mean=0.0, std=scale, size=tensor.size(), device=tensor.device - ) - return noise * tensor + tensor - - class PromiseSimModule(Module): - def __init__(self, module: HPVMConvBundle, approx: 'PromiseSim'): - super().__init__() - self.input_r, weight_r, bias_r, self.output_r = module.conv_ranges - module.weight.data = quantize_256(module.weight, *weight_r) - if module.bias is not None: - module.bias.data = quantize_256(module.bias, *bias_r) - self.module = module - self.approx = approx - - def forward(self, input_: torch.Tensor) -> torch.Tensor: - # Quantize input, weight, bias (see __init__), and add noise to input. - input_ = quantize_256(input_, *self.input_r) - input_ = self.approx.add_promise_noise(input_) - output = self.module(input_) - # Then again, quantize output. - return quantize_256(output, *self.output_r) - - def apply(self, module: HPVMConvBundle) -> PromiseSimModule: - return self.PromiseSimModule(module, self) - - -class FP16Approx(Approximation): - def __init__(self): - super().__init__() - - @property - def deterministic(self) -> bool: - return True - - @property - def devtime(self) -> bool: - return False - - @property - def fp32(self) -> bool: - return False - - def is_less_approx(self, other: Approximation) -> Optional[bool]: - return None - - class FP16ApproxModule(Module): - def __init__(self, module: Module): - super().__init__() - self.module = module.half() - - def forward(self, x: torch.Tensor) -> torch.Tensor: - x: torch.Tensor = self.module(x.half()) - assert x.dtype == torch.float16 - return x.float() - - def apply(self, module: Module) -> FP16ApproxModule: - return self.FP16ApproxModule(module) - - -AllApproxesT = Dict[int, Approximation] -TypeApproxesT = Dict[Type[Module], List[int]] - - -class AvailableApproximations: - r"""Holds a list of all available "approximation info": approximation + properties. - - For properties see `Approximation`. - - Parameters - ---------- - all_knobs: Dict[int, Approximation] - A dict from int index to (approximation, is_dev_time) pair. - Also see class function `from_global_knobs_file`. - - Attributes - ---------- - all_knobs : Dict[int, Approximation] - A mapping from approximation index to approximation info pair `(approximation, is_dev_time)`. - type_to_knobs : Dict[Type[Module], List[int]] - A mapping from network layer type (subtype of `torch.nn.Module`) to a list of indexes of - applicable approximations. Values of `type_to_knobs` are always valid keys in `all_knobs`. - """ - - def __init__(self, all_knobs: Dict[int, Approximation], type_to_knobs: TypeApproxesT): - self.all_knobs = all_knobs - self.type_to_knobs = type_to_knobs - - @classmethod - def from_global_knobs_file(cls) -> 'AvailableApproximations': - """Read and parse global_knobs.txt to provide all knobs supported and their indexes. - - Returns two things: - * Dict of indexes to (approximations, is_dev_time). Approximation is in the form of functions - with a layer input; see `ModuleReplacerT`. - * Dict of type of torch.nn.Module to a list of approximation indexes that can be applied to this - type of layer. - """ - with (get_tensorrt_dir() / 'autotuner/data/global_knobs.txt').open() as f: - lines = f.readlines() - all_knobs = {} - promise_and_fp16 = [] - for line in lines: - desc, knobs, _, _, _, _, _ = line.rstrip().split() - category, index = desc.split(',') - index = int(index) - if category in ('perf', 'perf_fp16'): - row, col, offset = [int(s) for s in knobs.split(',')] - if row > 1 and col > 1: - raise ValueError("Perforation on both row and column is not supported") - if col == 1: - direction_is_row, stride = True, row - else: - direction_is_row, stride = False, col - all_knobs[index] = PerforateConv2dStride( - direction_is_row, stride, offset, 'fp16' in category - ) - elif category in ('samp', 'samp_fp16'): - stride, offset, interp_rate = knobs.split(',') - stride, offset, interp_rate = int(stride), int(offset), float(interp_rate) - all_knobs[index] = Conv2dSampling( - stride, offset, interp_rate, 'fp16' in category - ) - elif category == 'swing_level': - all_knobs[index] = PromiseSim(index) - promise_and_fp16.append(index) - elif category == 'fp16': - all_knobs[index] = FP16Approx() - promise_and_fp16.append(index) - type_to_knobs = { - HPVMConvBundle: list(all_knobs.keys()), - Linear: promise_and_fp16 - } - return cls(all_knobs, type_to_knobs) - - def items(self, dev_time: bool, ignore_fp32: bool) -> Dict[Type[Module], List[int]]: - """Give a list of applicable approximations for each layer type. - - If dev_time is True, returns only devtime approximations, otherwise all approximations. - """ - - def remove_non_dev(type_to_knobs: TypeApproxesT) -> TypeApproxesT: - return { - k: [v for v in vs if self.all_knobs[v].devtime] - for k, vs in type_to_knobs.items() - } - - def remove_fp32(type_to_knobs: TypeApproxesT) -> TypeApproxesT: - return { - k: [v for v in vs if not self.all_knobs[v].fp32] - for k, vs in type_to_knobs.items() - } - - type_to_knobs_ = self.type_to_knobs - if dev_time: - type_to_knobs_ = remove_non_dev(type_to_knobs_) - if ignore_fp32: - type_to_knobs_ = remove_fp32(type_to_knobs_) - return type_to_knobs_ - - def __getitem__(self, item: int) -> Approximation: - """Returns the approximation info for given approximation index.""" - return self.all_knobs[item] diff --git a/hpvm/projects/pred_tuner/toolkit/estimators.py b/hpvm/projects/pred_tuner/toolkit/estimators.py deleted file mode 100644 index acd3533169..0000000000 --- a/hpvm/projects/pred_tuner/toolkit/estimators.py +++ /dev/null @@ -1,383 +0,0 @@ -import abc -import gc -import logging -import pickle -from math import sqrt -from pathlib import Path -from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, TypeVar - -import numpy as np -import torch -from torch.nn import Module -from tqdm import tqdm, trange - -from models.domains import QoS, qos_stats -from .transform import ConfigT, NetApproxSelector - -ProfT = TypeVar('ProfT') -NetOutputT = TypeVar('NetOutputT') -QoST = Callable[[NetOutputT], QoS] -ThresholdEvalT = Callable[[NetOutputT], bool] -ExeT = Callable[[Module], NetOutputT] -KeyT = Tuple[int, int] -KVT = Tuple[KeyT, NetOutputT] -EstmT = Tuple[QoS, QoS] - -msg_logger = logging.getLogger(__name__) - - -class LinearEstimator(abc.ABC): - """Estimate QoS of a config by linearly adding "something" from each approximation of config, and - then applying QoS metric. - - That "something" could be QoS itself (see `LinearQoSEstimator`), or the direct tensor output from - the model (see `LinearTensorEstimator`). - In initialization phase, run the model for each 1-approximation config and store the quantity to - be linearly summed in a table. - - Parameters - ---------- - nas: NetApproxSelector - `NetApproxSelector` instance is used to select all 1-approximation configs and evaluate them. - qos: Callable[[torch.Tensor], float] - Quality of Service measure (such as accuracy). Takes model output tensor and returns QoS value. - independent_init: bool - If False, don't initialize self.profile_table, and wait for `coinit_estimators` to fill in - the profile. `coinit_estimators` must be manually called if `init_profile` is False. - - Attributes - ---------- - qos : Callable[[torch.Tensor], float] - Same as parameter `qos`. - baseline_profile : T - Profile value of the baseline model. - profile_table : Dict[KeyT, T] - A mapping from (`layer_idx`, `approx_idx`) to the profile value, with only this approximation - applied (in other words, with configuration ``{layer_idx: approx_idx}`` applied). - """ - - n_nondeterm_runs = 10 - - def __init__( - self, nas: NetApproxSelector, executor: ExeT, qos: QoST, - threshold_eval: ThresholdEvalT, confidence_level: float, - independent_init: bool = True, storage: Path = None - ): - self.nas = nas - self.qos = qos - self.executor = executor - self.storage = storage - self.baseline_profile: ProfT = self.get_baseline_profile() - self.profile_table: Dict[KeyT, ProfT] = {} - self.confidence_level = confidence_level - if independent_init: - for (k, i), output in self._get_all_outputs(nas, self.executor, threshold_eval, storage): - self.profile_table[k, i] = self.handle_output(output) - - @staticmethod - def _load_from_pickle(storage: Path) -> Iterator[KVT]: - if not storage.is_file(): - return - msg_logger.info(f"Found pickle at {storage}") - with storage.open('rb') as f: - while True: - try: - key, tensor = pickle.load(f) - yield key, tensor - except EOFError: - return - - @classmethod - def run_model(cls, nas: NetApproxSelector, config: ConfigT, executor: ExeT) -> torch.Tensor: - is_deterministic = nas.is_deterministic(config) - model = nas.apply_approx_by_config(config).module - if is_deterministic: - ret = executor(model).unsqueeze(0).cpu() - else: - assert cls.n_nondeterm_runs > 0 - ret = torch.stack([ - executor(model) - for _ in trange(cls.n_nondeterm_runs, leave=False) - ]).cpu() - gc.collect() - return ret - - @classmethod - def _get_all_outputs( - cls, nas: NetApproxSelector, executor: ExeT, - threshold_eval: ThresholdEvalT, storage: Path = None - ) -> Iterator[KVT]: - preloaded_acceptable = {} - if storage is not None: - bar = tqdm(cls._load_from_pickle(storage)) - for key, tensor in bar: - bar.set_postfix(key=key) - preloaded_acceptable[key] = threshold_eval(tensor) - yield key, tensor - - def evaluate(k: int, i: int) -> Tuple[bool, Optional[KVT]]: - if (k, i) in preloaded_acceptable: - msg_logger.debug(f"Key {(k, i)} is preloaded.") - return preloaded_acceptable[(k, i)], None - outputs = cls.run_model(nas, {k: i}, executor) - if storage is not None: - with storage.open('ab') as f: - pickle.dump(((k, i), outputs), f) - return threshold_eval(outputs), ((k, i), outputs) - - for key_outputs in nas.filter_approxes(evaluate): - # key_outputs is None means corresponding key has been preloaded (we can't see the key) - if key_outputs is None: - continue - yield key_outputs - - @classmethod - def coinit_estimators( - cls, nas: NetApproxSelector, executor: ExeT, threshold_eval: ThresholdEvalT, - *estm_insts: 'LinearEstimator', storage: Path = None - ): - for (k, i), output in cls._get_all_outputs(nas, executor, threshold_eval, storage): - for inst in estm_insts: - inst.profile_table[(k, i)] = inst.handle_output(output) - - @abc.abstractmethod - def get_baseline_profile(self) -> ProfT: - pass - - @abc.abstractmethod - def handle_output(self, outputs: torch.Tensor) -> ProfT: - pass - - @abc.abstractmethod - def estimate(self, config: ConfigT) -> EstmT: - pass - - -class LinearQoSEstimator(LinearEstimator): - """Estimate QoS of a config by linearly adding QoS value. See `LinearEstimator`. - - ProfT = Tuple[QoS(mean), QoS(std)] - NetOutputT = torch.Tensor - """ - - def estimate(self, config: ConfigT) -> EstmT: - baseline_mean: QoS = self.baseline_profile[0] - if not config: - return baseline_mean, baseline_mean - # N * 2 array - profiles = np.array([self.profile_table[kv] for kv in config.items()]) - profiles[:, 0] -= baseline_mean - estm_qos = profiles[:, 0].sum() + baseline_mean - estm_std = sqrt(np.sum(profiles[:, 1] ** 2)) - # We're hardcoding 95% confidence interval here. - assert self.confidence_level == 0.95 - normal_dist_95 = 1.644854 - r1, r2 = estm_qos, estm_qos - normal_dist_95 * estm_std - return float(r1), float(r2) - - def handle_output(self, outputs: torch.Tensor) -> Tuple[QoS, QoS]: - qoses = np.array([self.qos(o) for o in outputs]) - msg_logger.debug(f"Handled {qoses.mean(), qoses.std()}") - return qoses.mean(), qoses.std() - - def get_baseline_profile(self) -> Tuple[QoS, QoS]: - mean_qos = self.qos(self.run_model(self.nas, {}, self.executor)[0]) - return mean_qos, mean_qos.null() - - -class LinearCombEstimator(LinearEstimator): - """Estimate QoS of a config by linearly adding tensor output from network. See `LinearEstimator`. - - On estimation, sums over the delta in tensor output (compared to baseline output) for each - approximation, and then the baseline tensor output is added back. - This works as an estimation of tensor output for this configuration, which is then sent to QoS - metric to get the final QoS. - - QoST = float - ProfT = torch.Tensor (2 * n_inputs * n_classes) - NetOutputT = torch.Tensor (n_inputs * n_classes) - """ - - def estimate(self, config) -> EstmT: - if not config: - baseline_qos = self.qos(self.baseline_profile) - return baseline_qos, baseline_qos - # 4D tensor: n_approx * 2 * n_inputs * n_classes - profiles = torch.stack([self.profile_table[kv] for kv in config.items()]) - profiles -= self.baseline_profile - mean_tensor, confidence_tensor = profiles.sum(dim=0) + self.baseline_profile - estm_mean_qos = self.qos(mean_tensor) - estm_confidence_qos = self.qos(confidence_tensor) - return estm_mean_qos, estm_confidence_qos - - def handle_output(self, outputs: torch.Tensor) -> torch.Tensor: - if len(outputs) == 1: - return torch.stack((outputs[0], outputs[0])) - qoses = np.array([self.qos(o) for o in outputs]) - percentile_pos = int(self.n_nondeterm_runs * (1 - self.confidence_level)) - assert 0 <= percentile_pos < self.n_nondeterm_runs - mean_pos = np.searchsorted(qoses, qoses.mean(), 'right') - assert 0 <= mean_pos <= self.n_nondeterm_runs - if mean_pos == self.n_nondeterm_runs: - mean_pos = self.n_nondeterm_runs - 1 - return torch.stack((outputs[mean_pos], outputs[percentile_pos])) - - def get_baseline_profile(self) -> torch.Tensor: - return self.run_model(self.nas, {}, self.executor)[0] - - -class TrainableEstimator(LinearEstimator, abc.ABC): - """ - QoST = float - ProfT = ProfT - NetOutputT = torch.Tensor (n_inputs * n_classes) - """ - n_train_confs = 50 - weight_range = 0.8, 1.2, 20 - n_cold_start = 500 - accept_threshold = 5 - penalize_overestm = 1.0 - - def __init__( - self, nas: NetApproxSelector, executor: ExeT, qos: QoST, - threshold_eval: ThresholdEvalT, confidence_level: float, - independent_init: bool = True, storage: Path = None - ): - super().__init__(nas, executor, qos, threshold_eval, confidence_level, independent_init, storage) - self.r_cands = np.linspace(*self.weight_range) - self.r_error = np.zeros((len(self.r_cands), self.n_train_confs)) - self.r = self.weight_range[1] - self.trained_iters = 0 - self.cold_start = 0 - - def update_r(self): - mean_error = np.mean(self.r_error, axis=1) - best_idx = np.argmin(mean_error) - self.r = self.r_cands[best_idx] - if best_idx == len(mean_error) - 1 or best_idx == 0: - msg_logger.warning(f"Parameter value r = {self.r} has reached the boundary. Consider a larger range.") - - def get_qos_for_config(self, config: ConfigT) -> EstmT: - is_deterministic = self.nas.is_deterministic(config) - net = self.nas.apply_approx_by_config(config).module - n_runs = 1 if is_deterministic else self.n_nondeterm_runs - qoses = [self.qos(self.executor(net)) for _ in trange(n_runs, leave=False)] - mean_qos, qos_at_confidence, _ = qos_stats(qoses, confidence=self.confidence_level) - return mean_qos, qos_at_confidence - - @abc.abstractmethod - def real_estimate(self, config, rs: Iterable[float] = None) -> List[EstmT]: - pass - - def estimate(self, config) -> EstmT: - estm = self.real_estimate(config)[0] - if self.cold_start < self.n_cold_start: - self.cold_start += 1 - if self.cold_start % 50 == 0: - msg_logger.info(f"WeightedLinearCombEstimator cold start {self.cold_start} / {self.n_cold_start}") - return estm - if self.trained_iters >= self.n_train_confs: - return estm - log_info_freq = 10 - log_level = logging.INFO if self.trained_iters % log_info_freq == 0 else logging.DEBUG - msg_logger.log( - log_level, - f"{self.__class__} train iter {self.trained_iters} / {self.n_train_confs}" - ) - mean_qos, qos_at_confidence = self.get_qos_for_config(config) - estm_conf_qoses = np.array(self.real_estimate(config, rs=self.r_cands))[:, 1] - diff_conf_qoses = qos_at_confidence - estm_conf_qoses - old_r = self.r - self.r_error[:, self.trained_iters] = np.where( - diff_conf_qoses > 0, diff_conf_qoses * self.penalize_overestm, - -diff_conf_qoses - ) - self.trained_iters += 1 - self.update_r() - msg_logger.debug( - f"{self.__class__} real mean qos = {mean_qos}, real conf qos = {qos_at_confidence}, " - f"estm conf qos = {estm[1]}, r: {old_r} -> {self.r}" - ) - return mean_qos, qos_at_confidence - - -class WeightedLinearCombEstimator(TrainableEstimator, LinearCombEstimator): - """ - QoST = float - ProfT = torch.Tensor - NetOutputT = torch.Tensor (n_inputs * n_classes), logged - """ - - def __init__( - self, nas: NetApproxSelector, executor: ExeT, qos: QoST, - threshold_eval: ThresholdEvalT, confidence_level: float, - independent_init: bool = True, storage: Path = None - ): - log_qos = lambda x: qos(torch.exp(x)) - super().__init__(nas, executor, log_qos, threshold_eval, confidence_level, independent_init, storage) - - @staticmethod - def tensor_log(tensor: torch.Tensor) -> torch.Tensor: - # TODO: don't take log if there's no SoftMax layer. - eps = torch.ones_like(tensor) * 1e-10 - return torch.log(torch.max(tensor, eps)) - - def real_estimate(self, config, rs: Iterable[float] = None) -> List[EstmT]: - # 3D tensor: 2 * n_inputs * n_classes - if config: - estm_delta_output = torch.sum( - torch.stack([self.profile_table[kv] for kv in config.items()]) - self.baseline_profile, - dim=0 - ) - else: - n_in, n_out = self.baseline_profile.shape - estm_delta_output = torch.zeros(2, n_in, n_out) - rets = [] - rs = rs if rs is not None else [self.r] - for r in rs: - mean_tensor, confidence_tensor = estm_delta_output * r + self.baseline_profile - rets.append((self.qos(mean_tensor), self.qos(confidence_tensor))) - return rets - - def handle_output(self, outputs: torch.Tensor) -> torch.Tensor: - return LinearCombEstimator.handle_output(self, self.tensor_log(outputs)) - - def get_baseline_profile(self) -> torch.Tensor: - return self.tensor_log(LinearCombEstimator.get_baseline_profile(self)) - - -class WeightedLinearQoSEstimator(TrainableEstimator, LinearQoSEstimator): - """ - QoST = float - ProfT = torch.Tensor - NetOutputT = torch.Tensor (n_inputs * n_classes), logged - """ - - weight_range = 0.5, 5, 50 - - def estimate(self, config) -> EstmT: - ret = super().estimate(config) - msg_logger.debug(f"Config {config} -> estimation {ret}") - return ret - - def real_estimate(self, config, rs: Iterable[float] = None) -> List[EstmT]: - baseline_mean_qos = self.baseline_profile[0] - if config: - # N * 2 array - profiles = np.array([self.profile_table[kv] for kv in config.items()]) - profiles[:, 0] -= baseline_mean_qos - profiles[:, 0][profiles[:, 0] > 0] = 0 - estm_mean_qos_delta = profiles[:, 0].sum() - estm_std = sqrt(np.sum(profiles[:, 1] ** 2)) - else: - estm_mean_qos_delta = estm_std = 0.0 - rets = [] - rs = rs if rs is not None else [self.r] - for r in rs: - estm_mean_qos = float(estm_mean_qos_delta * r + baseline_mean_qos) - # We're hardcoding 95% confidence interval here. - assert self.confidence_level == 0.95 - normal_dist_95 = 1.644854 - estm_conf_qos = estm_mean_qos - normal_dist_95 * estm_std - rets.append((estm_mean_qos, estm_conf_qos)) - return rets diff --git a/hpvm/projects/pred_tuner/toolkit/indexing.py b/hpvm/projects/pred_tuner/toolkit/indexing.py deleted file mode 100644 index 27500c152a..0000000000 --- a/hpvm/projects/pred_tuner/toolkit/indexing.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import Callable, Iterator, Optional, Set - -import torch -from torch.nn import Module, Sequential - -UnaryForwardT = Callable[[torch.Tensor], torch.Tensor] -ReplacedForwardT = Callable[[Module, UnaryForwardT, torch.Tensor], torch.Tensor] - - -class ModuleIndexer: - def __init__(self, module: Module, ignore_module: Callable[[Module], bool]): - self.module_to_index = {} - for i, submodule in enumerate(module.modules()): - if ignore_module(submodule): - continue - self.module_to_index[submodule] = i - self.index_to_module = {i: m for m, i in self.module_to_index.items()} - self.module = module - self.layer_parents = self.find_layers_parent_info(module, set(self.all_modules)) - - @staticmethod - def find_layers_parent_info(net: Module, layers: Set[Module]): - ret = {} - for name, submodule in net.named_children(): - if submodule in layers: - ret[submodule] = net, name - ret = {**ret, **ModuleIndexer.find_layers_parent_info(submodule, layers)} - return ret - - @property - def all_modules(self) -> Iterator[Module]: - return iter(self.module_to_index.keys()) - - def find(self, module: Module) -> Optional[int]: - return self.module_to_index.get(module, None) - - def __getitem__(self, item: int) -> Module: - return self.index_to_module[item] - - def __setitem__(self, key: int, value: Module): - old = self.index_to_module[key] - if value != old: - self.index_to_module[key] = value - self.module_to_index[value] = self.module_to_index[old] - self.module_to_index.pop(old) - parent, name = self.layer_parents[old] - self.layer_parents[value] = parent, name - self.layer_parents.pop(old) - parent.__setattr__(name, value) - - def __iter__(self) -> Iterator[Module]: - return self.all_modules - - def __len__(self): - return len(self.module_to_index) diff --git a/hpvm/projects/pred_tuner/toolkit/transform.py b/hpvm/projects/pred_tuner/toolkit/transform.py deleted file mode 100644 index f19554181a..0000000000 --- a/hpvm/projects/pred_tuner/toolkit/transform.py +++ /dev/null @@ -1,186 +0,0 @@ -import copy -import logging -from collections import defaultdict -from typing import Callable, Dict, Generic, Iterator, List, Tuple, TypeVar - -from torch.nn import Module - -from .approxdnn import Approximation, AvailableApproximations -from .indexing import ModuleIndexer - -msg_logger = logging.getLogger(__name__) - - -T1 = TypeVar('T1') -T2 = TypeVar('T2') -TransformerCT = Callable[[int, T1], T2] - - -class StateCapturer(Module, Generic[T2]): - @staticmethod - def _id(_, x): - return x.clone().cpu().detach() - - def __init__(self, net_index: ModuleIndexer, state_transformer: TransformerCT = None): - super().__init__() - self.net_state: Dict[int, List[T2]] = defaultdict(list) - self.state_transformer = state_transformer or self._id - self.net_index = net_index - for submodule in net_index.module.modules(): - submodule.register_forward_hook(self.forward_hook) - self._output = None - - @property - def module(self): - return self.net_index.module - - @property - def output(self): - if self._output is None: - raise RuntimeError("Cannot get output before inference happens") - return self._output - - def forward_hook(self, module: Module, _, outputs): - module_idx = self.net_index.find(module) - if module_idx is None: - raise RuntimeError("Cannot find module; module may have changed externally") - self.net_state[module_idx].append(self.state_transformer(module_idx, outputs)) - - def forward(self, *args, **kwargs): - return self.module.forward(*args, **kwargs) - - def get_output_state(self) -> List[T2]: - return self.net_state[self.injected.output_loc()] - - -T = TypeVar('T') -ConfigT = Dict[int, int] -EvaluatorT = Callable[[int, int], Tuple[bool, T]] - - -class NetApproxSelector: - r"""List all 1-approximation configurations, and apply configurations to a `ModuleDAG` network. - - Computes a list of available approximations for each layer of the network, given info on available - approximations in the system (in the form of an `AvailableApproximations` instance). - Capable of listing all single-approximation configurations, and apply a given configuration to the network. - A configuration is a dict from layer indices to approximation for these layers, one for each. - See `ConfigT`. - - Parameters - ---------- - net : Module - The network to be approximated. - dev_time_only : bool - If True, use only devtime approximations; otherwise use all available approximations. - aa : AvailableApproximations - A container with information of available approximations, and the type of layer each approximation - applies to, etc. - - Attributes - ---------- - net : Module - The network to be approximated (parameter `net`). - net_approxes: Dict[int, List[int]] - A list of available approximation indexes per layer index. - available_approx: AvailableApproximations - Available approximations (parameter `aa`). - """ - - class ApproximationGraph: - """Naive O(n^2) sort for a list of partially-ordered approximations.""" - - def __init__(self, approx_indices: List[int], aa: AvailableApproximations): - import networkx as nx - self.dep_graph = nx.DiGraph() - self.dep_graph.add_nodes_from(approx_indices) - for i, x in enumerate(approx_indices): - for y in approx_indices[i + 1:]: - approx_x, approx_y = aa[x], aa[y] - cmp = approx_x.is_less_approx(approx_y) - if cmp is None: # Not comparable - continue - if cmp: - self.dep_graph.add_edge(x, y) - else: - self.dep_graph.add_edge(y, x) - self.sorted_indices = list(nx.algorithms.topological_sort(self.dep_graph)) - - def __len__(self) -> int: - return len(self.sorted_indices) - - def __iter__(self) -> Iterator[Tuple[int, bool]]: - return iter(self.sorted_indices) - - def __init__( - self, net: Module, dev_time_only: bool = True, ignore_fp32: bool = False, - aa: AvailableApproximations = None - ): - self.available_approx = aa or AvailableApproximations.from_global_knobs_file() - self.type_approxes = self.available_approx.items(dev_time=dev_time_only, ignore_fp32=ignore_fp32) - approximable_types = tuple(self.type_approxes.keys()) - self.net_index = ModuleIndexer(net, lambda m: not isinstance(m, approximable_types)) - self.dev_time_only = dev_time_only - self.net_approxes: Dict[int, List[int]] = defaultdict(list) - for i, layer in self.net_index.index_to_module.items(): - for t, approxes in self.type_approxes.items(): - if isinstance(layer, t): - self.net_approxes[i].extend(approxes) - - def apply_approx_by_config(self, config: ConfigT) -> ModuleIndexer: - """Applies given `config` to network.""" - new_dag = copy.deepcopy(self.net_index) - for layer_idx, config_idx in config.items(): - layer = new_dag[layer_idx] - new_dag[layer_idx] = self.available_approx[config_idx].apply(layer) - return new_dag - - def list_single_approxes(self) -> Iterator[Tuple[int, int, Approximation]]: - for k, vs in self.net_approxes.items(): - for v in vs: - yield k, v, self.available_approx[v] - - def filter_approxes(self, evaluator: EvaluatorT) -> Iterator[T]: - """Enumerate through and apply each single-approximation configuration.""" - net_approxes_graph: Dict[int, NetApproxSelector.ApproximationGraph] = { - k: self.ApproximationGraph(vs, self.available_approx) for k, vs in self.net_approxes.items() - } - from tqdm import tqdm - from utils import gpu_mem_mb - bar1 = tqdm(net_approxes_graph.items(), total=len(net_approxes_graph)) - for k, graph in bar1: - bar1.set_postfix(layer=k) - bar2 = tqdm(graph, leave=None) - unacceptable_approx = None - filtered_layer_approxes = [] - for approx_id in bar2: - approx = self.available_approx[approx_id] - if unacceptable_approx is not None: - cmp = unacceptable_approx.is_less_approx(approx) - if cmp: - msg_logger.debug(f"{approx} is worse than unacceptable approx {unacceptable_approx}") - continue - else: - unacceptable_approx = None - bar2.set_postfix(approx_id=approx_id, mem=gpu_mem_mb()) - acceptable, ret_val = evaluator(k, approx_id) - if not acceptable: - unacceptable_approx = approx - msg_logger.debug(f"{approx} is unacceptable") - continue - filtered_layer_approxes.append(approx_id) - yield ret_val - self.net_approxes[k] = filtered_layer_approxes - - def get_baseline(self) -> Module: - return self.net_index.module - - def get_layer_approxes(self) -> Dict[Module, List[int]]: - """Expose available knobs for autotuner usage.""" - return { - self.net_index[layer_k]: approxes - for layer_k, approxes in self.net_approxes.items() - } - - def is_deterministic(self, config: ConfigT): - return all(self.available_approx[knob_id].deterministic for knob_id in config.values()) diff --git a/hpvm/projects/pred_tuner/utils/__init__.py b/hpvm/projects/pred_tuner/utils/__init__.py deleted file mode 100644 index 1f06b4ae22..0000000000 --- a/hpvm/projects/pred_tuner/utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .config import Config -from .logging import config_pylogger, reapply_last_config -from .utils import device, get_knob_config_file, get_tensorrt_dir, gpu_mem_mb diff --git a/hpvm/projects/pred_tuner/utils/benchmarks.json b/hpvm/projects/pred_tuner/utils/benchmarks.json deleted file mode 100644 index 57184872a0..0000000000 --- a/hpvm/projects/pred_tuner/utils/benchmarks.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "lenet_hpvm": { - "model_name": "lenet_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/lenet_keras/", - "layer_file": "autotuner/data/lenet/lenet_layers.txt", - "cost_file": "autotuner/data/lenet/op_cost.txt" - }, - "alexnet_hpvm": { - "model_name": "alexnet_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/alexnet_cifar10/", - "layer_file": "autotuner/data/alexnet/alexnet_layers.txt", - "cost_file": "autotuner/data/alexnet/op_cost.txt" - }, - "alexnet2_hpvm": { - "model_name": "alexnet2_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/alexnet2_cifar10/", - "layer_file": "autotuner/data/alexnet2/alexnet2_layers.txt", - "cost_file": "autotuner/data/alexnet2/op_cost.txt" - }, - "vgg16_cifar10_hpvm": { - "model_name": "vgg16_cifar10_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/vgg16_cifar10/", - "layer_file": "autotuner/data/vgg16_cifar10/vgg16_layers.txt", - "cost_file": "autotuner/data/vgg16_cifar10/op_cost.txt" - }, - "vgg16_cifar100_hpvm": { - "model_name": "vgg16_cifar100_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/vgg16_cifar100/", - "layer_file": "autotuner/data/vgg16_cifar100/vgg16_layers.txt", - "cost_file": "autotuner/data/vgg16_cifar100/op_cost.txt" - }, - "vgg16_imagenet_hpvm": { - "model_name": "vgg16_imagenet_hpvm", - "autotuner_runs": 20000, - "base_dir": "tuner_results/vgg16_imagenet/", - "layer_file": "autotuner/data/vgg16_imagenet/vgg16_layers.txt", - "cost_file": "autotuner/data/vgg16_imagenet/op_cost.txt" - }, - "resnet18_hpvm": { - "model_name": "resnet18_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/resnet18_cifar10/", - "layer_file": "autotuner/data/resnet/resnet_layers.txt", - "cost_file": "autotuner/data/resnet/op_cost.txt" - }, - "resnet50_imagenet_hpvm": { - "model_name": "resnet50_imagenet_hpvm", - "autotuner_runs": 30000, - "base_dir": "tuner_results/resnet50_imagenet/", - "layer_file": "autotuner/data/resnet50_imagenet/resnet50_layers.txt", - "cost_file": "autotuner/data/resnet50_imagenet/op_cost.txt" - }, - "mobilenet_hpvm": { - "model_name": "mobilenet_hpvm", - "autotuner_runs": 20000, - "base_dir": "tuner_results/mobilenet/", - "layer_file": "autotuner/data/mobilenet/mobilenet_layer_comp.txt", - "cost_file": "autotuner/data/mobilenet/op_cost.txt" - }, - "__unused_mobilenet_shallow": { - "model_name": "mobilenet_shallow_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/mobilenet_shallow/", - "layer_file": "autotuner/data/mobilenet_shallow/mobilenet_shallow_layer_comp.txt", - "cost_file": "autotuner/data/mobilenet_shallow/op_cost.txt" - }, - "alexnet_imagenet_hpvm": { - "model_name": "alexnet_imagenet_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/alexnet_imagenet/", - "layer_file": "autotuner/data/alexnet_imagenet/layer_composition.txt", - "cost_file": "autotuner/data/alexnet_imagenet/op_cost.txt" - }, - "alexnet2_canny_hpvm": { - "model_name": "alexnet2_canny_hpvm", - "autotuner_runs": 10000, - "base_dir": "tuner_results/alexnet2_canny_hpvm/", - "layer_file": "autotuner/data/alexnet2_canny_hpvm/layers.txt", - "cost_file": "autotuner/data/alexnet2_canny_hpvm/op_cost.txt" - }, - "resnet18_torch": { - "model_name": "resnet18_torch", - "autotuner_runs": 10000, - "base_dir": "tuner_results/resnet18_cifar10_torch/", - "layer_file": "autotuner/data/resnet18_torch/resnet_layers.txt", - "cost_file": "autotuner/data/resnet18_torch/op_cost.txt" - }, - "vgg16_torch": { - "model_name": "vgg16_torch", - "autotuner_runs": 10000, - "base_dir": "tuner_results/resnet18_cifar10_torch/", - "layer_file": "autotuner/data/resnet/resnet_layers.txt", - "cost_file": "autotuner/data/resnet/op_cost.txt" - } -} \ No newline at end of file diff --git a/hpvm/projects/pred_tuner/utils/config.py b/hpvm/projects/pred_tuner/utils/config.py deleted file mode 100644 index fced1a4d46..0000000000 --- a/hpvm/projects/pred_tuner/utils/config.py +++ /dev/null @@ -1,318 +0,0 @@ -from pathlib import Path -from typing import Dict, Iterable, List, Union - -import matplotlib.pyplot as plt -import numpy as np - -from models.domains import QoS -from models.domains.qoses import Accuracy, AccuracyPSNR -from .utils import get_knob_config_file - -op_mapping = { - "conv": "conv", "depthwise_conv": "group_conv", "dense": "mul", "batchnorm": "batchnorm", - "pool": "pool_max", "pool_mean": "pool_mean", "activation": "relu", "tanh": "tanh", "add": "add", - "reduce": "red_samp" -} - -approx_map = {} -PathLike = Union[str, Path] - - -def initializeApproxMap(knobs_file_path): - f = open(knobs_file_path, "r") - - for x in f: - toks = x.split("\t") - approx_type = toks[0].split(",")[0] - knob_id = toks[0].split(",")[1] - approx_str = approx_type + " " + knob_id - approx_map[knob_id] = approx_str - - -initializeApproxMap(get_knob_config_file()) - -# TODO: fix hardcoding -fp32_to_fp16 = { - **{k: k + 30 for k in range(121, 138 + 1)}, - **{k: k + 30 for k in range(231, 248 + 1)}, - 11: 12 -} -fp16_to_fp32 = {v: k for k, v in fp32_to_fp16.items()} - - -class Config: - def __init__( - self, avg_accuracy: QoS, baseline_accuracy: QoS, fname: str, flags: List[int], - total_runs: int, confidence: float, config_cost: float, speedup: float - ): - self.total_runs = total_runs - self.confidence = confidence - self.config_cost = config_cost - self.speedup = speedup - self.avg_qos = avg_accuracy - self.baseline_qos = baseline_accuracy - self.fname = fname - self.flags = flags - self.avg_loss = self.avg_loss.min_positive_loss() - - @property - def avg_loss(self): - return self.baseline_qos - self.avg_qos - - @avg_loss.setter - def avg_loss(self, value: QoS): - self.avg_qos = self.baseline_qos - value - - def __repr__(self): - return repr((self.fname, self.speedup, self.avg_qos, self.avg_loss, self.flags)) - - @staticmethod - def qos_speedup_points(configs: Iterable['Config']) -> np.ndarray: - return np.array([[*conf.avg_qos.numpy(), conf.speedup] for conf in configs]) - - def update_acc(self, acc: QoS, confidence: float, baseline_acc: QoS = None): - if baseline_acc: - self.baseline_qos = baseline_acc - self.avg_qos = acc - self.avg_loss = self.avg_loss.min_positive_loss() - self.confidence = confidence - - def to_fp16(self) -> 'Config': - import copy - fp16_conf = copy.copy(self) - fp16_conf.flags = [fp32_to_fp16.get(x, x) for x in self.flags] - return fp16_conf - - def to_fp32(self) -> 'Config': - import copy - fp32_conf = copy.copy(self) - fp32_conf.flags = [fp16_to_fp32.get(x, x) for x in self.flags] - return fp32_conf - - def to_rt_format(self, idx: int, bench_layer_composition, hardware_target: str): - config_str = build_config_str(self.flags, bench_layer_composition, hardware_target) - return ( - "+++++\n" - f"conf{idx} {self.speedup} 0 {self.avg_qos} {self.avg_loss}\n" - f"{config_str}" - "-----\n" - ) - - def to_tuner_format(self): - topline = ( - f"total_runs={self.total_runs}\tconfidence={self.confidence}\t" - f"avg_accuracy={self.avg_qos}\tconfig_cost={self.config_cost}\tspeedup={self.speedup}" - ) - flags_lines = [str(x) for x in self.flags] - return '\n'.join([topline] + flags_lines) - - @classmethod - def from_tuner_format(cls, lines: List[str], fname: str, baseline_accuracy: QoS): - def parseTopLine(x: str) -> Dict[str, str]: - toks = x.split() - fields = {} - for tok in toks: - field, value = tok.split('=') - fields[field] = value - return fields - - top_line = parseTopLine(lines[0]) - total_runs = int(top_line['total_runs']) - confidence = float(top_line['confidence']) - avg_accuracy = baseline_accuracy.parse(top_line['avg_accuracy']) - config_cost = float(top_line['config_cost']) - speedup = float(top_line['speedup']) - flags = [int(line.strip()) for line in lines[1:] if line.strip()] - return cls(avg_accuracy, baseline_accuracy, fname, flags, total_runs, confidence, config_cost, speedup) - - -def genScatterPlotFromConfigs(configs, file_path): - speedups, accuracy_losses = [c.speedup for c in configs], [c.avg_loss for c in configs] - plt.scatter(accuracy_losses, speedups) - plt.xlabel("accuracy_loss") - plt.ylabel("speedup") - plt.xlim(left=-0.05) - plt.ylim(bottom=1) - plt.savefig(file_path) - plt.close() - - -def _find_distance_to(points: np.ndarray, ref_points: np.ndarray) -> np.ndarray: - n_ref = len(ref_points) - if n_ref == 0: - return np.zeros(0) - if n_ref == 1: - return np.linalg.norm(points - ref_points, axis=1) - ref_points = np.array(sorted(ref_points, key=lambda p: p[0])) - px = points.T[0] - rx = ref_points.T[0] - local_unit_vecs = ref_points[1:] - ref_points[:-1] - dists = [] - bins = np.digitize(px, rx) - 1 - for point, left_ref_p in zip(points, bins): - if left_ref_p == -1: - left_ref_p = 0 - to_left_ref = ref_points[left_ref_p] - point - local_unit_vec = local_unit_vecs[-1] if left_ref_p >= n_ref - 1 else local_unit_vecs[left_ref_p] - projection = np.dot(local_unit_vec, to_left_ref) / np.linalg.norm(local_unit_vec) - dist = np.sqrt(np.linalg.norm(to_left_ref) ** 2 - projection ** 2) - dists.append(dist) - return np.array(dists) - - -def is_pareto_efficient( - configs: List[Config], margin: float = None, - ratio: float = None, n_min: int = None, n_max: int = None -) -> List[Config]: - configs = np.array(configs) - acc_speedup = Config.qos_speedup_points(configs) - is_efficient = np.ones(acc_speedup.shape[0], dtype=bool) - for idx, c in enumerate(acc_speedup): - if is_efficient[idx]: - # Keep any point with a higher value - is_efficient[is_efficient] = np.any(acc_speedup[is_efficient] > c, axis=1) - is_efficient[idx] = True # And keep self - pareto_acc_speedup = acc_speedup[is_efficient] - pareto_configs = configs[is_efficient] - non_pareto_acc_speedup = acc_speedup[np.logical_not(is_efficient)] - non_pareto_configs = configs[np.logical_not(is_efficient)] - dist_to_pareto = _find_distance_to(non_pareto_acc_speedup, pareto_acc_speedup) - if margin is not None: - marginal_accepted = non_pareto_configs[dist_to_pareto < margin] - elif ratio is not None: - dist_order = np.argsort(dist_to_pareto) - take_n = int(len(dist_to_pareto) * ratio) - if n_min is not None: - take_n = max(take_n, n_min) - if n_max is not None: - take_n = min(take_n, n_max) - take_n -= len(pareto_configs) - marginal_accepted = non_pareto_configs[dist_order[:take_n]] - else: - raise ValueError("Must provide margin or ratio") - return pareto_configs.tolist() + marginal_accepted.tolist() - - -def print_layer_info(flag: int, hardware_target: str, layer_comp): - approx_tech = approx_map[str(flag)] - if flag <= 7: - # If is PROMISE - return f"promise {approx_tech}" - # If is GPU / CPU - op0 = op_mapping[layer_comp[0]] - config_str = f"{hardware_target} {op0} {approx_tech} " - for op in layer_comp[1:]: - op_name = op_mapping[op] - fp = "fp32" if is_fp32(flag) else "fp16" - config_str += f"{op_name} {fp} 1 " - return config_str - - -def build_config_str(flags: List[int], layer_desc: List[List[str]], hardware_target: str): - lines = [] - assert len(flags) == len(layer_desc) - for index, (flag, layer_comp) in enumerate(zip(flags, layer_desc), start=1): - layer_str = print_layer_info(flag, hardware_target, layer_comp) - config_str = f"{index} {layer_str}" - lines.append(config_str) - lines.append(f"{len(layer_desc) + 1} {hardware_target} softmax fp32 1\n") - return '\n'.join(lines) - - -def is_fp32(flag: int): - return flag in fp32_to_fp16 - - -def dump_configs_to_rt( - layer_desc, configs: List[Config], - config_out_path: PathLike, baseline_acc: QoS, hardware_target: str -): - baseline_flag = 11 - baseline_config = Config( - baseline_acc, baseline_acc, '', [baseline_flag for _ in layer_desc], - 1, 100.0, 0.0, 1.0 - ) - baseline_str = baseline_config.to_rt_format(1, layer_desc, hardware_target) - with config_out_path.open("w") as f: - f.write(baseline_str) - for it, config in enumerate(configs, start=2): - f.write(config.to_rt_format(it, layer_desc, hardware_target)) - - -# Public Interfaces -def dump_rt_format_to( - layer_desc, configs: List[Config], gold_acc: QoS, - rt_cpu_path: PathLike = None, rt_gpu_path: PathLike = None -): - if configs: - assert len(set([conf.baseline_qos for conf in configs])) == 1 - # Sort configs - sorted_configs = sorted(configs, key=lambda conf: (conf.avg_loss, conf.speedup, conf.flags)) - if rt_gpu_path is not None: - # Remap to fp16 for gpu. - fp16_configs = [conf.to_fp16() for conf in sorted_configs] - dump_configs_to_rt( - layer_desc, fp16_configs, rt_gpu_path, gold_acc, 'gpu' - ) - if rt_cpu_path is not None: - # Remap to fp32 for cpu. - fp32_configs = [conf.to_fp32() for conf in sorted_configs] - dump_configs_to_rt( - layer_desc, fp32_configs, rt_cpu_path, gold_acc, 'cpu' - ) - - -def plot_configs(file_path: Path, **kw_configs: List[Config]): - from mpl_toolkits.mplot3d import Axes3D - # Decide 2D or 3D plot: - qos_type = None - for label, confs in kw_configs.items(): - if not confs: - continue - if not qos_type: - qos_type = type(confs[0].avg_qos) - else: - assert qos_type == type(confs[0].avg_qos) - if qos_type is None: - return - if qos_type is AccuracyPSNR: - fig: plt.Figure = plt.figure() - ax: Axes3D = fig.add_subplot(111, projection='3d') - for label, confs in kw_configs.items(): - data = np.array([ - [c.avg_loss.qoses[0].to_scalar(), c.avg_qos.qoses[1].to_scalar(), c.speedup] - for c in confs] - ) - x, y, z = data.T - ax.scatter(x, y, z, label=label) - ax.set_xlabel("accuracy_loss") - ax.set_ylabel("psnr") - ax.set_zlabel("speedup") - ax.set_xlim(left=-0.05) - ax.set_zlim(bottom=1) - elif qos_type is Accuracy: - fig, ax = plt.subplots() - fig: plt.Figure - ax: plt.Axes - for label, confs in kw_configs.items(): - data = np.array([[c.avg_loss.to_scalar(), c.speedup] for c in confs]) - x, y = data.T - ax.scatter(x, y, label=label) - ax.set_xlabel("accuracy_loss") - ax.set_ylabel("speedup") - ax.set_xlim(left=-0.05) - ax.set_ylim(bottom=1) - else: - raise ValueError(f"QoS type {qos_type} unsupported in plotting.") - ax.legend() - fig.savefig(file_path) - plt.close(fig) - - -def load_configs_from_dir(result_dir: PathLike, baseline_accuracy: QoS): - config_arr = [] - for path in Path(result_dir).glob('*'): - with path.open() as f: - lines = f.readlines() - config_arr.append(Config.from_tuner_format(lines, path.name, baseline_accuracy)) - return config_arr diff --git a/hpvm/projects/pred_tuner/utils/logging.py b/hpvm/projects/pred_tuner/utils/logging.py deleted file mode 100644 index 6b6904bd2e..0000000000 --- a/hpvm/projects/pred_tuner/utils/logging.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging -from logging import config -import os -from pathlib import Path - -import tqdm - - -class TqdmStreamHandler(logging.Handler): - """tqdm-friendly logging handler. Uses tqdm.write instead of print for logging.""" - - def __init__(self, level=logging.NOTSET): - super().__init__(level) - - def emit(self, record): - try: - msg = self.format(record) - tqdm.tqdm.write(msg) - self.flush() - except (KeyboardInterrupt, SystemExit, RecursionError): - raise - except: - self.handleError(record) - - -_last_applied_config = None - - -def config_pylogger(filename: str = None, output_dir: Path = None, verbose: bool = False) -> logging.Logger: - """Configure the Python logger. - - For each execution of the application, we'd like to create a unique log file. - By default this file is named using the date and time of day, so that it can be sorted by recency. - You can also name your filename or choose the log directory. - """ - import time - timestr = time.strftime("%Y.%m.%d-%H%M%S") - filename = filename or timestr - output_dir = output_dir or Path('.') - if not os.path.exists(output_dir): - os.makedirs(output_dir) - file_path = output_dir / filename - - global _last_applied_config - _last_applied_config = d = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'simple': { - 'format': '%(levelname)s %(name)s: ' - '%(message)s' - }, - 'detailed': { - 'format': '[%(asctime)-15s] ' - '%(levelname)7s %(name)s: ' - '%(message)s ' - '@%(filename)s:%(lineno)d' - } - }, - 'handlers': { - 'console': { - '()': TqdmStreamHandler, - 'level': 'INFO', - 'formatter': 'simple' - }, - 'file': { - 'class': 'logging.FileHandler', - 'filename': file_path.as_posix(), - 'mode': 'a', # Because we may apply this config again, want to keep existing content - 'formatter': 'detailed', - }, - }, - 'root': { - 'level': 'DEBUG' if verbose else 'INFO', - 'handlers': ['console', 'file'] - }, - } - config.dictConfig(d) - - msglogger = logging.getLogger() - msglogger.info(f"Log file for this run: {file_path}") - return msglogger - - -def reapply_last_config(): - if _last_applied_config is not None: - config.dictConfig(_last_applied_config) diff --git a/hpvm/projects/pred_tuner/utils/utils.py b/hpvm/projects/pred_tuner/utils/utils.py deleted file mode 100644 index 1616557466..0000000000 --- a/hpvm/projects/pred_tuner/utils/utils.py +++ /dev/null @@ -1,26 +0,0 @@ -import logging -import os -from pathlib import Path - -import torch - -device = f'cuda:{torch.cuda.device_count() - 1}' if torch.cuda.is_available() else 'cpu' -n_cpu_threads = 12 if device == 'cuda:0' else 35 -torch.set_num_threads(n_cpu_threads) - -msg_logger = logging.getLogger(__name__) - - -def gpu_mem_mb(): - # noinspection PyTypeChecker - return torch.cuda.memory_allocated(device) / 1024 ** 2 - - -def get_tensorrt_dir() -> Path: - if 'LLVM_SRC_ROOT' not in os.environ: - return Path('.') - return Path(os.environ['LLVM_SRC_ROOT']) / "projects/hpvm-tensor-rt" - - -def get_knob_config_file() -> Path: - return get_tensorrt_dir() / "autotuner/data/global_knobs.txt" diff --git a/hpvm/projects/predtuner b/hpvm/projects/predtuner new file mode 160000 index 0000000000..65165fafe9 --- /dev/null +++ b/hpvm/projects/predtuner @@ -0,0 +1 @@ +Subproject commit 65165fafe9ea011bd172d869ca424d7a4d648a48 -- GitLab