Skip to content
Snippets Groups Projects
Commit 4be5f914 authored by Hashim Sharif's avatar Hashim Sharif
Browse files

merging

parents 2a3e6579 0ffea7f3
No related branches found
No related tags found
No related merge requests found
Showing
with 2826 additions and 0 deletions
'''ShuffleNet in PyTorch.
See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class ShuffleBlock(nn.Module):
def __init__(self, groups):
super(ShuffleBlock, self).__init__()
self.groups = groups
def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N,C,H,W = x.size()
g = self.groups
return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
class Bottleneck(nn.Module):
def __init__(self, in_planes, out_planes, stride, groups):
super(Bottleneck, self).__init__()
self.stride = stride
mid_planes = out_planes/4
g = 1 if in_planes==24 else groups
self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
self.bn1 = nn.BatchNorm2d(mid_planes)
self.shuffle1 = ShuffleBlock(groups=g)
self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
self.bn2 = nn.BatchNorm2d(mid_planes)
self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 2:
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
res = self.shortcut(x)
out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
return out
class ShuffleNet(nn.Module):
def __init__(self, cfg):
super(ShuffleNet, self).__init__()
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']
self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_planes = 24
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
self.linear = nn.Linear(out_planes[2], 10)
def _make_layer(self, out_planes, num_blocks, groups):
layers = []
for i in range(num_blocks):
stride = 2 if i == 0 else 1
cat_planes = self.in_planes if i == 0 else 0
layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
self.in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ShuffleNetG2():
cfg = {
'out_planes': [200,400,800],
'num_blocks': [4,8,4],
'groups': 2
}
return ShuffleNet(cfg)
def ShuffleNetG3():
cfg = {
'out_planes': [240,480,960],
'num_blocks': [4,8,4],
'groups': 3
}
return ShuffleNet(cfg)
def test():
net = ShuffleNetG2()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()
'''ShuffleNetV2 in PyTorch.
See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class ShuffleBlock(nn.Module):
def __init__(self, groups=2):
super(ShuffleBlock, self).__init__()
self.groups = groups
def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N, C, H, W = x.size()
g = self.groups
return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
class SplitBlock(nn.Module):
def __init__(self, ratio):
super(SplitBlock, self).__init__()
self.ratio = ratio
def forward(self, x):
c = int(x.size(1) * self.ratio)
return x[:, :c, :, :], x[:, c:, :, :]
class BasicBlock(nn.Module):
def __init__(self, in_channels, split_ratio=0.5):
super(BasicBlock, self).__init__()
self.split = SplitBlock(split_ratio)
in_channels = int(in_channels * split_ratio)
self.conv1 = nn.Conv2d(in_channels, in_channels,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, in_channels,
kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
self.bn2 = nn.BatchNorm2d(in_channels)
self.conv3 = nn.Conv2d(in_channels, in_channels,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(in_channels)
self.shuffle = ShuffleBlock()
def forward(self, x):
x1, x2 = self.split(x)
out = F.relu(self.bn1(self.conv1(x2)))
out = self.bn2(self.conv2(out))
out = F.relu(self.bn3(self.conv3(out)))
out = torch.cat([x1, out], 1)
out = self.shuffle(out)
return out
class DownBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(DownBlock, self).__init__()
mid_channels = out_channels // 2
# left
self.conv1 = nn.Conv2d(in_channels, in_channels,
kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, mid_channels,
kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(mid_channels)
# right
self.conv3 = nn.Conv2d(in_channels, mid_channels,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(mid_channels)
self.conv4 = nn.Conv2d(mid_channels, mid_channels,
kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
self.bn4 = nn.BatchNorm2d(mid_channels)
self.conv5 = nn.Conv2d(mid_channels, mid_channels,
kernel_size=1, bias=False)
self.bn5 = nn.BatchNorm2d(mid_channels)
self.shuffle = ShuffleBlock()
def forward(self, x):
# left
out1 = self.bn1(self.conv1(x))
out1 = F.relu(self.bn2(self.conv2(out1)))
# right
out2 = F.relu(self.bn3(self.conv3(x)))
out2 = self.bn4(self.conv4(out2))
out2 = F.relu(self.bn5(self.conv5(out2)))
# concat
out = torch.cat([out1, out2], 1)
out = self.shuffle(out)
return out
class ShuffleNetV2(nn.Module):
def __init__(self, net_size):
super(ShuffleNetV2, self).__init__()
out_channels = configs[net_size]['out_channels']
num_blocks = configs[net_size]['num_blocks']
self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_channels = 24
self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels[3])
self.linear = nn.Linear(out_channels[3], 10)
def _make_layer(self, out_channels, num_blocks):
layers = [DownBlock(self.in_channels, out_channels)]
for i in range(num_blocks):
layers.append(BasicBlock(out_channels))
self.in_channels = out_channels
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
# out = F.max_pool2d(out, 3, stride=2, padding=1)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.relu(self.bn2(self.conv2(out)))
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
configs = {
0.5: {
'out_channels': (48, 96, 192, 1024),
'num_blocks': (3, 7, 3)
},
1: {
'out_channels': (116, 232, 464, 1024),
'num_blocks': (3, 7, 3)
},
1.5: {
'out_channels': (176, 352, 704, 1024),
'num_blocks': (3, 7, 3)
},
2: {
'out_channels': (224, 488, 976, 2048),
'num_blocks': (3, 7, 3)
}
}
def test():
net = ShuffleNetV2(net_size=0.5)
x = torch.randn(3, 3, 32, 32)
y = net(x)
print(y.shape)
# test()
"""VGG11/13/16/19 in Pytorch."""
import torch.nn as nn
from models.hpvm import HPVMConvBundle
cfg = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class VGG(nn.Module):
def __init__(self, vgg_name):
super(VGG, self).__init__()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 10)
def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
@staticmethod
def _make_layers(config):
layers = []
in_channels = 3
for x in config:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [HPVMConvBundle(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
#!/usr/bin/env python
#
# Development-time Tuner with Algorithmic Approximations:
# Approximations: Perforation, Sampling with varying knobs for rate, skip offset
import copy
import logging
import os
import shutil
import time
from pathlib import Path
from typing import List, Tuple
import numpy as np
import opentuner
from opentuner import ConfigurationManipulator, EnumParameter, MeasurementInterface
from opentuner.measurement.inputmanager import FixedInputManager
from opentuner.search.objective import ThresholdAccuracyMinimizeTime
from opentuner.tuningrunmain import TuningRunMain
from torch.nn import Module
from tqdm import tqdm
from exp import Benchmark, ConfigMeasurer, ExpState, TuningTime, batch_id, bench_tuner_data, is_dev_time
from models import get_all_output, networks, QoS
from toolkit import ConfigT
from toolkit.estimators import WeightedLinearQoSEstimator
from utils import Config, config, reapply_last_config
msg_logger = logging.getLogger(__name__)
use_proxy = False
n_promise_valid_runs = 30
confidence_level = 0.95
def init_proxy(ni: ConfigMeasurer, pickle_path: Path):
def acc_crit(inputs_):
return ni.get_qos(inputs_, ni.val_loader)
def threshold_eval(inputs_):
accs = np.array([acc_crit(x) for x in inputs_])
return ni.val_qos - accs.mean() < 3.0
def run_model(net: Module):
return get_all_output(net, ni.val_loader)
return WeightedLinearQoSEstimator(
ni.nas, run_model, acc_crit, threshold_eval, confidence_level, storage=pickle_path
)
class Timer:
def __init__(self, timer_state: TuningTime, timer_name: str):
self.timer_state = timer_state
self.name = timer_name
self.start = None
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, *args):
end = time.time()
interval = end - self.start
self.timer_state.add_timer(self.name, interval)
class TunerDriver:
def __init__(self, bench: Benchmark):
self.bench = bench
msg_logger.info(f"Tuning for model {self.bench.model_name}")
# Initialize folder.
self._init_folder(bench)
# Take a snapshot of current code.
self.take_code_snapshot()
# Initialize network information and qos thresholds
self.net_info = ConfigMeasurer.init_from_bench(self.bench)
qoses = self.net_info.val_qos, self.net_info.test_qos
qos_type = self.net_info.val_qos.__class__
self.tuner_thres = qos_type.suggested_tuner_thresholds(self.net_info.val_qos)
self.val_thres = qos_type.suggested_val_threshold(self.net_info.val_qos)
self.test_thres = qos_type.suggested_test_threshold(self.net_info.test_qos)
# Tuner states.
self.states = ExpState(bench, qos_type, qoses)
# Current # of iteration. `ProxyTuner` will use this.
self.run_id, self.iter = 0, 0
# Initialize proxy.
if use_proxy:
self.proxy = init_proxy(self.net_info, self.bench.result_dir / 'proxy.pkl')
else:
self.proxy = None
@staticmethod
def _init_folder(bench: Benchmark):
def remove_file_or_folder(path: Path):
if path.is_dir():
shutil.rmtree(child)
elif path.is_file():
path.unlink() # Removes file despite the surprising name
pickle_path = bench.result_dir / 'proxy.pkl'
# Remove everything in result folder except pickle file
if bench.result_dir.is_dir():
msg_logger.warning(f"!Cleaning existing result dir = {bench.result_dir}")
for child in bench.result_dir.glob('*'):
if child == pickle_path:
continue
msg_logger.info(f" !Removing {child}")
remove_file_or_folder(child)
# Create result folder if it doesn't exist
if not bench.result_dir.is_dir():
msg_logger.info(f"Creating output directory = {bench.result_dir}")
os.makedirs(bench.result_dir)
def get_default_args(self):
args = opentuner.default_argparser().parse_args()
args.database = f"opentuner.db/{batch_id}.db"
args.test_limit = self.bench.autotuner_runs
parent = Path(args.database).parent
if not parent.is_dir():
os.makedirs(parent, exist_ok=True)
return args
def tuner_exec(self):
# Get default opentuner args
args = self.get_default_args()
# Start tuning for each threshold
for i, thres in enumerate(self.tuner_thres):
with Timer(self.states.timers, f"tuning_{i}"):
msg_logger.info(
f"Tuning goal: qos >= {thres}; keeping configs with qos >= {self.val_thres}"
)
tuner = ProxyTuner(args, self, thres, self.val_thres)
# TuningRunMain.__init__ initializes its own logger, so we'll reapply our settings.
tuning_main = TuningRunMain(tuner, args)
reapply_last_config()
# Unleash the tuner!
tuning_main.main()
# Remove tuner progress bar
tuner.pbar.close()
self.run_id += 1
self.iter = 0
# Postprocess configs
self.process_configs()
def calibrate_write_configs(self, configs: List[Config], is_test_set: bool):
write_to = self.states.tested_configs if is_test_set else self.states.validated_configs
gold_acc = self.net_info.test_qos if is_test_set else self.net_info.val_qos
for cfg in tqdm(configs, leave=False):
cfg = copy.deepcopy(cfg)
cfg: Config
flags = {k: v for k, v in enumerate(cfg.flags)}
measured_acc, confidence = self.net_info.actual_measure(
flags, cfg.total_runs, is_test_set, threshold=self.val_thres
)
prev_acc = cfg.avg_qos
cfg.update_acc(measured_acc, confidence, gold_acc)
new_acc = cfg.avg_qos
msg_logger.debug(f"{prev_acc} (mean) -> {new_acc} (mean)")
write_to.append(cfg)
write_to.finalize_dump()
@staticmethod
def filter_configs(
validation: List[Config], test: List[Config],
vali_threshold: QoS, test_threshold: QoS
) -> Tuple[List[Config], List[Config]]:
# Filter validation and test set by their respective thresholds
filtered_validation = [
c for c in validation if c.avg_loss <= vali_threshold
]
filtered_test = [
c for c in test if c.avg_loss <= test_threshold
]
# Test configs also need to be a subset of validation configs.
name_to_filtered = {x.fname: x for x in filtered_test}
intersect_names = set(list(name_to_filtered.keys())).intersection(
set((x.fname for x in filtered_validation))
)
filtered_test_ = [name_to_filtered[fname] for fname in intersect_names]
return filtered_validation, filtered_test_
def process_configs(self):
# Finalize all configs because tuning is done.
# (this may not do anything now but will in the future)
self.states.all_configs.finalize_dump()
all_configs = self.states.all_configs.configs
# Pre-filter configs by a wide pareto margin
filtered_configs = config.is_pareto_efficient(all_configs, ratio=0.05, n_min=50, n_max=50)
msg_logger.info(f"Prefilter yields {len(filtered_configs)} configs from {len(all_configs)}")
self.states.filtered_configs.finalize_dump(with_configs=filtered_configs)
# Calibrate prefiltered configs (validation step)
with Timer(self.states.timers, "validate"):
self.calibrate_write_configs(filtered_configs, is_test_set=False)
validated_configs = self.states.validated_configs.configs
# Calibrate prefiltered configs on test set (test step)
with Timer(self.states.timers, "test"):
self.calibrate_write_configs(filtered_configs, is_test_set=True)
tested_configs = self.states.tested_configs.configs
# Filter valid and test set configs by thresholds
valid_configs, test_configs = self.filter_configs(
validated_configs, tested_configs, self.val_thres, self.test_thres
)
self.states.valid_configs.finalize_dump(valid_configs)
self.states.test_configs.finalize_dump(test_configs)
# Finalize data input and plot everything.
self.states.finalize_plot()
def take_code_snapshot(self):
import git
msg_logger.info(f"Taking git snapshot")
ref_dir = self.bench.result_dir / "references"
os.mkdir(ref_dir)
# Write current git commit (SHA id)
repo = git.Repo(search_parent_directories=True)
sha = repo.head.object.hexsha
msg_logger.info(f"Current code is at commit {sha}")
with (ref_dir / 'git_commit.txt').open('w') as f:
f.write(sha)
# Also put all outstanding code change in a diff file.
# This way changes in all git-tracked files are captured.
t = repo.head.commit.tree
with (ref_dir / 'diff.txt').open('w') as f:
f.write(repo.git.diff(t))
def make_config_name(self) -> str:
return f"{self.bench.model_name}_{self.run_id}_{self.iter}"
def get_accuracy(self, cfg: ConfigT) -> Tuple[QoS, QoS, int]:
has_promise_flags = set(cfg.values()).intersection(set(range(1, 7 + 1)))
config_validation_runs = n_promise_valid_runs if has_promise_flags else 1
if use_proxy:
mean_acc, confidence_acc = self.net_info.proxy_estimate(cfg, self.proxy)
assert has_promise_flags or (mean_acc == confidence_acc)
else:
mean_acc, _ = self.net_info.actual_measure(cfg, 1, is_test_set=False)
confidence_acc = mean_acc
return mean_acc, confidence_acc, config_validation_runs
class ProxyTuner(MeasurementInterface):
def __init__(self, args, driver: TunerDriver, tuner_thres: QoS, accept_thres: QoS):
self.tuner_driver = driver
self.model_info = driver.net_info
self.bench = driver.bench
self.tuner_thres = tuner_thres
self.all_configs = driver.states.all_configs
self.pbar = tqdm(total=args.test_limit, leave=False)
objective = ThresholdAccuracyMinimizeTime(tuner_thres.to_scalar())
input_manager = FixedInputManager(size=driver.bench.get_n_layers())
super(ProxyTuner, self).__init__(
args, program_name=self.bench.model_name,
input_manager=input_manager, objective=objective
)
self.accept_thres = accept_thres
def manipulator(self) -> ConfigurationManipulator:
"""Define the search space by creating a ConfigurationManipulator."""
manipulator = ConfigurationManipulator()
for ext_layer_id, knobs in self.model_info.get_knobs().items():
manipulator.add_parameter(EnumParameter(ext_layer_id, knobs))
return manipulator
def seed_configurations(self):
"""Provide baseline config as seed if model uses seed."""
return [self.bench.get_baseline_config(not is_dev_time)] if self.bench.use_seed else []
def run(self, desired_result, input_, limit):
"""Run a given configuration then return performance and accuracy."""
cfg: ConfigT = desired_result.configuration.data
# get_accuracy gives estimation of mean accuracy and 95% confident accuracy
mean_acc, confident_acc, n_runs = self.tuner_driver.get_accuracy(cfg)
# getConfigCost returns the cost associated with the selected configuration
total_comps, speedup = self.bench.compute_config_cost(cfg)
Result = opentuner.resultsdb.models.Result()
Result.time = total_comps
# Convert QoS to scalar, because opentuner does not support custom comparable datatype
Result.accuracy = confident_acc.to_scalar(relative_to=self.tuner_thres)
# If accuracy is acceptable, write this config
if confident_acc > self.accept_thres:
config_name = self.tuner_driver.make_config_name()
cfg_values = [cfg[layer] for layer in sorted(cfg.keys())]
writing_config = Config(
mean_acc, self.model_info.val_qos, config_name, cfg_values,
n_runs, 95.0, total_comps, speedup
)
self.all_configs.append(writing_config)
msg_logger.debug(
f"Config chosen with accuracy (mean) = {mean_acc}, (95%) = {confident_acc} "
f"and speedup = {speedup}"
)
self.tuner_driver.iter += 1
self.pbar.update()
return Result
def save_final_config(self, configuration):
"""Print final configuration."""
msg_logger.info(f"Final configuration {configuration.data}")
msg_logger.info("Done with Autotuning run")
if __name__ == '__main__':
assert set(networks.keys()).issubset(set(bench_tuner_data.keys()))
for network in ('alexnet2_hpvm',):
bench_: Benchmark = bench_tuner_data[network]
TunerDriver(bench_).tuner_exec()
This diff is collapsed.
{
"('0', '0', '1', '1', '2', '0')": {
"tensorConvolution": "41.000000,41.000000,41.000000,41.000000,",
"Baseline": "41.000000,41.000000,41.000000,41.000000,",
"FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,",
"ConvSampSim": "26.000000,26.000000,26.000000,26.000000,",
"ConvApprox": "26.000000,26.000000,26.000000,26.000000,",
"ConvApproxHalf2": "26.000000,26.000000,26.000000,26.000000,"
},
"('0', '0', '1', '1', '2', '1')": {
"tensorConvolution": "41.000000,41.000000,41.000000,41.000000,",
"Baseline": "41.000000,41.000000,41.000000,41.000000,",
"FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,",
"ConvSampSim": "56.000000,56.000000,56.000000,56.000000,",
"ConvApprox": "56.000000,56.000000,56.000000,56.000000,",
"ConvApproxHalf2": "56.000000,56.000000,56.000000,56.000000,"
},
"('0', '0', '1', '1', '3', '0')": {
"tensorConvolution": "41.000000,41.000000,41.000000,41.000000,",
"Baseline": "41.000000,41.000000,41.000000,41.000000,",
"FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,",
"ConvSampSim": "39.000000,39.000000,39.000000,39.000000,",
"ConvApprox": "39.000000,39.000000,39.000000,39.000000,",
"ConvApproxHalf2": "39.000000,39.000000,39.000000,39.000000,"
},
"('0', '0', '1', '1', '3', '1')": {
"tensorConvolution": "41.000000,41.000000,41.000000,41.000000,",
"Baseline": "41.000000,41.000000,41.000000,41.000000,",
"FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,",
"ConvSampSim": "42.000000,42.000000,42.000000,42.000000,",
"ConvApprox": "42.000000,42.000000,42.000000,42.000000,",
"ConvApproxHalf2": "42.000000,42.000000,42.000000,42.000000,"
},
"('0', '0', '1', '1', '4', '0')": {
"tensorConvolution": "41.000000,41.000000,41.000000,41.000000,",
"Baseline": "41.000000,41.000000,41.000000,41.000000,",
"FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,",
"ConvSampSim": "36.000000,36.000000,36.000000,36.000000,",
"ConvApprox": "36.000000,36.000000,36.000000,36.000000,",
"ConvApproxHalf2": "35.968750,35.968750,35.968750,35.968750,"
},
"('0', '0', '1', '1', '4', '1')": {
"tensorConvolution": "41.000000,41.000000,41.000000,41.000000,",
"Baseline": "41.000000,41.000000,41.000000,41.000000,",
"FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,",
"ConvSampSim": "45.333336,45.333336,45.333336,45.333336,",
"ConvApprox": "45.333336,45.333336,45.333336,45.333336,",
"ConvApproxHalf2": "45.312500,45.312500,45.312500,45.312500,"
},
"('1', '1', '1', '1', '2', '0')": {
"tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvSampSim": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,",
"ConvApprox": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,",
"ConvApproxHalf2": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,"
},
"('1', '1', '1', '1', '2', '1')": {
"tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvSampSim": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,",
"ConvApprox": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,",
"ConvApproxHalf2": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,"
},
"('1', '1', '1', '1', '3', '0')": {
"tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvSampSim": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,",
"ConvApprox": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,",
"ConvApproxHalf2": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,"
},
"('1', '1', '1', '1', '3', '1')": {
"tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvSampSim": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvApprox": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvApproxHalf2": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,"
},
"('1', '1', '1', '1', '4', '0')": {
"tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvSampSim": "16.000000,22.666666,22.666666,13.333333,25.333334,36.000000,36.000000,22.666668,25.333334,36.000000,36.000000,22.666668,18.666666,25.333334,25.333334,16.000000,",
"ConvApprox": "16.000000,22.666666,22.666666,13.333333,25.333334,36.000000,36.000000,22.666668,25.333334,36.000000,36.000000,22.666668,18.666666,25.333334,25.333334,16.000000,",
"ConvApproxHalf2": "16.000000,22.671875,22.671875,13.328125,25.328125,35.968750,35.968750,22.656250,25.328125,35.968750,35.968750,22.656250,18.671875,25.328125,25.328125,16.000000,"
},
"('1', '1', '1', '1', '4', '1')": {
"tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,",
"ConvSampSim": "18.666668,29.333332,29.333332,20.000000,29.333332,45.333336,45.333336,29.333332,29.333332,45.333336,45.333336,29.333332,20.000000,29.333332,29.333332,18.666668,",
"ConvApprox": "18.666668,29.333332,29.333332,20.000000,29.333332,45.333336,45.333336,29.333332,29.333332,45.333336,45.333336,29.333332,20.000000,29.333332,29.333332,18.666668,",
"ConvApproxHalf2": "18.656250,29.343750,29.343750,20.000000,29.328125,45.312500,45.312500,29.343750,29.328125,45.312500,45.312500,29.343750,20.000000,29.328125,29.328125,18.656250,"
},
"('1', '1', '2', '2', '2', '0')": {
"tensorConvolution": "18.000000,27.000000,27.000000,41.000000,",
"Baseline": "18.000000,27.000000,27.000000,41.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,",
"ConvSampSim": "12.000000,18.000000,18.000000,26.000000,",
"ConvApprox": "12.000000,18.000000,18.000000,26.000000,",
"ConvApproxHalf2": "12.000000,18.000000,18.000000,26.000000,"
},
"('1', '1', '2', '2', '2', '1')": {
"tensorConvolution": "18.000000,27.000000,27.000000,41.000000,",
"Baseline": "18.000000,27.000000,27.000000,41.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,",
"ConvSampSim": "24.000000,36.000000,36.000000,56.000000,",
"ConvApprox": "24.000000,36.000000,36.000000,56.000000,",
"ConvApproxHalf2": "24.000000,36.000000,36.000000,56.000000,"
},
"('1', '1', '2', '2', '3', '0')": {
"tensorConvolution": "18.000000,27.000000,27.000000,41.000000,",
"Baseline": "18.000000,27.000000,27.000000,41.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,",
"ConvSampSim": "18.000000,27.000000,25.500000,39.000000,",
"ConvApprox": "18.000000,27.000000,25.500000,39.000000,",
"ConvApproxHalf2": "18.000000,27.000000,25.500000,39.000000,"
},
"('1', '1', '2', '2', '3', '1')": {
"tensorConvolution": "18.000000,27.000000,27.000000,41.000000,",
"Baseline": "18.000000,27.000000,27.000000,41.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,",
"ConvSampSim": "18.000000,27.000000,28.500000,42.000000,",
"ConvApprox": "18.000000,27.000000,28.500000,42.000000,",
"ConvApproxHalf2": "18.000000,27.000000,28.500000,42.000000,"
},
"('1', '1', '2', '2', '4', '0')": {
"tensorConvolution": "18.000000,27.000000,27.000000,41.000000,",
"Baseline": "18.000000,27.000000,27.000000,41.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,",
"ConvSampSim": "16.000000,22.666666,25.333334,36.000000,",
"ConvApprox": "16.000000,22.666666,25.333334,36.000000,",
"ConvApproxHalf2": "16.000000,22.671875,25.328125,35.968750,"
},
"('1', '1', '2', '2', '4', '1')": {
"tensorConvolution": "18.000000,27.000000,27.000000,41.000000,",
"Baseline": "18.000000,27.000000,27.000000,41.000000,",
"FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,",
"ConvSampSim": "18.666668,29.333332,29.333332,45.333336,",
"ConvApprox": "18.666668,29.333332,29.333332,45.333336,",
"ConvApproxHalf2": "18.656250,29.343750,29.328125,45.312500,"
}
}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from toolkit import ModuleIndexer, NetApproxSelector
from utils import compute_accuracy, init_by_name, run_concat_output
def float_eq(f1, f2):
return abs(f1 - f2) < 1e-5
def main():
baseline, testloader, _, shapes = init_by_name('resnet50_imagenet_hpvm')
baseline_dag = ModuleIndexer(baseline)
nas = NetApproxSelector(baseline_dag)
# baseline
baseline_output = run_concat_output(baseline_dag.module, testloader)
baseline_acc = compute_accuracy(baseline_output, testloader)
assert float_eq(baseline_acc, 0.773)
# {13: 242} -> 75.5
approx1 = nas.apply_approx_by_config({82: 242})
acc1 = compute_accuracy(run_concat_output(approx1.module, testloader), testloader)
assert float_eq(acc1, 0.755)
# {13: 242, 17: 247} -> 74.6
approx2 = nas.apply_approx_by_config({82: 242, 108: 247})
acc2 = compute_accuracy(run_concat_output(approx2.module, testloader), testloader)
assert float_eq(acc2, 0.746)
# {9: 237, 13: 242, 17: 247} -> 74.1
approx3 = nas.apply_approx_by_config({55: 237, 82: 242, 108: 247})
acc3 = compute_accuracy(run_concat_output(approx3.module, testloader), testloader)
assert float_eq(acc3, 0.741)
print("Accuracy test passed.")
if __name__ == '__main__':
main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from .config import Config
from .logging import config_pylogger, reapply_last_config
from .utils import device, get_knob_config_file, get_tensorrt_dir, gpu_mem_mb
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment