diff --git a/doc/conf.py b/doc/conf.py index 12f28f5c0af5b4ba58545c094c660337223233cf..ad8cee166b4df9acec13bca5dd73eccd4db31a9e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,12 +1,14 @@ from datetime import date import sphinx_rtd_theme + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # General configuration # --------------------- diff --git a/predtuner/__init__.py b/predtuner/__init__.py index 92372504d1a9232aee0de40980fa5b91f52cb119..c11d18f205241f54b32405475e6e19d22d85538a 100644 --- a/predtuner/__init__.py +++ b/predtuner/__init__.py @@ -1,7 +1,13 @@ from ._logging import config_pylogger from .approxapp import ApproxApp, ApproxKnob, ApproxTuner from .approxes import get_knobs_from_file -from .modeledapp import (IPerfModel, IQoSModel, LinearPerfModel, ModeledApp, - QoSModelP1, QoSModelP2) +from .modeledapp import ( + IPerfModel, + IQoSModel, + LinearPerfModel, + ModeledApp, + QoSModelP1, + QoSModelP2, +) from .torchapp import TorchApp, TorchApproxKnob from .torchutil import accuracy diff --git a/predtuner/_pareto.py b/predtuner/_pareto.py index 4396ab1cc9d6cd02b3c0d0595a55085d8ea3d064..57ccd3d04d0920f117460c68060e53a0dc9a6ca8 100644 --- a/predtuner/_pareto.py +++ b/predtuner/_pareto.py @@ -19,8 +19,14 @@ def _find_distance_to(points: np.ndarray, ref_points: np.ndarray) -> np.ndarray: if left_ref_p == -1: left_ref_p = 0 to_left_ref = ref_points[left_ref_p] - point - local_unit_vec = local_unit_vecs[-1] if left_ref_p >= n_ref - 1 else local_unit_vecs[left_ref_p] - projection = np.dot(local_unit_vec, to_left_ref) / np.linalg.norm(local_unit_vec) + local_unit_vec = ( + local_unit_vecs[-1] + if left_ref_p >= n_ref - 1 + else local_unit_vecs[left_ref_p] + ) + projection = np.dot(local_unit_vec, to_left_ref) / np.linalg.norm( + local_unit_vec + ) dist = np.sqrt(np.linalg.norm(to_left_ref) ** 2 - projection ** 2) dists.append(dist) return np.array(dists) diff --git a/predtuner/approxapp.py b/predtuner/approxapp.py index 5cd82a19e770f6c34208fac1bda6d738c5f6ed4c..f82544e7763909a816cd91a7c81805b159a0c7fb 100644 --- a/predtuner/approxapp.py +++ b/predtuner/approxapp.py @@ -31,12 +31,13 @@ class ApproxKnob: class ApproxApp(abc.ABC): """Generic approximable application with operator & knob enumeration, and measures its own QoS and performance given a configuration. - + Parameters ---------- op_knobs: a mapping from each operator (identified by str) to a list of applicable knobs. """ + def __init__(self, op_knobs: Dict[str, List[ApproxKnob]]) -> None: super().__init__() self.op_knobs = op_knobs @@ -71,7 +72,9 @@ class ApproxApp(abc.ABC): return list(set.union(*knob_sets)) @staticmethod - def _check_get_baseline_knob_(op_knobs: Dict[str, List[ApproxKnob]]) -> "BaselineKnob": + def _check_get_baseline_knob_( + op_knobs: Dict[str, List[ApproxKnob]] + ) -> "BaselineKnob": # Modifies op_knobs inplace. # Find the baseline knob if the user has one, or get a default one knob_sets = [set(knobs) for knobs in op_knobs.values()] @@ -80,7 +83,7 @@ class ApproxApp(abc.ABC): if len(baselines) > 1: raise ValueError(f"Found multiple baseline knobs in op_knobs: {baselines}") if baselines: - baseline_knob, = baselines + (baseline_knob,) = baselines else: baseline_knob = BaselineKnob() # Start checking if each layer has the baseline knob diff --git a/predtuner/approxes/_copy.py b/predtuner/approxes/_copy.py index acb0f78186cb14108f859e5d8a41c6b95b4b02a3..3828702543a149955d62c04a45901a0b92c10577 100644 --- a/predtuner/approxes/_copy.py +++ b/predtuner/approxes/_copy.py @@ -3,7 +3,7 @@ from typing import TypeVar from torch.nn import Module, Parameter -T = TypeVar('T') +T = TypeVar("T") def module_only_deepcopy(obj: T, memo=None) -> T: @@ -19,7 +19,7 @@ def module_only_deepcopy(obj: T, memo=None) -> T: # Additionally share all buffers of Module. For example, this accounts for # running_{mean|var} in BatchNorm. if isinstance(obj_, Module): - buffers = obj_.__dict__.get('_buffers') + buffers = obj_.__dict__.get("_buffers") for buffer in buffers.values(): memo[id(buffer)] = buffer # Share all parameters. @@ -34,7 +34,7 @@ def module_only_deepcopy(obj: T, memo=None) -> T: elif isinstance(obj_, (list, tuple)): for x in obj_: recursive_scan_parameters(x) - elif hasattr(obj_, '__dict__'): + elif hasattr(obj_, "__dict__"): for x in obj_.__dict__.values(): recursive_scan_parameters(x) diff --git a/predtuner/approxes/approxes.py b/predtuner/approxes/approxes.py index d4ab80981273f74af709c769db99c4a721d12680..fb3bca7d13d0e0dd93cc39a3410d2ee9c86c7d18 100644 --- a/predtuner/approxes/approxes.py +++ b/predtuner/approxes/approxes.py @@ -33,33 +33,33 @@ def _interpolate_first_dim(tensor: torch.Tensor, interp_indices: Iterable[int]): class PerforateConv2dStride(TorchApproxKnob): r"""Simulation of strided perforated convolution for `torch.nn.Conv2d`. - Perforated convolution skips computing some entries in the output and instead interpolates - these values, to reduce the number of float-ops needed to complete a convolution op. - In this implementation, selected rows or columns of the output are discarded and replaced - with linearly interpolated values from the neighboring rows or columns. Each channel is - considered independently. - This implementation gives the same output as actual perforated convolution but without the - performance benefit. - - Parameters - ---------- - direction_is_row : bool - If True, discard and interpolate rows, otherwise columns. - stride : int \in [2, +\infty) - Skip 1 row/column in the convolution kernel per `stride` elements. - offset : int \in [0, stride) - Skipped first row/column is `offset`. - - Attributes - ---------- - interp_axis : int :math:`\in \{2, 3\}` - The axis that will be perforated over. As the input is an NCHW tensor, if - `direction_is_row` then `interp_axis = 2`, otherwise `interp_axis = 3`. - stride : int :math:`\in [2, +\infty)` - Equal to parameter `stride`. - offset : int :math:`\in [0, stride)` - Equal to parameter `offset`. - """ + Perforated convolution skips computing some entries in the output and instead interpolates + these values, to reduce the number of float-ops needed to complete a convolution op. + In this implementation, selected rows or columns of the output are discarded and replaced + with linearly interpolated values from the neighboring rows or columns. Each channel is + considered independently. + This implementation gives the same output as actual perforated convolution but without the + performance benefit. + + Parameters + ---------- + direction_is_row : bool + If True, discard and interpolate rows, otherwise columns. + stride : int \in [2, +\infty) + Skip 1 row/column in the convolution kernel per `stride` elements. + offset : int \in [0, stride) + Skipped first row/column is `offset`. + + Attributes + ---------- + interp_axis : int :math:`\in \{2, 3\}` + The axis that will be perforated over. As the input is an NCHW tensor, if + `direction_is_row` then `interp_axis = 2`, otherwise `interp_axis = 3`. + stride : int :math:`\in [2, +\infty)` + Equal to parameter `stride`. + offset : int :math:`\in [0, stride)` + Equal to parameter `offset`. + """ def __init__( self, @@ -373,7 +373,13 @@ class FP16Approx(TorchApproxKnob): default_name_to_class = { k.__name__: k - for k in [FP16Approx, PromiseSim, PerforateConv2dStride, Conv2dSampling, TorchBaselineKnob] + for k in [ + FP16Approx, + PromiseSim, + PerforateConv2dStride, + Conv2dSampling, + TorchBaselineKnob, + ] } default_knob_file = Path(__file__).parent / "default_approx_params.json" diff --git a/predtuner/torchutil/__init__.py b/predtuner/torchutil/__init__.py index af018f0d83469755f5092684c19a42aa3089184f..f65fc3898dfe8100b91cd8516d2547baefe1cdb2 100644 --- a/predtuner/torchutil/__init__.py +++ b/predtuner/torchutil/__init__.py @@ -1,5 +1,9 @@ from .common_qos import accuracy from .indexing import ModuleIndexer from .summary import get_summary -from .utils import (BatchedDataLoader, infer_net_device, - move_to_device_recursively, split_dataset) +from .utils import ( + BatchedDataLoader, + infer_net_device, + move_to_device_recursively, + split_dataset, +) diff --git a/predtuner/torchutil/indexing.py b/predtuner/torchutil/indexing.py index a059ad51af69291e5f81b653b25a3d4ed6cec449..375fa1bd0b117947ab33cb28870c9aa11a914955 100644 --- a/predtuner/torchutil/indexing.py +++ b/predtuner/torchutil/indexing.py @@ -8,37 +8,39 @@ ModulePredT = Callable[[Module], bool] class ModuleIndexer: r"""Allows indexing into an nn.Module with index (int) to get layers. - Supports read and modification, just like a dictionary. - - Parameters - ---------- - module: Module - The PyTorch Module to be indexed. - include_module: Callable[[Module], bool] = None - A predicate that decides which layers to include in the index. For example, - `lambda layer: isinstance(layer, Conv2d)` tells `ModuleIndexer` to only include `Conv2d` - layers. - If not given, by default `ModuleIndexer` will recursively walk down `module` like a tree - to include all internal and leaf nodes (layers), except for layers that `expand_module` - forbids recursing into. - expand_module: Callable[[Module], bool] = None - A predicate that decides which layers to recurse down. If `expand_module` returns `False`, - layer is kept as a whole and may be included if `include_module` allows. - - Attributes - ---------- - module: Module - Equal to parameter `module`. - index_to_module: List[Module] - Stores the layers in order so that a layer at `index_to_module[i]` has the index `i`. - layer_parent: Dict[Module, Tuple[Module, str]] - Maps each layer to its parent and its name in the parent layer. Contains the same layers - as in `index_to_module` except `module` which has no parent. - """ + Supports read and modification, just like a dictionary. + + Parameters + ---------- + module: Module + The PyTorch Module to be indexed. + include_module: Callable[[Module], bool] = None + A predicate that decides which layers to include in the index. For example, + `lambda layer: isinstance(layer, Conv2d)` tells `ModuleIndexer` to only include `Conv2d` + layers. + If not given, by default `ModuleIndexer` will recursively walk down `module` like a tree + to include all internal and leaf nodes (layers), except for layers that `expand_module` + forbids recursing into. + expand_module: Callable[[Module], bool] = None + A predicate that decides which layers to recurse down. If `expand_module` returns `False`, + layer is kept as a whole and may be included if `include_module` allows. + + Attributes + ---------- + module: Module + Equal to parameter `module`. + index_to_module: List[Module] + Stores the layers in order so that a layer at `index_to_module[i]` has the index `i`. + layer_parent: Dict[Module, Tuple[Module, str]] + Maps each layer to its parent and its name in the parent layer. Contains the same layers + as in `index_to_module` except `module` which has no parent. + """ def __init__( - self, module: Module, include_module: Optional[ModulePredT] = None, - expand_module: Optional[ModulePredT] = None + self, + module: Module, + include_module: Optional[ModulePredT] = None, + expand_module: Optional[ModulePredT] = None, ): self.module = module self.index_to_module = [] @@ -46,16 +48,21 @@ class ModuleIndexer: self.name_to_index = {} # By default, don't include container layer, and don't include (empty) Sequential has_children = lambda m: bool(list(m.children())) - default_inclusion = lambda m: not has_children(m) and not isinstance(m, Sequential) + default_inclusion = lambda m: not has_children(m) and not isinstance( + m, Sequential + ) # No need for "default expansion" because whatever is not included will be walked into. self._rec_expand_module( - module, '', include_module or default_inclusion, expand_module + module, "", include_module or default_inclusion, expand_module ) self.layer_parent = self._find_layers_parent_info(module, set(self.all_modules)) def _rec_expand_module( - self, module: Module, name_prefix: str, - include_module: ModulePredT, expand_module: Optional[ModulePredT] + self, + module: Module, + name_prefix: str, + include_module: ModulePredT, + expand_module: Optional[ModulePredT], ): """Recursively expands into module and builds the index.""" for name, submodule in module.named_children(): @@ -68,7 +75,9 @@ class ModuleIndexer: required_expansion = expand_module and expand_module(submodule) default_expansion = not included if default_expansion or required_expansion: - self._rec_expand_module(submodule, full_name, include_module, expand_module) + self._rec_expand_module( + submodule, full_name, include_module, expand_module + ) @staticmethod def _find_layers_parent_info(net: Module, layers: Set[Module]): @@ -86,7 +95,10 @@ class ModuleIndexer: @property def name_to_module(self) -> Dict[str, Module]: - return {name: self.index_to_module[index] for name, index in self.name_to_index.items()} + return { + name: self.index_to_module[index] + for name, index in self.name_to_index.items() + } def find_by_module(self, module: Module) -> Optional[Tuple[str, int]]: """Get name and index from module.""" diff --git a/predtuner/torchutil/summary.py b/predtuner/torchutil/summary.py index beb4cb1ba860a8339df83c3b7153dc4fc38f2d74..7be4bd48192969297cec06845f42255ad10513db 100644 --- a/predtuner/torchutil/summary.py +++ b/predtuner/torchutil/summary.py @@ -53,12 +53,16 @@ def get_flops(module: nn.Module, input_shape, output_shape): handler = type_dispatch.get(type(module)) if not handler: if not list(module.children()): - _warn_once(f"Module {module} cannot be handled; its FLOPs will be estimated as 0") + _warn_once( + f"Module {module} cannot be handled; its FLOPs will be estimated as 0" + ) return 0.0 try: return handler() except RuntimeError as e: - _warn_once(f'Error "{e}" when handling {module}; its FLOPs will be estimated as 0') + _warn_once( + f'Error "{e}" when handling {module}; its FLOPs will be estimated as 0' + ) return 0.0 diff --git a/setup.py b/setup.py index 094a89a948b33b998266de46b5babd0d20db04ce..8d08aa7a80b05d60f47a63b5bacb77a0d105fa61 100644 --- a/setup.py +++ b/setup.py @@ -13,4 +13,4 @@ setuptools.setup( long_description_content_type="text/markdown", url="https://github.com/Evan-Zhao/predictive-tuner", packages=["predtuner"], -) \ No newline at end of file +)