# Building docs
We use Sphinx for generating the API and reference documentation.
## Instructions
Install the following Python packages needed to build the documentation by entering:
pip install sphinx sphinx-autodoc-typehints sphinx-rtd-theme
To build the HTML documentation, enter::
make html
in the ``doc/`` directory. This will generate a ``build/html`` subdirectory
containing the built documentation.
To build the PDF documentation, enter::
make latexpdf
You will need to have LaTeX installed for this.
Getting Started
.. _contents:
PredTuner is a Python library for predictive approximation autotuning.
PredTuner performs autotuning on approximation choices for a program
using an error-predictive proxy instead of executing the program,
to greatly speedup autotuning while getting results of comparable quality.
PredTuner is a contribution of [ApproxTuner]
Short-term Goals
- Measure accuracy impact of approximations
- Obtain a tuned, approximated CNN in <5 lines of code
- Easy to manage multiple approximation configs
- Easy to load and manage prior tuning results
- Flexible retraining support
Possible Long-term Goals
- High-performance implementations of approximate layers
- Allow users to register their own approximations
- Support for other frameworks: TF, ONNX, JAX
.. only:: html
:Release: |version|
:Date: |today|
.. toctree::
:maxdepth: 1
Indices and tables
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
* :ref:`glossary`
PyTorch Autotuning API
.. autoclass:: predtuner.torchapp.TorchApp
.. autoclass:: predtuner.modeledapp.ApproxModeledTuner
...@@ -44,10 +44,43 @@ _default_device = f"cuda" if torch.cuda.is_available() else "cpu" ...@@ -44,10 +44,43 @@ _default_device = f"cuda" if torch.cuda.is_available() else "cpu"
class TorchApp(ModeledApp, abc.ABC): class TorchApp(ModeledApp, abc.ABC):
"""Approximable PyTorch Modules (tensor output assumed). r"""Adaptor for approximable PyTorch Modules with tensor output.
Automatically derives performance model and QoS models P1&P2.
A TorchApp stores the PyTorch Module, datasets for tuning and calibration,
knobs: User defines a set of all knobs available; we'll dispatch them to each layer (op). set of available TorchApproxKnob each of which may be applied to some layer in the Module,
and the quality of service (QoS) metric of application (e.g., accuracy).
It provides empirical tuning and predictive tuning capability (see `TorchApp.tune()`),
Name of the application, which is used as an identifier in tuning sessions, etc.
The PyTorch module to tune.
A dataset to use as inputs to module during tuning. (PyTorch DataLoader is conceptually
an enumerable, batched dataset.)
A input dataset used for QoS testing (see `test_config` parameter of `ApproxModeledTuner.tune`).
A set of knobs to be considered. Each knob has an `is_applicable()` method
which is used to determine which layer it can apply to.
QoS metric function which computes QoS from the module's output.
A function to combine each batch's QoS into one value.
When QoS is accuracy this will most likely be `mean()` (which is the default).
The device to store module and perform inference on. By default is "cuda"
if CUDA is available, otherwise "cpu".
A folder to store the serialized QoS models into.
`QoSModelP1` will be serialized into `model_storage_folder / "p1.pkl"`,
and `QoSModelP2` into `model_storage_folder / "p2.json"`.
See `QoSModelP1` and `QoSModelP2` for details.
""" """
def __init__( def __init__(
...@@ -60,7 +93,7 @@ class TorchApp(ModeledApp, abc.ABC): ...@@ -60,7 +93,7 @@ class TorchApp(ModeledApp, abc.ABC):
tensor_to_qos: Callable[[torch.Tensor, Any], float], tensor_to_qos: Callable[[torch.Tensor, Any], float],
combine_qos: Callable[[np.ndarray], float] = np.mean, combine_qos: Callable[[np.ndarray], float] = np.mean,
device: Union[torch.device, str] = _default_device, device: Union[torch.device, str] = _default_device,
model_storage_folder: Optional[PathLike] = None model_storage_folder: Optional[PathLike] = None,
) -> None: ) -> None:
self.app_name = app_name self.app_name = app_name
self.module = module self.module = module
...@@ -70,7 +103,9 @@ class TorchApp(ModeledApp, abc.ABC): ...@@ -70,7 +103,9 @@ class TorchApp(ModeledApp, abc.ABC):
self.tensor_to_qos = tensor_to_qos self.tensor_to_qos = tensor_to_qos
self.combine_qos = combine_qos self.combine_qos = combine_qos
self.device = device self.device = device
self.model_storage = Path(model_storage_folder) if model_storage_folder else None self.model_storage = (
Path(model_storage_folder) if model_storage_folder else None
self.module = self.module =
self.midx = ModuleIndexer(module) self.midx = ModuleIndexer(module)
...@@ -92,13 +127,21 @@ class TorchApp(ModeledApp, abc.ABC): ...@@ -92,13 +127,21 @@ class TorchApp(ModeledApp, abc.ABC):
@property @property
def name(self) -> str: def name(self) -> str:
"""Returns the name of application."""
return self.app_name return self.app_name
@property @property
def op_knobs(self) -> Dict[str, List[ApproxKnob]]: def op_knobs(self) -> Dict[str, List[ApproxKnob]]:
"""Returns a list of applicable knobs for each operator (layer) in module."""
return self._op_knobs return self._op_knobs
def get_models(self) -> List[Union[IPerfModel, IQoSModel]]: def get_models(self) -> List[Union[IPerfModel, IQoSModel]]:
"""Returns a list of predictive tuning models.
TorchApp in particular derives 1 performance model (LinearPerfModel)
and 2 QoS models (QoSModelP1, QoSModelP2) automatically.
def batched_valset_qos(tensor_output: torch.Tensor): def batched_valset_qos(tensor_output: torch.Tensor):
dataset_len = len(self.tune_loader.dataset) dataset_len = len(self.tune_loader.dataset)
assert len(tensor_output) == dataset_len assert len(tensor_output) == dataset_len
...@@ -115,7 +158,9 @@ class TorchApp(ModeledApp, abc.ABC): ...@@ -115,7 +158,9 @@ class TorchApp(ModeledApp, abc.ABC):
p2_storage = self.model_storage / "p2.json" if self.model_storage else None p2_storage = self.model_storage / "p2.json" if self.model_storage else None
return [ return [
LinearPerfModel(self._op_costs, self._knob_speedups), LinearPerfModel(self._op_costs, self._knob_speedups),
QoSModelP1(self, self._get_raw_output_valset, batched_valset_qos, p1_storage), QoSModelP1(
self, self._get_raw_output_valset, batched_valset_qos, p1_storage
QoSModelP2(self, p2_storage), QoSModelP2(self, p2_storage),
] ]
...@@ -123,6 +168,9 @@ class TorchApp(ModeledApp, abc.ABC): ...@@ -123,6 +168,9 @@ class TorchApp(ModeledApp, abc.ABC):
def empirical_measure_qos_perf( def empirical_measure_qos_perf(
self, with_approxes: KnobsT, is_test: bool self, with_approxes: KnobsT, is_test: bool
) -> Tuple[float, float]: ) -> Tuple[float, float]:
"""Measure the QoS and performance of Module with given approximation
empirically (i.e., by running the Module on the dataset)."""
from time import time_ns from time import time_ns
dataloader = self.test_loader if is_test else self.tune_loader dataloader = self.test_loader if is_test else self.tune_loader
