diff --git a/doc/conf.py b/doc/conf.py
index 12f28f5c0af5b4ba58545c094c660337223233cf..ad8cee166b4df9acec13bca5dd73eccd4db31a9e 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,12 +1,14 @@
 from datetime import date
 import sphinx_rtd_theme
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('..'))
+
+sys.path.insert(0, os.path.abspath(".."))
 
 # General configuration
 # ---------------------
diff --git a/predtuner/__init__.py b/predtuner/__init__.py
index 92372504d1a9232aee0de40980fa5b91f52cb119..c11d18f205241f54b32405475e6e19d22d85538a 100644
--- a/predtuner/__init__.py
+++ b/predtuner/__init__.py
@@ -1,7 +1,13 @@
 from ._logging import config_pylogger
 from .approxapp import ApproxApp, ApproxKnob, ApproxTuner
 from .approxes import get_knobs_from_file
-from .modeledapp import (IPerfModel, IQoSModel, LinearPerfModel, ModeledApp,
-                         QoSModelP1, QoSModelP2)
+from .modeledapp import (
+    IPerfModel,
+    IQoSModel,
+    LinearPerfModel,
+    ModeledApp,
+    QoSModelP1,
+    QoSModelP2,
+)
 from .torchapp import TorchApp, TorchApproxKnob
 from .torchutil import accuracy
diff --git a/predtuner/_pareto.py b/predtuner/_pareto.py
index 4396ab1cc9d6cd02b3c0d0595a55085d8ea3d064..57ccd3d04d0920f117460c68060e53a0dc9a6ca8 100644
--- a/predtuner/_pareto.py
+++ b/predtuner/_pareto.py
@@ -19,8 +19,14 @@ def _find_distance_to(points: np.ndarray, ref_points: np.ndarray) -> np.ndarray:
         if left_ref_p == -1:
             left_ref_p = 0
         to_left_ref = ref_points[left_ref_p] - point
-        local_unit_vec = local_unit_vecs[-1] if left_ref_p >= n_ref - 1 else local_unit_vecs[left_ref_p]
-        projection = np.dot(local_unit_vec, to_left_ref) / np.linalg.norm(local_unit_vec)
+        local_unit_vec = (
+            local_unit_vecs[-1]
+            if left_ref_p >= n_ref - 1
+            else local_unit_vecs[left_ref_p]
+        )
+        projection = np.dot(local_unit_vec, to_left_ref) / np.linalg.norm(
+            local_unit_vec
+        )
         dist = np.sqrt(np.linalg.norm(to_left_ref) ** 2 - projection ** 2)
         dists.append(dist)
     return np.array(dists)
diff --git a/predtuner/approxapp.py b/predtuner/approxapp.py
index 5cd82a19e770f6c34208fac1bda6d738c5f6ed4c..f82544e7763909a816cd91a7c81805b159a0c7fb 100644
--- a/predtuner/approxapp.py
+++ b/predtuner/approxapp.py
@@ -31,12 +31,13 @@ class ApproxKnob:
 class ApproxApp(abc.ABC):
     """Generic approximable application with operator & knob enumeration,
     and measures its own QoS and performance given a configuration.
-    
+
     Parameters
     ----------
     op_knobs:
         a mapping from each operator (identified by str) to a list of applicable knobs.
     """
+
     def __init__(self, op_knobs: Dict[str, List[ApproxKnob]]) -> None:
         super().__init__()
         self.op_knobs = op_knobs
@@ -71,7 +72,9 @@ class ApproxApp(abc.ABC):
         return list(set.union(*knob_sets))
 
     @staticmethod
-    def _check_get_baseline_knob_(op_knobs: Dict[str, List[ApproxKnob]]) -> "BaselineKnob":
+    def _check_get_baseline_knob_(
+        op_knobs: Dict[str, List[ApproxKnob]]
+    ) -> "BaselineKnob":
         # Modifies op_knobs inplace.
         # Find the baseline knob if the user has one, or get a default one
         knob_sets = [set(knobs) for knobs in op_knobs.values()]
@@ -80,7 +83,7 @@ class ApproxApp(abc.ABC):
         if len(baselines) > 1:
             raise ValueError(f"Found multiple baseline knobs in op_knobs: {baselines}")
         if baselines:
-            baseline_knob, = baselines
+            (baseline_knob,) = baselines
         else:
             baseline_knob = BaselineKnob()
         # Start checking if each layer has the baseline knob
diff --git a/predtuner/approxes/_copy.py b/predtuner/approxes/_copy.py
index acb0f78186cb14108f859e5d8a41c6b95b4b02a3..3828702543a149955d62c04a45901a0b92c10577 100644
--- a/predtuner/approxes/_copy.py
+++ b/predtuner/approxes/_copy.py
@@ -3,7 +3,7 @@ from typing import TypeVar
 
 from torch.nn import Module, Parameter
 
-T = TypeVar('T')
+T = TypeVar("T")
 
 
 def module_only_deepcopy(obj: T, memo=None) -> T:
@@ -19,7 +19,7 @@ def module_only_deepcopy(obj: T, memo=None) -> T:
         # Additionally share all buffers of Module. For example, this accounts for
         # running_{mean|var} in BatchNorm.
         if isinstance(obj_, Module):
-            buffers = obj_.__dict__.get('_buffers')
+            buffers = obj_.__dict__.get("_buffers")
             for buffer in buffers.values():
                 memo[id(buffer)] = buffer
         # Share all parameters.
@@ -34,7 +34,7 @@ def module_only_deepcopy(obj: T, memo=None) -> T:
         elif isinstance(obj_, (list, tuple)):
             for x in obj_:
                 recursive_scan_parameters(x)
-        elif hasattr(obj_, '__dict__'):
+        elif hasattr(obj_, "__dict__"):
             for x in obj_.__dict__.values():
                 recursive_scan_parameters(x)
 
diff --git a/predtuner/approxes/approxes.py b/predtuner/approxes/approxes.py
index d4ab80981273f74af709c769db99c4a721d12680..fb3bca7d13d0e0dd93cc39a3410d2ee9c86c7d18 100644
--- a/predtuner/approxes/approxes.py
+++ b/predtuner/approxes/approxes.py
@@ -33,33 +33,33 @@ def _interpolate_first_dim(tensor: torch.Tensor, interp_indices: Iterable[int]):
 class PerforateConv2dStride(TorchApproxKnob):
     r"""Simulation of strided perforated convolution for `torch.nn.Conv2d`.
 
-        Perforated convolution skips computing some entries in the output and instead interpolates
-        these values, to reduce the number of float-ops needed to complete a convolution op.
-        In this implementation, selected rows or columns of the output are discarded and replaced
-        with linearly interpolated values from the neighboring rows or columns. Each channel is
-        considered independently.
-        This implementation gives the same output as actual perforated convolution but without the
-        performance benefit.
-
-        Parameters
-        ----------
-        direction_is_row : bool
-            If True, discard and interpolate rows, otherwise columns.
-        stride : int \in [2, +\infty)
-            Skip 1 row/column in the convolution kernel per `stride` elements.
-        offset : int \in [0, stride)
-            Skipped first row/column is `offset`.
-
-        Attributes
-        ----------
-        interp_axis : int :math:`\in \{2, 3\}`
-            The axis that will be perforated over. As the input is an NCHW tensor, if
-            `direction_is_row` then `interp_axis = 2`, otherwise `interp_axis = 3`.
-        stride : int :math:`\in [2, +\infty)`
-            Equal to parameter `stride`.
-        offset : int :math:`\in [0, stride)`
-            Equal to parameter `offset`.
-        """
+    Perforated convolution skips computing some entries in the output and instead interpolates
+    these values, to reduce the number of float-ops needed to complete a convolution op.
+    In this implementation, selected rows or columns of the output are discarded and replaced
+    with linearly interpolated values from the neighboring rows or columns. Each channel is
+    considered independently.
+    This implementation gives the same output as actual perforated convolution but without the
+    performance benefit.
+
+    Parameters
+    ----------
+    direction_is_row : bool
+        If True, discard and interpolate rows, otherwise columns.
+    stride : int \in [2, +\infty)
+        Skip 1 row/column in the convolution kernel per `stride` elements.
+    offset : int \in [0, stride)
+        Skipped first row/column is `offset`.
+
+    Attributes
+    ----------
+    interp_axis : int :math:`\in \{2, 3\}`
+        The axis that will be perforated over. As the input is an NCHW tensor, if
+        `direction_is_row` then `interp_axis = 2`, otherwise `interp_axis = 3`.
+    stride : int :math:`\in [2, +\infty)`
+        Equal to parameter `stride`.
+    offset : int :math:`\in [0, stride)`
+        Equal to parameter `offset`.
+    """
 
     def __init__(
         self,
@@ -373,7 +373,13 @@ class FP16Approx(TorchApproxKnob):
 
 default_name_to_class = {
     k.__name__: k
-    for k in [FP16Approx, PromiseSim, PerforateConv2dStride, Conv2dSampling, TorchBaselineKnob]
+    for k in [
+        FP16Approx,
+        PromiseSim,
+        PerforateConv2dStride,
+        Conv2dSampling,
+        TorchBaselineKnob,
+    ]
 }
 default_knob_file = Path(__file__).parent / "default_approx_params.json"
 
diff --git a/predtuner/torchutil/__init__.py b/predtuner/torchutil/__init__.py
index af018f0d83469755f5092684c19a42aa3089184f..f65fc3898dfe8100b91cd8516d2547baefe1cdb2 100644
--- a/predtuner/torchutil/__init__.py
+++ b/predtuner/torchutil/__init__.py
@@ -1,5 +1,9 @@
 from .common_qos import accuracy
 from .indexing import ModuleIndexer
 from .summary import get_summary
-from .utils import (BatchedDataLoader, infer_net_device,
-                    move_to_device_recursively, split_dataset)
+from .utils import (
+    BatchedDataLoader,
+    infer_net_device,
+    move_to_device_recursively,
+    split_dataset,
+)
diff --git a/predtuner/torchutil/indexing.py b/predtuner/torchutil/indexing.py
index a059ad51af69291e5f81b653b25a3d4ed6cec449..375fa1bd0b117947ab33cb28870c9aa11a914955 100644
--- a/predtuner/torchutil/indexing.py
+++ b/predtuner/torchutil/indexing.py
@@ -8,37 +8,39 @@ ModulePredT = Callable[[Module], bool]
 
 class ModuleIndexer:
     r"""Allows indexing into an nn.Module with index (int) to get layers.
-        Supports read and modification, just like a dictionary.
-
-        Parameters
-        ----------
-        module: Module
-            The PyTorch Module to be indexed.
-        include_module: Callable[[Module], bool] = None
-            A predicate that decides which layers to include in the index. For example,
-            `lambda layer: isinstance(layer, Conv2d)` tells `ModuleIndexer` to only include `Conv2d`
-            layers.
-            If not given, by default `ModuleIndexer` will recursively walk down `module` like a tree
-            to include all internal and leaf nodes (layers), except for layers that `expand_module`
-            forbids recursing into.
-        expand_module: Callable[[Module], bool] = None
-            A predicate that decides which layers to recurse down. If `expand_module` returns `False`,
-            layer is kept as a whole and may be included if `include_module` allows.
-
-        Attributes
-        ----------
-        module: Module
-            Equal to parameter `module`.
-        index_to_module: List[Module]
-            Stores the layers in order so that a layer at `index_to_module[i]` has the index `i`.
-        layer_parent: Dict[Module, Tuple[Module, str]]
-            Maps each layer to its parent and its name in the parent layer. Contains the same layers
-            as in `index_to_module` except `module` which has no parent.
-        """
+    Supports read and modification, just like a dictionary.
+
+    Parameters
+    ----------
+    module: Module
+        The PyTorch Module to be indexed.
+    include_module: Callable[[Module], bool] = None
+        A predicate that decides which layers to include in the index. For example,
+        `lambda layer: isinstance(layer, Conv2d)` tells `ModuleIndexer` to only include `Conv2d`
+        layers.
+        If not given, by default `ModuleIndexer` will recursively walk down `module` like a tree
+        to include all internal and leaf nodes (layers), except for layers that `expand_module`
+        forbids recursing into.
+    expand_module: Callable[[Module], bool] = None
+        A predicate that decides which layers to recurse down. If `expand_module` returns `False`,
+        layer is kept as a whole and may be included if `include_module` allows.
+
+    Attributes
+    ----------
+    module: Module
+        Equal to parameter `module`.
+    index_to_module: List[Module]
+        Stores the layers in order so that a layer at `index_to_module[i]` has the index `i`.
+    layer_parent: Dict[Module, Tuple[Module, str]]
+        Maps each layer to its parent and its name in the parent layer. Contains the same layers
+        as in `index_to_module` except `module` which has no parent.
+    """
 
     def __init__(
-            self, module: Module, include_module: Optional[ModulePredT] = None,
-            expand_module: Optional[ModulePredT] = None
+        self,
+        module: Module,
+        include_module: Optional[ModulePredT] = None,
+        expand_module: Optional[ModulePredT] = None,
     ):
         self.module = module
         self.index_to_module = []
@@ -46,16 +48,21 @@ class ModuleIndexer:
         self.name_to_index = {}
         # By default, don't include container layer, and don't include (empty) Sequential
         has_children = lambda m: bool(list(m.children()))
-        default_inclusion = lambda m: not has_children(m) and not isinstance(m, Sequential)
+        default_inclusion = lambda m: not has_children(m) and not isinstance(
+            m, Sequential
+        )
         # No need for "default expansion" because whatever is not included will be walked into.
         self._rec_expand_module(
-            module, '', include_module or default_inclusion, expand_module
+            module, "", include_module or default_inclusion, expand_module
         )
         self.layer_parent = self._find_layers_parent_info(module, set(self.all_modules))
 
     def _rec_expand_module(
-            self, module: Module, name_prefix: str,
-            include_module: ModulePredT, expand_module: Optional[ModulePredT]
+        self,
+        module: Module,
+        name_prefix: str,
+        include_module: ModulePredT,
+        expand_module: Optional[ModulePredT],
     ):
         """Recursively expands into module and builds the index."""
         for name, submodule in module.named_children():
@@ -68,7 +75,9 @@ class ModuleIndexer:
             required_expansion = expand_module and expand_module(submodule)
             default_expansion = not included
             if default_expansion or required_expansion:
-                self._rec_expand_module(submodule, full_name, include_module, expand_module)
+                self._rec_expand_module(
+                    submodule, full_name, include_module, expand_module
+                )
 
     @staticmethod
     def _find_layers_parent_info(net: Module, layers: Set[Module]):
@@ -86,7 +95,10 @@ class ModuleIndexer:
 
     @property
     def name_to_module(self) -> Dict[str, Module]:
-        return {name: self.index_to_module[index] for name, index in self.name_to_index.items()}
+        return {
+            name: self.index_to_module[index]
+            for name, index in self.name_to_index.items()
+        }
 
     def find_by_module(self, module: Module) -> Optional[Tuple[str, int]]:
         """Get name and index from module."""
diff --git a/predtuner/torchutil/summary.py b/predtuner/torchutil/summary.py
index beb4cb1ba860a8339df83c3b7153dc4fc38f2d74..7be4bd48192969297cec06845f42255ad10513db 100644
--- a/predtuner/torchutil/summary.py
+++ b/predtuner/torchutil/summary.py
@@ -53,12 +53,16 @@ def get_flops(module: nn.Module, input_shape, output_shape):
     handler = type_dispatch.get(type(module))
     if not handler:
         if not list(module.children()):
-            _warn_once(f"Module {module} cannot be handled; its FLOPs will be estimated as 0")
+            _warn_once(
+                f"Module {module} cannot be handled; its FLOPs will be estimated as 0"
+            )
         return 0.0
     try:
         return handler()
     except RuntimeError as e:
-        _warn_once(f'Error "{e}" when handling {module}; its FLOPs will be estimated as 0')
+        _warn_once(
+            f'Error "{e}" when handling {module}; its FLOPs will be estimated as 0'
+        )
         return 0.0
 
 
diff --git a/setup.py b/setup.py
index 094a89a948b33b998266de46b5babd0d20db04ce..8d08aa7a80b05d60f47a63b5bacb77a0d105fa61 100644
--- a/setup.py
+++ b/setup.py
@@ -13,4 +13,4 @@ setuptools.setup(
     long_description_content_type="text/markdown",
     url="https://github.com/Evan-Zhao/predictive-tuner",
     packages=["predtuner"],
-)
\ No newline at end of file
+)