diff --git a/predtuner/approxapp.py b/predtuner/approxapp.py
index 7564bb81a61f2c6a31b982c28b9be6eb4c21d426..0775ec80093ce47495ed3f9457a506cc409de671 100644
--- a/predtuner/approxapp.py
+++ b/predtuner/approxapp.py
@@ -45,7 +45,7 @@ class ApproxApp(abc.ABC):
         self.baseline_knob = self._check_get_baseline_knob_(self.op_knobs)
 
     @abc.abstractmethod
-    def measure_qos_perf(
+    def measure_qos_cost(
         self, with_approxes: KnobsT, is_test: bool
     ) -> Tuple[float, float]:
         pass
@@ -106,11 +106,11 @@ class BaselineKnob(ApproxKnob):
 
 class Config:
     def __init__(
-        self, qos: float, perf: float, knobs: KnobsT, test_qos: Optional[float] = None
+        self, qos: float, cost: float, knobs: KnobsT, test_qos: Optional[float] = None
     ) -> None:
         self.qos = qos
-        self.perf = perf
-        self.knobs = knobs
+        self.cost = cost
+        self.knobs = dict(sorted(knobs.items()))
         self.test_qos: Optional[float] = test_qos
 
 
@@ -208,12 +208,12 @@ class ApproxTuner(Generic[T]):
             cfg: T
             if cfg.test_qos is not None:
                 continue
-            cfg.test_qos, _ = self.app.measure_qos_perf(cfg.knobs, True)
+            cfg.test_qos, _ = self.app.measure_qos_cost(cfg.knobs, True)
             msg_logger.debug(f"Calibration: {cfg.qos} (mean) -> {cfg.test_qos} (mean)")
 
     @staticmethod
     def take_best_configs(configs: List[T], n: Optional[int] = None) -> List[T]:
-        points = np.array([[c.perf, c.qos] for c in configs])
+        points = np.array([[c.cost, c.qos] for c in configs])
         taken_idx = is_pareto_efficient(points, take_n=n)
         return [configs[i] for i in taken_idx]
 
@@ -237,12 +237,12 @@ class ApproxTuner(Generic[T]):
                 f"No tuning session has been run; call self.tune() first."
             )
 
-        _, perf = self.app.measure_qos_perf({}, False)
+        _, cost = self.app.measure_qos_cost({}, False)
         fig, ax = plt.subplots()
         confs = self.kept_configs
         if not confs:
             return fig
-        qos_speedup = [(c.qos, perf / c.perf) for c in confs]
+        qos_speedup = [(c.qos, cost / c.cost) for c in confs]
         qoses, speedups = zip(*sorted(qos_speedup, key=lambda p: p[0]))
         ax.plot(qoses, speedups)
         ax.scatter(qoses, speedups)
@@ -262,7 +262,7 @@ class ApproxTuner(Generic[T]):
         # By default, keep_threshold == tuner_threshold
         self.keep_threshold = qos_keep_threshold or qos_tuner_threshold
         if is_threshold_relative:
-            baseline_qos, _ = self.app.measure_qos_perf({}, False)
+            baseline_qos, _ = self.app.measure_qos_cost({}, False)
             qos_tuner_threshold = baseline_qos - qos_tuner_threshold
             self.keep_threshold = baseline_qos - self.keep_threshold
         opentuner_args.test_limit = max_iter
@@ -342,16 +342,16 @@ class TunerInterface(MeasurementInterface):
         from opentuner.resultsdb.models import Result
 
         cfg = desired_result.configuration.data
-        qos, perf = self.app.measure_qos_perf(cfg, False, **self.app_kwargs)
+        qos, cost = self.app.measure_qos_cost(cfg, False, **self.app_kwargs)
         # Print a debug message for each config in tuning and keep threshold
-        self.print_debug_config(qos, perf)
+        self.print_debug_config(qos, cost)
         self.pbar.update(self.progress_getter() - self.pbar.n)
-        return Result(time=perf, accuracy=qos)
+        return Result(time=cost, accuracy=qos)
 
     def save_final_config(self, config):
         self.pbar.close()
 
-    def print_debug_config(self, qos: float, perf: float):
+    def print_debug_config(self, qos: float, cost: float):
         gt_tune, gt_keep = qos > self.tune_thres, qos > self.keep_thres
         if not gt_tune and not gt_keep:
             return
@@ -362,5 +362,5 @@ class TunerInterface(MeasurementInterface):
         else:
             kind = "tuning and keep"
         msg_logger.debug(
-            f"Found config within {kind} threshold: QoS = {qos}, perf = {perf}"
+            f"Found config within {kind} threshold: QoS = {qos}, cost = {cost}"
         )
diff --git a/predtuner/modeledapp.py b/predtuner/modeledapp.py
index a22c06dd6885551994afa3841eeca0b45ee6bdea..adeb53383a81d0d03db9642e0dfde17896462ee9 100644
--- a/predtuner/modeledapp.py
+++ b/predtuner/modeledapp.py
@@ -29,7 +29,7 @@ class ModeledApp(ApproxApp, abc.ABC):
         self._name_to_model = {m.name: m for m in models}
         if len(self._name_to_model) != len(models):
             raise ValueError("Name conflict in models")
-        self._perf_models = {
+        self._cost_models = {
             model.name: model for model in models if isinstance(model, IPerfModel)
         }
         self._qos_models = {
@@ -41,7 +41,7 @@ class ModeledApp(ApproxApp, abc.ABC):
         """Get QoS/Performance prediction models for this application."""
         pass
 
-    def empirical_measure_qos_perf(
+    def empirical_measure_qos_cost(
         self, with_approxes: KnobsT, is_test: bool
     ) -> Tuple[float, float]:
         """Measures QoS and performance by running the program with approximation.
@@ -50,27 +50,27 @@ class ModeledApp(ApproxApp, abc.ABC):
         """
         raise NotImplementedError()
 
-    def measure_qos_perf(
+    def measure_qos_cost(
         self,
         with_approxes: KnobsT,
         is_test: bool,
         qos_model: str = "none",
-        perf_model: str = "none",
+        cost_model: str = "none",
     ) -> Tuple[float, float]:
-        """We provide this with the right qos and perf function.
+        """We provide this with the right qos and cost function.
 
-        Empirical measurement will be called once if either `perf_model` or `qos_model`
+        Empirical measurement will be called once if either `cost_model` or `qos_model`
         is "none", otherwise only use model indicated by model name.
         """
         # Testset measurement is always empirical
         if is_test:
-            return self.empirical_measure_qos_perf(with_approxes, is_test)
-        # Run empirical measurement once if either perf or qos needs it
-        qos, perf = None, None
-        if qos_model == "none" or perf_model == "none":
-            qos, perf = self.empirical_measure_qos_perf(with_approxes, is_test)
+            return self.empirical_measure_qos_cost(with_approxes, is_test)
+        # Run empirical measurement once if either cost or qos needs it
+        qos, cost = None, None
+        if qos_model == "none" or cost_model == "none":
+            qos, cost = self.empirical_measure_qos_cost(with_approxes, is_test)
         # If we're asked to use some qos_model, overwrite `qos` value
-        # even if we already get it from empirical measure (i.e., even if perf_model == "none")
+        # even if we already get it from empirical measure (i.e., even if cost_model == "none")
         if qos_model != "none":
             if qos_model not in self._qos_models:
                 raise ValueError(
@@ -79,15 +79,15 @@ class ModeledApp(ApproxApp, abc.ABC):
                 )
             qos = self._qos_models[qos_model].measure_qos(with_approxes)
         # Same goes for perf
-        if perf_model != "none":
-            if perf_model not in self._perf_models:
+        if cost_model != "none":
+            if cost_model not in self._cost_models:
                 raise ValueError(
-                    f'"{perf_model}" is an invalid value for perf_model '
-                    f"(choose from {list(self._perf_models.keys())})"
+                    f'"{cost_model}" is an invalid value for cost_model '
+                    f"(choose from {list(self._cost_models.keys())})"
                 )
-            perf = self._perf_models[perf_model].measure_perf(with_approxes)
-        assert type(qos) is float and type(perf) is float
-        return qos, perf
+            cost = self._cost_models[cost_model].measure_cost(with_approxes)
+        assert type(qos) is float and type(cost) is float
+        return qos, cost
 
     def get_tuner(self) -> "ApproxModeledTuner":
         return ApproxModeledTuner(self)
@@ -109,7 +109,7 @@ class IPerfModel(abc.ABC):
         pass
 
     @abc.abstractmethod
-    def measure_perf(self, with_approxes: KnobsT) -> float:
+    def measure_cost(self, with_approxes: KnobsT) -> float:
         """Predict the performance of application."""
         pass
 
@@ -163,9 +163,9 @@ class LinearPerfModel(IPerfModel):
 
     @property
     def name(self) -> str:
-        return "perf_linear"
+        return "cost_linear"
 
-    def measure_perf(self, with_approxes: KnobsT) -> float:
+    def measure_cost(self, with_approxes: KnobsT) -> float:
         """We implement this using a weighted linear performance model."""
         with_approxes = self.app.add_baseline_to_knobs(with_approxes)
         return float(
@@ -210,11 +210,10 @@ class QoSModelP1(IQoSModel):
         """Implementation of model."""
         assert self.baseline_tensor is not None
         with_approxes = self.app.add_baseline_to_knobs(with_approxes)
-        delta_tensors = np.array(
-            [self.delta_tensors[op][knob] for op, knob in with_approxes.items()],
-            dtype=np.object
+        delta_sum = sum(
+            [self.delta_tensors[op][knob] for op, knob in with_approxes.items()]
         )
-        ret = delta_tensors.sum() + self.baseline_tensor
+        ret = delta_sum + self.baseline_tensor
         return float(self.qos_metric(ret))
 
     def _init(self):
@@ -276,7 +275,7 @@ class QoSModelP2(IQoSModel):
         and then defines a `measure_qos` that doesn't run the application during tuning
         (to reduce overhead).
         """
-        qos, _ = self.app.empirical_measure_qos_perf(with_approxes, False)
+        qos, _ = self.app.empirical_measure_qos_cost(with_approxes, False)
         return qos
 
     def measure_qos(self, with_approxes: KnobsT) -> float:
@@ -343,12 +342,12 @@ class ValConfig(Config):
     def __init__(
         self,
         qos: float,
-        perf: float,
+        cost: float,
         knobs: KnobsT,
         test_qos: Optional[float] = None,
         validated_qos: Optional[float] = None,
     ) -> None:
-        super().__init__(qos, perf, knobs, test_qos)
+        super().__init__(qos, cost, knobs, test_qos)
         self.validated_qos = validated_qos
 
 
@@ -364,24 +363,24 @@ class ApproxModeledTuner(ApproxTuner):
         take_best_n: Optional[int] = None,
         test_configs: bool = True,
         validate_configs: Optional[bool] = None,
-        perf_model: str = "none",
+        cost_model: str = "none",
         qos_model: str = "none",
     ) -> List[ValConfig]:
         qos_desc = (
             "no model for qos" if qos_model == "none" else f'qos model "{qos_model}"'
         )
-        perf_desc = (
+        cost_desc = (
             "no model for performance"
-            if perf_model == "none"
-            else f'performance model "{perf_model}"'
+            if cost_model == "none"
+            else f'performance model "{cost_model}"'
         )
-        msg_logger.info("Starting tuning with %s and %s", qos_desc, perf_desc)
+        msg_logger.info("Starting tuning with %s and %s", qos_desc, cost_desc)
         if qos_model != "none":
             msg_logger.info("Initializing qos model %s", qos_model)
             self.app.init_model(qos_model)
-        if perf_model != "none":
-            msg_logger.info("Initializing performance model %s", perf_model)
-            self.app.init_model(perf_model)
+        if cost_model != "none":
+            msg_logger.info("Initializing performance model %s", cost_model)
+            self.app.init_model(cost_model)
         ret = super().tune(
             max_iter=max_iter,
             qos_tuner_threshold=qos_tuner_threshold,
@@ -389,7 +388,7 @@ class ApproxModeledTuner(ApproxTuner):
             is_threshold_relative=is_threshold_relative,
             take_best_n=take_best_n,
             test_configs=test_configs,
-            perf_model=perf_model,
+            cost_model=cost_model,
             qos_model=qos_model,
         )
         if validate_configs is None and qos_model != "none":
@@ -409,11 +408,11 @@ class ApproxModeledTuner(ApproxTuner):
             cfg: ValConfig
             if cfg.validated_qos is not None:
                 continue
-            cfg.validated_qos, _ = self.app.measure_qos_perf(cfg.knobs, False)
+            cfg.validated_qos, _ = self.app.measure_qos_cost(cfg.knobs, False)
             msg_logger.debug(f"Validation: {cfg.qos} (mean) -> {cfg.test_qos} (mean)")
 
-    def _get_app_kwargs(self, perf_model: str, qos_model: str):
-        return {"perf_model": perf_model, "qos_model": qos_model}
+    def _get_app_kwargs(self, cost_model: str, qos_model: str):
+        return {"cost_model": cost_model, "qos_model": qos_model}
 
     @classmethod
     def _get_config_class(cls) -> Type[Config]:
diff --git a/predtuner/torchapp.py b/predtuner/torchapp.py
index 65b8b43163bfb5c4794d70bd498d631cc99e6df8..33ceaad991b005d632b48497230ee28744c9b2df 100644
--- a/predtuner/torchapp.py
+++ b/predtuner/torchapp.py
@@ -166,7 +166,7 @@ class TorchApp(ModeledApp, abc.ABC):
         ]
 
     @torch.no_grad()
-    def empirical_measure_qos_perf(
+    def empirical_measure_qos_cost(
         self, with_approxes: KnobsT, is_test: bool, progress: bool = False
     ) -> Tuple[float, float]:
         """Measure the QoS and performance of Module with given approximation
diff --git a/test/integrated_tuning.py b/test/integrated_tuning.py
index 4ba0d55e1b611c8471f0a2f52160695a7d8bf5b2..0ea354184c4cc12bee759d9d0b64eb11cc52fea2 100644
--- a/test/integrated_tuning.py
+++ b/test/integrated_tuning.py
@@ -32,7 +32,7 @@ app = TorchApp(
     accuracy,
     model_storage_folder="tuner_results/vgg16_cifar10",
 )
-baseline, _ = app.measure_qos_perf({}, False)
+baseline, _ = app.measure_qos_cost({}, False)
 tuner = app.get_tuner()
-tuner.tune(100, 2.1, 3.0, True, 50, perf_model="perf_linear", qos_model="qos_p1")
+tuner.tune(100, 2.1, 3.0, True, 50, cost_model="cost_linear", qos_model="qos_p1")
 tuner.dump_configs("tuner_results/test/configs.json")
diff --git a/test/test_model_zoo_acc.py b/test/test_model_zoo_acc.py
index e4028814ae2b0c510073af01626fd87a19f36d22..9d5aca6788b168d90836fe66d67e670e869ca5ec 100644
--- a/test/test_model_zoo_acc.py
+++ b/test/test_model_zoo_acc.py
@@ -13,11 +13,11 @@ class TestModelZooAcc(unittest.TestCase):
     networks = {
         "lenet_mnist": (net.LeNet, net.MNIST, 2000, 99.65),
         "alexnet_cifar10": (net.AlexNet, net.CIFAR, 500, 78.78),
-        "alexnet2_cifar10": (net.AlexNet2, net.CIFAR, 500, 84.75),
+        "alexnet2_cifar10": (net.AlexNet2, net.CIFAR, 500, 84.76),
         "vgg16_cifar10": (net.VGG16Cifar10, net.CIFAR, 250, 89.22),
         "vgg16_cifar100": (net.VGG16Cifar100, net.CIFAR, 250, 68.42),
-        "resnet18_cifar10": (net.ResNet18, net.CIFAR, 250, 89.41),
-        "mobilenet": (net.MobileNet, net.CIFAR, 250, 84.9),
+        "resnet18_cifar10": (net.ResNet18, net.CIFAR, 250, 89.42),
+        "mobilenet_cifar10": (net.MobileNet, net.CIFAR, 250, 84.9),
         "alexnet_imagenet": (net.AlexNetImageNet, net.ImageNet, 20, 55.86),
         # "resnet50_imagenet": (net.ResNet50, net.ImageNet, 10, 71.72),
         "vgg16_imagenet": (net.VGG16ImageNet, net.ImageNet, 5, 68.82),
@@ -34,5 +34,5 @@ class TestModelZooAcc(unittest.TestCase):
             )
             tune = DataLoader(dataset, batchsize)
             app = TorchApp("", network, tune, tune, get_knobs_from_file(), accuracy)
-            qos, _ = app.empirical_measure_qos_perf({}, False, True)
+            qos, _ = app.empirical_measure_qos_cost({}, False, True)
             self.assertAlmostEqual(qos, target_acc)
diff --git a/test/test_torchapp.py b/test/test_torchapp.py
index 58c68a59d047e6e291939ebb99ea28006f610530..80efcf3cd6b61350c7a2400c23a89e825c2e008f 100644
--- a/test/test_torchapp.py
+++ b/test/test_torchapp.py
@@ -47,11 +47,11 @@ class TestTorchAppTuning(TorchAppSetUp):
         self.assertEqual(self.app.baseline_knob.name, "11")
 
     def test_baseline_qos(self):
-        qos, _ = self.app.measure_qos_perf({}, False)
+        qos, _ = self.app.measure_qos_cost({}, False)
         self.assertAlmostEqual(qos, 93.0)
 
     def test_tuning_relative_thres(self):
-        baseline, _ = self.app.measure_qos_perf({}, False)
+        baseline, _ = self.app.measure_qos_cost({}, False)
         tuner = self.app.get_tuner()
         tuner.tune(100, 3.0, 3.0, True, 10)
         for conf in tuner.kept_configs:
@@ -62,7 +62,7 @@ class TestTorchAppTuning(TorchAppSetUp):
     def test_enum_models(self):
         self.assertSetEqual(
             set(model.name for model in self.app.get_models()),
-            {"perf_linear", "qos_p1", "qos_p2"},
+            {"cost_linear", "qos_p1", "qos_p2"},
         )
 
 
@@ -70,7 +70,7 @@ class TestTorchAppTunerResult(TorchAppSetUp):
     @classmethod
     def setUpClass(cls):
         super().setUpClass()
-        cls.baseline, _ = cls.app.measure_qos_perf({}, False)
+        cls.baseline, _ = cls.app.measure_qos_cost({}, False)
         cls.tuner = cls.app.get_tuner()
         cls.tuner.tune(100, cls.baseline - 3.0)
 
@@ -83,7 +83,7 @@ class TestTorchAppTunerResult(TorchAppSetUp):
         configs = self.tuner.best_configs
         for c1 in configs:
             self.assertFalse(
-                any(c2.qos > c1.qos and c2.perf > c1.perf for c2 in configs)
+                any(c2.qos > c1.qos and c2.cost > c1.cost for c2 in configs)
             )
 
     def test_dummy_testset(self):
@@ -96,7 +96,7 @@ class TestModeledTuning(TorchAppSetUp):
     @classmethod
     def setUpClass(cls):
         super().setUpClass()
-        cls.baseline, _ = cls.app.measure_qos_perf({}, False)
+        cls.baseline, _ = cls.app.measure_qos_cost({}, False)
 
     def test_qos_p1(self):
         tuner = self.app.get_tuner()
@@ -104,7 +104,7 @@ class TestModeledTuning(TorchAppSetUp):
             100,
             3.0,
             is_threshold_relative=True,
-            perf_model="perf_linear",
+            cost_model="cost_linear",
             qos_model="qos_p1",
         )
 
@@ -114,7 +114,7 @@ class TestModeledTuning(TorchAppSetUp):
             100,
             3.0,
             is_threshold_relative=True,
-            perf_model="perf_linear",
+            cost_model="cost_linear",
             qos_model="qos_p2",
         )
 
@@ -123,7 +123,7 @@ class TestModelSaving(TorchAppSetUp):
     @classmethod
     def setUpClass(cls):
         super().setUpClass()
-        cls.baseline, _ = cls.app.measure_qos_perf({}, False)
+        cls.baseline, _ = cls.app.measure_qos_cost({}, False)
         cls.model_path = "/tmp/test_models"
         app = cls.get_app()
         app.init_model("qos_p1")
@@ -147,7 +147,7 @@ class TestModelSaving(TorchAppSetUp):
             100,
             3.0,
             is_threshold_relative=True,
-            perf_model="perf_linear",
+            cost_model="cost_linear",
             qos_model="qos_p1",
         )
 
@@ -156,6 +156,6 @@ class TestModelSaving(TorchAppSetUp):
             100,
             3.0,
             is_threshold_relative=True,
-            perf_model="perf_linear",
+            cost_model="cost_linear",
             qos_model="qos_p2",
         )