diff --git a/predtuner/approxapp.py b/predtuner/approxapp.py
index a0bd0f1080c1f791dce1b4e5d4eb6323305c1ee4..92e6aaaa74482623b36648e7e9053f607a60077f 100644
--- a/predtuner/approxapp.py
+++ b/predtuner/approxapp.py
@@ -76,6 +76,7 @@ class ApproxTuner:
         # By default, keep_threshold == tuner_threshold
         qos_keep_threshold = qos_keep_threshold or qos_tuner_threshold
         opentuner_args = opentuner_default_args()
+        opentuner_args.test_limit = max_iter
         tuner = TunerInterface(
             opentuner_args, self.app, qos_tuner_threshold, qos_keep_threshold, max_iter,
         )
diff --git a/predtuner/torchapp.py b/predtuner/torchapp.py
index 826de35329d4a3b9c5b77aee3d329a5f1acdbc9b..fbd52b4fda9199b853db156a19e82e5b2d9de65b 100644
--- a/predtuner/torchapp.py
+++ b/predtuner/torchapp.py
@@ -110,7 +110,8 @@ class TorchApp(ModeledApp, abc.ABC):
                 end = begin + len(target)
                 qos = self.tensor_to_qos(tensor_output[begin:end], target)
                 qoses.append(qos)
-            return self.combine_qos(np.array(qoses))
+            # float64 -> float
+            return float(self.combine_qos(np.array(qoses)))
 
         return [
             LinearPerfModel(self._op_costs, self._knob_speedups),
@@ -127,10 +128,11 @@ class TorchApp(ModeledApp, abc.ABC):
         qoses = []
         for inputs, targets in dataloader:
             inputs = move_to_device_recursively(inputs, self.device)
+            targets = move_to_device_recursively(targets, self.device)
             outputs = approxed(inputs)
             qoses.append(self.tensor_to_qos(outputs, targets))
         qos = self.combine_qos(np.array(qoses))
-        return 0.0, qos
+        return float(qos), 0.0  # float64->float
 
     def __repr__(self) -> str:
         class_name = self.__class__.__name__
diff --git a/test/test_torchapp.py b/test/test_torchapp.py
index ac23c9870a7eb65d5659aadef878a6d89f49bdf6..a77715ead7301b96ec0d31d50d65758bf28ee619 100644
--- a/test/test_torchapp.py
+++ b/test/test_torchapp.py
@@ -1,5 +1,7 @@
 import unittest
 
+from torch.utils.data.dataset import Subset
+
 from predtuner.approxes import get_knobs_from_file
 from predtuner.torchapp import TorchApp
 from predtuner.torchutil import accuracy
@@ -10,21 +12,29 @@ from torchvision.datasets import CIFAR10
 from torchvision.models.vgg import vgg16
 
 
-class TestTorchAppInit(unittest.TestCase):
+class TestTorchApp(unittest.TestCase):
     def setUp(self):
-        transform = transforms.Compose([transforms.ToTensor()])
-        self.dataset = CIFAR10("/tmp/cifar10", download=True, transform=transform)
+        normalize = transforms.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+        )
+        transform = transforms.Compose([transforms.ToTensor(), normalize])
+
+        dataset = CIFAR10("/tmp/cifar10", download=True, transform=transform)
+        self.dataset = Subset(dataset, range(100))
         self.module = vgg16(pretrained=True)
 
-    def test_init(self):
-        app = TorchApp(
-            "test",
+    def get_app(self):
+        return TorchApp(
+            "TestTorchApp",
             self.module,
             DataLoader(self.dataset),
             DataLoader(self.dataset),
             get_knobs_from_file(),
             accuracy,
         )
+
+    def test_init(self):
+        app = self.get_app()
         n_knobs = {op: len(ks) for op, ks in app.op_knobs.items()}
         for op_name, op in app.midx.name_to_module.items():
             if isinstance(op, Conv2d):
@@ -34,3 +44,19 @@ class TestTorchAppInit(unittest.TestCase):
             else:
                 nknob = 1
             self.assertEqual(n_knobs[op_name], nknob)
+
+    # def test_baseline_qos(self):
+    #     app = self.get_app()
+    #     qos, _ = app.measure_qos_perf({}, False)
+
+    def test_tuning(self):
+        app = TorchApp(
+            "test",
+            self.module,
+            DataLoader(self.dataset, batch_size=4),
+            DataLoader(self.dataset, batch_size=4),
+            get_knobs_from_file(),
+            accuracy,
+        )
+        tuner = app.get_tuner()
+        tuner.tune(10, 3.0)