diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8bcc4738d02d7f07a497131d74f9a0ff6f119048..63efc4a8bb13a3feb97fb8dd76951fbfe0dde9a5 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -26,8 +26,9 @@ build-and-test:
     - cd build
     - make -j32 check-hpvm-pass
     - make -j32 check-hpvm-dnn
-    - make -j32 check-hpvm-profiler
-    - make -j32 check-hpvm-torch2hpvm
+    - make -j32 check-hpvm-torch-acc
+    - make -j32 check-hpvm-torch-profiling
+    - make -j32 check-hpvm-torch-tuning
   only:
     - hpvm-release-exp
     - merge_requests
diff --git a/hpvm/test/dnn_benchmarks/profiling/jetson_clocks.sh b/hpvm/projects/hpvm-profiler/hpvm_profiler/jetson_clocks.sh
similarity index 100%
rename from hpvm/test/dnn_benchmarks/profiling/jetson_clocks.sh
rename to hpvm/projects/hpvm-profiler/hpvm_profiler/jetson_clocks.sh
diff --git a/hpvm/projects/predtuner b/hpvm/projects/predtuner
index a149e365170263666db764664ad8ed6b03f258d3..108de0205d1f84082f9722d2e0e7b51dd5f8d997 160000
--- a/hpvm/projects/predtuner
+++ b/hpvm/projects/predtuner
@@ -1 +1 @@
-Subproject commit a149e365170263666db764664ad8ed6b03f258d3
+Subproject commit 108de0205d1f84082f9722d2e0e7b51dd5f8d997
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
index 5c248f829adef15093b853891927f353aca30c4b..abf7b60ff3ff30962d8c960e5c6ffbc33d03f39e 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
@@ -115,7 +115,6 @@ class WeightTensor(TensorNode):
 
 class Conv2DNode(DFGNode):
     op_type = "Conv2D"
-    hpvm_op_type = "convolution"
 
     def __init__(
         self,
@@ -135,6 +134,11 @@ class Conv2DNode(DFGNode):
         if list(dilations) != [1, 1]:
             raise ValueError("Dilation > 1 is unsupported")
         self.group = group
+        if group == 1:
+            self.hpvm_op_type = "convolution"
+        else:
+            # HPVM tensor_runtime distinguishes these two. (sigh)
+            self.hpvm_op_type = "depthwise_convolution"
         self.pads = pads[0]
         self.sh, self.sw = strides
 
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
index fa252a3e0ce063697d56e771afbfbde69d0c5641..208cdfe6169f6baae95522720b8c850aff12a3a0 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
@@ -58,23 +58,38 @@ typedef struct __attribute__((__packed__)) {
   struct ret_t r;
 } RootIn;
 
+void printUsage(const std::string &bin_name) {
+  std::cerr << "Usage: " << bin_name << "[-d {test|tune}] [-c CONF_FILE]\n";
+}
 
 const int batch_size = {{batch_size}}, input_size = {{input_size}}, batch_count = input_size / batch_size;
 
-int main(int argc, char *argv[]){
-  if (argc != 2) {
-    std::cout << "Usage: " << argv[0] << " {tune|test}\n";
-    return 1;
-  }
-  std::string arg1 = argv[1];
-  if (arg1 != "tune" && arg1 != "test") {
-    std::cout << "Usage: " << argv[0] << " {tune|test}\n";
-    return 1;
+int main(int argc, char *argv[]) {
+  std::string config_path = "", runtype = "test";
+  int flag;
+  while ((flag = getopt(argc, argv, "hc:")) != -1) {
+    switch (flag) {
+    case 'd':
+      runtype = std::string(optarg);
+      if (runtype != "test" && runtype != "tune")
+        printUsage(argv[0]);
+        return 1;
+      break;
+    case 'c':
+      config_path = std::string(optarg);
+      break;
+    case 'h':
+      printUsage(argv[0]);
+      return 0;
+    default:
+      printUsage(argv[0]);
+      return 1;
+    }
   }
 
   std::string dir_prefix = "{{prefix}}/";
-  std::string input_path = dir_prefix + arg1 + "_input.bin";
-  std::string labels_path = dir_prefix + arg1 + "_labels.bin";
+  std::string input_path = dir_prefix + "test_input.bin";
+  std::string labels_path = dir_prefix + "test_labels.bin";
 {% for w in weights %}
   std::string {{w.name}}_path = dir_prefix + "{{w.filename}}";
   void* {{w.name}} = readTrainedWeights({{w.name}}_path.c_str(), 0, {{w.shape|join(', ')}});
@@ -88,8 +103,12 @@ int main(int argc, char *argv[]){
 {% endfor %}
 
   __hpvm__init();
+  if (config_path != "") {
+    llvm_hpvm_initializeRuntimeController(config_path.c_str());
+  }
+
   startMemTracking();
-  #pragma clang loop unroll(disable)
+#pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++){
     int start = i * batch_size, end = start + batch_size;
     void *{{input_name}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}});
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
index 8074704ece0988d7897c1e93b41f1ea3c43deb35..7db01d87af8dbfb0341c77b3c471cfba3cc289c4 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
@@ -140,6 +140,7 @@ int main(){
     // Keep this open so the other side knows we have more batches to write
     auto* fp = open_fifo("{{fifo_path_w}}", "wb");
     float total_accuracy = 0;
+#pragma clang loop unroll(disable)
     for (int i = 0; i < batch_count; i++){
       int start = i * batch_size, end = start + batch_size;
       void *{{input_name}} = readInputBatch(input_pth, 0, start, end, {{input_shape|join(', ')}});
@@ -158,8 +159,8 @@ int main(){
       fifo_write_batch(fp, result);
       freeBatchMemory();
     }
-    fclose(fp);
     write_accuracy(total_accuracy / input_size);
+    fclose(fp);
     __hpvm__cleanup();
   }
 
diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt
index 4ff98a5386d91ce50b755d7e507a84e0fbe1c4dd..8f8ca9b4c7c7b99873cc03020402c90ab22e3f58 100644
--- a/hpvm/test/CMakeLists.txt
+++ b/hpvm/test/CMakeLists.txt
@@ -8,6 +8,5 @@ set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++)
 add_subdirectory(hpvm_pass)  # Passes test suite
 add_subdirectory(benchmarks)
 add_subdirectory(dnn_benchmarks/hpvm-c)  # HPVM-C DNN accuracy test suite
-add_subdirectory(dnn_benchmarks/pytorch)  # Torch frontend test suite
+add_subdirectory(dnn_benchmarks/pytorch)  # Torch frontend test suites (3 of them)
 add_subdirectory(dnn_benchmarks/tensor-rt-src)  # tensor_runtime DNN (build only, no tests)
-add_subdirectory(dnn_benchmarks/profiling)  # hpvm-profiler test suite
diff --git a/hpvm/test/README.rst b/hpvm/test/README.rst
index 66af5c999fd4c90df193f6ca22cc911419d66f40..6125aab2b78e8a94e4544cbf142273e623723a41 100644
--- a/hpvm/test/README.rst
+++ b/hpvm/test/README.rst
@@ -4,20 +4,17 @@ Test and Benchmarks
 Directory Organization
 ----------------------
 
-The `hpvm/test` directory holds all tests and benchmarks in HPVM and is organized as follows:
+The ``hpvm/test`` directory holds all tests and benchmarks in HPVM and is organized as follows:
 
-* 
-  ``hpvm_pass/``: unit and regression tests for HPVM Passes, written in LLVM-bitcode.
+* ``hpvm_pass/``: unit and regression tests for HPVM Passes, written in LLVM-bitcode.
 
-* 
-  ``benchmarks/``: includes a few applications written in HPVM-C, a template, and directions for compiling and running these benchmarks.
+* ``benchmarks/``: includes a few applications written in HPVM-C, a template, and directions for compiling and running these benchmarks.
 
   * ``benchmarks/parboil``: Selected benchmarks from the `Parboil <http://impact.crhc.illinois.edu/parboil/parboil.aspx>`_ benchmark suite.
   * ``benchmarks/pipeline``: An edge detection pipeline benchmark.
   * ``benchmarks/hpvm-cava``: A Camera ISP pipeline, adapted from C code provided from our collaborators at `Harvard <http://vlsiarch.eecs.harvard.edu>`_.
 
-* 
-  ``dnn_benchmarks/``: ten (10) DNN benchmarks in HPVM-C, Keras and PyTorch, supported by ApproxHPVM.
+* ``dnn_benchmarks/``: ten (10) DNN benchmarks in HPVM-C, Keras and PyTorch, supported by ApproxHPVM.
   This tests HPVM as well as the Keras and PyTorch frontends.
 
   * 
@@ -25,18 +22,27 @@ The `hpvm/test` directory holds all tests and benchmarks in HPVM and is organize
     Their organization and usage are similar to the benchmarks under ``benchmarks/``.
 
     Each subfolder contains a DNN with 2 versions (2 ``.cpp`` files):
-    the ``tensor``-targeted version which compiles to ``tensor_runtime``,
+    the ``tensor``-targeted version which compiles to `tensor_runtime`,
     and the ``cudnn``-targeted version which compiles to operators in ``cuDNN``
     (has ``_cudnn`` in name).
 
-  * 
-    ``dnn_benchmarks/keras`` contains these DNNs implemented in Keras,
+  * ``dnn_benchmarks/keras`` contains these DNNs implemented in Keras,
     and code for generating them down to HPVM-C (testing Keras frontend).
 
   * ``dnn_benchmarks/pytorch`` contains these DNNs in PyTorch
     and code for generating them down to HPVM-C (testing PyTorch/ONNX frontend).
 
-  The code generated from Keras and PyTorch frontend should be largely similar and functionally equivalent.
+    * ``./dnn`` is a local package with these 10 DNNs implemented in PyTorch as examples.
+      This package is not installed with HPVM.
+
+    * ``./test_frontend`` contains tests on inference accuracy of code generated by the PyTorch frontend.
+
+    * ``./test_{profiling|tuning}`` contains tests on performing profiling/tuning
+      on frontend-generated binary.
+
+  * ``dnn_benchmarks/tensor-rt-src`` contains these DNNs directly implemented in `tensor_runtime`
+    functions. These are for reference purpose only and not actively used in the HPVM system or testing.
+
 
 Running Test Cases and Benchmarks
 ---------------------------------
@@ -45,29 +51,29 @@ The easiest way to run tests is to use ``make`` targets,
 which will also take care of all compilation of test cases and test fixtures.
 The following targets runs these tests respectively:
 
-
 * ``make -j check-hpvm-pass`` runs tests in ``hpvm_pass``: ``hpvm_pass/**/*.ll``.
   These are regression and unit tests for HPVM passes.
-* 
-  ``make -j check-hpvm-dnn`` runs all 20 DNN benchmarks under ``dnn_benchmarks/hpvm-c``
+
+* ``make -j check-hpvm-dnn`` runs all 20 DNN benchmarks under ``dnn_benchmarks/hpvm-c``
   (10 DNNs x 2 versions) and validates their accuracy.
 
   *Note* that this can take quite long due to the size of DNNs and datasets.
   Depending on your hardware capability, this test can take 5-30 minutes.
   Also, this is set to run sequentially out of GPU memory concerns.
 
-* 
-  ``make -j check-hpvm-profiler`` runs ``hpvm-profiler`` on some smaller networks
-  (as it is extremely time-consuming) and presents the tradeoff curve with profiled speedup.
+* ``make -j check-hpvm-torch-acc`` generates all 10 DNNs with torch frontend,
+  runs them and checks their accuracy. This tests the torch frontend in isolation.
 
-  *Note* that if you're on an NVIDIA Jetson TX2, you may want to run
-  ``bash dnn_benchmarks/profiling/jetson_clocks.sh``
-  to ensure that the clocks are running at the maximum frequency
+* ``make -j check-hpvm-torch-tuning`` runs `predtuner` with binaries from torch frontend
+  to exercise both empirical and predictive autotuning.
+  This is only done for a few smaller networks for 5 iterations,
+  as it is extremely time-consuming.
 
-Underneath, ``llvm-lit`` is used to discover and run the tests.
+* ``make -j check-hpvm-torch-profiling`` runs `hpvm-profiler` with binaries from torch frontend,
+  and presents the tradeoff curve with profiled speedup.
+  This is only done for a few smaller networks.
 
-``benchmarks/`` can only be compiled in-source with ``make``.
-We are working to migrate it into the ``cmake`` system.
+Underneath, ``llvm-lit`` is used to discover and run the tests.
 
 Compiling Benchmarks
 --------------------
@@ -119,4 +125,20 @@ Currently, there are 20 of them. These are:
 
 
 ``_cudnn`` suffix indicates the code is generated onto cuDNN functions.
-Otherwise they are generated to ``tensor_runtime`` DNN functions which are hand-written in CUDA.
+Otherwise they are generated to `tensor_runtime` DNN functions which are hand-written in CUDA.
+
+Other HPVM-C Benchmarks
+^^^^^^^^^^^^^^^^^^^^^^^
+
+There are 6 benchmarks under ``benchmarks/``:
+``hpvm-cava`` and ``pipeline`` are single benchmarks, while ``parboil/`` is a collection of 4 benchmarks.
+
+To build ``hpvm-cava`` or ``pipeline``,
+use ``make -j hpvm_cava_{cpu|gpu}`` or ``make -j pipeline_{cpu|gpu}``.
+The cpu or gpu suffix indicates the device the kernels in the benchmark run on.
+For ``hpvm-cava``, the binary is generated under
+``${build_dir}/tools/hpvm/test/benchmarks/hpvm-cava``,
+while pipeline binaries are under ``${build_dir}/tools/hpvm/test/benchmarks/pipeline``.
+
+The parboil benchmarks are only available through Makefile.
+We will move them into CMake in the next release.
diff --git a/hpvm/test/dnn_benchmarks/profiling/CMakeLists.txt b/hpvm/test/dnn_benchmarks/profiling/CMakeLists.txt
deleted file mode 100644
index 23e0e9161884ce95152e2feffe19d6b1acfcf381..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# --[ llvm-lit test setup
-# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py)
-# as most of the tests require some kind of compilation / generation
-# which is best done over there.
-configure_lit_site_cfg(
-  ../../lit.site.cfg.py.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
-  MAIN_CONFIG
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
-)
-add_lit_testsuite(check-hpvm-profiler "Run tests for package hpvm-profiler"
-  ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS dnn_benchmarks  # Requires all dnn benchmarks
-  ARGS "-j1"  # Run DNN benchmarks sequentially
-)
diff --git a/hpvm/test/dnn_benchmarks/profiling/alexnet2_cifar10.test b/hpvm/test/dnn_benchmarks/profiling/alexnet2_cifar10.test
deleted file mode 100644
index 455a3e75a7aff4ac76123cb62e860701e8397713..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/alexnet2_cifar10.test
+++ /dev/null
@@ -1 +0,0 @@
-RUN: test_hpvm_c_profiling.py alexnet2_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/profiling/alexnet_cifar10.test b/hpvm/test/dnn_benchmarks/profiling/alexnet_cifar10.test
deleted file mode 100644
index 62c667a249e514a17f8ea809f364c4e65c3332dd..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/alexnet_cifar10.test
+++ /dev/null
@@ -1 +0,0 @@
-RUN: test_hpvm_c_profiling.py alexnet_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/profiling/lenet_mnist.test b/hpvm/test/dnn_benchmarks/profiling/lenet_mnist.test
deleted file mode 100644
index 88856a8913f2c9fb275187d65d443c50aa8bf583..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/lenet_mnist.test
+++ /dev/null
@@ -1 +0,0 @@
-RUN: test_hpvm_c_profiling.py lenet_mnist
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/profiling/mobilenet_cifar10.test b/hpvm/test/dnn_benchmarks/profiling/mobilenet_cifar10.test
deleted file mode 100644
index a40981c9408b52f45ae9a58ab3895e12889bf665..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/mobilenet_cifar10.test
+++ /dev/null
@@ -1 +0,0 @@
-RUN: test_hpvm_c_profiling.py mobilenet_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/profiling/resnet18_cifar10.test b/hpvm/test/dnn_benchmarks/profiling/resnet18_cifar10.test
deleted file mode 100644
index 5d09297309e6f2ac48c23e0c529021144d6734e7..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/resnet18_cifar10.test
+++ /dev/null
@@ -1 +0,0 @@
-RUN: test_hpvm_c_profiling.py resnet18_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py b/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py
deleted file mode 100755
index 853b0dc3e23a3ea847748ecaeda62650e99ee430..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-from pathlib import Path
-from sys import argv
-
-from hpvm_profiler import profile_configs, read_hpvm_configs, write_hpvm_configs
-
-# relative to cwd()
-benchmarks_bindir = Path("../hpvm-c")
-# relative to location of this file
-benchmarks_srcdir = Path(__file__).parent / "../hpvm-c/benchmarks"
-# We're called in the "current" binary directory.
-# For example (depending on where build dir is),
-# "hpvm/build/tools/hpvm/test/dnn_benchmarks/profiling".
-# So we know where the benchmark binaries are due to source directory structure,
-# and this is not hardcoding.
-dnn = argv[1]
-bench_bin_file = benchmarks_bindir / f"hpvm_{dnn}"
-config_file = benchmarks_srcdir / dnn / "data/tuner_confs.txt"
-out_config_file = f"./{dnn}.txt"
-header, configs = read_hpvm_configs(config_file)
-profile_configs(bench_bin_file, configs[1:6], configs[0], progress_bar=False)
-write_hpvm_configs(header, configs[:6], out_config_file)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt b/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt
index 778593a57ddfc3a6abcc4ed045f02614535739f8..9129cab70115cabc14426ccb47ee0531816592b3 100644
--- a/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt
@@ -1,18 +1,39 @@
-# --[ llvm-lit test setup
-# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py)
-# as most of the tests require some kind of compilation / generation
-# which is best done over there.
+# --[ llvm-lit test setup for test_frontend/
 configure_lit_site_cfg(
   ../../lit.site.cfg.py.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+  ${CMAKE_CURRENT_BINARY_DIR}/test_frontend/lit.site.cfg.py
   MAIN_CONFIG
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+  ${CMAKE_CURRENT_SOURCE_DIR}/test_frontend/lit.cfg.py
 )
-add_lit_testsuite(check-hpvm-torch2hpvm "Run tests for package torch2hpvm"
-  ${CMAKE_CURRENT_BINARY_DIR}
+add_lit_testsuite(check-hpvm-torch-acc "Run accuracy tests for HPVM PyTorch frontend"
+  ${CMAKE_CURRENT_BINARY_DIR}/test_frontend
   # We depend on check_dnn_acc.py defined in ../hpvm-c/
   # to compare the inference accuracy of our frontend-generated binary
   # to that of the baseline.
   DEPENDS check_dnn_acc
   ARGS "-j1"  # Run frontend generation sequentially
 )
+
+# --[ llvm-lit test setup for test_profiling/
+configure_lit_site_cfg(
+  ../../lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/test_profiling/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/test_profiling/lit.cfg.py
+)
+add_lit_testsuite(check-hpvm-torch-profiling "Run tests for torch frontend + profiling"
+  ${CMAKE_CURRENT_BINARY_DIR}/test_profiling
+  ARGS "-j1"  # Run DNN benchmarks sequentially
+)
+
+# --[ llvm-lit test setup for test_tuning/
+configure_lit_site_cfg(
+  ../../lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/test_tuning/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/test_tuning/lit.cfg.py
+)
+add_lit_testsuite(check-hpvm-torch-tuning "Run tests for torch frontend + autotuning"
+  ${CMAKE_CURRENT_BINARY_DIR}/test_tuning
+  ARGS "-j1"  # Run tuning tests sequentially
+)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/dnn/__init__.py b/hpvm/test/dnn_benchmarks/pytorch/dnn/__init__.py
index f4a16c6a2c0767efcac372006abf88f9919a75a9..22e7bfadbc9157814195d4b71d913ffa869a0fc2 100644
--- a/hpvm/test/dnn_benchmarks/pytorch/dnn/__init__.py
+++ b/hpvm/test/dnn_benchmarks/pytorch/dnn/__init__.py
@@ -1,6 +1,62 @@
+from pathlib import Path
+from typing import Union
+
+import torch
+from torch2hpvm import BinDataset, ModelExporter
+
 from .alexnet import AlexNet, AlexNet2, AlexNetImageNet
 from .datasets import CIFAR, MNIST, ImageNet
 from .lenet import LeNet
-from .vgg16 import VGG16Cifar10, VGG16Cifar100, VGG16ImageNet
 from .mobilenet import MobileNet
 from .resnet import ResNet18, ResNet50
+from .vgg16 import VGG16Cifar10, VGG16Cifar100, VGG16ImageNet
+
+# DNN name -> (DNN class, input_channel, input_size, suggested_batchsize)
+benchmarks = {
+    "lenet_mnist": (LeNet, 1, 28, 1000),
+    "alexnet_cifar10": (AlexNet, 3, 32, 500),
+    "alexnet2_cifar10": (AlexNet2, 3, 32, 500),
+    "alexnet_imagenet": (AlexNetImageNet, 3, 224, 500),
+    "mobilenet_cifar10": (MobileNet, 3, 32, 500),
+    "resnet18_cifar10": (ResNet18, 3, 32, 500),
+    "resnet50_imagenet": (ResNet50, 3, 224, 25),
+    "vgg16_cifar10": (VGG16Cifar10, 3, 32, 500),
+    "vgg16_cifar100": (VGG16Cifar100, 3, 32, 500),
+    "vgg16_imagenet": (VGG16ImageNet, 3, 224, 10),
+}
+
+
+def export_example_dnn(
+    dnn_name: str, output_dir: Union[Path, str], generate_for_tuning: bool
+):
+    self_folder = Path(__file__).parent.absolute()
+    dnn_bench_dir = self_folder / "../.."
+
+    model_cls, nch, img_size, batch_size = benchmarks[dnn_name]
+    dataset_shape = 5000, nch, img_size, img_size
+    params = dnn_bench_dir / "model_params" / dnn_name
+    bin_tuneset = BinDataset(
+        params / "tune_input.bin", params / "tune_labels.bin", dataset_shape
+    )
+    bin_testset = BinDataset(
+        params / "test_input.bin", params / "test_labels.bin", dataset_shape
+    )
+    model: Module = model_cls()
+    checkpoint = dnn_bench_dir / f"model_params/pytorch/{dnn_name}.pth.tar"
+    model.load_state_dict(torch.load(checkpoint.as_posix()))
+
+    build_dir = output_dir / "build"
+    target_binary = build_dir / dnn_name
+    if generate_for_tuning:
+        exporter = ModelExporter(
+            model, bin_tuneset, bin_testset, output_dir, target="hpvm_tensor_inspect"
+        )
+    else:
+        conf_file = (
+            dnn_bench_dir / "hpvm-c/benchmarks" / dnn_name / "data/tuner_confs.txt"
+        ).absolute()
+        exporter = ModelExporter(
+            model, bin_tuneset, bin_testset, output_dir, config_file=conf_file
+        )
+    exporter.generate(batch_size=batch_size).compile(target_binary, build_dir)
+    return target_binary, exporter
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py b/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py
deleted file mode 100755
index 3c20c6ea5a472a693156b4881b58d4e0f1fc8575..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python3
-import os
-import shutil
-import site
-from pathlib import Path
-from subprocess import run
-from sys import argv
-
-import torch
-from torch2hpvm import BinDataset, ModelExporter
-from torch.nn import Module
-
-site.addsitedir(os.path.dirname(__file__))
-import dnn
-
-benchmarks = {
-    "lenet_mnist": (dnn.LeNet, 1, 28, 1000),
-    "alexnet_cifar10": (dnn.AlexNet, 3, 32, 500),
-    "alexnet2_cifar10": (dnn.AlexNet2, 3, 32, 500),
-    "alexnet_imagenet": (dnn.AlexNetImageNet, 3, 224, 500),
-    "mobilenet_cifar10": (dnn.MobileNet, 3, 32, 500),
-    "resnet18_cifar10": (dnn.ResNet18, 3, 32, 500),
-    "resnet50_imagenet": (dnn.ResNet50, 3, 224, 25),
-    "vgg16_cifar10": (dnn.VGG16Cifar10, 3, 32, 500),
-    "vgg16_cifar100": (dnn.VGG16Cifar100, 3, 32, 500),
-    "vgg16_imagenet": (dnn.VGG16ImageNet, 3, 224, 10),
-}
-self_folder = Path(__file__).parent
-netname = argv[1]
-model_cls, nch, img_size, batch_size = benchmarks[netname]
-codegen_dir = Path(f"./{netname}")
-print(f"Generating {netname} to {codegen_dir}")
-if codegen_dir.exists():
-    shutil.rmtree(codegen_dir)
-
-params = self_folder / "../model_params" / netname
-dataset_shape = 5000, nch, img_size, img_size
-bin_tuneset = BinDataset(
-    params / "tune_input.bin", params / "tune_labels.bin", dataset_shape
-)
-bin_testset = BinDataset(
-    params / "test_input.bin", params / "test_labels.bin", dataset_shape
-)
-model: Module = model_cls()
-checkpoint = self_folder / "../model_params/pytorch" / f"{netname}.pth.tar"
-model.load_state_dict(torch.load(checkpoint.as_posix()))
-print(model)
-
-build_dir = codegen_dir / "build"
-target_binary = build_dir / netname
-conf_file = self_folder / "../hpvm-c/benchmarks" / netname / "data/tuner_confs.txt"
-exporter = ModelExporter(
-    model, bin_tuneset, bin_testset, codegen_dir, config_file=conf_file
-)
-exporter.generate(batch_size=batch_size).compile(target_binary, build_dir)
-run([str(target_binary), "test"], check=True)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/alexnet2_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/alexnet2_cifar10.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/alexnet2_cifar10.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/alexnet2_cifar10.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/alexnet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/alexnet_cifar10.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/alexnet_cifar10.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/alexnet_cifar10.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/alexnet_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/alexnet_imagenet.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/alexnet_imagenet.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/alexnet_imagenet.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/lenet_mnist.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/lenet_mnist.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/lenet_mnist.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/lenet_mnist.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/lit.cfg.py b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/lit.cfg.py
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/lit.cfg.py
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/lit.cfg.py
diff --git a/hpvm/test/dnn_benchmarks/pytorch/mobilenet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/mobilenet_cifar10.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/mobilenet_cifar10.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/mobilenet_cifar10.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/resnet18_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/resnet18_cifar10.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/resnet18_cifar10.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/resnet18_cifar10.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/resnet50_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/resnet50_imagenet.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/resnet50_imagenet.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/resnet50_imagenet.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_frontend/test_frontend.py b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/test_frontend.py
new file mode 100755
index 0000000000000000000000000000000000000000..f248b7bc259e9b6ebd0ef304991e8df988836010
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/test_frontend.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+import shutil
+import site
+from pathlib import Path
+from subprocess import run
+from sys import argv
+
+self_folder = Path(__file__).parent.absolute()
+site.addsitedir(self_folder.parent)
+import dnn
+
+netname = argv[1]
+codegen_dir = Path(f"./{netname}")
+print(f"Generating {netname} to {codegen_dir}")
+if codegen_dir.exists():
+    shutil.rmtree(codegen_dir)
+target_binary, _ = dnn.export_example_dnn(netname, codegen_dir, False)
+run([str(target_binary), "test"], check=True)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/vgg16_cifar10.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar10.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/vgg16_cifar10.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar100.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/vgg16_cifar100.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar100.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/vgg16_cifar100.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/vgg16_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/test_frontend/vgg16_imagenet.test
similarity index 100%
rename from hpvm/test/dnn_benchmarks/pytorch/vgg16_imagenet.test
rename to hpvm/test/dnn_benchmarks/pytorch/test_frontend/vgg16_imagenet.test
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_profiling/alexnet2_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/alexnet2_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..f544576dcfb7d8ffc282b92751e5c312c81b9b1f
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/alexnet2_cifar10.test
@@ -0,0 +1 @@
+RUN: test_profiling.py alexnet2_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_profiling/alexnet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/alexnet_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..af40686e9a499c8afd94e409da2fc3d64d691c61
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/alexnet_cifar10.test
@@ -0,0 +1 @@
+RUN: test_profiling.py alexnet_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_profiling/lenet_mnist.test b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/lenet_mnist.test
new file mode 100644
index 0000000000000000000000000000000000000000..f556f9adb045057244d2a600aea56ed795defae8
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/lenet_mnist.test
@@ -0,0 +1 @@
+RUN: test_profiling.py lenet_mnist
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/profiling/lit.cfg.py b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/lit.cfg.py
similarity index 91%
rename from hpvm/test/dnn_benchmarks/profiling/lit.cfg.py
rename to hpvm/test/dnn_benchmarks/pytorch/test_profiling/lit.cfg.py
index 5c11f61baf0d3d3ec8464d15828d24d7c54f22c0..95ae7c44bf8c4e7d7d6554d0cf86ec40d305fc88 100644
--- a/hpvm/test/dnn_benchmarks/profiling/lit.cfg.py
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/lit.cfg.py
@@ -29,4 +29,4 @@ config.test_exec_root = current_binary_dir
 llvm_config.with_environment("PATH", config.llvm_tools_dir, append_path=True)
 
 # Add substitution for our main script in this directory.
-llvm_config.add_tool_substitutions(["test_hpvm_c_profiling.py"], config.test_source_root)
+llvm_config.add_tool_substitutions(["test_profiling.py"], config.test_source_root)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_profiling/mobilenet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/mobilenet_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..18276e5e7920525c44fe71d0662e5f4186601248
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/mobilenet_cifar10.test
@@ -0,0 +1 @@
+RUN: test_profiling.py mobilenet_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_profiling/resnet18_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/resnet18_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..676e80393b204aa5db6f3139b690a576fc5e502e
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/resnet18_cifar10.test
@@ -0,0 +1 @@
+RUN: test_profiling.py resnet18_cifar10
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_profiling/test_profiling.py b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/test_profiling.py
new file mode 100755
index 0000000000000000000000000000000000000000..66aa33d7cbb142cd473fbbaa51c4ab2a8b6eeff1
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_profiling/test_profiling.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+import shutil
+import site
+from pathlib import Path
+from sys import argv
+
+from hpvm_profiler import profile_configs, read_hpvm_configs, write_hpvm_configs
+
+self_folder = Path(__file__).parent.absolute()
+site.addsitedir(self_folder.parent)
+import dnn
+
+netname = argv[1]
+codegen_dir = Path(f"./{netname}")
+if codegen_dir.exists():
+    shutil.rmtree(codegen_dir)
+binary_file, _ = dnn.export_example_dnn(netname, codegen_dir, False)
+config_file = self_folder / "../../hpvm-c/benchmarks" / netname / "data/tuner_confs.txt"
+out_config_file = f"./{netname}.txt"
+header, configs = read_hpvm_configs(config_file)
+profile_configs(binary_file, configs[1:6], configs[0], progress_bar=False)
+write_hpvm_configs(header, configs[:6], out_config_file)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning.py b/hpvm/test/dnn_benchmarks/pytorch/test_tuning.py
deleted file mode 100644
index d0451b70b44325a355345ad95ab9bf85154002c5..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/pytorch/test_tuning.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import os
-import shutil
-import site
-from pathlib import Path
-
-import torch
-from predtuner import config_pylogger
-from predtuner.pipedbin import PipedBinaryApp
-from torch2hpvm import BinDataset, ModelExporter
-from torch.nn import Module
-
-site.addsitedir(os.path.dirname(__file__))
-import dnn
-
-# Set up logger to put log file in /tmp
-msg_logger = config_pylogger(output_dir="/tmp", verbose=True)
-
-
-benchmarks = [
-    (dnn.LeNet, 1, 28, 500, "lenet_mnist"),
-    (dnn.AlexNet, 3, 32, 500, "alexnet_cifar10"),
-    (dnn.AlexNet2, 3, 32, 500, "alexnet2_cifar10"),
-    (dnn.AlexNetImageNet, 3, 224, 100, "alexnet_imagenet"),
-    (dnn.MobileNet, 3, 32, 500, "mobilenet_cifar10"),
-    (dnn.ResNet18, 3, 32, 500, "resnet18_cifar10"),
-    (dnn.ResNet50, 3, 224, 50, "resnet50_imagenet"),
-    (dnn.VGG16Cifar10, 3, 32, 500, "vgg16_cifar10"),
-    (dnn.VGG16Cifar100, 3, 32, 500, "vgg16_cifar100"),
-    (dnn.VGG16ImageNet, 3, 224, 50, "vgg16_imagenet"),
-]
-model_param = Path(__file__).parent / "../model_params"
-
-
-def generate(model_cls, nch, img_size, batch_size, pathname):
-    codegen_dir = Path(f"/tmp/{pathname}_tune")
-    build_dir = codegen_dir / "build"
-    metadata_file = codegen_dir / "ops.json"
-    binary_file = build_dir / pathname
-    build_dir = codegen_dir / "build"
-    # if binary_file.is_file() and metadata_file.is_file():
-    #     return binary_file, metadata_file
-
-    print(f"Generating {pathname} to {codegen_dir}")
-    if codegen_dir.exists():
-        shutil.rmtree(codegen_dir)
-    params = model_param / pathname
-    dataset_shape = 5000, nch, img_size, img_size
-    bin_tuneset = BinDataset(
-        params / "tune_input.bin", params / "tune_labels.bin", dataset_shape
-    )
-    bin_testset = BinDataset(
-        params / "test_input.bin", params / "test_labels.bin", dataset_shape
-    )
-    model: Module = model_cls()
-    checkpoint = model_param / f"{pathname}.pth.tar"
-    model.load_state_dict(torch.load(checkpoint.as_posix()))
-    exporter = ModelExporter(
-        model, bin_tuneset, bin_testset, codegen_dir, target="hpvm_tensor_inspect"
-    )
-    exporter.generate(batch_size=batch_size).compile(binary_file, build_dir)
-    return binary_file, metadata_file
-
-
-def main():
-    for model_cls, nch, img_size, batch_size, pathname in benchmarks:
-        print(f"Testing {pathname}")
-        binary_file, metadata_file = generate(
-            model_cls, nch, img_size, batch_size, pathname
-        )
-        app = PipedBinaryApp("test", binary_file, metadata_file)
-        tuner = app.get_tuner()
-        tuner.tune(100, 3.0, 3.0, True, 50, cost_model="cost_linear")
-        tuner.dump_configs("configs.json")
-        fig = tuner.plot_configs(show_qos_loss=True)
-        fig.savefig("configs.png", dpi=300)
-        app.dump_hpvm_configs(tuner.best_configs, "hpvm_confs.txt")
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/alexnet_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/alexnet_imagenet.test
new file mode 100644
index 0000000000000000000000000000000000000000..be88fde546f5ce863619b73bb00519994def225c
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/alexnet_imagenet.test
@@ -0,0 +1 @@
+RUN: test_tuning.py alexnet_imagenet 0
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/lenet_mnist_p1.test b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/lenet_mnist_p1.test
new file mode 100644
index 0000000000000000000000000000000000000000..cea2fbf5e7995eb6fa0244ccc69c3ccf3273af27
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/lenet_mnist_p1.test
@@ -0,0 +1 @@
+RUN: test_tuning.py lenet_mnist 1
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/lit.cfg.py b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/lit.cfg.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f026e6c61771bd8483573e93ec001289b500351
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/lit.cfg.py
@@ -0,0 +1,32 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+import lit.formats
+from lit.llvm import llvm_config
+
+# name: The name of this test suite.
+config.name = "HPVM-Predtuner"
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.ShTest(False)
+
+# suffixes: A list of file extensions to treat as test files. This is overriden
+# by individual lit.local.cfg files in the test subdirectories.
+config.suffixes = [".test"]
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+current_source_dir = os.path.dirname(os.path.relpath(__file__, config.llvm_src_root))
+current_binary_dir = os.path.join(config.llvm_obj_root, current_source_dir)
+config.test_exec_root = current_binary_dir
+
+# Tweak the PATH to include the tools dir.
+llvm_config.with_environment("PATH", config.llvm_tools_dir, append_path=True)
+
+# Add substitution for our main script in this directory.
+llvm_config.add_tool_substitutions(["test_tuning.py"], config.test_source_root)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/mobilenet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/mobilenet_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..88d5c4a0492a4ffb7b02d8f34e6447006138c558
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/mobilenet_cifar10.test
@@ -0,0 +1 @@
+RUN: test_tuning.py mobilenet_cifar10 0
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/resnet18_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/resnet18_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..4d00d4dae23544aea236c0c9015dca3bdad1252a
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/resnet18_cifar10.test
@@ -0,0 +1 @@
+RUN: test_tuning.py resnet18_cifar10 0
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/test_tuning.py b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/test_tuning.py
new file mode 100755
index 0000000000000000000000000000000000000000..c589b108d73277977820407f1874cf33053fcc34
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/test_tuning.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+import shutil
+import site
+from pathlib import Path
+from sys import argv
+
+from predtuner import PipedBinaryApp, config_pylogger
+
+self_folder = Path(__file__).parent.absolute()
+site.addsitedir(self_folder.parent)
+import dnn
+
+# Set up logger
+msg_logger = config_pylogger(output_dir=".", verbose=True)
+
+
+def main():
+    netname, is_pred = argv[1:]
+    is_pred = int(is_pred)
+    # Generating tunable binary
+    codegen_dir = Path(f"./{netname}")
+    if codegen_dir.exists():
+        shutil.rmtree(codegen_dir)
+    binary_file, exporter = dnn.export_example_dnn(netname, codegen_dir, True)
+    metadata_file = codegen_dir / exporter.metadata_file_name
+    # Tuning
+    app = PipedBinaryApp("test", binary_file, metadata_file)
+    tuner = app.get_tuner()
+    tuner.tune(
+        5,
+        3.0,
+        is_threshold_relative=True,
+        cost_model="cost_linear",
+        qos_model="qos_p1" if is_pred else "none",
+    )
+    tuner.dump_configs("configs.json")
+    fig = tuner.plot_configs(show_qos_loss=True)
+    fig.savefig("configs.png", dpi=300)
+    app.dump_hpvm_configs(tuner.best_configs, "hpvm_confs.txt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_tuning/vgg16_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/vgg16_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..2084c2bd74dcf09535971c09e228182f5ebf5c61
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_tuning/vgg16_cifar10.test
@@ -0,0 +1 @@
+RUN: test_tuning.py vgg16_cifar10 0
diff --git a/hpvm/test/lit.site.cfg.py.in b/hpvm/test/lit.site.cfg.py.in
index 0ed68ccfa0d05e797463dcd2e0a1f9030a20b99a..7f1bd1cd0ef9c007a41bea1f3db41f7abf60f449 100644
--- a/hpvm/test/lit.site.cfg.py.in
+++ b/hpvm/test/lit.site.cfg.py.in
@@ -10,4 +10,5 @@ import lit.llvm
 lit.llvm.initialize(lit_config, config)
 
 # Let the main config do the real work.
-lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg.py")
+# (ARG_MAIN_CONFIG is a variable defined in configure_lit_site_cfg)
+lit_config.load_config(config, "@ARG_MAIN_CONFIG@")