diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/__init__.py b/hpvm/projects/torch2hpvm/torch2hpvm/__init__.py
index dd59cd582e9ab5d23c15a38bf6f68eb258a0253a..efc9cef0a12c46aed545ce7179d869f67b2c74f4 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/__init__.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/__init__.py
@@ -1,2 +1,2 @@
-from .compile import compile_onnx_model, compile_torch_module
+from .compile import ModelExporter, BinDataset
 from .__main__ import main
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/codegen_hpvm.py b/hpvm/projects/torch2hpvm/torch2hpvm/codegen_hpvm.py
index a9dc8ce75bfe185bb79f7532b0b670180220fbcd..65e73e9a9fbda1033999b86c975f2f4cbbdf7abb 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/codegen_hpvm.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/codegen_hpvm.py
@@ -1,6 +1,5 @@
-from os import PathLike
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import jinja2
 
@@ -12,27 +11,20 @@ loader = jinja2.FileSystemLoader(searchpath=Path(__file__).parent)
 template_env = jinja2.Environment(loader=loader, trim_blocks=True)
 template = template_env.get_template(TEMPLATE_FILE)
 
+PathLike = Union[str, Path]
+
 
 class CodeGen:
-    def __init__(
-        self,
-        dfg: DFG,
-        output_dir: PathLike,
-        input_size: int,
-        batch_size: int = None,
-        prefix: str = None,
-    ):
+    def __init__(self, dfg: DFG, prefix: PathLike, input_size: int):
         self.dfg = dfg
         self.var_count = 0
-        self.output_dir = Path(output_dir)
-        self.prefix = prefix
+        self.prefix = Path(prefix)
         # Some reasoning of input information
         assert len(self.dfg.inputs) == 1
         input_tensor = self.dfg.inputs[0]
         self.input_name = input_tensor.name
         self.input_shape = input_tensor.shape[1:]
         self.input_size = input_size
-        self.batch_size = batch_size or input_size
         # self.variables is a "node to our name" map
         # Each value is (varname, bool) and the bool indicates
         # "is root node input" or not.
@@ -127,22 +119,21 @@ class HpvmCodeGen(CodeGen):
         output_var_idx = self.variables[self.dfg.output][0]
         return input_args, output_var_idx
 
-    def compile(self) -> None:
+    def compile(self, output: PathLike, batch_size: Optional[int] = None) -> None:
         nodes = self.emit_hpvm_node_structures()
         inputs, output_var_idx = self.emit_root_io()
         weights = self.emit_weights(self.weights)
-        prefix = self.prefix or self.output_dir
-        with open(self.output_dir / "hpvm_src.cc", "w") as f:
+        with Path(output).open("w") as f:
             f.write(
                 template.render(
                     nodes=nodes,
                     input_name=self.input_name,
                     input_size=self.input_size,
-                    batch_size=self.batch_size,
+                    batch_size=batch_size or self.input_size,
                     input_shape=self.input_shape,
                     root_inputs=inputs,
                     root_output_idx=output_var_idx,
                     weights=weights,
-                    prefix=prefix,
+                    prefix=self.prefix,
                 )
             )
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/codegen_tensor.py b/hpvm/projects/torch2hpvm/torch2hpvm/codegen_tensor.py
index 3c511803274534f8a88c9d62d049df918ec67010..5faa1a16951f719f8f1d55a92b2c9e4417ea3883 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/codegen_tensor.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/codegen_tensor.py
@@ -1,9 +1,9 @@
 from pathlib import Path
-from typing import Dict, List
+from typing import Dict, List, Optional
 
 import jinja2
 
-from .codegen_hpvm import CodeGen
+from .codegen_hpvm import CodeGen, PathLike
 from .graph_ir import DFGNode, TensorNode
 
 TEMPLATE_FILE = "template_tensor.cpp.in"
@@ -49,10 +49,10 @@ class TensorCodeGen(CodeGen):
     # program with HPVM Tensor Runtime
     ################################################
 
-    def compile(self):
+    def compile(self, output: PathLike, batch_size: Optional[int] = None):
         graph_code = self.emit_graph()
         output_arg = self.variables[self.dfg.output]
-        with open(self.output_dir / "src.cc", "w") as f:
+        with Path(output).open("w") as f:
             f.write(
                 template.render(
                     input=self.input_name,
@@ -60,6 +60,6 @@ class TensorCodeGen(CodeGen):
                     output=output_arg,
                     graph_code=graph_code,
                     weights=self.emit_weights(self.weights),
-                    output_dir=self.output_dir,
+                    prefix=self.prefix,
                 )
             )
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
index f77e9f63b9b17046cf24b215670891f7bfa67746..6bfe48e72bb55e0b75d7dc75892326a2e93b4081 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
@@ -1,18 +1,151 @@
 import os
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import IO, Optional, Sequence, Union
+from typing import IO, NamedTuple, Optional, Sequence, Tuple, Union
 
 import onnx
 import torch
 from onnx import version_converter
 from torch.nn import Module
+from torch.utils.data import Dataset
 
-from .codegen_hpvm import HpvmCodeGen
+from .codegen_hpvm import HpvmCodeGen, PathLike
 from .codegen_tensor import TensorCodeGen
 from .graph_builder import DFG
 
-PathLike = Union[Path, str]
+
+class BinDataset(NamedTuple):
+    input_file: PathLike
+    labels_file: PathLike
+    shape: Sequence[int]
+
+
+# Path to ONNX model, loaded ONNX model, or PyTorch Module
+ModelTy = Union[PathLike, onnx.ModelProto, Module]
+# Path to a pair of HPVM bin format file (input, labels), or PyTorch Dataset
+DatasetTy = Union[BinDataset, Dataset]
+
+
+class ModelExporter:
+    tuneset_name = "tune_input.bin", "tune_labels.bin"
+    testset_name = "test_input.bin", "test_labels.bin"
+
+    def __init__(
+        self,
+        model: ModelTy,
+        tune_dataset: DatasetTy,
+        test_dataset: DatasetTy,
+        weight_dir: PathLike,
+        hpvmc: bool,
+        opset: Optional[int] = None,
+    ):
+        self.tune_dataset, self.test_dataset = tune_dataset, test_dataset
+        self.dataset_shape = self._check_datasets(tune_dataset, test_dataset)
+        self.dataset_size = self.dataset_shape[0]
+        onnx_model = self._load_model(model, self.dataset_shape)
+        if opset is not None:
+            onnx_model = check_onnx_version(onnx_model, opset)
+        self.onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
+
+        self.dfg = DFG(self.onnx_model.graph)
+        self.weight_dir = Path(weight_dir)
+        os.makedirs(weight_dir, exist_ok=True)
+        flavor = HpvmCodeGen if hpvmc else TensorCodeGen
+        self.codegen = flavor(self.dfg, weight_dir, self.dataset_size)
+
+    def export_source_code(self, output: PathLike, batch_size: Optional[int] = None):
+        self.codegen.compile(output, batch_size)
+
+    def export_weights(self):
+        self.dfg.dump_weights(self.weight_dir)
+
+    def export_datasets(self):
+        input_, labels = self.tuneset_name
+        self._dump_dataset(self.tune_dataset, self.weight_dir / input_, self.weight_dir / labels)
+        input_, labels = self.testset_name
+        self._dump_dataset(self.test_dataset, self.weight_dir / input_, self.weight_dir / labels)
+
+    def export_all(self, output: PathLike, batch_size: Optional[int] = None):
+        self.export_source_code(output, batch_size)
+        self.export_weights()
+        self.export_datasets()
+
+    @staticmethod
+    def _dump_dataset(dataset: DatasetTy, input_filename: Path, label_filename: Path):
+        import numpy as np
+        from torch.utils.data import DataLoader
+
+        if isinstance(dataset, BinDataset):
+            Path(dataset.input_file).symlink_to(input_filename)
+            Path(dataset.labels_file).symlink_to(label_filename)
+            return
+        inputs, labels = zip(*iter(DataLoader(dataset)))
+        inputs = np.stack(inputs, axis=0)
+        labels = np.stack(labels, axis=0)
+        inputs.tofile(input_filename)
+        inputs.tofile(label_filename)
+
+    @classmethod
+    def _check_datasets(cls, tune_dataset: DatasetTy, test_dataset: DatasetTy) -> Tuple[int, int, int, int]:
+        tune_shape = cls._check_dataset_get_shape(tune_dataset)
+        test_shape = cls._check_dataset_get_shape(test_dataset)
+        if tune_shape != test_shape:
+            raise ValueError(
+                f"Size of tune and test dataset must match (got {tune_shape} and {test_shape})"
+            )
+        return tuple(tune_shape)
+
+    @staticmethod
+    def _check_dataset_get_shape(dataset: DatasetTy) -> Sequence[int]:
+        import numpy as np
+
+        if isinstance(dataset, Dataset):
+            size = len(dataset)
+            sample = dataset[0]
+            if not isinstance(sample, (np.ndarray, torch.Tensor)) or len(sample.shape) != 4:
+                raise ValueError("Dataset must be a 4D tensor due to backend limitation")
+            return size, *sample.shape
+        if not isinstance(dataset, BinDataset):
+            raise TypeError("Only BinDataset or PyTorch Dataset are supported")
+        input_file = Path(dataset.input_file)
+        labels_file = Path(dataset.labels_file)
+        if not input_file.is_file():
+            raise FileNotFoundError(f"Input file {input_file}")
+        if not labels_file.is_file():
+            raise FileNotFoundError(f"Labels file {input_file}")
+        if len(dataset.shape) != 4:
+            raise ValueError("Dataset must be a 4D tensor due to backend limitation")
+        float_size = np.dtype(np.float32).itemsize
+        expected_input_size = np.array(dataset.shape).prod() * float_size
+        int32_size = np.dtype(np.int32).itemsize
+        expected_labels_size = dataset.shape[0] * int32_size
+        input_size = input_file.stat().st_size
+        labels_size = labels_file.stat().st_size
+        if input_size != expected_input_size:
+            raise RuntimeError(
+                f"Input file {input_file} should have size {expected_input_size} "
+                f"(shape {dataset.shape}), but got {input_size}"
+            )
+        if labels_size != expected_labels_size:
+            raise RuntimeError(
+                f"Labels file {labels_file} should have size {expected_labels_size} "
+                f"(dataset length {dataset.shape[0]}), but got {labels_size}"
+            )
+        return dataset.shape
+
+    @staticmethod
+    def _load_model(model: ModelTy, dataset_shape: Sequence[int]) -> onnx.ModelProto:
+        if isinstance(model, Module):
+            # Export to ONNX and load back.
+            sample_input_shape = 1, *dataset_shape[1:]
+            sample_input = torch.rand(sample_input_shape)
+            with NamedTemporaryFile("w+b") as tmp:
+                torch_to_onnx(model, (sample_input,), tmp)
+                tmp.seek(0)
+                return onnx.load_model(tmp)
+        if isinstance(model, onnx.ModelProto):
+            return model
+        return onnx.load(Path(model).as_posix())
 
 
 def check_onnx_version(model, new_version):
@@ -21,7 +154,6 @@ def check_onnx_version(model, new_version):
     except AttributeError:
         opset = 1  # default opset version set to 1 if not specified
     if opset != new_version:
-
         try:
             converted_model = version_converter.convert_version(model, new_version)
             return converted_model
@@ -49,57 +181,5 @@ def torch_to_onnx(
         do_constant_folding=True,  # whether to execute constant folding for optimization
         input_names=["input"],  # the model's input names
         output_names=["output"],  # the model's output names
-        dynamic_axes={
-            "input": {0: "batch_size"},  # variable length axes
-            "output": {0: "batch_size"},
-        },
         strip_doc_string=False,
     )
-
-
-def compile_onnx_model(
-    file_or_model: Union[PathLike, onnx.ModelProto],
-    output_dir: PathLike,
-    dataset_size: int,
-    hpvmc: bool,
-    prefix: Optional[str] = None,
-    batch_size: Optional[int] = None,
-    opset: Optional[int] = None,
-):
-    if isinstance(file_or_model, onnx.ModelProto):
-        model = file_or_model
-    else:
-        model = onnx.load(Path(file_or_model).as_posix())
-    if opset is not None:
-        model = check_onnx_version(model, opset)
-    model = onnx.shape_inference.infer_shapes(model)
-    dfg = DFG(model.graph)
-    output_dir = Path(output_dir)
-    os.makedirs(output_dir, exist_ok=True)
-    if hpvmc:
-        hpvm_code_gen = HpvmCodeGen(dfg, output_dir, dataset_size, batch_size, prefix)
-        hpvm_code_gen.compile()
-    else:
-        tensor_code_gen = TensorCodeGen(dfg, output_dir, dataset_size, batch_size, prefix)
-        tensor_code_gen.compile()
-    dfg.dump_weights(output_dir)
-
-
-def compile_torch_module(
-    module: Module,
-    input_shape: Sequence[int],
-    output_dir: PathLike,
-    hpvmc: bool,
-    prefix: Optional[str] = None,
-    batch_size: Optional[int] = None,
-):
-    dataset_size, *single_input_shape = input_shape
-    sample_input_shape = 1, *single_input_shape
-    sample_input = torch.rand(sample_input_shape)
-    with NamedTemporaryFile("w+b") as tmp:
-        torch_to_onnx(module, (sample_input, ), tmp)
-        tmp.seek(0)
-        onnx_model = onnx.load_model(tmp)
-        compile_onnx_model(
-            onnx_model, output_dir, dataset_size, hpvmc, prefix, batch_size
-        )
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py
index bd04c15d96b7a9f6a0620e17a08f0cfabd5f3e43..a07c18e72dbab7490e44e5814fa974e3b497c653 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_builder.py
@@ -1,5 +1,4 @@
 from collections import defaultdict
-from os import PathLike
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 
@@ -9,6 +8,7 @@ import onnx
 from . import graph_ir as g
 from .onnx_attr import node_attr_to_dict, node_to_shape
 
+PathLike = Union[str, Path]
 GraphT = onnx.GraphProto
 NodeT = onnx.NodeProto
 NodeT.__hash__ = lambda self: id(self)
@@ -89,6 +89,7 @@ class DFG(object):
         """Check model validaty and single output (which is our limitation)"""
 
         import warnings
+
         from onnx import checker, onnx_cpp2py_export
 
         # try use onnx's own model checker before converting any model
@@ -110,7 +111,7 @@ class DFG(object):
         onnx_defs, onnx_uses = def_use(graph.node)
         tensors = extract_tensors_from_graph(graph)
         node_shape = node_to_shape(graph)
-        node_and_attr = [(n, {'shape': shape}) for n, shape in node_shape.items()]
+        node_and_attr = [(n, {"shape": shape}) for n, shape in node_shape.items()]
         ret_graph.add_nodes_from(node_and_attr)
         for onnx_value_name, use_nodes in onnx_uses.items():
             def_node = onnx_defs.get(onnx_value_name)
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_tensor.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_tensor.cpp.in
index ae3060836451069ed6aee9321d714db1b66c1723..948bba932154705261c67b4769f5a53da03e7f94 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_tensor.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_tensor.cpp.in
@@ -11,7 +11,7 @@
 #include "../include/utils.h"
 
 int main() {
-  std::string dir_prefix = "{{output_dir}}";
+  std::string dir_prefix = "{{prefix}}";
   std::string input_path = dir_prefix + "input.bin";
   std::string labels_path = dir_prefix + "labels.bin";
 {% for w in weights %}