diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
index 4041df11ce8d79e39d6f72bdf0a1068eae449300..1e34f44f403e373607d2efc10aca7f796c81b938 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library( LLVMDFG2LLVM_OpenCL
 
   DEPENDS
   intrinsics_gen
+  llvm-cbe  # Called within the pass
   PLUGIN_TOOL
   opt
   )
diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
index 5a58f272b3042a4ebfc2e3c7bb3606b5c19e8d84..286bcdebd2ed3322b134581f3e8fdac286ee325b 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
@@ -47,6 +47,8 @@
 #include "llvm/Support/ToolOutputFile.h"
 
 #include <sstream>
+#include <unistd.h>  // ".", nullptr() (POSIX-only)
+#include <cstdlib>  // std::system
 
 #ifndef LLVM_BUILD_DIR
 #error LLVM_BUILD_DIR is not defined
@@ -147,7 +149,7 @@ static void getExecuteNodeParams(Module &M, Value *&, Value *&, Value *&,
 static Value *genWorkGroupPtr(Module &M, std::vector<Value *>,
                               ValueToValueMapTy &, Instruction *,
                               const Twine &WGName = "WGSize");
-static std::string getPTXFilename(const Module &);
+static std::string getPTXFilePath(const Module &);
 static void changeDataLayout(Module &);
 static void changeTargetTriple(Module &);
 static void findReturnInst(Function *, std::vector<ReturnInst *> &);
@@ -258,7 +260,7 @@ public:
     DEBUG(errs() << *KernelM);
   }
 
-  void writeKernelsModule();
+  void writeKModCompilePTX();
 };
 
 // Initialize the HPVM runtime API. This makes it easier to insert these calls
@@ -846,7 +848,7 @@ void CGT_OpenCL::codeGen(DFInternalNode *N) {
     // Now the remaining nodes to be visited should be ignored
     KernelLaunchNode = NULL;
     DEBUG(errs() << "Insert Runtime calls\n");
-    insertRuntimeCalls(N, kernel, getPTXFilename(M));
+    insertRuntimeCalls(N, kernel, getPTXFilePath(M));
 
   } else {
     DEBUG(errs() << "Found intermediate node. Getting size parameters.\n");
@@ -1901,7 +1903,7 @@ bool DFG2LLVM_OpenCL::runOnModule(Module &M) {
     CGTVisitor->visit(rootNode);
   }
 
-  CGTVisitor->writeKernelsModule();
+  CGTVisitor->writeKModCompilePTX();
 
   // TODO: Edit module epilogue to remove the HPVM intrinsic declarations
   delete CGTVisitor;
@@ -2010,16 +2012,18 @@ void CGT_OpenCL::addCLMetadata(Function *F) {
   MDN_annotations->addOperand(MDNvvmAnnotationsNode);
 }
 
-void CGT_OpenCL::writeKernelsModule() {
+void CGT_OpenCL::writeKModCompilePTX() {
 
   // In addition to deleting all other functions, we also want to spiff it
   // up a little bit.  Do this now.
   legacy::PassManager Passes;
+  auto kmodName = getKernelsModuleName(M);
+  auto ptxPath = getPTXFilePath(M);
 
   DEBUG(errs() << "Writing to File --- ");
-  DEBUG(errs() << getKernelsModuleName(M).c_str() << "\n");
+  DEBUG(errs() << kmodName << "\n");
   std::error_code EC;
-  ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None);
+  ToolOutputFile Out(kmodName.c_str(), EC, sys::fs::F_None);
   if (EC) {
     DEBUG(errs() << EC.message() << '\n');
   }
@@ -2030,6 +2034,13 @@ void CGT_OpenCL::writeKernelsModule() {
 
   // Declare success.
   Out.keep();
+
+  // Starts calling llvm-cbe.
+  auto llvmCBE = std::string(LLVM_BUILD_DIR_STR) + "/bin/llvm-cbe";
+  std::string command = llvmCBE + " " + kmodName + " -o " + ptxPath;
+  DEBUG(errs() << "Compiling PTX from kernel module:\n");
+  DEBUG(errs() << command);
+  std::system(command.c_str());
 }
 
 Function *CGT_OpenCL::transformFunctionToVoid(Function *F) {
@@ -2346,10 +2357,13 @@ static Value *genWorkGroupPtr(Module &M, std::vector<Value *> WGSize,
 }
 
 // Get generated PTX binary name
-static std::string getPTXFilename(const Module &M) {
+static std::string getPTXFilePath(const Module &M) {
   std::string moduleID = M.getModuleIdentifier();
-  moduleID.append(".kernels.cl");
-  return moduleID;
+  char *cwd_p = get_current_dir_name();
+  std::string cwd(cwd_p);
+  free(cwd_p);
+  std::string ptxPath = cwd + "/" + moduleID + ".kernels.cl";
+  return ptxPath;
 }
 
 // Changes the data layout of the Module to be compiled with OpenCL backend
diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt
index cb548b84dc9944e54c7dfdd0a0d74cf0aed9aafd..07024b6febc0c27dcc40f166262ab441e7f5675d 100644
--- a/hpvm/test/CMakeLists.txt
+++ b/hpvm/test/CMakeLists.txt
@@ -1,5 +1,13 @@
 include(../cmake/TestFile.cmake)  # Generation of `.test` files in CMake
+
+# approxhpvm.py, clang and clang++ are used to compile benchmarks
+# in `benchmarks` and `dnn_benchmarks/hpvm-c`.
+set(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py)
+set(CLANG_C ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang)
+set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++)
+
 add_subdirectory(hpvm_pass)  # Passes test suite
+add_subdirectory(benchmarks)
 add_subdirectory(dnn_benchmarks/hpvm-c)  # HPVM-C DNN accuracy test suite
 add_subdirectory(dnn_benchmarks/tensor-rt-src)  # tensor_runtime DNN (build only, no tests)
 add_subdirectory(dnn_benchmarks/profiling)  # hpvm-profiler test suite
diff --git a/hpvm/test/benchmarks/CMakeLists.txt b/hpvm/test/benchmarks/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c226aefd58f20c296db232f1a03c35bed0b7195
--- /dev/null
+++ b/hpvm/test/benchmarks/CMakeLists.txt
@@ -0,0 +1,38 @@
+macro(hpvm_compile_util_sources compiler_flags util_srcs language_mode)
+  # "Returns" ${util_bitcodes}, a list of paths to generated bitcode (.ll) files
+  foreach(src ${util_srcs})
+    set(src_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
+    get_filename_component(src_name ${src_path} NAME)
+    set(output_path ${CMAKE_CURRENT_BINARY_DIR}/${src_name}.ll)
+    add_custom_command(
+        OUTPUT ${output_path}
+        COMMAND ${CLANG_C} -x${language_mode}
+          ${compiler_flags} ${src_path} -emit-llvm -S -o ${output_path}
+        DEPENDS ${src_path}
+    )
+    list(APPEND util_bitcodes ${output_path})
+  endforeach()
+endmacro()
+
+function(add_hpvm_benchmark
+  target_name output_bin_name all_flags language_mode
+  main_src util_bitcodes
+)
+  set(output_bin_path ${CMAKE_CURRENT_BINARY_DIR}/${output_bin_name})
+  set(main_src_path ${CMAKE_CURRENT_SOURCE_DIR}/${main_src})
+  if(util_bitcodes)
+    set(bitcodes_arg -b ${util_bitcodes})
+  else()
+    set(bitcodes_arg "")
+  endif()
+  add_custom_command(
+    OUTPUT ${output_bin_path}
+    COMMAND ${APPROXHPVM_PY} ${all_flags} -x${language_mode}
+      ${bitcodes_arg} -- ${main_src_path} ${output_bin_path}
+    DEPENDS ${main_src_path} ${util_bitcodes} approxhpvm.py
+  )
+  add_custom_target(${target_name} DEPENDS ${output_bin_path})
+endfunction(add_hpvm_benchmark)
+
+add_subdirectory(hpvm-cava)
+add_subdirectory(pipeline)
diff --git a/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57e3bd3f33c580c9244634608402009804dbc38e
--- /dev/null
+++ b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(other_srcs src/load_cam_model.c src/cam_pipe_utility.c src/utility.c)
+set(
+  compiler_flags
+  # Insanity warning: only -O1 works for this benchmark.
+  -O1 -I ${CMAKE_CURRENT_SOURCE_DIR}/src
+  -DDMA_MODE -DDMA_INTERFACE_V3
+)
+
+# Sets ${util_bitcodes}
+hpvm_compile_util_sources("${compiler_flags}" "${other_srcs}" "c")
+
+set(all_flags_cpu ${compiler_flags} "-DDEVICE=CPU_TARGET" "-lpthread")
+add_hpvm_benchmark(
+  "hpvm_cava_cpu" "hpvm-cava-cpu" "${all_flags_cpu}" "c"
+  src/main.c "${util_bitcodes}"
+)
+
+set(all_flags_gpu ${compiler_flags} "-DDEVICE=GPU_TARGET" "--opencl" "-lpthread")
+add_hpvm_benchmark(
+  "hpvm_cava_gpu" "hpvm-cava-gpu" "${all_flags_gpu}" "c"
+  src/main.c "${util_bitcodes}"
+)
diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile
deleted file mode 100644
index 58dfa72aacb172252ea1c13ed3331322fb600861..0000000000000000000000000000000000000000
--- a/hpvm/test/benchmarks/hpvm-cava/Makefile
+++ /dev/null
@@ -1,131 +0,0 @@
-# This Makefile compiles the HPVM-CAVA pilot project. 
-# It builds HPVM-related dependencies, then the native camera pipeline ISP code.
-#
-# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config,
-# which can be copied from Makefile.config.example for a start.
-
-CONFIG_FILE := ../include/Makefile.config
-
-ifeq ($(wildcard $(CONFIG_FILE)),)
-    $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example)
-endif
-include $(CONFIG_FILE)
-
-# Compiler Flags
-
-LFLAGS += -lm -lrt
-
-ifeq ($(TARGET),)
-    TARGET = gpu
-endif
-
-# Build dirs
-SRC_DIR = src/
-CAM_PIPE_SRC_DIR = $(SRC_DIR)
-BUILD_DIR = build/$(TARGET)
-CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-
-
-INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR)
-ifneq ($(CONFUSE_ROOT),)
-  INCLUDES += -I$(CONFUSE_ROOT)/include
-  LFLAGS += -L$(CONFUSE_ROOT)/lib
-endif
-
-EXE = cava-hpvm-$(TARGET)
-
-LFLAGS += -pthread
-
-## BEGIN HPVM MAKEFILE
-LANGUAGE=hpvm
-SRCDIR_OBJS=load_cam_model.ll cam_pipe_utility.ll utility.ll
-OBJS_SRC=src/load_cam_model.c src/cam_pipe_utility.c src/utility.c
-HPVM_OBJS=main.hpvm.ll
-APP = $(EXE)
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3
-APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include
-APP_LDFLAGS=$(LFLAGS)
-
-CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS)
-LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS)
-
-HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt
-
-HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc
-
-
-TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
-
-ifeq ($(TARGET),seq)
-  DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu
-else
-  DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
-endif
-  TESTGEN_OPTFLAGS += -hpvm-timers-gen
-
-CFLAGS += -DDEVICE=$(DEVICE)
-CXXFLAGS += -DDEVICE=$(DEVICE)
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1))
-
-.PRECIOUS: $(BUILD_DIR)/%.ll
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS))
-KERNEL = $(TEST_OBJS).kernels.ll
-
-ifeq ($(TARGET),gpu)
-  KERNEL_OCL = $(TEST_OBJS).kernels.cl
-endif
-
-HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll
-HOST = $(BUILD_DIR)/$(APP).host.ll
-
-ifeq ($(OPENCL_PATH),)
-FAILSAFE=no_opencl
-else 
-FAILSAFE=
-endif
-
-# Targets
-
-default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE)
-
-clean :
-	if [ -f $(EXE) ]; then rm $(EXE); fi
-	if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi
-	if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi
-
-$(KERNEL_OCL) : $(KERNEL)
-	$(OCLBE) $< -o $@
-
-$(EXE) : $(HOST_LINKED)
-	$(CXX) -O3 $(LDFLAGS) $< -o $@
-
-$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB)
-	$(LLVM_LINK) $^ -S -o $@
-
-$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS)
-	$(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST)
-
-$(BUILD_DIR):
-	mkdir -p $(BUILD_DIR)
-
-$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c
-	$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c
-	$(CC) $(CFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll
-	$(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@
-
-## END HPVM MAKEFILE
diff --git a/hpvm/test/benchmarks/hpvm-cava/src/main.c b/hpvm/test/benchmarks/hpvm-cava/src/main.c
index 09430239d7c06a0b2fab2a59c7e6310babe617c6..0d8fb37d0103b55234e59e8d2b66742faed207b6 100644
--- a/hpvm/test/benchmarks/hpvm-cava/src/main.c
+++ b/hpvm/test/benchmarks/hpvm-cava/src/main.c
@@ -53,6 +53,7 @@ typedef struct __attribute__((__packed__)) {
 } RootIn;
 
 typedef enum _argnum {
+  CAM_MODEL,
   RAW_IMAGE_BIN,
   OUTPUT_IMAGE_BIN,
   NUM_REQUIRED_ARGS,
@@ -67,7 +68,7 @@ typedef struct _arguments {
 } arguments;
 
 static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n";
-static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary";
+static char args_doc[] = "path/to/cam-model path/to/raw-image-binary path/to/output-image-binary";
 static struct argp_option options[] = {
     {"num-inputs", 'n', "N", 0, "Number of input images"},
     {0},
@@ -724,14 +725,14 @@ int main(int argc, char *argv[]) {
       *transform_out, *gamut_out;
   float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist;
 
-  TsTw = get_TsTw("cam_models/NikonD7000/", wb_index);
+  TsTw = get_TsTw(args.args[CAM_MODEL], wb_index);
   float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE);
   free(TsTw);
   TsTw = trans;
-  ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts);
-  weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts);
-  coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts);
-  tone_map = get_tone_map("cam_models/NikonD7000/");
+  ctrl_pts = get_ctrl_pts(args.args[CAM_MODEL], num_ctrl_pts);
+  weights = get_weights(args.args[CAM_MODEL], num_ctrl_pts);
+  coefs = get_coefs(args.args[CAM_MODEL], num_ctrl_pts);
+  tone_map = get_tone_map(args.args[CAM_MODEL]);
 
   input_scaled = (float *)malloc_aligned(bytes_fimage);
   result_scaled = (float *)malloc_aligned(bytes_fimage);
diff --git a/hpvm/test/benchmarks/pipeline/CMakeLists.txt b/hpvm/test/benchmarks/pipeline/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84cb2d85afb4929b2f26854844808dd2493da1e3
--- /dev/null
+++ b/hpvm/test/benchmarks/pipeline/CMakeLists.txt
@@ -0,0 +1,13 @@
+find_package(OpenCV 2)
+if(${OpenCV_FOUND})
+  set(
+    all_flags
+    -O3 -I${OpenCV_INCLUDE_DIRS}
+    -ffast-math -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize
+    -lpthread
+  )
+  add_hpvm_benchmark("pipeline_cpu" "pipeline-cpu" "${all_flags}" "c++" src/main.cc "")
+  add_hpvm_benchmark("pipeline_gpu" "pipeline-gpu" "${all_flags};--opencl" "c++" src/main.cc "")
+else()
+  message(WARNING "opencv-2 not found; not compiling HPVM benchmark 'pipeline'.")
+endif()
diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile
deleted file mode 100644
index 8a55393f241f30d840cbf85c31488e652c2023a0..0000000000000000000000000000000000000000
--- a/hpvm/test/benchmarks/pipeline/Makefile
+++ /dev/null
@@ -1,118 +0,0 @@
-# This Makefile compiles the HPVM-pipeline test.
-# It builds HPVM-related dependencies, then the native camera pipeline ISP code.
-#
-# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config,
-# which can be copied from Makefile.config.example for a start.
-
-CONFIG_FILE := ../include/Makefile.config
-
-ifeq ($(wildcard $(CONFIG_FILE)),)
-    $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example)
-endif
-include $(CONFIG_FILE)
-
-# Compiler Flags
-
-LFLAGS += -lm -lrt
-
-ifeq ($(TARGET),)
-    TARGET = gpu
-endif
-
-# Build dirs
-SRC_DIR = src/
-BUILD_DIR = build/$(TARGET)
-CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-
-EXE = pipeline-$(TARGET)
-
-INCLUDES += -I$(SRC_DIR)
-
-## BEGIN HPVM MAKEFILE
-SRCDIR_OBJS=
-HPVM_OBJS=main.hpvm.ll
-APP = $(EXE)
-APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize
-APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize
-APP_LDFLAGS=`pkg-config opencv --libs`
-
-CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS)
-LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS)
-
-HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt
-HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc
-
-TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
-
-ifeq ($(TARGET),seq)
-  DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu
-else
-  DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
-endif
-  TESTGEN_OPTFLAGS += -hpvm-timers-gen
-
-CFLAGS += -DDEVICE=$(DEVICE)
-CXXFLAGS += -DDEVICE=$(DEVICE)
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1))
-
-.PRECIOUS: $(BUILD_DIR)/%.ll
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS))
-KERNEL = $(TEST_OBJS).kernels.ll
-
-ifeq ($(TARGET),seq)
-else
-  KERNEL_OCL = $(TEST_OBJS).kernels.cl
-endif
-
-HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll
-HOST = $(BUILD_DIR)/$(APP).host.ll
-
-ifeq ($(OPENCL_PATH),)
-FAILSAFE=no_opencl
-else 
-FAILSAFE=
-endif
-
-# Targets
-default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE)
-
-clean :
-	if [ -f $(EXE) ]; then rm $(EXE); fi
-	if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi
-	if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi
-
-$(KERNEL_OCL) : $(KERNEL)
-	$(OCLBE) $< -o $@
-
-$(EXE) : $(HOST_LINKED)
-	$(CXX) -O3 $(LDFLAGS) $< -o $@
-
-$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB)
-	$(LLVM_LINK) $^ -S -o $@
-
-$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS)
-	$(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST)
-
-$(BUILD_DIR):
-	mkdir -p $(BUILD_DIR)
-
-$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc
-	$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc
-	$(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll
-	$(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@
-
-## END HPVM MAKEFILE
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
index d9cdafa7bc8dcffffb9d6b8bf3fb8d5aa958175f..9f34317d34157d57468c60cb854828b5c54f1cde 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
@@ -11,7 +11,7 @@ function(compile_hpvm_c target_name src_filepath codegen_target)
     OUTPUT ${output_bin_path}
     DEPENDS ${generated_file_path} hpvm-clang
     COMMAND hpvm-clang
-      ${generated_file_path} ${output_bin_path}
+      ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions
       -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN}
   )
   add_custom_target(${target_name} DEPENDS ${output_bin_path})
diff --git a/hpvm/tools/hpvm-clang/CMakeLists.txt b/hpvm/tools/hpvm-clang/CMakeLists.txt
index 53a98d0b2deed5ed0337cce46a75078abfd8c887..217bd39328788a41d7387105b312daf0c6133bb6 100644
--- a/hpvm/tools/hpvm-clang/CMakeLists.txt
+++ b/hpvm/tools/hpvm-clang/CMakeLists.txt
@@ -27,16 +27,20 @@ set(DIRECT_LINK_LIBS ${OpenCL_LIBRARY} "$<TARGET_FILE:tensor_runtime>")
 # and does not export its file location.
 # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
 set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
+# And this must be manually in sync with `hpvm/lib/Transforms/*`.
+# which is fine for because we don't have many passes for now.
 set(
     AVAILABLE_PASSES
     LLVMBuildDFG
-    LLVMInPlaceDFGAnalysis
+    LLVMClearDFG
     LLVMDFG2LLVM_CPU
     LLVMDFG2LLVM_CUDNN
+    LLVMDFG2LLVM_OpenCL
     LLVMDFG2LLVM_WrapperAPI
     LLVMFuseHPVMTensorNodes
-    LLVMClearDFG
     LLVMGenHPVM
+    LLVMInPlaceDFGAnalysis
+    LLVMLocalMem
 )
 
 # ---[ Create package folder structure
diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in
index 5fa256d26ba1312b20acbc86ed2c59befb52da2a..d37f293789787fbe81e5369543d99de97cb989ac 100644
--- a/hpvm/tools/hpvm-clang/main.py.in
+++ b/hpvm/tools/hpvm-clang/main.py.in
@@ -18,40 +18,70 @@ DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";")
 AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";")
 HPVM_RT_PATH = "@HPVM_RT_PATH@"
 
-COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"]
-
 
 def compile_hpvm_c(
-    input_file: PathLike,
+    hpvm_src: PathLike,
     output_file: PathLike,
-    codegen_target: str = "tensor",
+    tensor_target: Optional[str],
+    opencl: bool,
+    link_bitcode: List[PathLike] = None,
     include: List[PathLike] = None,
+    macro: List[str] = None,
+    flags: List[str] = None,
+    optim_level: str = "0",  # -O0
+    is_cpp: bool = True,  # otherwise is C
+    std: str = None,  # language std (-std=c++11)
+    link_dirs: List[PathLike] = None,
+    link_libs: List[str] = None,
     working_dir: PathLike = None,
     conf_file: PathLike = None,
 ):
     from subprocess import check_output
 
-    codegen_functions = {
-        "tensor": lambda i, o: opt_codegen_tensor(i, o, conf_file),
-        "cudnn": opt_codegen_cudnn
-    }
-    codegen_f = codegen_functions[codegen_target]
+    passes = ["LLVMBuildDFG"]
+    pass_flags = ["buildDFG"]
+    if tensor_target == "tensor":
+        if conf_file is None:
+            raise ValueError("conf_file must be defined when tensor_target=='tensor'.")
+        passes += ["LLVMInPlaceDFGAnalysis", "LLVMFuseHPVMTensorNodes", "LLVMDFG2LLVM_WrapperAPI"]
+        pass_flags += [
+            "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi",
+            f"configuration-inputs-filename={conf_file}"
+        ]
+    elif tensor_target == "cudnn":
+        passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"]
+        pass_flags += ["inplace", "dfg2llvm-cudnn"]
+    elif tensor_target is None:
+        passes += ["LLVMLocalMem"]
+        pass_flags += ["localmem"]
+    else:
+        raise ValueError(f"Tensor target {tensor_target} not recognized")
+    if opencl:
+        passes += ["LLVMDFG2LLVM_OpenCL"]
+        pass_flags += ["dfg2llvm-opencl"]
+    passes += ["LLVMDFG2LLVM_CPU", "LLVMClearDFG"]
+    pass_flags += ["dfg2llvm-cpu", "clearDFG"]
+
     working_dir = Path(working_dir or ".")
     if not working_dir.is_dir():
         os.makedirs(working_dir)
-    name_stem = Path(input_file).stem
 
+    # All commands for compiling the main hpvm_c file
+    name_stem = Path(hpvm_src).stem
     ll_file = working_dir / f"{name_stem}.ll"
     hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll"
     llvm_ll_file = working_dir / f"{name_stem}.llvm.ll"
     hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc"
+    link_bitcode_ = [Path(bc) for bc in (link_bitcode or [])]
     commands = [
-        hpvm_c_to_ll(input_file, ll_file, extra_includes=include),
+        hpvm_c_to_ll(hpvm_src, ll_file, include, macro, flags, optim_level, is_cpp, std),
         opt_codegen_hpvm(ll_file, hpvm_ll_file),
-        codegen_f(hpvm_ll_file, llvm_ll_file),
-        link_hpvm_rt(llvm_ll_file, hpvm_rt_linked_file),
-        link_binary(hpvm_rt_linked_file, output_file),
+        _run_opt(hpvm_ll_file, llvm_ll_file, passes, pass_flags),
+        link_hpvm_rt(link_bitcode_ + [llvm_ll_file], hpvm_rt_linked_file),
     ]
+    commands.append(
+        link_binary(hpvm_rt_linked_file, output_file, link_dirs, link_libs)
+    )
     for command in commands:
         print(" ".join(command))
         check_output(command)
@@ -60,14 +90,23 @@ def compile_hpvm_c(
 def hpvm_c_to_ll(
     src_file: PathLike,
     target_file: PathLike,
-    extra_includes: Optional[List[PathLike]] = None,
+    extra_includes: List[PathLike] = None,
+    macros: List[str] = None,
     flags: List[str] = None,
+    optim_level: str = "0",  # -O0
+    is_cpp: bool = True,  # otherwise is C
+    std: str = None,  # --std=c++11
 ) -> List[str]:
-    extra_includes = extra_includes or []
-    includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes]
-    flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS]
+    includes = (extra_includes or []) + TRT_INCLUDE_DIRS
+    includes_s = [f"-I{path}" for path in includes]
+    macros = [f"-D{macro}" for macro in (macros or [])]
+    flags = [f"-f{flg}" for flg in (flags or [])]
+    if std:
+        flags.append(f"-std={std}")
+    clang = "clang++" if is_cpp else "clang"
     return [
-        str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S",
+        str(LLVM_BUILD_BIN / clang), *includes_s, *flags, *macros,
+        f"-O{optim_level}", "-emit-llvm", "-S",
         str(src_file), "-o", str(target_file)
     ]
 
@@ -76,63 +115,44 @@ def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]:
     return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"])
 
 
-def opt_codegen_cudnn(src_file: PathLike, target_file: PathLike) -> List[str]:
-    passes = [
-        "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis",
-        "LLVMDFG2LLVM_CUDNN", "LLVMDFG2LLVM_CPU",
-        "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM"
-    ]
-    flags = [
-        "buildDFG", "inplace", "hpvm-fuse",
-        "dfg2llvm-cudnn", "dfg2llvm-cpu", "clearDFG"
-    ]
-    return _run_opt(src_file, target_file, passes, flags)
+def link_hpvm_rt(bitcodes: List[PathLike], target_file: PathLike) -> List[str]:
+    bitcodes_s = [str(bc) for bc in bitcodes]
+    return [str(LLVM_BUILD_BIN / "llvm-link"), *bitcodes_s, HPVM_RT_PATH, "-S", "-o", str(target_file)]
 
 
-def opt_codegen_tensor(
-    src_file: PathLike, target_file: PathLike, conf_file: PathLike
-):
-    passes = [
-        "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis",
-        "LLVMDFG2LLVM_WrapperAPI", "LLVMDFG2LLVM_CPU",
-        "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM"
-    ]
-    flags = [
-        "buildDFG", "inplace", "hpvm-fuse",
-        "dfg2llvm-wrapperapi",
-        f"configuration-inputs-filename={conf_file}",
-        "dfg2llvm-cpu", "clearDFG",
+def link_binary(
+    src_file: PathLike,
+    target_file: PathLike,
+    extra_link_dirs: List[PathLike] = None,
+    extra_link_libs: List[str] = None
+) -> List[str]:
+    link_dirs, link_libs = _link_args(extra_link_dirs or [], extra_link_libs or [])
+    linker_dir_flags = []
+    for path in link_dirs:
+        linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"])
+    linker_lib_flags = [f"-l{lib}" for lib in link_libs]
+    return [
+        str(LLVM_BUILD_BIN / "clang++"), str(src_file),
+        "-o", str(target_file), *linker_dir_flags, *linker_lib_flags
     ]
-    return _run_opt(src_file, target_file, passes, flags)
 
 
-def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]:
-    return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)]
-
-
-def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]:
+def _link_args(extra_link_dirs: List[PathLike], extra_link_libs: List[str]):
     def drop_suffix(libname: str):
         import re
 
         match = re.match(r"lib(.*)\.so", libname)
         return libname if match is None else match.group(1)
 
-    link_dirs, link_libnames = [], []
+    link_dirs, link_libs = extra_link_dirs.copy(), extra_link_libs.copy()
     for lib in DIRECT_LINK_LIBS:
         lib = Path(lib)
         link_dirs.append(lib.parent)
-        link_libnames.append(drop_suffix(lib.name))
+        link_libs.append(lib.name)
     link_dirs += TRT_LINK_DIRS
-    link_libnames += TRT_LINK_LIBS
-
-    linker_dir_flags = []
-    for path in link_dirs:
-        linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"])
-    linker_lib_flags = [f"-l{drop_suffix(lib)}" for lib in link_libnames]
-    return [
-        str(LLVM_BUILD_BIN / "clang++"), str(src_file),
-        "-o", str(target_file), *linker_dir_flags, *linker_lib_flags
-    ]
+    link_libs += TRT_LINK_LIBS
+    link_libnames = [drop_suffix(s) for s in link_libs]
+    return link_dirs, link_libnames
 
 
 def _run_opt(
@@ -143,7 +163,7 @@ def _run_opt(
 ) -> List[str]:
     unavailable = set(pass_names) - set(AVAILABLE_PASSES)
     if unavailable:
-        raise ValueError(f"Passes {unavailable} are unavailable from CMake")
+        raise ValueError(f"Passes {unavailable} are unavailable for this compilation.")
     load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]]
     pass_flags_strs = [f"-{flag}" for flag in pass_flags]
     return [
@@ -154,37 +174,98 @@ def _run_opt(
 
 def parse_args():
     parser = argparse.ArgumentParser("hpvm-clang")
-    parser.add_argument("input_file", type=Path, help="HPVM-C code to compile")
+    parser.add_argument(
+        "hpvm_src", type=Path,
+        help="""HPVM-C code to compile.
+HPVM-C code must be single file, but additional bitcode file can be linked together.
+See option -b for that."""
+    )
     parser.add_argument("output_file", type=Path, help="Path to generate binary to")
+    parser.add_argument(
+        "-x", type=str, metavar="language", default="c++",
+        help="Treat input file as having type <language>",
+    )
+    parser.add_argument(
+        "-b",
+        "--link-bitcode",
+        type=Path,
+        nargs="+",
+        help="Additional bitcode (.ll/.bc) files to link to",
+    )
     parser.add_argument(
         "-t",
-        "--codegen-target",
+        "--tensor-target",
         type=str,
-        required=True,
         choices=["tensor", "cudnn"],
-        help="Backend to use",
+        help="Backend to use for tensor operators",
+    )
+    parser.add_argument(
+        "--conf-file", type=Path,
+        help="File to approximation configurations; required for tensor target 'tensor'"
+    )
+    parser.add_argument(
+        "--opencl",
+        action="store_true",
+        help="Use OpenCL support. Requires HPVM built with OpenCL",
     )
     parser.add_argument(
         "-d", "--working-dir", type=Path, help="Directory to generate temp files in"
     )
+
+    # Relaying arguments for clang++ (source -> bitcode stage)
     parser.add_argument(
-        "--conf-file", type=Path,
-        help="File to approximation configurations; required for 'tensor' target"
+        "-I", "--include", type=Path, action="append", metavar="dir",
+        help="[clang emit-llvm] Add directory to include search path"
+    )
+    parser.add_argument(
+        "-D", type=str, action="append", metavar="<macro>=<value>",
+        help="[clang emit-llvm] Define macro"
+    )
+    parser.add_argument(
+        "-f", type=str, action="append", metavar="flag",
+        help="[clang emit-llvm] clang++ flags (such as -ffastmath)"
+    )
+    parser.add_argument(
+        "-O", type=str, default="0", metavar="level",
+        help="[clang emit-llvm] Optimization level"
+    )
+    parser.add_argument(
+        "--std", type=str,
+        help="[clang emit-llvm] Language standard to compile for. Double dashes (--std, not -std)."
+    )
+
+    # Relaying arguments for clang++ (linking stage)
+    parser.add_argument(
+        "-L", type=Path, action="append", metavar="dir",
+        help="[clang linker] Add directory to library search path"
     )
     parser.add_argument(
-        "-I", "--include", type=Path, action="append",
-        help="Additional include directories to use"
+        "-l", type=str, action="append", metavar="name",
+        help="[clang linker] Link library (such as -lpthread)"
     )
 
     args = parser.parse_args()
-    if args.codegen_target == "tensor":
+    if args.tensor_target == "tensor":
         if args.conf_file is None:
-            parser.error('Codegen target "tensor" requires --conf-file argument')
+            parser.error('Tensor target "tensor" requires --conf-file argument')
+    if args.x == "c":
+        args.is_cpp = False
+    elif args.x == "c++":
+        args.is_cpp = True
+    else:
+        parser.error(f"Language mode {args.x} not supported yet -- only c or c++")
     return args
 
 
 def main():
-    compile_hpvm_c(**vars(parse_args()))
+    args = vars(parse_args())
+    args["macro"] = args.pop("D")
+    args["flags"] = args.pop("f")
+    args["optim_level"] = args.pop("O")
+    args["link_dirs"] = args.pop("L")
+    args["link_libs"] = args.pop("l")
+    args.pop("x")
+    compile_hpvm_c(**args)
 
 
 if __name__ == "__main__":