diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt
index fcfaf264a64d52bfe13e0023fe92ad12b7cf2016..71e6de5999ad0127da5155d93fe3403c05cb3c7f 100644
--- a/hpvm/CMakeLists.txt
+++ b/hpvm/CMakeLists.txt
@@ -8,7 +8,13 @@ message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}")
 # find_package will use the auxillary cmake/Find*.cmake we provide
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 find_package(CUDNN 7 EXACT REQUIRED)  # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_DIR and CUDNN::cudnn
-find_package(OpenCL REQUIRED)  # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY}
+find_package(OpenCL)  # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY}
+if(${OpenCL_FOUND})
+  set(HPVM_USE_OPENCL 1)
+else()
+  message(WARNING "OpenCL not found. --opencl flag of hpvm-clang will be unavailable for this build.")
+  set(HPVM_USE_OPENCL 0)
+endif()
 
 include_directories(./include/)
 
diff --git a/hpvm/docs/install.rst b/hpvm/docs/install.rst
index fa4a3f747b24501537903edc8f7b09da8928571b..9dd8365ae71277f70e72abe6f3c1f6725351eb1f 100644
--- a/hpvm/docs/install.rst
+++ b/hpvm/docs/install.rst
@@ -143,14 +143,14 @@ Some common options that can be used with CMake are:
 * ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for type are Debug, Release, RelWithDebInfo, and MinSizeRel. Default is Debug.
 * ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled (default is Yes for Debug builds, No for all other build types).
 
-Now, compile the HPVM Compilation Tool ``approxhpvm.py`` using:
+Now, compile the HPVM Compilation Tool ``hpvm-clang`` using:
 
 .. code-block:: shell
 
-   make -j<number of threads> approxhpvm.py
+   make -j<number of threads> hpvm-clang
 
 With all the aforementioned steps, HPVM should be built, installed, tested and ready to use.
-In particular, ``approxhpvm.py`` should be an executable command from your command line.
+In particular, ``hpvm-clang`` should be an executable command from your command line.
 
 Benchmarks and Tests
 --------------------
@@ -159,7 +159,7 @@ We provide a number of general benchmarks, DNN benchmarks, and test cases, writt
 
 ``make`` targets ``check-hpvm-pass``, ``check-hpvm-dnn``, and ``check-hpvm-profiler``
 tests various components of HPVM and are increasingly time-consuming.
-You can run tests similarly as how ``approxhpvm.py`` is compiled: for example,
+You can run tests similarly as how ``hpvm-clang`` is compiled: for example,
 
 .. code-block:: shell
 
diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
index 4041df11ce8d79e39d6f72bdf0a1068eae449300..1e34f44f403e373607d2efc10aca7f796c81b938 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library( LLVMDFG2LLVM_OpenCL
 
   DEPENDS
   intrinsics_gen
+  llvm-cbe  # Called within the pass
   PLUGIN_TOOL
   opt
   )
diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
index 5a58f272b3042a4ebfc2e3c7bb3606b5c19e8d84..286bcdebd2ed3322b134581f3e8fdac286ee325b 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
@@ -47,6 +47,8 @@
 #include "llvm/Support/ToolOutputFile.h"
 
 #include <sstream>
+#include <unistd.h>  // ".", nullptr() (POSIX-only)
+#include <cstdlib>  // std::system
 
 #ifndef LLVM_BUILD_DIR
 #error LLVM_BUILD_DIR is not defined
@@ -147,7 +149,7 @@ static void getExecuteNodeParams(Module &M, Value *&, Value *&, Value *&,
 static Value *genWorkGroupPtr(Module &M, std::vector<Value *>,
                               ValueToValueMapTy &, Instruction *,
                               const Twine &WGName = "WGSize");
-static std::string getPTXFilename(const Module &);
+static std::string getPTXFilePath(const Module &);
 static void changeDataLayout(Module &);
 static void changeTargetTriple(Module &);
 static void findReturnInst(Function *, std::vector<ReturnInst *> &);
@@ -258,7 +260,7 @@ public:
     DEBUG(errs() << *KernelM);
   }
 
-  void writeKernelsModule();
+  void writeKModCompilePTX();
 };
 
 // Initialize the HPVM runtime API. This makes it easier to insert these calls
@@ -846,7 +848,7 @@ void CGT_OpenCL::codeGen(DFInternalNode *N) {
     // Now the remaining nodes to be visited should be ignored
     KernelLaunchNode = NULL;
     DEBUG(errs() << "Insert Runtime calls\n");
-    insertRuntimeCalls(N, kernel, getPTXFilename(M));
+    insertRuntimeCalls(N, kernel, getPTXFilePath(M));
 
   } else {
     DEBUG(errs() << "Found intermediate node. Getting size parameters.\n");
@@ -1901,7 +1903,7 @@ bool DFG2LLVM_OpenCL::runOnModule(Module &M) {
     CGTVisitor->visit(rootNode);
   }
 
-  CGTVisitor->writeKernelsModule();
+  CGTVisitor->writeKModCompilePTX();
 
   // TODO: Edit module epilogue to remove the HPVM intrinsic declarations
   delete CGTVisitor;
@@ -2010,16 +2012,18 @@ void CGT_OpenCL::addCLMetadata(Function *F) {
   MDN_annotations->addOperand(MDNvvmAnnotationsNode);
 }
 
-void CGT_OpenCL::writeKernelsModule() {
+void CGT_OpenCL::writeKModCompilePTX() {
 
   // In addition to deleting all other functions, we also want to spiff it
   // up a little bit.  Do this now.
   legacy::PassManager Passes;
+  auto kmodName = getKernelsModuleName(M);
+  auto ptxPath = getPTXFilePath(M);
 
   DEBUG(errs() << "Writing to File --- ");
-  DEBUG(errs() << getKernelsModuleName(M).c_str() << "\n");
+  DEBUG(errs() << kmodName << "\n");
   std::error_code EC;
-  ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None);
+  ToolOutputFile Out(kmodName.c_str(), EC, sys::fs::F_None);
   if (EC) {
     DEBUG(errs() << EC.message() << '\n');
   }
@@ -2030,6 +2034,13 @@ void CGT_OpenCL::writeKernelsModule() {
 
   // Declare success.
   Out.keep();
+
+  // Starts calling llvm-cbe.
+  auto llvmCBE = std::string(LLVM_BUILD_DIR_STR) + "/bin/llvm-cbe";
+  std::string command = llvmCBE + " " + kmodName + " -o " + ptxPath;
+  DEBUG(errs() << "Compiling PTX from kernel module:\n");
+  DEBUG(errs() << command);
+  std::system(command.c_str());
 }
 
 Function *CGT_OpenCL::transformFunctionToVoid(Function *F) {
@@ -2346,10 +2357,13 @@ static Value *genWorkGroupPtr(Module &M, std::vector<Value *> WGSize,
 }
 
 // Get generated PTX binary name
-static std::string getPTXFilename(const Module &M) {
+static std::string getPTXFilePath(const Module &M) {
   std::string moduleID = M.getModuleIdentifier();
-  moduleID.append(".kernels.cl");
-  return moduleID;
+  char *cwd_p = get_current_dir_name();
+  std::string cwd(cwd_p);
+  free(cwd_p);
+  std::string ptxPath = cwd + "/" + moduleID + ".kernels.cl";
+  return ptxPath;
 }
 
 // Changes the data layout of the Module to be compiled with OpenCL backend
diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt
index 48cf16282661862a86a254b6e40097a830cc2d43..3147d00e892f8d89b71ebbe7e04aa0fa33210534 100644
--- a/hpvm/projects/hpvm-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-rt/CMakeLists.txt
@@ -8,9 +8,13 @@ SET(CMAKE_CXX_STANDARD 11)
 # we want ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o
 # which is a LLVM Bitcode file because of the -flto below.
 add_llvm_library(hpvm-rt hpvm-rt.cpp DEPENDS clang)
-target_compile_options(hpvm-rt PUBLIC -flto)
-target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS})
-target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY})
+target_compile_options(hpvm-rt PUBLIC -flto -DHPVM_USE_OPENCL=${HPVM_USE_OPENCL})
+if(${HPVM_USE_OPENCL})
+  target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS})
+  target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY})
+else()
+  message(STATUS "hpvm-rt.bc is not using OpenCL.")
+endif()
 
 # Move and rename hpvm-rt.cpp.o to be an actual bc code
 add_custom_command(
diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp
index dff4f0e4a21e8e0eafad36bd7b14805c584cd10c..f0716378fe8c62639555396a3d268aae051534ba 100644
--- a/hpvm/projects/hpvm-rt/hpvm-rt.cpp
+++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp
@@ -1,4 +1,6 @@
-#include <CL/cl.h>
+
+//#define HPVM_USE_OPENCL 1
+
 #include <algorithm>
 #include <cassert>
 #include <cstdio>
@@ -8,9 +10,14 @@
 #include <map>
 #include <pthread.h>
 #include <string>
-
 #include <unistd.h>
 
+#ifdef HPVM_USE_OPENCL
+
+#include <CL/cl.h>
+
+#endif
+
 #if _POSIX_VERSION >= 200112L
 #include <sys/time.h>
 #endif
@@ -40,6 +47,9 @@ typedef struct {
   std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers;
 } DFNodeContext_CPU;
 
+
+#ifdef HPVM_USE_OPENCL
+
 typedef struct {
   cl_context clOCLContext;
   cl_command_queue clCommandQue;
@@ -51,6 +61,9 @@ cl_context globalOCLContext;
 cl_device_id *clDevices;
 cl_command_queue globalCommandQue;
 
+#endif
+
+
 MemTracker MTracker;
 vector<DFGDepth> DStack;
 // Mutex to prevent concurrent access by multiple thereads in pipeline
@@ -59,6 +72,8 @@ pthread_mutex_t ocl_mtx;
 #define NUM_TESTS 1
 hpvm_TimerSet kernel_timer;
 
+#ifdef HPVM_USE_OPENCL
+
 static const char *getErrorString(cl_int error) {
   switch (error) {
   // run-time and JIT compiler errors
@@ -209,6 +224,15 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) {
   }
 }
 
+#endif
+
+
+void openCLAbort(){
+ cout <<" ERROR: OpenCL NOT found!. Please Recompile with OpenCL - Make sure to have OpenCL on System \n ";
+ abort();
+}
+
+
 /************************* Depth Stack Routines ***************************/
 
 void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
@@ -260,6 +284,9 @@ uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) {
 /********************** Memory Tracking Routines **************************/
 
 void llvm_hpvm_track_mem(void *ptr, size_t size) {
+
+#ifdef HPVM_USE_OPENCL 
+  
   DEBUG(cout << "Start tracking memory: " << ptr << flush << "\n");
   MemTrackerEntry *MTE = MTracker.lookup(ptr);
   if (MTE != NULL) {
@@ -269,9 +296,19 @@ void llvm_hpvm_track_mem(void *ptr, size_t size) {
   DEBUG(cout << "Inserting ID " << ptr << " in the MemTracker Table\n");
   MTracker.insert(ptr, size, MemTrackerEntry::HOST, ptr);
   DEBUG(MTracker.print());
+
+#else
+
+  openCLAbort();
+  
+#endif
+  
 }
 
 void llvm_hpvm_untrack_mem(void *ptr) {
+
+#ifdef HPVM_USE_OPENCL 
+
   DEBUG(cout << "Stop tracking memory: " << ptr << flush << "\n");
   MemTrackerEntry *MTE = MTracker.lookup(ptr);
   if (MTE == NULL) {
@@ -284,11 +321,22 @@ void llvm_hpvm_untrack_mem(void *ptr) {
     clReleaseMemObject((cl_mem)MTE->getAddress());
   MTracker.remove(ptr);
   DEBUG(MTracker.print());
+
+#else
+
+  openCLAbort();
+  
+#endif
+  
 }
 
+
+#ifdef HPVM_USE_OPENCL 
+
 static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
                                        DFNodeContext_OCL *Context, bool isInput,
                                        bool isOutput) {
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "[OCL] Request memory: " << ptr
              << " for context: " << Context->clOCLContext << flush << "\n");
@@ -343,13 +391,20 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
   DEBUG(MTracker.print());
   pthread_mutex_unlock(&ocl_mtx);
   return d_input;
+  
 }
 
+#endif
+
+
 void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) {
   return llvm_hpvm_request_mem(ptr, size);
 }
 
 void *llvm_hpvm_request_mem(void *ptr, size_t size) {
+
+#ifdef HPVM_USE_OPENCL 
+
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "[CPU] Request memory: " << ptr << flush << "\n");
   MemTrackerEntry *MTE = MTracker.lookup(ptr);
@@ -386,6 +441,13 @@ void *llvm_hpvm_request_mem(void *ptr, size_t size) {
   DEBUG(MTracker.print());
   pthread_mutex_unlock(&ocl_mtx);
   return ptr;
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 /*************************** Timer Routines **********************************/
@@ -419,6 +481,9 @@ get_last_async(struct hpvm_TimerSet *timers) {
 }
 
 static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) {
+
+#ifdef HPVM_USE_OPENCL
+  
   cl_int ciErrNum = CL_SUCCESS;
   struct hpvm_async_time_marker_list **new_event = &(tset->async_markers);
 
@@ -441,10 +506,20 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) {
   if (ciErrNum != CL_SUCCESS) {
     fprintf(stderr, "Error Enqueueing Marker!\n");
   }
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 static void insert_submarker(struct hpvm_TimerSet *tset, char *label,
                              enum hpvm_TimerID timer) {
+
+#ifdef HPVM_USE_OPENCL
+  
   cl_int ciErrNum = CL_SUCCESS;
   struct hpvm_async_time_marker_list **new_event = &(tset->async_markers);
 
@@ -467,10 +542,20 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label,
   if (ciErrNum != CL_SUCCESS) {
     fprintf(stderr, "Error Enqueueing Marker!\n");
   }
+
+#else
+
+  openCLAbort();
+
+#endif
+  
 }
 
 /* Assumes that all recorded events have completed */
 static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) {
+
+#ifdef HPVM_USE_OPENCL
+  
   struct hpvm_async_time_marker_list *next_interval = NULL;
   struct hpvm_async_time_marker_list *last_marker = get_last_async(tset);
   hpvm_Timestamp total_async_time = 0;
@@ -517,6 +602,13 @@ static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) {
     next_interval->timerID = INVALID_TIMERID;
 
   return total_async_time;
+
+#else
+
+  openCLAbort();
+
+#endif
+  
 }
 
 static void accumulate_time(hpvm_Timestamp *accum, hpvm_Timestamp start,
@@ -733,6 +825,9 @@ void hpvm_AddSubTimer(struct hpvm_TimerSet *timers, char *label,
 }
 
 void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) {
+
+#ifdef HPVM_USE_OPENCL
+  
   // cerr << "Switch to timer: " << timer << flush << "\n";
   /* Stop the currently running timer */
   if (timers->current != hpvm_TimerID_NONE) {
@@ -849,10 +944,21 @@ void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) {
     }
   }
   timers->current = timer;
+
+#else
+
+  openCLAbort();
+
+#endif
+
+
 }
 
 void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label,
                            enum hpvm_TimerID category) {
+
+#ifdef HPVM_USE_OPENCL
+  
   struct hpvm_SubTimerList *subtimerlist =
       timers->sub_timer_list[timers->current];
   struct hpvm_SubTimer *curr =
@@ -1001,6 +1107,13 @@ void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label,
   }
 
   timers->current = category;
+
+#else
+
+  openCLAbort();
+
+#endif
+  
 }
 
 void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) {
@@ -1069,6 +1182,9 @@ void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) {
 }
 
 void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) {
+
+#ifdef HPVM_USE_OPENCL
+  
   /* clean up all of the async event markers */
   struct hpvm_async_time_marker_list *event = timers->async_markers;
   while (event != NULL) {
@@ -1106,6 +1222,13 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) {
       free(timers->sub_timer_list[i]);
     }
   }
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 /**************************** Pipeline API ************************************/
@@ -1304,10 +1427,13 @@ void llvm_hpvm_cpu_wait(void *graphID) {
   DEBUG(cout << "\t... pthread Done!\n");
 }
 
+
+#ifdef HPVM_USE_OPENCL
+
 // Returns the platform name.
 std::string getPlatformName(cl_platform_id pid) {
+ 
   cl_int status;
-
   size_t sz;
   status = clGetPlatformInfo(pid, CL_PLATFORM_NAME, 0, NULL, &sz);
   checkErr(status, CL_SUCCESS, "Query for platform name size failed");
@@ -1318,12 +1444,18 @@ std::string getPlatformName(cl_platform_id pid) {
 
   const auto &tmp = std::string(name, name + sz);
   delete[] name;
-  return tmp;
+  return tmp;  
 }
 
+#endif
+
+
+#ifdef HPVM_USE_OPENCL
+
 // Searches all platforms for the first platform whose name
 // contains the search string (case-insensitive).
 cl_platform_id findPlatform(const char *platform_name_search) {
+  
   cl_int status;
 
   std::string search = platform_name_search;
@@ -1360,7 +1492,13 @@ cl_platform_id findPlatform(const char *platform_name_search) {
   assert(false && "No matching platform found!");
 }
 
+#endif
+
+
 void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(std::string Target = T == hpvm::GPU_TARGET ? "GPU" : "SPIR");
   DEBUG(cout << "Initializing Context for " << Target << " device\n");
@@ -1450,9 +1588,19 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
 
   pthread_mutex_unlock(&ocl_mtx);
   return globalOCLContext;
+
+#else
+
+  openCLAbort();
+
+#endif
+  
 }
 
 void llvm_hpvm_ocl_clearContext(void *graphID) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Clear Context\n");
   DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
@@ -1464,9 +1612,19 @@ void llvm_hpvm_ocl_clearContext(void *graphID) {
   cout << "Printing HPVM Timer: KernelTimer\n";
   hpvm_PrintTimerSet(&kernel_timer);
   pthread_mutex_unlock(&ocl_mtx);
+
+#else
+
+  openCLAbort();
+
+#endif
+  
 }
 
 void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Set Shared Memory Input:");
   DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size
@@ -1477,10 +1635,20 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) {
   cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL);
   checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument");
   pthread_mutex_unlock(&ocl_mtx);
+
+#else
+
+  openCLAbort();
+
+#endif
+  
 }
 
 void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index,
                                    size_t size) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Set Scalar Input:");
   DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size
@@ -1491,10 +1659,20 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index,
   cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input);
   checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument");
   pthread_mutex_unlock(&ocl_mtx);
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index,
                                  size_t size, bool isInput, bool isOutput) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Set Pointer Input:");
   DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input
@@ -1518,9 +1696,19 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index,
   DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n");
   pthread_mutex_unlock(&ocl_mtx);
   return d_input;
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Set device memory for Output Struct:");
   DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size
@@ -1536,12 +1724,22 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) {
   DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n");
   pthread_mutex_unlock(&ocl_mtx);
   return d_output;
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 void llvm_hpvm_ocl_free(void *ptr) {}
 
 void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output,
                               size_t size) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Get Output:\n");
   DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output
@@ -1555,11 +1753,21 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output,
   checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output");
   pthread_mutex_unlock(&ocl_mtx);
   return h_output;
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim,
                                 const size_t *localWorkSize,
                                 const size_t *globalWorkSize) {
+
+#ifdef HPVM_USE_OPENCL 
+ 
   pthread_mutex_lock(&ocl_mtx);
 
   size_t GlobalWG[3];
@@ -1610,6 +1818,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim,
 
   pthread_mutex_unlock(&ocl_mtx);
   return NULL;
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -1656,6 +1871,9 @@ static char *LoadProgSource(const char *Filename, size_t *szFinalLength) {
 }
 
 void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Launch OCL Kernel\n");
   // Initialize OpenCL
@@ -1717,14 +1935,31 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
 
   pthread_mutex_unlock(&ocl_mtx);
   return Context;
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 void llvm_hpvm_ocl_wait(void *graphID) {
+
+#ifdef HPVM_USE_OPENCL
+  
   pthread_mutex_lock(&ocl_mtx);
   DEBUG(cout << "Wait\n");
   DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
   clFinish(Context->clCommandQue);
   pthread_mutex_unlock(&ocl_mtx);
+
+#else
+
+  openCLAbort();
+
+#endif
+
 }
 
 void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) {
@@ -1749,3 +1984,5 @@ void *llvm_hpvm_initializeTimerSet() {
   pthread_mutex_unlock(&ocl_mtx);
   return TS;
 }
+
+
diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
index f6fed2ac296f93bc060fe09b3b889b42ee8c4a1a..cbabc8bbe0111a0ec6c99520176a8b37a530a4fb 100644
--- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
@@ -53,7 +53,7 @@ endif()
 
 # -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS)
 # tensor_runtime links to, because CMake doesn't help us do this.
-# This is needed by both approxhpvm.py and the RPATH setting step (below).
+# This is needed by both hpvm-clang and the RPATH setting step (below).
 # First, take a guess at the paths to the libraries that are used here.
 # (CMake, why do you make this so difficult?)
 foreach(interface_lib ${LINK_LIBS})
@@ -120,7 +120,7 @@ function(add_tensor_runtime target_name)
     target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}")
   endforeach()
   # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target
-  # so that approxhpvm.py can read them. (we'll create our own properties.)
+  # so that hpvm-clang can read them. (we'll create our own properties.)
   set_target_properties(
     ${target_name} PROPERTIES
     TRT_LINK_DIRS "${TRT_LINK_DIRS}"
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
index 922b6795ade457ba4c961af4d2e70ce150e22e92..172448a60d4f65fc4aafc09c9a76d9cb492ff7b0 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
@@ -172,7 +172,7 @@ class ModelExporter:
         from subprocess import run
 
         args = [
-            "approxhpvm.py",
+            "hpvm-clang",
             str(self.codefile),
             str(output_binary),
             *self.compile_args,
diff --git a/hpvm/scripts/hpvm_installer.py b/hpvm/scripts/hpvm_installer.py
index 255b2864ea3f5df6a0cae9604502c715e5fdc6f1..cce8b3f07928d9ab096df3166bd02f4e6f8e1f5d 100755
--- a/hpvm/scripts/hpvm_installer.py
+++ b/hpvm/scripts/hpvm_installer.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-from argparse import ArgumentParser, Namespace
+from argparse import ArgumentParser
 from os import chdir, environ, makedirs
 from pathlib import Path
 from subprocess import CalledProcessError, check_call
-from typing import List
+from typing import List, Union
 
 VERSION = "9.0.0"
 URL = "http://releases.llvm.org"
@@ -30,7 +30,7 @@ LINKS = [
     "test",
     "tools",
 ]
-MAKE_TARGETS = ["approxhpvm.py"]
+MAKE_TARGETS = ["hpvm-clang"]
 MAKE_TEST_TARGETS = ["check-hpvm-dnn", "check-hpvm-pass"]
 
 # Relative to project root which is __file__.parent.parent
@@ -79,6 +79,11 @@ def parse_args(args=None):
         "Supported targets: AArch64, AMDGPU, ARM, BPF, Hexagon, Mips, MSP430, NVPTX, PowerPC, "
         "Sparc, SystemZ, X86, XCore.",
     )
+    parser.add_argument(
+        "--ninja",
+        action="store_true",
+        help="Use Ninja to build HPVM. Uses 'make' otherwise.",
+    )
     parser.add_argument(
         "-r", "--run-tests", action="store_true", help="Build and run test cases"
     )
@@ -166,6 +171,8 @@ def print_args(args):
     print("Running with the following options:")
     print(f"  Automated build: {not args.no_build}")
     print(f"  Build directory: {args.build_dir}")
+    build_sys = "ninja" if args.ninja else "make"
+    print(f"  Build system: {build_sys}")
     print(f"  Threads: {args.parallel}")
     print(f"  Targets: {args.targets}")
     print(f"  Download DNN weights: {not args.no_params}")
@@ -256,7 +263,7 @@ def build(
     build_dir: Path,
     nthreads: int,
     targets: str,
-    build_test_targets: bool,
+    use_ninja: bool,
     cmake_additional_args: List[str],
 ):
     print("Now building...")
@@ -272,13 +279,17 @@ def build(
         f"-DLLVM_TARGETS_TO_BUILD={targets}",
         *cmake_additional_args,
     ]
+    if use_ninja:
+        cmake_args.append("-GNinja")
     print(f"CMake: {' '.join(cmake_args)}")
     print(f"=============================")
     check_call(cmake_args)
-    make_args = ["make", f"-j{nthreads}", *MAKE_TARGETS]
-    print(f"Make: {' '.join(make_args)}")
+
+    build_sys = "ninja" if use_ninja else "make"
+    build_args = [build_sys, f"-j{nthreads}", *MAKE_TARGETS]
+    print(f"Build system ({build_sys}): {' '.join(build_args)}")
     print(f"=============================")
-    check_call(make_args)
+    check_call(build_args)
     chdir(ROOT_DIR)
 
 
@@ -292,12 +303,13 @@ def install_py_packages():
         check_call([sys.executable, "-m", "pip", "install", str(package_home)])
 
 
-def run_tests(build_dir: Path, nthreads: int):
+def run_tests(build_dir: Path, use_ninja: bool, nthreads: int):
     chdir(build_dir)
-    make_args = ["make", f"-j{nthreads}", *MAKE_TEST_TARGETS]
-    print(f"Tests: {' '.join(make_args)}")
+    build_sys = "ninja" if use_ninja else "make"
+    build_args = [build_sys, f"-j{nthreads}", *MAKE_TARGETS]
+    print(f"Tests: {' '.join(build_args)}")
     print(f"=============================")
-    check_call(make_args)
+    check_call(build_args)
     chdir(ROOT_DIR)
 
 
@@ -311,8 +323,8 @@ def input_with_check(prompt: str, parse, prompt_when_invalid: str):
     return value
 
 
-def download(link: str, output: Path):
-    check_call(["curl", "-L", link, "-o", output])
+def download(link: str, output: Union[Path, str]):
+    check_call(["curl", "-L", link, "-o", str(output)])
 
 
 def main():
@@ -333,21 +345,15 @@ HPVM not installed.
 To complete installation, follow these instructions:
 - Create and navigate to a folder "./build" 
 - Run "cmake ../llvm [options]". Find potential options in README.md.
-- Run "make -j<number of threads> approxhpvm.py" and then "make install"
+- Run "make -j<number of threads> hpvm-clang" and then "make install"
 For more details refer to README.md.
 """
         )
         return
     else:
-        build(
-            args.build_dir,
-            args.parallel,
-            args.targets,
-            args.run_tests,
-            args.cmake_args,
-        )
+        build(args.build_dir, args.parallel, args.targets, args.ninja, args.cmake_args)
     if args.run_tests:
-        run_tests(args.build_dir, args.parallel)
+        run_tests(args.build_dir, args.ninja, args.parallel)
     else:
         print("Skipping tests.")
 
diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt
index cb548b84dc9944e54c7dfdd0a0d74cf0aed9aafd..3c4f26472317f511edaab98c5e4a4f8ed7ba2dfb 100644
--- a/hpvm/test/CMakeLists.txt
+++ b/hpvm/test/CMakeLists.txt
@@ -1,5 +1,12 @@
 include(../cmake/TestFile.cmake)  # Generation of `.test` files in CMake
+
+# clang and clang++ are used to compile benchmarks
+# in `benchmarks` and `dnn_benchmarks/hpvm-c`.
+set(CLANG_C ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang)
+set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++)
+
 add_subdirectory(hpvm_pass)  # Passes test suite
+add_subdirectory(benchmarks)
 add_subdirectory(dnn_benchmarks/hpvm-c)  # HPVM-C DNN accuracy test suite
 add_subdirectory(dnn_benchmarks/tensor-rt-src)  # tensor_runtime DNN (build only, no tests)
 add_subdirectory(dnn_benchmarks/profiling)  # hpvm-profiler test suite
diff --git a/hpvm/test/benchmarks/CMakeLists.txt b/hpvm/test/benchmarks/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d4f0691fc0e70cd9b532947688da264b2ca4e7b
--- /dev/null
+++ b/hpvm/test/benchmarks/CMakeLists.txt
@@ -0,0 +1,38 @@
+macro(hpvm_compile_util_sources compiler_flags util_srcs language_mode)
+  # "Returns" ${util_bitcodes}, a list of paths to generated bitcode (.ll) files
+  foreach(src ${util_srcs})
+    set(src_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
+    get_filename_component(src_name ${src_path} NAME)
+    set(output_path ${CMAKE_CURRENT_BINARY_DIR}/${src_name}.ll)
+    add_custom_command(
+        OUTPUT ${output_path}
+        COMMAND ${CLANG_C} -x${language_mode}
+          ${compiler_flags} ${src_path} -emit-llvm -S -o ${output_path}
+        DEPENDS ${src_path}
+    )
+    list(APPEND util_bitcodes ${output_path})
+  endforeach()
+endmacro()
+
+function(add_hpvm_benchmark
+  target_name output_bin_name all_flags language_mode
+  main_src util_bitcodes
+)
+  set(output_bin_path ${CMAKE_CURRENT_BINARY_DIR}/${output_bin_name})
+  set(main_src_path ${CMAKE_CURRENT_SOURCE_DIR}/${main_src})
+  if(util_bitcodes)
+    set(bitcodes_arg -b ${util_bitcodes})
+  else()
+    set(bitcodes_arg "")
+  endif()
+  add_custom_command(
+    OUTPUT ${output_bin_path}
+    COMMAND hpvm-clang ${all_flags} -x${language_mode}
+      ${bitcodes_arg} -- ${main_src_path} ${output_bin_path}
+    DEPENDS ${main_src_path} ${util_bitcodes} hpvm-clang
+  )
+  add_custom_target(${target_name} DEPENDS ${output_bin_path})
+endfunction(add_hpvm_benchmark)
+
+add_subdirectory(hpvm-cava)
+add_subdirectory(pipeline)
diff --git a/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d89a01a05669a0c49fecf1ccbd25029fce14143b
--- /dev/null
+++ b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(other_srcs src/load_cam_model.c src/cam_pipe_utility.c src/utility.c)
+set(
+  compiler_flags
+  # Insanity warning: only -O1 works for this benchmark.
+  -O1 -I ${CMAKE_CURRENT_SOURCE_DIR}/src
+  -DDMA_MODE -DDMA_INTERFACE_V3
+)
+
+# Sets ${util_bitcodes}
+hpvm_compile_util_sources("${compiler_flags}" "${other_srcs}" "c")
+
+set(all_flags_cpu ${compiler_flags} "-DDEVICE=CPU_TARGET" "-lpthread")
+add_hpvm_benchmark(
+  "hpvm_cava_cpu" "hpvm-cava-cpu" "${all_flags_cpu}" "c"
+  src/main.c "${util_bitcodes}"
+)
+
+if(HPVM_USE_OPENCL)
+  set(all_flags_gpu ${compiler_flags} "-DDEVICE=GPU_TARGET" "--opencl" "-lpthread")
+  add_hpvm_benchmark(
+    "hpvm_cava_gpu" "hpvm-cava-gpu" "${all_flags_gpu}" "c"
+    src/main.c "${util_bitcodes}"
+  )
+endif()
diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile
deleted file mode 100644
index 58dfa72aacb172252ea1c13ed3331322fb600861..0000000000000000000000000000000000000000
--- a/hpvm/test/benchmarks/hpvm-cava/Makefile
+++ /dev/null
@@ -1,131 +0,0 @@
-# This Makefile compiles the HPVM-CAVA pilot project. 
-# It builds HPVM-related dependencies, then the native camera pipeline ISP code.
-#
-# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config,
-# which can be copied from Makefile.config.example for a start.
-
-CONFIG_FILE := ../include/Makefile.config
-
-ifeq ($(wildcard $(CONFIG_FILE)),)
-    $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example)
-endif
-include $(CONFIG_FILE)
-
-# Compiler Flags
-
-LFLAGS += -lm -lrt
-
-ifeq ($(TARGET),)
-    TARGET = gpu
-endif
-
-# Build dirs
-SRC_DIR = src/
-CAM_PIPE_SRC_DIR = $(SRC_DIR)
-BUILD_DIR = build/$(TARGET)
-CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-
-
-INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR)
-ifneq ($(CONFUSE_ROOT),)
-  INCLUDES += -I$(CONFUSE_ROOT)/include
-  LFLAGS += -L$(CONFUSE_ROOT)/lib
-endif
-
-EXE = cava-hpvm-$(TARGET)
-
-LFLAGS += -pthread
-
-## BEGIN HPVM MAKEFILE
-LANGUAGE=hpvm
-SRCDIR_OBJS=load_cam_model.ll cam_pipe_utility.ll utility.ll
-OBJS_SRC=src/load_cam_model.c src/cam_pipe_utility.c src/utility.c
-HPVM_OBJS=main.hpvm.ll
-APP = $(EXE)
-APP_CUDALDFLAGS=-lm -lstdc++
-APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3
-APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include
-APP_LDFLAGS=$(LFLAGS)
-
-CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS)
-LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS)
-
-HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt
-
-HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc
-
-
-TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
-
-ifeq ($(TARGET),seq)
-  DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu
-else
-  DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
-endif
-  TESTGEN_OPTFLAGS += -hpvm-timers-gen
-
-CFLAGS += -DDEVICE=$(DEVICE)
-CXXFLAGS += -DDEVICE=$(DEVICE)
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1))
-
-.PRECIOUS: $(BUILD_DIR)/%.ll
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS))
-KERNEL = $(TEST_OBJS).kernels.ll
-
-ifeq ($(TARGET),gpu)
-  KERNEL_OCL = $(TEST_OBJS).kernels.cl
-endif
-
-HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll
-HOST = $(BUILD_DIR)/$(APP).host.ll
-
-ifeq ($(OPENCL_PATH),)
-FAILSAFE=no_opencl
-else 
-FAILSAFE=
-endif
-
-# Targets
-
-default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE)
-
-clean :
-	if [ -f $(EXE) ]; then rm $(EXE); fi
-	if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi
-	if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi
-
-$(KERNEL_OCL) : $(KERNEL)
-	$(OCLBE) $< -o $@
-
-$(EXE) : $(HOST_LINKED)
-	$(CXX) -O3 $(LDFLAGS) $< -o $@
-
-$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB)
-	$(LLVM_LINK) $^ -S -o $@
-
-$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS)
-	$(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST)
-
-$(BUILD_DIR):
-	mkdir -p $(BUILD_DIR)
-
-$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c
-	$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c
-	$(CC) $(CFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll
-	$(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@
-
-## END HPVM MAKEFILE
diff --git a/hpvm/test/benchmarks/hpvm-cava/src/main.c b/hpvm/test/benchmarks/hpvm-cava/src/main.c
index 09430239d7c06a0b2fab2a59c7e6310babe617c6..0d8fb37d0103b55234e59e8d2b66742faed207b6 100644
--- a/hpvm/test/benchmarks/hpvm-cava/src/main.c
+++ b/hpvm/test/benchmarks/hpvm-cava/src/main.c
@@ -53,6 +53,7 @@ typedef struct __attribute__((__packed__)) {
 } RootIn;
 
 typedef enum _argnum {
+  CAM_MODEL,
   RAW_IMAGE_BIN,
   OUTPUT_IMAGE_BIN,
   NUM_REQUIRED_ARGS,
@@ -67,7 +68,7 @@ typedef struct _arguments {
 } arguments;
 
 static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n";
-static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary";
+static char args_doc[] = "path/to/cam-model path/to/raw-image-binary path/to/output-image-binary";
 static struct argp_option options[] = {
     {"num-inputs", 'n', "N", 0, "Number of input images"},
     {0},
@@ -724,14 +725,14 @@ int main(int argc, char *argv[]) {
       *transform_out, *gamut_out;
   float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist;
 
-  TsTw = get_TsTw("cam_models/NikonD7000/", wb_index);
+  TsTw = get_TsTw(args.args[CAM_MODEL], wb_index);
   float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE);
   free(TsTw);
   TsTw = trans;
-  ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts);
-  weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts);
-  coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts);
-  tone_map = get_tone_map("cam_models/NikonD7000/");
+  ctrl_pts = get_ctrl_pts(args.args[CAM_MODEL], num_ctrl_pts);
+  weights = get_weights(args.args[CAM_MODEL], num_ctrl_pts);
+  coefs = get_coefs(args.args[CAM_MODEL], num_ctrl_pts);
+  tone_map = get_tone_map(args.args[CAM_MODEL]);
 
   input_scaled = (float *)malloc_aligned(bytes_fimage);
   result_scaled = (float *)malloc_aligned(bytes_fimage);
diff --git a/hpvm/test/benchmarks/pipeline/CMakeLists.txt b/hpvm/test/benchmarks/pipeline/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db2b8eeab1647f4ac72c05127877ecec1f949c90
--- /dev/null
+++ b/hpvm/test/benchmarks/pipeline/CMakeLists.txt
@@ -0,0 +1,15 @@
+find_package(OpenCV 2)
+if(${OpenCV_FOUND})
+  set(
+    all_flags
+    -O3 -I${OpenCV_INCLUDE_DIRS}
+    -ffast-math -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize
+    -lpthread
+  )
+  add_hpvm_benchmark("pipeline_cpu" "pipeline-cpu" "${all_flags}" "c++" src/main.cc "")
+  if(HPVM_USE_OPENCL)
+    add_hpvm_benchmark("pipeline_gpu" "pipeline-gpu" "${all_flags};--opencl" "c++" src/main.cc "")
+  endif()
+else()
+  message(WARNING "opencv-2 not found; not compiling HPVM benchmark 'pipeline'.")
+endif()
diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile
deleted file mode 100644
index 8a55393f241f30d840cbf85c31488e652c2023a0..0000000000000000000000000000000000000000
--- a/hpvm/test/benchmarks/pipeline/Makefile
+++ /dev/null
@@ -1,118 +0,0 @@
-# This Makefile compiles the HPVM-pipeline test.
-# It builds HPVM-related dependencies, then the native camera pipeline ISP code.
-#
-# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config,
-# which can be copied from Makefile.config.example for a start.
-
-CONFIG_FILE := ../include/Makefile.config
-
-ifeq ($(wildcard $(CONFIG_FILE)),)
-    $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example)
-endif
-include $(CONFIG_FILE)
-
-# Compiler Flags
-
-LFLAGS += -lm -lrt
-
-ifeq ($(TARGET),)
-    TARGET = gpu
-endif
-
-# Build dirs
-SRC_DIR = src/
-BUILD_DIR = build/$(TARGET)
-CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-
-EXE = pipeline-$(TARGET)
-
-INCLUDES += -I$(SRC_DIR)
-
-## BEGIN HPVM MAKEFILE
-SRCDIR_OBJS=
-HPVM_OBJS=main.hpvm.ll
-APP = $(EXE)
-APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize
-APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize
-APP_LDFLAGS=`pkg-config opencv --libs`
-
-CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS)
-CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS)
-LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS)
-
-HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt
-HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc
-
-TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
-
-ifeq ($(TARGET),seq)
-  DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu
-else
-  DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
-endif
-  TESTGEN_OPTFLAGS += -hpvm-timers-gen
-
-CFLAGS += -DDEVICE=$(DEVICE)
-CXXFLAGS += -DDEVICE=$(DEVICE)
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1))
-
-.PRECIOUS: $(BUILD_DIR)/%.ll
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS))
-KERNEL = $(TEST_OBJS).kernels.ll
-
-ifeq ($(TARGET),seq)
-else
-  KERNEL_OCL = $(TEST_OBJS).kernels.cl
-endif
-
-HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll
-HOST = $(BUILD_DIR)/$(APP).host.ll
-
-ifeq ($(OPENCL_PATH),)
-FAILSAFE=no_opencl
-else 
-FAILSAFE=
-endif
-
-# Targets
-default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE)
-
-clean :
-	if [ -f $(EXE) ]; then rm $(EXE); fi
-	if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi
-	if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi
-
-$(KERNEL_OCL) : $(KERNEL)
-	$(OCLBE) $< -o $@
-
-$(EXE) : $(HOST_LINKED)
-	$(CXX) -O3 $(LDFLAGS) $< -o $@
-
-$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB)
-	$(LLVM_LINK) $^ -S -o $@
-
-$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS)
-	$(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST)
-
-$(BUILD_DIR):
-	mkdir -p $(BUILD_DIR)
-
-$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc
-	$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc
-	$(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $<
-
-$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll
-	$(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@
-
-## END HPVM MAKEFILE
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
index d429bca23cd56cafc70a84fe79a35fed38912bec..9f34317d34157d57468c60cb854828b5c54f1cde 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
@@ -1,6 +1,3 @@
-# First get approxhpvm.py which we then use to compile benchmarks.
-get_filename_component(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py REALPATH)
-
 # Each source file contains a @MODEL_PARAMS_DIR@ waiting to be filled in.
 set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/")
 set(test_compile_targets "")
@@ -12,9 +9,9 @@ function(compile_hpvm_c target_name src_filepath codegen_target)
   # the name of output file and custom target to clash.
   add_custom_command(
     OUTPUT ${output_bin_path}
-    DEPENDS ${generated_file_path} approxhpvm.py
-    COMMAND ${APPROXHPVM_PY}
-      ${generated_file_path} ${output_bin_path}
+    DEPENDS ${generated_file_path} hpvm-clang
+    COMMAND hpvm-clang
+      ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions
       -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN}
   )
   add_custom_target(${target_name} DEPENDS ${output_bin_path})
diff --git a/hpvm/test/dnn_benchmarks/keras/Benchmark.py b/hpvm/test/dnn_benchmarks/keras/Benchmark.py
index c225ff97661aef8f4fceb123b79190cb7c819dd7..f3d8e9e6b2268618dc835e3d27374a8f7d738a86 100644
--- a/hpvm/test/dnn_benchmarks/keras/Benchmark.py
+++ b/hpvm/test/dnn_benchmarks/keras/Benchmark.py
@@ -52,20 +52,23 @@ class Benchmark:
         
         try:
             subprocess.run([
-                "approxhpvm.py", 
+                "hpvm-clang", 
                 "-h"
             ], check=True, stdout=FNULL)
             
         except:
-            print ("\n\n ERROR: Could not find approxhpvm.py (HPVM compile script)!! \n\n")
-            print ("To Compile, Must set PATH to include approxhpvm.py script. Do the following: ")
-            print ("**** export PATH=${PATH_TO_YOUR_HPVM_INSTALLATION}/build/bin/:$PATH *****")
+            print("""
+
+ERROR: Could not find hpvm-clang (HPVM compile script)!!
+
+hpvm-clang is installed to the python environment used when compiling HPVM.
+Please try rerunning 'make -j hpvm-clang'.""")
             sys.exit(1)
 
 
         try:
             subprocess.run([
-                "approxhpvm.py", src_file, target_binary,
+                "hpvm-clang", src_file, target_binary,
                 "-t", "tensor", "--conf-file", approx_conf_file
             ], check=True)
         except:
diff --git a/hpvm/tools/CMakeLists.txt b/hpvm/tools/CMakeLists.txt
index 495348404f3594d8bca46b87acd8be2d08d2c8a4..4758dde6a3f1564739135225f6f8673b5d34843d 100644
--- a/hpvm/tools/CMakeLists.txt
+++ b/hpvm/tools/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_llvm_tool_subdirectory(hpvm-config)
-add_llvm_tool_subdirectory(py-approxhpvm)
\ No newline at end of file
+add_llvm_tool_subdirectory(hpvm-clang)
\ No newline at end of file
diff --git a/hpvm/tools/hpvm-clang/CMakeLists.txt b/hpvm/tools/hpvm-clang/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3564b00d0aa480996c152c1f027fa6fc39cd144c
--- /dev/null
+++ b/hpvm/tools/hpvm-clang/CMakeLists.txt
@@ -0,0 +1,87 @@
+# This file is very tightly coupled with main.py.in.
+# Watch out and keep them in sync.
+
+# CMake fills in some variables in main.py.in and generate it into a python package:
+# `hpvmpy`, which is the main entry point and Python API for HPVM.
+
+# ---[ Define variables for main.py.in
+# main.py.in requires the following variables:
+# LLVM_PROJECT_DIR, LLVM_BUILD_DIR
+# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS
+# DIRECT_LINK_LIBS, HPVM_USE_OPENCL (defined globally)
+# AVAILABLE_PASSES, HPVM_RT_PATH
+
+set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR})
+set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR})
+
+get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES)
+get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS)
+get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS)
+
+set(DIRECT_LINK_LIBS "$<TARGET_FILE:tensor_runtime>")
+if(${HPVM_USE_OPENCL})
+  # We need to link to OpenCL libs when hpvm uses opencl
+  # because OpenCL functions may be injected by the OpenCL pass.
+  list(APPEND DIRECT_LINK_LIBS ${OpenCL_LIBRARY})
+else()
+  message(STATUS "hpvm-clang is not using OpenCL (--opencl flag not available).")
+endif()
+
+# The hpvm-rt runtime
+# This has to be explicitly set as hpvm-rt.bc is created in a custom_target
+# and does not export its file location.
+# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
+set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
+# And this must be manually in sync with `hpvm/lib/Transforms/*`.
+# which is fine for because we don't have many passes for now.
+set(
+    AVAILABLE_PASSES
+    LLVMBuildDFG
+    LLVMClearDFG
+    LLVMDFG2LLVM_CPU
+    LLVMDFG2LLVM_CUDNN
+    LLVMDFG2LLVM_OpenCL
+    LLVMDFG2LLVM_WrapperAPI
+    LLVMFuseHPVMTensorNodes
+    LLVMGenHPVM
+    LLVMInPlaceDFGAnalysis
+    LLVMLocalMem
+)
+
+# ---[ Create package folder structure
+# This sounds crazy but since main.py.in is generated into another file under build/ dir,
+# to make a python package around it, we'll have to generate the whole package structure
+# in build/ as well.
+# Target dir structure:
+# ${CMAKE_CURRENT_BINARY_DIR}
+#   hpvmpy/
+#     __init__.py  <- generated from main.py.in
+#   setup.py       <- copied from setup.py
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hpvmpy)
+file(COPY setup.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+
+# ---[ Generate main.py.in to hpvmpy/__init__.py
+set(init_path ${CMAKE_CURRENT_BINARY_DIR}/hpvmpy/__init__.py)
+# First resolve all `@symbol@` by configuring the file
+configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf)
+# Then resolve all generator expressions we configured into the previous file
+file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf)
+# Delibrately create an extra step of moving file
+# which is carried out at build time (as a target)
+# so we can set these dependencies on it.
+set(DEPS tensor_runtime hpvm-rt-bc clang opt llvm-link ${AVAILABLE_PASSES})
+add_custom_command(
+    OUTPUT ${init_path}
+    COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/main.py ${init_path}
+    DEPENDS ${DEPS} ${CMAKE_CURRENT_BINARY_DIR}/main.py
+)
+
+# ---[ Call python3 -m pip to install this package.
+add_custom_target(
+    hpvmpy
+    COMMAND python3 -m pip install ./
+    DEPENDS ${init_path} setup.py
+)
+# hpvm-clang is the name of the compiler binary (see setup.py)
+# We provide this alias for convenience
+add_custom_target(hpvm-clang DEPENDS hpvmpy)
diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in
new file mode 100644
index 0000000000000000000000000000000000000000..e2bc5cbafa23bd64094a3198ad8466f682f6bbdc
--- /dev/null
+++ b/hpvm/tools/hpvm-clang/main.py.in
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+import argparse
+import os
+from pathlib import Path
+from typing import List, Union, Optional
+
+PathLike = Union[Path, str]
+
+HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm"
+LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin"
+
+# Directories to include
+TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";")
+TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")]
+TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";")
+DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";")
+HPVM_USE_OPENCL = int("@HPVM_USE_OPENCL@")
+
+AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";")
+HPVM_RT_PATH = "@HPVM_RT_PATH@"
+
+
+def compile_hpvm_c(
+    hpvm_src: PathLike,
+    output_file: PathLike,
+    tensor_target: Optional[str],
+    opencl: bool,
+    link_bitcode: List[PathLike] = None,
+    include: List[PathLike] = None,
+    macro: List[str] = None,
+    flags: List[str] = None,
+    optim_level: str = "0",  # -O0
+    is_cpp: bool = True,  # otherwise is C
+    std: str = None,  # language std (-std=c++11)
+    link_dirs: List[PathLike] = None,
+    link_libs: List[str] = None,
+    working_dir: PathLike = None,
+    conf_file: PathLike = None,
+):
+    from subprocess import check_output
+
+    passes = ["LLVMBuildDFG"]
+    pass_flags = ["buildDFG"]
+    if tensor_target == "tensor":
+        if conf_file is None:
+            raise ValueError("conf_file must be defined when tensor_target=='tensor'.")
+        passes += ["LLVMInPlaceDFGAnalysis", "LLVMFuseHPVMTensorNodes", "LLVMDFG2LLVM_WrapperAPI"]
+        pass_flags += [
+            "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi",
+            f"configuration-inputs-filename={conf_file}"
+        ]
+    elif tensor_target == "cudnn":
+        passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"]
+        pass_flags += ["inplace", "dfg2llvm-cudnn"]
+    elif tensor_target is None:
+        passes += ["LLVMLocalMem"]
+        pass_flags += ["localmem"]
+    else:
+        raise ValueError(f"Tensor target {tensor_target} not recognized")
+    if opencl:
+        passes += ["LLVMDFG2LLVM_OpenCL"]
+        pass_flags += ["dfg2llvm-opencl"]
+    passes += ["LLVMDFG2LLVM_CPU", "LLVMClearDFG"]
+    pass_flags += ["dfg2llvm-cpu", "clearDFG"]
+
+    working_dir = Path(working_dir or ".")
+    if not working_dir.is_dir():
+        os.makedirs(working_dir)
+
+    # All commands for compiling the main hpvm_c file
+    name_stem = Path(hpvm_src).stem
+    ll_file = working_dir / f"{name_stem}.ll"
+    hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll"
+    llvm_ll_file = working_dir / f"{name_stem}.llvm.ll"
+    hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc"
+    link_bitcode_ = [Path(bc) for bc in (link_bitcode or [])]
+    commands = [
+        hpvm_c_to_ll(hpvm_src, ll_file, include, macro, flags, optim_level, is_cpp, std),
+        opt_codegen_hpvm(ll_file, hpvm_ll_file),
+        _run_opt(hpvm_ll_file, llvm_ll_file, passes, pass_flags),
+        link_hpvm_rt(link_bitcode_ + [llvm_ll_file], hpvm_rt_linked_file),
+    ]
+    commands.append(
+        link_binary(hpvm_rt_linked_file, output_file, link_dirs, link_libs)
+    )
+    for command in commands:
+        print(" ".join(command))
+        check_output(command)
+
+
+def hpvm_c_to_ll(
+    src_file: PathLike,
+    target_file: PathLike,
+    extra_includes: List[PathLike] = None,
+    macros: List[str] = None,
+    flags: List[str] = None,
+    optim_level: str = "0",  # -O0
+    is_cpp: bool = True,  # otherwise is C
+    std: str = None,  # --std=c++11
+) -> List[str]:
+    includes = (extra_includes or []) + TRT_INCLUDE_DIRS
+    includes_s = [f"-I{path}" for path in includes]
+    macros = [f"-D{macro}" for macro in (macros or [])]
+    flags = [f"-f{flg}" for flg in (flags or [])]
+    if std:
+        flags.append(f"-std={std}")
+    clang = "clang++" if is_cpp else "clang"
+    return [
+        str(LLVM_BUILD_BIN / clang), *includes_s, *flags, *macros,
+        f"-O{optim_level}", "-emit-llvm", "-S",
+        str(src_file), "-o", str(target_file)
+    ]
+
+
+def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]:
+    return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"])
+
+
+def link_hpvm_rt(bitcodes: List[PathLike], target_file: PathLike) -> List[str]:
+    bitcodes_s = [str(bc) for bc in bitcodes]
+    return [str(LLVM_BUILD_BIN / "llvm-link"), *bitcodes_s, HPVM_RT_PATH, "-S", "-o", str(target_file)]
+
+
+def link_binary(
+    src_file: PathLike,
+    target_file: PathLike,
+    extra_link_dirs: List[PathLike] = None,
+    extra_link_libs: List[str] = None
+) -> List[str]:
+    link_dirs, link_libs = _link_args(extra_link_dirs or [], extra_link_libs or [])
+    linker_dir_flags = []
+    for path in link_dirs:
+        linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"])
+    linker_lib_flags = [f"-l{lib}" for lib in link_libs]
+    return [
+        str(LLVM_BUILD_BIN / "clang++"), str(src_file),
+        "-o", str(target_file), *linker_dir_flags, *linker_lib_flags
+    ]
+
+
+def _link_args(extra_link_dirs: List[PathLike], extra_link_libs: List[str]):
+    def drop_suffix(libname: str):
+        import re
+
+        match = re.match(r"lib(.*)\.so", libname)
+        return libname if match is None else match.group(1)
+
+    link_dirs, link_libs = extra_link_dirs.copy(), extra_link_libs.copy()
+    for lib in DIRECT_LINK_LIBS:
+        lib = Path(lib)
+        link_dirs.append(lib.parent)
+        link_libs.append(lib.name)
+    link_dirs += TRT_LINK_DIRS
+    link_libs += TRT_LINK_LIBS
+    link_libnames = [drop_suffix(s) for s in link_libs]
+    return link_dirs, link_libnames
+
+
+def _run_opt(
+    src_file: PathLike,
+    target_file: PathLike,
+    pass_names: List[str],
+    pass_flags: List[str],
+) -> List[str]:
+    unavailable = set(pass_names) - set(AVAILABLE_PASSES)
+    if unavailable:
+        raise ValueError(f"Passes {unavailable} are unavailable for this compilation.")
+    load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]]
+    pass_flags_strs = [f"-{flag}" for flag in pass_flags]
+    return [
+        str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs,
+        "-S", str(src_file), "-o", str(target_file)
+    ]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("hpvm-clang")
+    parser.add_argument(
+        "hpvm_src", type=Path,
+        help="""HPVM-C code to compile.
+HPVM-C code must be single file, but additional bitcode file can be linked together.
+See option -b for that."""
+    )
+    parser.add_argument("output_file", type=Path, help="Path to generate binary to")
+    parser.add_argument(
+        "-x", type=str, metavar="language", default="c++",
+        help="Treat input file as having type <language>",
+    )
+    parser.add_argument(
+        "-b",
+        "--link-bitcode",
+        type=Path,
+        nargs="+",
+        help="Additional bitcode (.ll/.bc) files to link to",
+    )
+    parser.add_argument(
+        "-t",
+        "--tensor-target",
+        type=str,
+        choices=["tensor", "cudnn"],
+        help="Backend to use for tensor operators",
+    )
+    parser.add_argument(
+        "--conf-file", type=Path,
+        help="File to approximation configurations; required for tensor target 'tensor'"
+    )
+    parser.add_argument(
+        "--opencl",
+        action="store_true",
+        help="Use OpenCL support. Requires HPVM built with OpenCL",
+    )
+    parser.add_argument(
+        "-d", "--working-dir", type=Path, help="Directory to generate temp files in"
+    )
+
+    # Relaying arguments for clang++ (source -> bitcode stage)
+    parser.add_argument(
+        "-I", "--include", type=Path, action="append", metavar="dir",
+        help="[clang emit-llvm] Add directory to include search path"
+    )
+    parser.add_argument(
+        "-D", type=str, action="append", metavar="<macro>=<value>",
+        help="[clang emit-llvm] Define macro"
+    )
+    parser.add_argument(
+        "-f", type=str, action="append", metavar="flag",
+        help="[clang emit-llvm] clang++ flags (such as -ffastmath)"
+    )
+    parser.add_argument(
+        "-O", type=str, default="0", metavar="level",
+        help="[clang emit-llvm] Optimization level"
+    )
+    parser.add_argument(
+        "--std", type=str,
+        help="[clang emit-llvm] Language standard to compile for. Double dashes (--std, not -std)."
+    )
+
+    # Relaying arguments for clang++ (linking stage)
+    parser.add_argument(
+        "-L", type=Path, action="append", metavar="dir",
+        help="[clang linker] Add directory to library search path"
+    )
+    parser.add_argument(
+        "-l", type=str, action="append", metavar="name",
+        help="[clang linker] Link library (such as -lpthread)"
+    )
+
+    args = parser.parse_args()
+    if args.tensor_target == "tensor":
+        if args.conf_file is None:
+            parser.error('Tensor target "tensor" requires --conf-file argument')
+    if args.x == "c":
+        args.is_cpp = False
+    elif args.x == "c++":
+        args.is_cpp = True
+    else:
+        parser.error(f"Language mode {args.x} not supported yet -- only c or c++")
+    if not HPVM_USE_OPENCL and args.opencl:
+        parser.error(f"OpenCL is disabled for this build of HPVM.")
+    return args
+
+
+def main():
+    args = vars(parse_args())
+    args["macro"] = args.pop("D")
+    args["flags"] = args.pop("f")
+    args["optim_level"] = args.pop("O")
+    args["link_dirs"] = args.pop("L")
+    args["link_libs"] = args.pop("l")
+    args.pop("x")
+    compile_hpvm_c(**args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hpvm/tools/hpvm-clang/setup.py b/hpvm/tools/hpvm-clang/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..75262a069de32e6f5de4bff89738e0011dd91326
--- /dev/null
+++ b/hpvm/tools/hpvm-clang/setup.py
@@ -0,0 +1,15 @@
+import setuptools
+
+setuptools.setup(
+    name="hpvmpy",
+    version="1.0",
+    author="Yifan Zhao",
+    author_email="yifanz16@illinois.edu",
+    description="HPVM Python API",
+    packages=["hpvmpy"],
+    entry_points={
+        "console_scripts": [
+            "hpvm-clang = hpvmpy:main",
+        ],
+    },
+)
diff --git a/hpvm/tools/py-approxhpvm/CMakeLists.txt b/hpvm/tools/py-approxhpvm/CMakeLists.txt
deleted file mode 100644
index 1e5eeb891d35fc028d1aa85c5e5e679902a4dad7..0000000000000000000000000000000000000000
--- a/hpvm/tools/py-approxhpvm/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# This file is very tightly coupled with main.py.in.
-# Watch out and keep them in sync.
-# main.py.in (to become approxhpvm.py) requires the following variables:
-# LLVM_PROJECT_DIR, LLVM_BUILD_DIR
-# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS
-# DIRECT_LINK_LIBS
-# AVAILABLE_PASSES, HPVM_RT_PATH
-
-set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR})
-set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR})
-
-get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES)
-get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS)
-get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS)
-
-# This is defined globally. We need to manually link to this
-# because OpenCL functions are injected by HPVM Passes.
-set(DIRECT_LINK_LIBS ${OpenCL_LIBRARY} "$<TARGET_FILE:tensor_runtime>")
-
-# The hpvm-rt runtime
-# This has to be explicitly set as hpvm-rt.bc is created in a custom_target
-# and does not export its file location.
-# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
-set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
-set(
-    AVAILABLE_PASSES
-    LLVMBuildDFG
-    LLVMInPlaceDFGAnalysis
-    LLVMDFG2LLVM_CPU
-    LLVMDFG2LLVM_CUDNN
-    LLVMDFG2LLVM_WrapperAPI
-    LLVMFuseHPVMTensorNodes
-    LLVMClearDFG
-    LLVMGenHPVM
-)
-
-# First resolve all `@symbol@` by configuring the file
-configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf)
-# Then resolve all generator expressions we configured into the previous file
-file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf)
-# Delibrately create an extra step of moving file
-# which is carried out at build time (as a target)
-# so we can set these dependencies on it
-set(
-    DEPS
-    tensor_runtime hpvm-rt-bc clang opt llvm-link ${AVAILABLE_PASSES}
-)
-add_custom_command(
-    OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py
-    COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/main.py ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py
-    COMMAND chmod +x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py
-    DEPENDS ${DEPS} ${CMAKE_CURRENT_BINARY_DIR}/main.py
-)
-add_custom_target(approxhpvm.py ALL DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py)
diff --git a/hpvm/tools/py-approxhpvm/main.py.in b/hpvm/tools/py-approxhpvm/main.py.in
deleted file mode 100644
index 7b211911643c64d8bf2c34ef8a43e3ac98cdd88a..0000000000000000000000000000000000000000
--- a/hpvm/tools/py-approxhpvm/main.py.in
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import os
-from pathlib import Path
-from typing import List, Union, Optional
-
-PathLike = Union[Path, str]
-
-HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm"
-LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin"
-
-# Directories to include
-TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";")
-TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")]
-TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";")
-DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";")
-
-AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";")
-HPVM_RT_PATH = "@HPVM_RT_PATH@"
-
-COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"]
-
-
-def compile_hpvm_c(
-    input_file: PathLike,
-    output_file: PathLike,
-    codegen_target: str = "tensor",
-    include: List[PathLike] = None,
-    working_dir: PathLike = None,
-    conf_file: PathLike = None,
-):
-    from subprocess import check_output
-
-    codegen_functions = {
-        "tensor": lambda i, o: opt_codegen_tensor(i, o, conf_file),
-        "cudnn": opt_codegen_cudnn
-    }
-    codegen_f = codegen_functions[codegen_target]
-    working_dir = Path(working_dir or ".")
-    if not working_dir.is_dir():
-        os.makedirs(working_dir)
-    name_stem = Path(input_file).stem
-
-    ll_file = working_dir / f"{name_stem}.ll"
-    hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll"
-    llvm_ll_file = working_dir / f"{name_stem}.llvm.ll"
-    hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc"
-    commands = [
-        hpvm_c_to_ll(input_file, ll_file, extra_includes=include),
-        opt_codegen_hpvm(ll_file, hpvm_ll_file),
-        codegen_f(hpvm_ll_file, llvm_ll_file),
-        link_hpvm_rt(llvm_ll_file, hpvm_rt_linked_file),
-        link_binary(hpvm_rt_linked_file, output_file),
-    ]
-    for command in commands:
-        print(" ".join(command))
-        check_output(command)
-
-
-def hpvm_c_to_ll(
-    src_file: PathLike,
-    target_file: PathLike,
-    extra_includes: Optional[List[PathLike]] = None,
-    flags: List[str] = None,
-) -> List[str]:
-    extra_includes = extra_includes or []
-    includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes]
-    flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS]
-    return [
-        str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S",
-        str(src_file), "-o", str(target_file)
-    ]
-
-
-def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]:
-    return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"])
-
-
-def opt_codegen_cudnn(src_file: PathLike, target_file: PathLike) -> List[str]:
-    passes = [
-        "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis",
-        "LLVMDFG2LLVM_CUDNN", "LLVMDFG2LLVM_CPU",
-        "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM"
-    ]
-    flags = [
-        "buildDFG", "inplace", "hpvm-fuse",
-        "dfg2llvm-cudnn", "dfg2llvm-cpu", "clearDFG"
-    ]
-    return _run_opt(src_file, target_file, passes, flags)
-
-
-def opt_codegen_tensor(
-    src_file: PathLike, target_file: PathLike, conf_file: PathLike
-):
-    passes = [
-        "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis",
-        "LLVMDFG2LLVM_WrapperAPI", "LLVMDFG2LLVM_CPU",
-        "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM"
-    ]
-    flags = [
-        "buildDFG", "inplace", "hpvm-fuse",
-        "dfg2llvm-wrapperapi",
-        f"configuration-inputs-filename={conf_file}",
-        "dfg2llvm-cpu", "clearDFG",
-    ]
-    return _run_opt(src_file, target_file, passes, flags)
-
-
-def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]:
-    return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)]
-
-
-def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]:
-    def drop_suffix(libname: str):
-        import re
-
-        match = re.match(r"lib(.*)\.so", libname)
-        return libname if match is None else match.group(1)
-
-    link_dirs, link_libnames = [], []
-    for lib in DIRECT_LINK_LIBS:
-        lib = Path(lib)
-        link_dirs.append(lib.parent)
-        link_libnames.append(drop_suffix(lib.name))
-    link_dirs += TRT_LINK_DIRS
-    link_libnames += TRT_LINK_LIBS
-
-    linker_dir_flags = []
-    for path in link_dirs:
-        linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"])
-    linker_lib_flags = [f"-l{drop_suffix(lib)}" for lib in link_libnames]
-    return [
-        str(LLVM_BUILD_BIN / "clang++"), str(src_file),
-        "-o", str(target_file), *linker_dir_flags, *linker_lib_flags
-    ]
-
-
-def _run_opt(
-    src_file: PathLike,
-    target_file: PathLike,
-    pass_names: List[str],
-    pass_flags: List[str],
-) -> List[str]:
-    unavailable = set(pass_names) - set(AVAILABLE_PASSES)
-    if unavailable:
-        raise ValueError(f"Passes {unavailable} are unavailable from CMake")
-    load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]]
-    pass_flags_strs = [f"-{flag}" for flag in pass_flags]
-    return [
-        str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs,
-        "-S", str(src_file), "-o", str(target_file)
-    ]
-
-
-def parse_args():
-    parser = argparse.ArgumentParser("approxhpvm")
-    parser.add_argument("input_file", type=Path, help="HPVM-C code to compile")
-    parser.add_argument("output_file", type=Path, help="Path to generate binary to")
-    parser.add_argument(
-        "-t",
-        "--codegen-target",
-        type=str,
-        required=True,
-        choices=["tensor", "cudnn"],
-        help="Backend to use",
-    )
-    parser.add_argument(
-        "-d", "--working-dir", type=Path, help="Directory to generate temp files in"
-    )
-    parser.add_argument(
-        "--conf-file", type=Path,
-        help="File to approximation configurations; required for 'tensor' target"
-    )
-    parser.add_argument(
-        "-I", "--include", type=Path, action="append",
-        help="Additional include directories to use"
-    )
-
-    args = parser.parse_args()
-    if args.codegen_target == "tensor":
-        if args.conf_file is None:
-            parser.error('Codegen target "tensor" requires --conf-file argument')
-    return args
-
-
-if __name__ == "__main__":
-    compile_hpvm_c(**vars(parse_args()))