diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bd5edbd1a467666f67c66be132b3a9d9bbd2d540..8bcc4738d02d7f07a497131d74f9a0ff6f119048 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,15 +14,20 @@ cache:
     - hpvm/llvm/
   when: always
 
-build:
+build-and-test:
   stage: build
   tags:
     - hpvm
   script:
-    - pwd
     - source activate hpvm && cd hpvm
+    - mv /root/cfe-9.0.0.src.tar.xz /root/llvm-9.0.0.src.tar.xz ./
+    - mv /root/model_params ./test/dnn_benchmarks
     - ./install.sh -j32 -t "X86" DCMAKE_BUILD_TYPE=Release
-    - cd ..
+    - cd build
+    - make -j32 check-hpvm-pass
+    - make -j32 check-hpvm-dnn
+    - make -j32 check-hpvm-profiler
+    - make -j32 check-hpvm-torch2hpvm
   only:
     - hpvm-release-exp
     - merge_requests
diff --git a/hpvm/docs/components/hpvm-profiler.rst b/hpvm/docs/components/hpvm-profiler.rst
index 8a0e6603d3b7111d2735a86b5db26d7aa834ebb6..820456799ddf0570be6b92564e35077e31fcd3da 100644
--- a/hpvm/docs/components/hpvm-profiler.rst
+++ b/hpvm/docs/components/hpvm-profiler.rst
@@ -1,6 +1,6 @@
 HPVM Profiler API
 ======================
 
-.. autofunction:: hpvm_profiler.profile_configs
+.. autofunction:: hpvm_profiler.profile_config_file
 
 .. autofunction:: hpvm_profiler.plot_hpvm_configs
diff --git a/hpvm/docs/getting-started.rst b/hpvm/docs/getting-started.rst
index 82a582283e7f7071a77ef55c1e2d9eca5fa9668d..6976fa012112eace8bc842658d5ea28b31ff04b6 100644
--- a/hpvm/docs/getting-started.rst
+++ b/hpvm/docs/getting-started.rst
@@ -207,14 +207,14 @@ we obtained in the tuning step.
 
 .. code-block:: python
 
-   from hpvm_profiler import profile_configs, plot_hpvm_configs
+   from hpvm_profiler import profile_config_file, plot_hpvm_configs
 
    # Set `target_binary` to the path of the plain binary.
    target_binary = "./alexnet2_cifar10/build/alexnet2_cifar10"
    # Set `config_file` to the config file produced in tuning, such as "hpvm_confs.txt".
    config_file = "hpvm_confs.txt"
    out_config_file = "hpvm_confs_profiled.txt"
-   profile_configs(target_binary, config_file, out_config_file)
+   profile_config_file(target_binary, config_file, out_config_file)
    plot_hpvm_configs(out_config_file, "configs_profiled.png")
 
 ``hpvm_confs_profiled.txt`` contains the profiled configurations in HPVM format,
diff --git a/hpvm/include/SupportHPVM/DFG2LLVM.h b/hpvm/include/SupportHPVM/DFG2LLVM.h
index fb1e35033eda0445f10423beb69aab5f07c093f0..c1ade92e9a7201a5c3c80e9302b9bac57c750537 100644
--- a/hpvm/include/SupportHPVM/DFG2LLVM.h
+++ b/hpvm/include/SupportHPVM/DFG2LLVM.h
@@ -291,6 +291,7 @@ Function *CodeGenTraversal::addArgument(Function *F, Type *Ty,
 // Return new function with additional index and limit arguments.
 // The original function is removed from the module and erased.
 Function *CodeGenTraversal::addIdxDimArgs(Function *F) {
+  DEBUG(errs() << "Adding dimension and limit arguments to Function: " << F->getName());
   DEBUG(errs() << "Function Type: " << *F->getFunctionType() << "\n");
   // Add Index and Dim arguments
   std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"};
diff --git a/hpvm/include/SupportHPVM/HPVMUtils.h b/hpvm/include/SupportHPVM/HPVMUtils.h
index 781306956dad0eacc85eadaaf60be4c8ce0e7b21..2a5116ddb122b16b28ee45022d7c57409cdce566 100644
--- a/hpvm/include/SupportHPVM/HPVMUtils.h
+++ b/hpvm/include/SupportHPVM/HPVMUtils.h
@@ -175,14 +175,15 @@ void replaceNodeFunctionInIR(Module &M, Function *F, Function *G) {
           continue;
 
         // Otherwise, replace F with G
-        DEBUG(errs() << *G->getType() << "\n");
-        DEBUG(errs() << *CI->getArgOperand(1)->getType() << "\n");
+        DEBUG(errs() << "Fixing use: " << *CI << "\n");
+        DEBUG(errs() << "in function: " << Func.getName() << "\n");
         CI->setArgOperand(1, G);
+        DEBUG(errs() << "Fixed use: " << *CI << "\n");
       }
     }
 
     for (auto I : toBeErased) {
-      DEBUG(errs() << "\tErasing " << *I << "\n");
+      DEBUG(errs() << "\tErasing Instruction: " << *I << "\n");
       I->eraseFromParent();
     }
   }
@@ -448,7 +449,7 @@ hpvm::Target getUpdatedTag(hpvm::Target Tag, hpvm::Target T) {
 
 // This functions add the hint as metadata in hpvm code
 void addHint(Function *F, hpvm::Target T) {
-  errs() << "ADD HINT *************************\n";
+  DEBUG(errs() << "ADD HINT *************************\n");
   // Get Module
   Module *M = F->getParent();
   DEBUG(errs() << "Set preferred target for " << F->getName() << ": ");
@@ -474,7 +475,7 @@ void addHint(Function *F, hpvm::Target T) {
       break;
   case hpvm::TENSOR_TARGET:
       DEBUG(errs() << "PROMISE Target\n");
-      errs() << "PROMISE\n";
+      DEBUG(errs() << "PROMISE\n");
       HintNode = M->getOrInsertNamedMetadata("hpvm_hint_promise");
       break;
   default:
diff --git a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp
index e7293a0640b5d7e45614459ed9687768998142a4..b3b46de48260f965782b1fb13bc049d446f51da2 100644
--- a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp
+++ b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp
@@ -59,7 +59,7 @@ bool BuildDFG::runOnModule(Module &M) {
         BuildGraph(Root, F);
 
         Root->getChildGraph()->sortChildren();
-        viewDFGraph(Root->getChildGraph());
+        // viewDFGraph(Root->getChildGraph());
       }
     }
   }
diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
index d5904bd83c0eadcbdd912a79443bd7126acc36c5..10667ddeecc7f072222032e930d27fd1f75e7b2d 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
@@ -1412,7 +1412,7 @@ void CGT_CPU::codeGen(DFLeafNode *N) {
       break;
     }
     case hpvm::CUDNN_TARGET: {
-      errs() << "CUDNN hint found. Store CUDNN function as CPU funtion.\n";
+      DEBUG(errs() << "CUDNN hint found. Store CUDNN function as CPU funtion.\n");
       // Make sure there is a generated CPU function for cudnn
       assert(N->getGenFuncForTarget(hpvm::CUDNN_TARGET) && "");
       assert(N->hasCPUGenFuncForTarget(hpvm::CUDNN_TARGET) && "");
@@ -1431,7 +1431,7 @@ void CGT_CPU::codeGen(DFLeafNode *N) {
     }
      case hpvm::TENSOR_TARGET: 
      {
-       errs() << "Promise hint found. Store PROMISE function as CPU funtion.\n";
+       DEBUG(errs() << "Promise hint found. Store PROMISE function as CPU funtion.\n");
        // Make sure there is a generated x86 function for promise
        assert(N->getGenFuncForTarget(hpvm::TENSOR_TARGET) && "");
        assert(N->hasCPUGenFuncForTarget(hpvm::TENSOR_TARGET) && "");
diff --git a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
index 0559e8136da6bff441e3c1fb0b948bcaaeb954ee..110f8918ef6dbfc748862334b6ed68a9a34d0466 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -171,8 +171,8 @@ void CGT_CUDNN::initRuntimeAPI() {
 }
 
 void CGT_CUDNN::codeGen(DFInternalNode *N) {
-  errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs() << "Skipping internal node\n";
+  DEBUG(errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n");
+  DEBUG(errs() << "Skipping internal node\n");
 }
 
 void CGT_CUDNN::codeGen(DFLeafNode *N) {
@@ -191,13 +191,13 @@ void CGT_CUDNN::codeGen(DFLeafNode *N) {
 
   // Generate code only if it has the right hint
   if (!checkPreferredTarget(N, hpvm::CUDNN_TARGET)) {
-    errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n";
+    DEBUG(errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n");
     return;
   }
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
-  errs() << "function name = " << F->getName() << "\n";
+  DEBUG(errs() << "function name = " << F->getName() << "\n");
 
   /* Removing HPVM in/out/inout function attributes */
   for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
@@ -224,7 +224,7 @@ void CGT_CUDNN::codeGen(DFLeafNode *N) {
   std::string FName(F->getName().data());
   F_cudnn = CloneFunction(F, VMap);
   F_cudnn->setName(FName + "_cudnn");
-  errs() << "Cloned function name2 = " << F_cudnn->getName() << "\n";
+  DEBUG(errs() << "Cloned function name2 = " << F_cudnn->getName() << "\n");
   F_cudnn->removeFromParent();
   M.getFunctionList().push_back(F_cudnn);
 
@@ -496,7 +496,7 @@ void CGT_CUDNN::codeGen(DFLeafNode *N) {
         } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) {
           // Create cudnn runtime function call
           FunctionCallee tensorTanh;
-          errs() << "tensorTanh Call = \n\n";
+          DEBUG(errs() << "tensorTanh Call = \n\n");
           DECLARE(tensorTanh);
           // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
           CallInst::Create(tensorTanh, Args, "", II);
@@ -569,7 +569,7 @@ void CGT_CUDNN::codeGen(DFLeafNode *N) {
                                                       re = IItoRemove.rend();
        ri != re; ++ri) {
     DEBUG(errs() << "Erasing: " << **ri << "\n");
-    errs() << "Erasing: " << **ri << "\n";
+    DEBUG(errs() << "Erasing: " << **ri << "\n");
     (*ri)->eraseFromParent();
   }
 
@@ -577,7 +577,7 @@ void CGT_CUDNN::codeGen(DFLeafNode *N) {
 }
 
 bool DFG2LLVM_CUDNN::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_CUDNN PASS\n";
+  DEBUG(errs() << "\nDFG2LLVM_CUDNN PASS\n");
 
   // Get the BuildDFG Analysis Results:
   // - Dataflow graph
@@ -587,7 +587,7 @@ bool DFG2LLVM_CUDNN::runOnModule(Module &M) {
   InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
       (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
   // Print results
-  printInPlaceDFGParameter(IPP);
+  // printInPlaceDFGParameter(IPP);
 
   std::vector<DFInternalNode *> Roots = DFG.getRoots();
 
diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
index ec5a84cffb31b67cfcc0f9efc7a7c2cc3f4be4a1..c0dbd3899b0f6f31e0cb3d58a501aef7771b8281 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
@@ -265,7 +265,7 @@ public:
 
 void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "INITIAL STATE\n";
+    DEBUG(errs() << "INITIAL STATE\n");
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_tensor_convolution: {
       Mch->addIntrinsicInst(II);
@@ -273,7 +273,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
       Mch->addArgument(II->getOperand(1)); // conv kernel
 
       Mch->setCurrent(new ConvolutionLayer_1());
-      errs() << "TO CONVOLUTION LAYER 1\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 1\n");
     } break;
     case Intrinsic::hpvm_tensor_mul: {
       Mch->addIntrinsicInst(II);
@@ -281,7 +281,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
       Mch->addArgument(II->getOperand(1)); // 2nd gemm input
 
       Mch->setCurrent(new FullyConnectedLayer_1());
-      errs() << "TO FULLY CONNECTED LAYER 1\n";
+      DEBUG(errs() << "TO FULLY CONNECTED LAYER 1\n");
     } break;
 
     case Intrinsic::hpvm_node_id: {
@@ -304,36 +304,36 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
 
       Mch->addIntrinsicToRemove(II);
       Mch->setCurrent(new InitialState());
-      errs() << "TO INIT STATE\n";
+      DEBUG(errs() << "TO INIT STATE\n");
     } break;
 
     default: // Other HPVM intrinsic
     {
       Mch->addIntrinsicInst(II);
       Mch->setCurrent(new SingleTensorOperation());
-      errs() << "TO SINGLE OP\n";
+      DEBUG(errs() << "TO SINGLE OP\n");
     } break;
     }
     delete this;
   } // else {} // No HPVM intrinsic received. Remain at initial
-  errs() << "TO NO CHANGE\n";
+  DEBUG(errs() << "TO NO CHANGE\n");
 }
 
 void SingleTensorOperation::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "SINGLE TENSOR OP\n";
+    DEBUG(errs() << "SINGLE TENSOR OP\n");
     Mch->setCurrent(new NoPattern());
-    errs() << "TO NO PATTERN\n";
+    DEBUG(errs() << "TO NO PATTERN\n");
     delete this;
   }
-  errs() << "NO CHANGE\n";
+  DEBUG(errs() << "NO CHANGE\n");
 }
 
 void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "FULLY CONNECTED LAYER 1\n";
+    DEBUG(errs() << "FULLY CONNECTED LAYER 1\n");
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_tensor_add: {
       IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0);
@@ -344,16 +344,16 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
       Mch->addArgument(II->getOperand(1)); // bias
 
       Mch->setCurrent(new FullyConnectedLayer_2());
-      errs() << "TO FULLY CONNECTED LAYER 2\n";
+      DEBUG(errs() << "TO FULLY CONNECTED LAYER 2\n");
     } break;
     default:
       Mch->setCurrent(new NoPattern());
-      errs() << "TO NO PATERN\n";
+      DEBUG(errs() << "TO NO PATERN\n");
       break;
     }
   } else {
     Mch->setCurrent(new NoPattern());
-    errs() << "TO NO PATERN\n";
+    DEBUG(errs() << "TO NO PATERN\n");
   }
   delete this;
 }
@@ -361,7 +361,7 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
 void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "FULLY CONNECTED LAYER 2\n";
+    DEBUG(errs() << "FULLY CONNECTED LAYER 2\n");
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_tensor_tanh: {
       // Type of activation : TanH
@@ -371,7 +371,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new FullyConnectedLayer_3());
-      errs() << "TO FULLY CONNECTED LAYER 3\n";
+      DEBUG(errs() << "TO FULLY CONNECTED LAYER 3\n");
     } break;
     case Intrinsic::hpvm_tensor_relu: {
       // Type of activation : ReLU
@@ -381,7 +381,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new FullyConnectedLayer_3());
-      errs() << "TO FULLY CONNECTED LAYER 3\n";
+      DEBUG(errs() << "TO FULLY CONNECTED LAYER 3\n");
     } break;
     case Intrinsic::hpvm_tensor_clipped_relu: {
       // Type of activation : Clipped ReLU
@@ -391,11 +391,11 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new FullyConnectedLayer_3());
-      errs() << "TO FULLY CONNECTED LAYER 3\n";
+      DEBUG(errs() << "TO FULLY CONNECTED LAYER 3\n");
     } break;
     default: // No activation, but HPVM intrinsic
       Mch->setCurrent(new NoPattern());
-      errs() << "TO NO PATTERN\n";
+      DEBUG(errs() << "TO NO PATTERN\n");
       break;
     }
   } else { // End of instruction stream
@@ -404,7 +404,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
         ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new FullyConnectedLayer());
-    errs() << "TO FULLY CONNECTED LAYER\n";
+    DEBUG(errs() << "TO FULLY CONNECTED LAYER\n");
   }
   delete this;
 }
@@ -412,12 +412,12 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
 void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (!II) { // End of instruction stream
-    errs() << "FULLY CONNECTED LAYER 3\n";
+    DEBUG(errs() << "FULLY CONNECTED LAYER 3\n");
     Mch->setCurrent(new FullyConnectedLayer());
-    errs() << "TO FULLY CONNECTED LAYER\n";
+    DEBUG(errs() << "TO FULLY CONNECTED LAYER\n");
   } else {
     Mch->setCurrent(new NoPattern());
-    errs() << "TO NO PATTERN\n";
+    DEBUG(errs() << "TO NO PATTERN\n");
   }
   delete this;
 }
@@ -425,18 +425,18 @@ void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
 void FullyConnectedLayer::transition(CodeGenStateMachine *Mch,
                                      IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "FULLY CONNECTED LAYER\n";
+    DEBUG(errs() << "FULLY CONNECTED LAYER\n");
     Mch->setCurrent(new NoPattern());
-    errs() << "TO NO PATTERN\n";
+    DEBUG(errs() << "TO NO PATTERN\n");
     delete this;
   }
-  errs() << "TO NO CHANGE\n";
+  DEBUG(errs() << "TO NO CHANGE\n");
 }
 
 void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "CONVOLUTION LAYER 1\n";
+    DEBUG(errs() << "CONVOLUTION LAYER 1\n");
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_tensor_add: {
       IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
@@ -452,11 +452,11 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
       Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
 
       Mch->setCurrent(new ConvolutionLayer_2());
-      errs() << "TO CONVOLUTION LAYER 2\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 2\n");
     } break;
     default:
       Mch->setCurrent(new NoPattern());
-      errs() << "TO NO PATTERN\n";
+      DEBUG(errs() << "TO NO PATTERN\n");
       break;
     }
   } else {
@@ -497,7 +497,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
         ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new ConvolutionLayer());
-    errs() << "TO CONVOLUTION LAYER\n";
+    DEBUG(errs() << "TO CONVOLUTION LAYER\n");
   }
   delete this;
 }
@@ -505,7 +505,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "CONVOLUTION LAYER 2\n";
+    DEBUG(errs() << "CONVOLUTION LAYER 2\n");
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_tensor_tanh: {
       // Type of activation : TanH
@@ -515,7 +515,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new ConvolutionLayer_3());
-      errs() << "TO CONVOLUTION LAYER 3\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 3\n");
     } break;
     case Intrinsic::hpvm_tensor_relu: {
       // Type of activation : ReLU
@@ -525,7 +525,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new ConvolutionLayer_3());
-      errs() << "TO CONVOLUTION LAYER 3\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 3\n");
     } break;
     case Intrinsic::hpvm_tensor_clipped_relu: {
       // Type of activation : Clipped ReLU
@@ -535,7 +535,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new ConvolutionLayer_3());
-      errs() << "TO CONVOLUTION LAYER 3\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 3\n");
     } break;
     case Intrinsic::hpvm_tensor_pool_max: {
       // pool max
@@ -552,7 +552,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new ConvolutionLayer_4());
-      errs() << "TO CONVOLUTION LAYER 4\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 4\n");
     } break;
     case Intrinsic::hpvm_tensor_pool_min: {
       // pool min FIXME: 2: supported?
@@ -569,7 +569,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new ConvolutionLayer_4());
-      errs() << "TO CONVOLUTION LAYER 4\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 4\n");
     } break;
     case Intrinsic::hpvm_tensor_pool_mean: {
       // pool mean
@@ -586,11 +586,11 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
       Mch->addIntrinsicInst(II);
 
       Mch->setCurrent(new ConvolutionLayer_4());
-      errs() << "TO CONVOLUTION LAYER 4\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 4\n");
     } break;
     default: // No activation, No pooling, but HPVM intrinsic
       Mch->setCurrent(new NoPattern());
-      errs() << "TO NO PATTERN\n";
+      DEBUG(errs() << "TO NO PATTERN\n");
       break;
     }
   } else { // End of instruction stream
@@ -607,7 +607,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new ConvolutionLayer());
-    errs() << "TO CONVOLUTION LAYER\n";
+    DEBUG(errs() << "TO CONVOLUTION LAYER\n");
   }
   delete this;
 }
@@ -615,7 +615,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "CONVOLUTION LAYER 3\n";
+    DEBUG(errs() << "CONVOLUTION LAYER 3\n");
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_tensor_pool_max: {
       // pool max
@@ -644,7 +644,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
       }
 
       Mch->setCurrent(new ConvolutionLayer_4());
-      errs() << "TO CONVOLUTION LAYER 4\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 4\n");
     } break;
     case Intrinsic::hpvm_tensor_pool_min: {
       // pool min FIXME: 2: supported?
@@ -674,7 +674,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
       }
 
       Mch->setCurrent(new ConvolutionLayer_4());
-      errs() << "TO CONVOLUTION LAYER 4\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 4\n");
     } break;
     case Intrinsic::hpvm_tensor_pool_mean: {
       // pool max
@@ -703,11 +703,11 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
       }
 
       Mch->setCurrent(new ConvolutionLayer_4());
-      errs() << "TO CONVOLUTION LAYER 4\n";
+      DEBUG(errs() << "TO CONVOLUTION LAYER 4\n");
     } break;
     default: // No pooling, but HPVM intrinsic
       Mch->setCurrent(new NoPattern());
-      errs() << "TO NO PATTERN\n";
+      DEBUG(errs() << "TO NO PATTERN\n");
       break;
     }
   } else { // End of instruction stream
@@ -736,7 +736,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
     }
 
     Mch->setCurrent(new ConvolutionLayer());
-    errs() << "TO CONVOLUTION LAYER\n";
+    DEBUG(errs() << "TO CONVOLUTION LAYER\n");
   }
   delete this;
 }
@@ -744,24 +744,24 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (!II) { // End of instruction stream
-    errs() << "CONVOLUTION LAYER 4\n";
+    DEBUG(errs() << "CONVOLUTION LAYER 4\n");
     Mch->setCurrent(new ConvolutionLayer());
-    errs() << "TO CONVOLUTION LAYER\n";
+    DEBUG(errs() << "TO CONVOLUTION LAYER\n");
   } else {
     Mch->setCurrent(new NoPattern());
-    errs() << "TO NO PATTERN\n";
+    DEBUG(errs() << "TO NO PATTERN\n");
   }
   delete this;
 }
 
 void ConvolutionLayer::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
-    errs() << "CONVOLUTION LAYER\n";
+    DEBUG(errs() << "CONVOLUTION LAYER\n");
     Mch->setCurrent(new NoPattern());
-    errs() << "TO NO PATTERN\n";
+    DEBUG(errs() << "TO NO PATTERN\n");
     delete this;
   }
-  errs() << "NO CHANGE\n";
+  DEBUG(errs() << "NO CHANGE\n");
 }
 
 void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {}
@@ -779,8 +779,8 @@ void CodeGenStateMachine::codeGen(
     DFNode *N, Function *F, const StringRef &strRef,
     InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
 
-  errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID())
-         << "\n";
+  DEBUG(errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID())
+               << "\n");
   assert(
       ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
        (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) ||
@@ -894,7 +894,7 @@ void CodeGenStateMachine::codeGen(
            "Unexpected arguments found in coge gen state machine.\n");
     IntrinsicInst *TensorII = IIs[0];
 
-    errs() << "TensorII: " << *TensorII << "\n";
+    DEBUG(errs() << "TensorII: " << *TensorII << "\n");
 
     switch (TensorII->getIntrinsicID()) {
     case Intrinsic::
@@ -1330,8 +1330,8 @@ void CGT_WrapperAPI::initRuntimeAPI() {
 }
 
 void CGT_WrapperAPI::codeGen(DFInternalNode *N) {
-  errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs() << "Skipping internal node\n";
+  DEBUG(errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n");
+  DEBUG(errs() << "Skipping internal node\n");
 }
 
 void CGT_WrapperAPI::codeGen(DFLeafNode *N) {
@@ -1350,11 +1350,11 @@ void CGT_WrapperAPI::codeGen(DFLeafNode *N) {
 
   // Increment the node ID, for current node.
   ++nodeID;
-  errs() << "Node ID string: " << StringRef(std::to_string(nodeID)) << "\n";
+  DEBUG(errs() << "Node ID string: " << StringRef(std::to_string(nodeID)) << "\n");
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
-  errs() << "Node Function: " << *F << "\n";
+  DEBUG(errs() << "Node Function: " << *F << "\n");
   // Look up if we have visited this function before. If we have, then just
   // get the cloned function pointer from DFNode. Otherwise, create the cloned
   // function and add it to the DFNode GenFunc.
@@ -1418,10 +1418,10 @@ void CGT_WrapperAPI::codeGen(DFLeafNode *N) {
   for (inst_iterator i = inst_begin(F_wrapper_api), e = inst_end(F_wrapper_api);
        i != e; ++i) {
     Instruction *I = &(*i);
-    errs() << "PRINT INST: " << *I << "\n";
+    DEBUG(errs() << "PRINT INST: " << *I << "\n");
     CGM.transition(dyn_cast<IntrinsicInst>(I));
   }
-  errs() << "CLONED FUNCTION: " << *F_wrapper_api << "\n";
+  DEBUG(errs() << "CLONED FUNCTION: " << *F_wrapper_api << "\n");
   // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
   // CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP);
   CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP);
@@ -1431,7 +1431,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode *N) {
 
 bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
 
-  errs() << "\nDFG2LLVM_WrapperAPI PASS\n";
+  DEBUG(errs() << "\nDFG2LLVM_WrapperAPI PASS\n");
   // Get the BuildDFG Analysis Results:
   // - Dataflow graph
   BuildDFG &DFG = getAnalysis<BuildDFG>();
diff --git a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
index 5117cc23d30a7392ee53107e63e7c2d13a4f9692..616b8a9b572473ed4acb2f2316d88f41a06e434a 100644
--- a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
+++ b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
@@ -101,7 +101,7 @@ static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) {
     if (dyn_cast<IntrinsicInst>(&*I)) {
       II = dyn_cast<IntrinsicInst>(&*I);
       if ((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")) {
-        errs() << "** Tensor Intrinsic = " << *II << "\n";
+        DEBUG(errs() << "** Tensor Intrinsic = " << *II << "\n");
       }
     }
   }
@@ -134,7 +134,7 @@ static DFNode *findNextNodeInSequence(DFNode *SrcN) {
     if (!DstN)
       DstN = N;
     if (DstN != N) {
-      errs() << "Found different destination nodes: no node sequence.\n";
+      DEBUG(errs() << "Found different destination nodes: no node sequence.\n");
       return NULL;
     }
   }
@@ -767,7 +767,7 @@ void FuseHPVMTensorNodes::FuseHPVMTensorNodeSequence(
   }
 
   if (IIs.size() < 2) {
-    errs() << "Warning: Attempted to fuse fewer than 2 nodes\n";
+    DEBUG(errs() << "Warning: Attempted to fuse fewer than 2 nodes\n");
     return;
   }
 
@@ -792,17 +792,17 @@ void FuseHPVMTensorNodes::run(Module &M, FusionTargets &FTs) {
 // Print Fusion Targets. The argument vector contains createNode intrinsics
 // of nodes to be fused).
 void FuseHPVMTensorNodes::printFusionTargets(FusionTargets &FTs) {
-  errs() << "Print Fusion Targets\n";
-  errs() << "Found " << FTs.size() << " targets\n";
+  DEBUG(errs() << "Print Fusion Targets\n");
+  DEBUG(errs() << "Found " << FTs.size() << " targets\n");
   for (FuseHPVMTensorNodes::FusionTargets::iterator ii = FTs.begin(),
                                                     ie = FTs.end();
        ii != ie; ++ii) {
-    errs() << "Target:\n";
+    DEBUG(errs() << "Target:\n");
     std::vector<IntrinsicInst *> IIv = *ii;
     for (std::vector<IntrinsicInst *>::iterator pi = IIv.begin(),
                                                 pe = IIv.end();
          pi != pe; ++pi) {
-      errs() << "\t" << *((*pi)->getOperand(0)) << "\n";
+      DEBUG(errs() << "\t" << *((*pi)->getOperand(0)) << "\n");
     }
   }
   return;
@@ -817,19 +817,19 @@ void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) {
 void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
   DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName()
                << "\n");
-  errs() << "FUSE TARGETS AT LEAF NODE\n";
+  DEBUG(errs() << "FUSE TARGETS AT LEAF NODE\n");
   // Skip fusion check if it is a dummy node
   if (N->isDummyNode()) {
     DEBUG(errs() << "Skipping dummy node\n");
     return;
   }
-  errs() << "THIS IS NOT A DUMMY NODE\n";
-  errs() << "INTRINSIC: " << *isValidHPVMTensorNode(N) << "\n";
+  DEBUG(errs() << "THIS IS NOT A DUMMY NODE\n");
+  DEBUG(errs() << "INTRINSIC: " << *isValidHPVMTensorNode(N) << "\n");
   if (!preferredTargetIncludes(N, hpvm::TENSOR_TARGET)) {
     // Only fuse if we plan to target PROMISE/Layers API
     // The CUDNN backend would be able to generate calls for the fused node,
     // but not the other way around
-    errs() << "NO PROMISE HINT. SKIPPING NODE.\n";
+    DEBUG(errs() << "NO PROMISE HINT. SKIPPING NODE.\n");
     DEBUG(errs() << "No PROMISE hint. Skipping node: "
                  << N->getFuncPointer()->getName() << "\n");
     return;
@@ -852,7 +852,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
     */
 
   case Intrinsic::hpvm_tensor_convolution: {
-    errs() << "INSTRUCTION: " << *II << "\n";
+    DEBUG(errs() << "INSTRUCTION: " << *II << "\n");
 
     // Found beginning of pattern conv-bias-activation-pooling.
     // Look for the rest
@@ -861,20 +861,20 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
     // Look for bias
     DFNode *SN = findNextNodeInSequence(N);
     if (!SN) {
-      errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n";
+      DEBUG(errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n");
       return; // Did not find a node sequence starting at N. Simpy return.
     }
     if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-      errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n";
+      DEBUG(errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n");
       return; // Node in sequence has different hint. Simpy return.
     }
     IntrinsicInst *SII = isValidHPVMTensorNode(SN);
     if (SII->getIntrinsicID() != Intrinsic::hpvm_tensor_add) {
-      errs() << "SUCCESSOR IS NOT A BIAS OPERATION\n";
+      DEBUG(errs() << "SUCCESSOR IS NOT A BIAS OPERATION\n");
       // Successor is not the bias operation, thus does not fit the pattern.
       return;
     }
-    errs() << "SUCCESSOR IS A BIAS OPERATION\n";
+    DEBUG(errs() << "SUCCESSOR IS A BIAS OPERATION\n");
     // Otherwise, push this node to the current sequence
     CurrentNodeSequence.push_back(SN->getInstruction());
 
@@ -884,15 +884,15 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
     // tanh)
     SN = findNextNodeInSequence(SN);
     if (!SN) {
-      errs() << "DID NOT FIND POOLING AND ACTIVATION NODE SEQUENCE\n";
+      DEBUG(errs() << "DID NOT FIND POOLING AND ACTIVATION NODE SEQUENCE\n");
       // Did not find a node sequence starting at N.Use current sequence.
       break;
     }
     if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-      errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n";
+      DEBUG(errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n");
       break; // Node in sequence has different hint. Use current sequence.
     }
-    errs() << "SUCCESSOR IS A ACTIVATION OR POOLING  OPERATION\n";
+    DEBUG(errs() << "SUCCESSOR IS A ACTIVATION OR POOLING  OPERATION\n");
     SII = isValidHPVMTensorNode(SN);
 
     if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) ||
@@ -900,15 +900,15 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
         (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh)) {
       // Successor is activation. Push this node to the current sequence.
       CurrentNodeSequence.push_back(SN->getInstruction());
-      errs() << "SUCCESSOR IS AN ACTIVATION OPERATION\n";
+      DEBUG(errs() << "SUCCESSOR IS AN ACTIVATION OPERATION\n");
       // Will continue, looking for pooling in the next node
       SN = findNextNodeInSequence(SN);
       if (!SN) {
-        errs() << "DID NOT FIND POOLING NODE SEQUENCE\n";
+        DEBUG(errs() << "DID NOT FIND POOLING NODE SEQUENCE\n");
         break; // No node in sequence. Use currently found sequence.
       }
       if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-        errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n";
+        DEBUG(errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n");
         break; // Node in sequence has different hint. Use current sequence.
       }
       SII = isValidHPVMTensorNode(SN);
@@ -917,7 +917,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
     if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) ||
         (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) ||
         (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean)) {
-      errs() << "SUCCESSOR IS A POOLING OPERATION\n";
+      DEBUG(errs() << "SUCCESSOR IS A POOLING OPERATION\n");
       // Successor is a pool operation. Use currently found sequence.
       CurrentNodeSequence.push_back(SN->getInstruction());
     }
@@ -928,20 +928,20 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
     // Look for bias
     DFNode *SN = findNextNodeInSequence(N);
     if (!SN) {
-      errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n";
+      DEBUG(errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n");
       return; // Did not find a node sequence starting at N. Simpy return.
     }
     if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-      errs() << "HINT DO NOT MATCH IN NODE SEQUENCE\n";
+      DEBUG(errs() << "HINT DO NOT MATCH IN NODE SEQUENCE\n");
       return; // Node in sequence has different hint. Simpy return.
     }
     IntrinsicInst *SII = isValidHPVMTensorNode(SN);
     if (SII->getIntrinsicID() != Intrinsic::hpvm_tensor_add) {
-      errs() << "SUCCESSOR IS NOT IS BIAS OPERATION\n";
+      DEBUG(errs() << "SUCCESSOR IS NOT IS BIAS OPERATION\n");
       // Successor is not the bias operation, thus does not fit the pattern.
       return;
     }
-    errs() << "SUCCESSOR IS BIAS OPERATION\n";
+    DEBUG(errs() << "SUCCESSOR IS BIAS OPERATION\n");
     // Otherwise, push this node to the current sequence
     CurrentNodeSequence.push_back(SN->getInstruction());
     // This is a possible fuse target, gemm-add.
@@ -958,7 +958,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
         if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) ||
             (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) ||
             (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh)) {
-          errs() << "SUCCESSOR IS ACTIVATION OPERATION\n";
+          DEBUG(errs() << "SUCCESSOR IS ACTIVATION OPERATION\n");
           // We found activation in sequence. Push in vector as well.
           CurrentNodeSequence.push_back(SN->getInstruction());
         }
@@ -980,7 +980,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
 
 bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) {
 
-  errs() << "\nFUSE HPVM TENSOR NODES PASS\n";
+  DEBUG(errs() << "\nFUSE HPVM TENSOR NODES PASS\n");
   // Get the BuildDFG Analysis Results:
   // - Dataflow graph
   BuildDFG &DFG = getAnalysis<BuildDFG>();
@@ -993,7 +993,7 @@ bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) {
   // Visit each DFG only once
   std::set<Function *> Visited;
 
-  errs() << "Find targets\n";
+  DEBUG(errs() << "Find targets\n");
   // Iterate over all the DFGs and produce code for each one of them
   for (auto rootNode : Roots) {
 
@@ -1007,7 +1007,7 @@ bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) {
     Visited.insert(rootFunc);
   }
 
-  errs() << "Finished visiting DFGs ...\n";
+  DEBUG(errs() << "Finished visiting DFGs ...\n");
   FuseHPVMTensorNodes::FusionTargets &FTs = FTTVisitor->getFusionTargets();
 
   FuseHPVMTensorNodes Fuse;
diff --git a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp
index 12f6abc34005f3ec84a2fbc54e9f8e65e8adbf48..eda655e3196450ee94ab44a70d500a1188007a66 100644
--- a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp
+++ b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp
@@ -382,7 +382,7 @@ bool GenHPVM::runOnModule(Module &M) {
         assert(isa<ConstantInt>(CI->getArgOperand(0)) &&
                "Argument to hint must be constant integer!");
         ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(0));
-        errs() << "HINT INSTRUCTION: " << *I << "\n";
+        DEBUG(errs() << "HINT INSTRUCTION: " << *I << "\n");
         hpvm::Target t = (hpvm::Target)hint->getZExtValue();
         addHint(CI->getParent()->getParent(), t);
         DEBUG(errs() << "Found hpvm hint call: " << *CI << "\n");
diff --git a/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp b/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp
index db5a1f5fe092ee7757ad0750bb089218c009955b..dcef54fb2662e3f2dff3d09b43b0791799df5ebc 100644
--- a/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp
+++ b/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp
@@ -152,7 +152,7 @@ bool InPlaceDFGAnalysisWrapper::runOnModule(Module &M) {
 /*** Methods of InPlaceDFGAnalysis ***/
 void InPlaceDFGAnalysis::run(Module &M, BuildDFG &DFG, InPlaceDFGParameter &IPP) {
 
-  errs() << "\nIN PLACE ANALYSIS PASS\n";
+  DEBUG(errs() << "\nIN PLACE ANALYSIS PASS\n");
 
   std::vector<DFInternalNode*> Roots = DFG.getRoots();
 
diff --git a/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py b/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py
index 4e91fbbe4a4af2c16b7583443360a09d88b0ac61..baaf645cb9f5a1c0f7f71a9d9b01269206a9cf18 100644
--- a/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py
+++ b/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py
@@ -1,19 +1,18 @@
-from dataclasses import dataclass
 from pathlib import Path
 from subprocess import PIPE, CalledProcessError
 from typing import Iterable, List, Tuple, Union
 
 import matplotlib.pyplot as plt
-from tqdm import trange
 
 PathLike = Union[Path, str]
 conf_opening, conf_closing = "+++++", "-----"
 
 
-def profile_configs(
+def profile_config_file(
     binary_path: PathLike,
     config_path: PathLike,
     output_config_path: PathLike,
+    progress_bar: bool = True,
     profile_filename: str = "profile_info.txt",
     qos_filename: str = "final_accuracy",
 ) -> None:
@@ -33,39 +32,69 @@ def profile_configs(
         It contains a single float number as the QoS of this run.
         This defaults to "final_accuracy" and should not be changed for HPVM binaries.
     """
-
-    from subprocess import check_call
-    from tempfile import NamedTemporaryFile
-
     # Read first line ("the float") and configs in config file
     header, configs = read_hpvm_configs(Path(config_path))
     if not configs:
         raise ValueError("Config file with no configs is unsupported.")
-    temp_file = NamedTemporaryFile("w")
-    baseline_time, baseline_acc = None, None
-    for idx in trange(len(configs), desc="Configs profiled"):
-        config = configs[idx]
-        # Write config to temp config file
-        write_hpvm_config(header, [config], Path(temp_file.name))
-        # Run binary_path binary,
-        # which generates `profile_filename` and `qos_filename` file in cwd.
-        try:
-            check_call([str(binary_path), "-c", str(temp_file.name)])     
-        except CalledProcessError as e:
-            print("Output from the program:")
-            print(e.output)
-            raise e
-        # Read these two files for time and QoS info.
-        time = _read_profile_file(Path(profile_filename))
-        acc = _read_qos_file(Path(qos_filename))
-        if idx == 0:
-            baseline_time, baseline_acc = time, acc
-            continue
-        assert baseline_time is not None and baseline_acc is not None
+    # Modifies configs in place.
+    profile_configs(
+        binary_path,
+        configs[1:],
+        configs[0],
+        progress_bar,
+        profile_filename,
+        qos_filename,
+    )
+    write_hpvm_configs(header, configs, Path(output_config_path))
+
+
+def profile_configs(
+    binary_path: PathLike,
+    configs: Iterable["Config"],
+    baseline_config: "Config",
+    progress_bar: bool = True,
+    profile_filename: str = "profile_info.txt",
+    qos_filename: str = "final_accuracy",
+) -> None:
+    """Profile a sequence of HPVM configs.
+    This function modifies argument `configs` in place."""
+
+    from tqdm import tqdm
+
+    baseline_time, baseline_acc = measure_config(binary_path, baseline_config)
+    iterable = tqdm(configs, desc="Configs profiled") if progress_bar else configs
+    for config in iterable:
+        time, acc = measure_config(binary_path, config, profile_filename, qos_filename)
         speedup = baseline_time / time
         config.update_profile_results(speedup, acc, baseline_acc)
-    write_hpvm_config(header, configs, Path(output_config_path))
+    return configs
+
+
+def measure_config(
+    binary_path: PathLike,
+    config: "Config",
+    profile_filename: str = "profile_info.txt",
+    qos_filename: str = "final_accuracy",
+):
+    from subprocess import check_call
+    from tempfile import NamedTemporaryFile
+    import os
+
+    temp_file = NamedTemporaryFile("w")
+    write_hpvm_configs("0.0", [config], Path(temp_file.name))
+    # Run binary_path binary,
+    # which generates `profile_filename` and `qos_filename` file in cwd.
+    try:
+        with open(os.devnull, "w") as f:
+            check_call([str(binary_path), "-c", str(temp_file.name)], stdout=f)
+    except CalledProcessError as e:
+        print("Output from the program:")
+        print(e.output)
+        raise e
+    time = _read_profile_file(Path(profile_filename))
+    acc = _read_qos_file(Path(qos_filename))
     temp_file.close()
+    return time, acc
 
 
 def plot_hpvm_configs(
@@ -102,19 +131,27 @@ def plot_hpvm_configs(
     return fig
 
 
-@dataclass
 class Config:
-    conf_name: str
-    speedup: float
-    energy: float
-    qos: float
-    qos_loss: float
-    # We don't care about the information in this part, and we don't parse this.
-    config_body: List[str]
+    def __init__(
+        self,
+        conf_name: str,
+        speedup: float,
+        energy: float,
+        qos: float,
+        qos_loss: float,
+        config_body: List[str],
+    ):
+        self.conf_name = conf_name
+        self.speedup = speedup
+        self.energy = energy
+        self.qos = qos
+        self.qos_loss = qos_loss
+        # We don't care about the information in this part, and we don't parse this.
+        self.config_body = config_body
 
     def update_profile_results(self, speedup: float, qos: float, base_qos: float):
         recorded_base_qos = self.qos + self.qos_loss
-        if abs(recorded_base_qos - base_qos) > 0.02:
+        if abs(recorded_base_qos - base_qos) > 0.025:
             raise ValueError(
                 f"Baseline QoS mismatch. Original: {recorded_base_qos}, measured: {base_qos}"
             )
@@ -157,15 +194,13 @@ def read_hpvm_configs(config_file: PathLike) -> Tuple[str, List[Config]]:
     return header, ret_configs
 
 
-def write_hpvm_config(header: str, configs: Iterable[Config], to_file: PathLike):
-    
+def write_hpvm_configs(header: str, configs: Iterable[Config], to_file: PathLike):
     text_segs = [header] + [str(config) for config in configs]
     with open(to_file, "w") as f:
         f.write("\n".join(text_segs))
         f.flush()
 
 
-
 def _read_profile_file(profile_file_path: Path):
     with profile_file_path.open() as f:
         target_lines = [line.strip() for line in f if "Total Time" in line]
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
index 172448a60d4f65fc4aafc09c9a76d9cb492ff7b0..d53776b363595dd10b8f46f792474b941f444f2b 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py
@@ -173,6 +173,8 @@ class ModelExporter:
 
         args = [
             "hpvm-clang",
+            "-O3",
+            "-fno-exceptions",
             str(self.codefile),
             str(output_binary),
             *self.compile_args,
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
index a088e6eae5c7cd8fb3db62f5046aa5d9ac945726..5c248f829adef15093b853891927f353aca30c4b 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/graph_ir.py
@@ -198,12 +198,6 @@ class _Pool2DNode(DFGNode, abc.ABC):
             [self.pool_type, *self.kernel_shape, *self.pads, *self.strides,],
         )
 
-    def hpvm_codegen(self):
-        return (
-            "__hpvm__tensor_pool_max",
-            [*self.kernel_shape, *self.pads, *self.strides],
-        )
-
     def get_flops(self) -> int:
         input0 = self.input_shapes[0]
         return np.prod(input0) if input0 else 0
@@ -214,12 +208,24 @@ class MaxPool2DNode(_Pool2DNode):
     op_type = "MaxPool2D"
     hpvm_op_type = "maxpool"
 
+    def hpvm_codegen(self):
+        return (
+            "__hpvm__tensor_pool_max",
+            [*self.kernel_shape, *self.pads, *self.strides],
+        )
+
 
 class AveragePool2DNode(_Pool2DNode):
     pool_type = "1"
     op_type = "AveragePool2D"
     hpvm_op_type = "avgpool"
 
+    def hpvm_codegen(self):
+        return (
+            "__hpvm__tensor_pool_mean",
+            [*self.kernel_shape, *self.pads, *self.strides],
+        )
+
 
 class BiasAddNode(DFGNode):
     op_type = "BiasAdd"
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
index 1f6dd875ffa6b39ab57609d7690c9a9ad3944b44..fa252a3e0ce063697d56e771afbfbde69d0c5641 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 
 {% for node in nodes %}
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
index 94a8e0a534c04b323b4b66f369ab2d624a2a745f..8074704ece0988d7897c1e93b41f1ea3c43deb35 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
@@ -2,7 +2,6 @@
 #include <string>
 #include <array>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 
 // For writing binary to file descriptors
diff --git a/hpvm/scripts/hpvm_installer.py b/hpvm/scripts/hpvm_installer.py
index cce8b3f07928d9ab096df3166bd02f4e6f8e1f5d..c9d94523301e1847c7fd8227700d7542a3db1e06 100755
--- a/hpvm/scripts/hpvm_installer.py
+++ b/hpvm/scripts/hpvm_installer.py
@@ -87,6 +87,9 @@ def parse_args(args=None):
     parser.add_argument(
         "-r", "--run-tests", action="store_true", help="Build and run test cases"
     )
+    parser.add_argument(
+        "--no-pypkg", action="store_true", help="Don't build the HPVM Python Packages"
+    )
     parser.add_argument(
         "--no-params", action="store_true", help="Don't download DNN model parameters"
     )
@@ -158,6 +161,9 @@ Arguments: """
     if args.cmake_args.strip() != "":    
       args.cmake_args = [f"-{arg}" for arg in args.cmake_args.split(" ")]
 
+    args.no_pypkg = not input_with_check(
+        "Install HPVM Python Packages (recommended)? [y/n]: ", parse_yn, "Please enter y or n"
+    )
     args.no_params = not input_with_check(
         "Download DNN weights (recommended)? [y/n]: ", parse_yn, "Please enter y or n"
     )
@@ -337,7 +343,8 @@ def main():
     link_and_patch()
     if not args.no_params:
         check_download_model_params()
-    install_py_packages()
+    if not args.no_pypkg:
+        install_py_packages()
     if args.no_build:
         print(
             """
diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt
index 3c4f26472317f511edaab98c5e4a4f8ed7ba2dfb..4ff98a5386d91ce50b755d7e507a84e0fbe1c4dd 100644
--- a/hpvm/test/CMakeLists.txt
+++ b/hpvm/test/CMakeLists.txt
@@ -8,5 +8,6 @@ set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++)
 add_subdirectory(hpvm_pass)  # Passes test suite
 add_subdirectory(benchmarks)
 add_subdirectory(dnn_benchmarks/hpvm-c)  # HPVM-C DNN accuracy test suite
+add_subdirectory(dnn_benchmarks/pytorch)  # Torch frontend test suite
 add_subdirectory(dnn_benchmarks/tensor-rt-src)  # tensor_runtime DNN (build only, no tests)
 add_subdirectory(dnn_benchmarks/profiling)  # hpvm-profiler test suite
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
index 9f34317d34157d57468c60cb854828b5c54f1cde..aedf0640025703b62ed5e9a810f5c3d68e800f6f 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
@@ -1,18 +1,17 @@
-# Each source file contains a @MODEL_PARAMS_DIR@ waiting to be filled in.
+# MODEL_PARAMS_DIR is given as -DMODEL_PARAMS_DIR=<value> to compiler.
 set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/")
 set(test_compile_targets "")
 function(compile_hpvm_c target_name src_filepath codegen_target)
-  set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.cpp")
   set(output_bin_path "${CMAKE_CURRENT_BINARY_DIR}/hpvm_${target_name}")
-  configure_file(${src_filepath} ${generated_file_path})
   # Add an "hpvm_" prefix here because Ninja generator doesn't like
   # the name of output file and custom target to clash.
   add_custom_command(
     OUTPUT ${output_bin_path}
-    DEPENDS ${generated_file_path} hpvm-clang
+    DEPENDS ${src_filepath} hpvm-clang
     COMMAND hpvm-clang
-      ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions
-      -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN}
+      ${src_filepath} ${output_bin_path} -O3 -fno-exceptions
+      "-DMODEL_PARAMS_DIR=${MODEL_PARAMS_DIR}"
+      -t ${codegen_target} ${ARGN}
   )
   add_custom_target(${target_name} DEPENDS ${output_bin_path})
   set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE)
@@ -49,16 +48,17 @@ foreach(dir ${entries})
 endforeach(dir)
 
 # Install an accuracy comparator under build/bin for test suite.
-set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR})
+set(BIN_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
 add_custom_command(
   OUTPUT ${BIN_DIR}/check_dnn_acc.py
   COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR}
   COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py
   DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py
 )
+add_custom_target(check_dnn_acc DEPENDS ${BIN_DIR}/check_dnn_acc.py)
 
 message(STATUS "List of HPVM-C DNN benchmarks: ${test_compile_targets}")
-add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets} ${BIN_DIR}/check_dnn_acc.py)
+add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets})
 message(STATUS "Target name for compiling all DNN benchmarks: dnn_benchmarks")
 
 # --[ llvm-lit test setup
@@ -73,6 +73,6 @@ configure_lit_site_cfg(
 )
 add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs"
   ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS dnn_benchmarks  # Compile all dnn benchmarks to run them
+  DEPENDS dnn_benchmarks check_dnn_acc # Compile all dnn benchmarks to run them
   ARGS "-j1"  # Run DNN benchmarks sequentially
 )
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
index 39f49784d76470c4e0bab213127369806e1e2531..255ec86924066beb82e18cf83e7c0b4500ad7287 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -189,7 +196,7 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -429,7 +436,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet2_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
index dafd1a6ae084c4e1bf819ce1ac94e667c696eb24..b0a8fe8102dbba10cfe19fa3b825ec7f2d9ba31c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -434,7 +441,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet2_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
index 64350c590bb181fa4eaab4b2bf5fb37f69e11c09..b37fd71deaeb607545837faf09b133c14b9d8968 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -165,7 +172,7 @@ void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -380,7 +387,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint8_t *labels = readLabels(labels_path.c_str(), 5000);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
index 72af2ff4a1b33aabac427d203101c32c4a7403c7..48ce0864174f5401e295cdb3c01011009bfe338b 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -386,7 +393,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index 37e7a34a51a14b6903d549f271d3c0c83822fec8..abdf532c7a0417e16a55f4bb7ec57471340837a4 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -1,7 +1,14 @@
 #include <config.h>
+
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 #include <hpvm.h>
 #include <string>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
@@ -213,7 +220,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -478,7 +485,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
index 1206d7bac4b9dcff2b4cfd7183f4a3e5f65d73d9..239d4ea5793e010562a7d4963f590fff85b932dc 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -483,7 +490,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
index d7ab4238ebac5598b92c432aced85a602bb5ce89..684c1bfef532c162a7981a12b54b5282c5a1b114 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -117,7 +124,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -282,7 +289,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/lenet_mnist/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
index 26acc65a99287ea9f20e037dd996635315d76e48..b607e9e653063437b72179ab83ea74921b9bd3ef 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -287,7 +294,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/lenet_mnist/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
index 5f8c63dbfbfb800dc6f60f9ed9a6108dee0a9a48..881a9bbaa877aad6c0a4b0d7cbae79d7a60c862c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -15,7 +22,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -23,7 +30,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -31,7 +38,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 32);
@@ -41,7 +48,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -49,7 +56,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -67,7 +74,7 @@ void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -75,7 +82,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_8_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -83,7 +90,7 @@ void var_8_node(void *t1, size_t bytes_t1) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 64);
@@ -93,7 +100,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -101,7 +108,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_11_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -119,7 +126,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -127,7 +134,7 @@ void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_14_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -135,7 +142,7 @@ void var_14_node(void *t1, size_t bytes_t1) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 128);
@@ -145,7 +152,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_16_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -153,7 +160,7 @@ void var_16_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_17_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +178,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -179,7 +186,7 @@ void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_20_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -187,7 +194,7 @@ void var_20_node(void *t1, size_t bytes_t1) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 128);
@@ -197,7 +204,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -205,7 +212,7 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -223,7 +230,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -231,7 +238,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -239,7 +246,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 256);
@@ -249,7 +256,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -257,7 +264,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +282,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -283,7 +290,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -291,7 +298,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 256);
@@ -301,7 +308,7 @@ void var_33_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -309,7 +316,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_35_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -327,7 +334,7 @@ void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -335,7 +342,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_38_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -343,7 +350,7 @@ void var_38_node(void *t1, size_t bytes_t1) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512);
@@ -353,7 +360,7 @@ void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -361,7 +368,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_41_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -379,7 +386,7 @@ void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -387,7 +394,7 @@ void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_44_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -395,7 +402,7 @@ void var_44_node(void *t1, size_t bytes_t1) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512);
@@ -405,7 +412,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_46_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -413,7 +420,7 @@ void var_46_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_47_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -431,7 +438,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_49_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -439,7 +446,7 @@ void var_49_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_50_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -447,7 +454,7 @@ void var_50_node(void *t1, size_t bytes_t1) {
 }
 
 void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512);
@@ -457,7 +464,7 @@ void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -465,7 +472,7 @@ void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_53_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -483,7 +490,7 @@ void var_54_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_55_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -491,7 +498,7 @@ void var_55_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_56_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -499,7 +506,7 @@ void var_56_node(void *t1, size_t bytes_t1) {
 }
 
 void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512);
@@ -509,7 +516,7 @@ void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_58_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -517,7 +524,7 @@ void var_58_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_59_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -535,7 +542,7 @@ void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -543,7 +550,7 @@ void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_62_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -551,7 +558,7 @@ void var_62_node(void *t1, size_t bytes_t1) {
 }
 
 void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 512);
@@ -561,7 +568,7 @@ void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -569,7 +576,7 @@ void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_65_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -587,7 +594,7 @@ void var_66_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -595,7 +602,7 @@ void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_68_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -603,7 +610,7 @@ void var_68_node(void *t1, size_t bytes_t1) {
 }
 
 void var_69_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 2, 2, 1, 512);
@@ -613,7 +620,7 @@ void var_69_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_70_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -621,7 +628,7 @@ void var_70_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_71_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -639,7 +646,7 @@ void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_73_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -647,7 +654,7 @@ void var_73_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_74_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -655,7 +662,7 @@ void var_74_node(void *t1, size_t bytes_t1) {
 }
 
 void var_75_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_group_convolution(t1, t2, 1, 1, 1, 1, 1, 1024);
@@ -665,7 +672,7 @@ void var_75_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_76_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -673,7 +680,7 @@ void var_76_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_77_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -691,7 +698,7 @@ void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_79_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
@@ -699,7 +706,7 @@ void var_79_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
 }
 
 void var_80_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -707,7 +714,7 @@ void var_80_node(void *t1, size_t bytes_t1) {
 }
 
 void var_81_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_mean(t1, 2, 2, 0, 0, 2, 2);
@@ -731,7 +738,7 @@ void var_83_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_84_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -1984,7 +1991,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/mobilenet_cifar10/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
index 2070089053ef0b6e7e0ca33c2c6cc4cea17b8e29..7cfeca00418f2f580227b880cb4d6e63afaaf6f1 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -1989,7 +1996,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/mobilenet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
index 5b580f26821e67cc96c8347e485b792f40105176..ce164a16f0c95a6f6dfc141609542a9614e2c994 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
@@ -1,10 +1,17 @@
 
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -78,7 +85,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(9);
 
@@ -87,7 +94,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(10);
 
@@ -141,7 +148,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(16);
 
@@ -150,7 +157,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(17);
 
@@ -204,7 +211,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(23);
 
@@ -213,7 +220,7 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(24);
 
@@ -285,7 +292,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(32);
 
@@ -294,7 +301,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(33);
 
@@ -348,7 +355,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(39);
 
@@ -357,7 +364,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(40);
 
@@ -411,7 +418,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(46);
 
@@ -420,7 +427,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(47);
 
@@ -492,7 +499,7 @@ void var_53_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_54_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(55);
 
@@ -501,7 +508,7 @@ void var_54_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_55_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(56);
 
@@ -555,7 +562,7 @@ void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(62);
 
@@ -564,7 +571,7 @@ void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_62_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(63);
 
@@ -618,7 +625,7 @@ void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
   __hpvm__node_id(69);
 
@@ -627,7 +634,7 @@ void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_69_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(70);
 
@@ -636,7 +643,7 @@ void var_69_node(void *t1, size_t bytes_t1) {
 }
 
 void var_70_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(71);
 
@@ -663,7 +670,7 @@ void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_73_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
   __hpvm__node_id(74);
 
@@ -1318,7 +1325,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
index 735e2c9abab91f00560faa5496e234321027b82c..7eb4acc44ac6c7061c0a500bd8f68e1ea6a1a7d7 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
@@ -1,10 +1,17 @@
 
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -1249,7 +1256,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index 160563064cc47effd463c4915b0c7f0d93bff56f..54d008932687a895a1819c5480af2b39b87aadf6 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -5151,7 +5158,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/resnet50_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
index c5cf2cb3a0177a5cce9ad0cf460484e63ded0ecd..fa1e616156683131b40d25ee243d5925bab9cf42 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -4927,7 +4934,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/resnet50_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index bec6139c2d089e90d09fa239e1b15c9a835fd4ea..ddd015a63a3284f2c78a57a8173544d233fd2772 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -397,7 +404,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_49_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -845,7 +852,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
index 4fa7d5c121bacff122821fe983ed443e3c6db249..79970b6395c3d02c9fca1810e06dfd636fd682f8 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -850,7 +857,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 8666030fba4390d29d9324f5a5c7d60324325f05..7a8fbbc9b3a5de110996b56e8f5ee06fc761ef41 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -397,7 +404,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_49_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -845,7 +852,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar100/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
index 6d01caa3b7c0875cff4f3e16131ddd09195e92b7..7c6fd3ff4048d6e7a9c1317abd484a0f9c990d9c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -850,7 +857,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar100/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index b1b2b4f2e312b6372e10a2fce3ef12eab2dddded..2fdf36965da100843f69e2ca6ba975bcae4a13ff 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -421,7 +428,7 @@ void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_52_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_softmax(t1);
@@ -893,7 +900,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
index eb29e45805671072428318412f27b05d0da90199..e516b9117b816cc0cc29d21527e7873eb04e33ac 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
@@ -1,9 +1,16 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
   __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
@@ -898,7 +905,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorTypes.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorTypes.h
deleted file mode 100644
index 726080efe7e1a06363e7fca191f9708219d5baeb..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorTypes.h
+++ /dev/null
@@ -1,39 +0,0 @@
-
-#ifndef TYPES_HEADER
-#define TYPES_HEADER
-
-
-/*struct Dimension_t{
-  int num_dims;
-  size_t* dim_sizes;
-};
-
-
-struct Tensor_t{
-  int tensor_id; // used for indexing (in the tensor runtime)
-  int data_type; // {float_type, double_type, half_type, int_type}
-  int data_format; // {nchw, nhwc}
-  void* host_data;
-  size_t num_elems; // Total elements
-  size_t size_in_bytes; // Total size in bytes
-  struct Dimension_t dims;
-};
-
-
-enum Tensor_type_t{
-  float_type,
-  double_type,
-  half_type,
-  int_type
-};
-
-
-// NOTE: Currently only NCHW is supported due to limited cuDNN support
-enum Tensor_format_t{
-  nchw,
-  nhwc 
-};
-
-*/
-
-#endif
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
deleted file mode 100644
index 1d5ac7d908b0990f21de885c645786997640264c..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
+++ /dev/null
@@ -1,758 +0,0 @@
-
-// Header guards
-#ifndef UTILS_HEADER
-#define UTILS_HEADER
-
-#include <sstream>
-#include <vector>
-#include <bits/stdc++.h>
-#include <tensor_runtime.h>
-#include <tensor.h>
-#include <cmath>
-
-std::vector<float> run_accuracies;
-
-void printTensorInfo(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  if (tensor->gpu_data != NULL) {
-    printf("Successful cudaMalloc \n");
-  }
-
-  printf("tensor dims = %d \n", tensor->dims.num_dims);
-  printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]);
-  printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]);
-  printf("num_elems = %lu \n", tensor->num_elems);
-}
-
-// FIXIT: Move this to debug.h and include in all files
-void dumpWeightsToFile(char *file_name, void *weights_ptr) {
-
-  struct Tensor *weights = (Tensor *)weights_ptr;
-  // Move data back to host
-  hpvm_request_tensor(weights, 0);
-
-  FILE *fp = fopen(file_name, "wb");
-  if (fp == NULL) {
-    printf("File %s could not be created. Check if directory exists \n",
-           file_name);
-    abort();
-  }
-
-  // printf("size_in_bytes = %lu \n", weights->size_in_bytes);
-  size_t bytes_written =
-      fwrite(weights->host_data, 1, weights->size_in_bytes, fp);
-  // printf("bytes_written = %lu \n", bytes_written);
-  fclose(fp);
-}
-
-void fillTensorWithOnes(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = 1.0;
-    }
-  }
-}
-
-void fillWithOnesAndTwos(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems / 2; i++) {
-      data_arr[i] = 1.0;
-    }
-
-    for (unsigned int i = tensor->num_elems / 2; i < tensor->num_elems; i++) {
-      data_arr[i] = 2.0;
-    }
-  }
-}
-
-void fillTensorWithVal(void *tensor_ptr, float target_value) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = target_value;
-    }
-  }
-}
-
-void fillTensorWithNegOnes(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = -1.0;
-    }
-  }
-}
-
-void fillTensorVals(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = i + 1;
-    }
-  }
-}
-
-void printTensorValues(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // printing is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      printf("%f,", data_arr[i]);
-    }
-  }
-
-  printf("\n");
-}
-
-void printTensorDims(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  printf("Num_elems = %lu \n", tensor->num_elems);
-  for (int i = 0; i < tensor->dims.num_dims; i++) {
-    printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]);
-  }
-}
-
-void compareTensors(void *tensor1_ptr, void *tensor2_ptr) {
-
-  struct Tensor *tensor1 = (struct Tensor *)tensor1_ptr;
-  struct Tensor *tensor2 = (struct Tensor *)tensor2_ptr;
-
-  hpvm_request_tensor(tensor1, 0);
-  hpvm_request_tensor(tensor2, 0);
-
-  float *tensor_data1 = (float *)tensor1->host_data;
-  float *tensor_data2 = (float *)tensor2->host_data;
-
-  for (unsigned int i = 0; i < tensor1->num_elems; i++) {
-    if (tensor_data1[i] != tensor_data2[i]) {
-      printf("Tensor data mismatch at index %d \n", i);
-      abort();
-    }
-  }
-}
-
-void compareValues(void *tensor_ptr, float *data, size_t num_elems) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  float *tensor_data = (float *)tensor->host_data;
-  for (unsigned int i = 0; i < num_elems; i++) {
-    if (tensor_data[i] != data[i]) {
-      printf("Tensor data mismatch");
-      abort();
-    }
-  }
-}
-
-void *readInputTensor(const char *file_name, int data_type, int dim1_size,
-                      int dim2_size, int dim3_size, int dim4_size) {
-
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  uint8_t *file_data = (uint8_t *)malloc(sizeof(char) * num_elems);
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  int file_header_size = 16;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  size_t bytes_read = fread(file_data, 1, sizeof(uint8_t) * num_elems, file);
-
-  fclose(file);
-
-  for (size_t i = 0; i < num_elems; ++i) {
-    tensor_data[i] = (float)file_data[i] / 255.0f;
-  }
-
-  // NOTE: Using NCHW format
-  struct Tensor *input = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(input, tensor_data, size_in_bytes);
-  //  compareValues(input, tensor_data, num_elems);
-
-  return input;
-}
-
-//*** FIXIT: Move this to CPU-only
-struct Tensor *readTrainedWeightsCPU(const char *file_name, int data_type,
-                                     int dim1_size, int dim2_size,
-                                     int dim3_size, int dim4_size) {
-
-  // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  int file_header_size = 0;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, bytes_read);
-
-  fclose(file);
-
-  struct Tensor *weights = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(weights, tensor_data, size_in_bytes);
-  // compareValues(weights, tensor_data, num_elems);
-  free(tensor_data);
-
-  return weights;
-}
-
-struct Tensor *readTrainedWeights(const char *file_name, int data_type,
-                                  long int dim1_size, long int dim2_size,
-                                  long int dim3_size, long int dim4_size) {
-
-  // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  printf("size_in_bytes  = %lu \n", size_in_bytes);
-
-  int file_header_size = 0;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes,
-  // bytes_read);
-
-  fclose(file);
-
-  struct Tensor *weights = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(weights, tensor_data, size_in_bytes);
-  // compareValues(weights, tensor_data, num_elems);
-  free(tensor_data);
-
-  return weights;
-}
-
-struct Tensor *readInputBatch(const char *file_name, long data_type, long start,
-                              long end, long dim2_size, long dim3_size,
-                              long dim4_size) {
-
-  long int dim1_size = end - start;
-  // FIXIT: Don't assume floating point types
-  long int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  long int file_header_size =
-      type_size * start * dim2_size * dim3_size * dim4_size;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  fclose(file);
-
-  // printf ("FIXED input BATCH read \n");
-
-  struct Tensor *weights = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(weights, tensor_data, size_in_bytes);
-  free(tensor_data);
-
-  return weights;
-}
-
-uint8_t *readLabels(const char *labels_file, int num_labels) {
-
-  uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file);
-
-  fclose(file);
-
-  return labels;
-}
-
-uint32_t *readLabels3(const char *labels_file, int num_labels) {
-
-  uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file);
-
-  fclose(file);
-
-  return labels;
-}
-
-uint8_t *readLabelsBatch(const char *labels_file, int start, int end) {
-
-  int num_labels = end - start;
-  int file_header_size = sizeof(uint8_t) * start;
-
-  uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file);
-
-  fclose(file);
-
-  // printf("--labels bytes_read = %lu \n", bytes_read);
-  return labels;
-}
-
-uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) {
-
-  int num_labels = end - start;
-  int file_header_size = sizeof(uint32_t) * start;
-
-  uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file);
-
-  fclose(file);
-
-  return labels;
-}
-
-void computeAccuracy(const char *labels_file, int num_labels,
-                     void *result_ptr) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  uint8_t *labels = readLabels(labels_file, num_labels);
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t channels = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  for (int i = 0; i < batch_dim; i++) {
-    int chosen = 0;
-    for (int id = 1; id < 10; ++id) {
-      if (data[i * channels + chosen] < data[i * channels + id])
-        chosen = id;
-    }
-
-    // printf("chosen = %d, label = %d \n", chosen, labels[i]);
-    if (chosen != labels[i])
-      num_errors++;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-    fclose(fp);
-  }
-}
-
-// NOTE: batch_size and num_classes are Unused arguments
-float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr,
-                       size_t num_classes = 10) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  size_t batch_dim = result->dims.dim_sizes[0];
-  num_classes = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  printf("batch_dim = %lu, channels = %lu \n", batch_dim, num_classes);
-
-  for (unsigned int i = 0; i < batch_dim; i++) {
-
-    int chosen = 0;
-    for (int id = 1; id < num_classes; ++id) {
-      if (data[i * num_classes + chosen] < data[i * num_classes + id])
-        chosen = id;
-    }
-
-    if (chosen != labels[i])
-      num_errors++;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  return accuracy;
-}
-
-float computeAccuracy3(uint32_t *labels, void *result_ptr) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t num_classes = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes);
-
-  for (int i = 0; i < batch_dim; i++) {
-
-    int chosen = 0;
-    for (int id = 1; id < num_classes; ++id) {
-      if (data[i * num_classes + chosen] < data[i * num_classes + id])
-        chosen = id;
-    }
-
-    if (chosen != labels[i])
-      num_errors++;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  return accuracy;
-}
-
-struct ClassProb {
-  float prob;
-  int index;
-};
-
-bool descendFloatComp(ClassProb obj1, ClassProb obj2) {
-  return obj1.prob > obj2.prob;
-}
-
-float computeTop5Accuracy(uint8_t *labels, int num_labels, void *result_ptr,
-                          unsigned num_classes = 10) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t channels = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels);
-
-  for (int i = 0; i < num_labels; i++) {
-
-    std::vector<ClassProb> elem_probs;
-    for (int id = 0; id < num_classes; ++id) {
-      ClassProb cProb;
-      cProb.prob = data[i * channels + id];
-      cProb.index = id;
-      elem_probs.push_back(cProb);
-    }
-
-  std:
-    sort(elem_probs.begin(), elem_probs.end(), descendFloatComp);
-    // Check if any of top-5 predictions matches
-    bool matched = false;
-    for (int j = 0; j < 5; j++) {
-      ClassProb cProb = elem_probs[j];
-      if (cProb.index == labels[i])
-        matched = true;
-    }
-
-    if (!matched)
-      num_errors += 1;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  return accuracy;
-}
-
-void dumpFinalAccuracy(float accuracy) {
-
-  printf("\n\n **** Final Accuracy = %f \n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  run_accuracies.push_back(accuracy);
-}
-
-void dumpAvgPSNR(float avg_psnr) {
-
-  FILE *fp = fopen("avg_psnr", "w+");
-  if (fp != NULL) {
-    std::ostringstream ss;
-    ss << std::fixed << avg_psnr;
-    std::string print_str = ss.str();
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-}
-
-void dumpPSNRStd(float psnr_std) {
-
-  FILE *fp = fopen("psnr_std.txt", "w+");
-  if (fp != NULL) {
-    std::ostringstream ss;
-    ss << std::fixed << psnr_std;
-    std::string print_str = ss.str();
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-}
-
-void dumpExecutionAccuracies() {
-
-  FILE *fp = fopen("run_accuracies.txt", "w+");
-  if (fp != NULL) {
-    for (int i = 0; i < run_accuracies.size(); i++) {
-      float accuracy = run_accuracies[i];
-      std::ostringstream ss;
-      ss << std::fixed << accuracy;
-      std::string print_str = ss.str();
-      fwrite(print_str.c_str(), 1, print_str.length(), fp);
-      fwrite("\n", 1, 1, fp);
-    }
-  }
-
-  fclose(fp);
-}
-
-float readPSNRFromFile(const char *file_name) {
-
-  float psnr;
-  FILE *pFile = fopen(file_name, "r");
-  if (pFile == NULL) {
-    printf("ERROR: psnr.txt not found! \n");
-    abort();
-  }
-
-  fscanf(pFile, "%f", &psnr);
-  printf("**** PSNR read = %f \n\n", psnr);
-  return psnr;
-}
-
-float computePSNRViolation(void *gold_ptr, void *approx_ptr,
-                           float PSNR_threshold) {
-
-  PSNR_threshold = readPSNRFromFile("psnr.txt");
-  std::vector<float> psnr_list;
-
-  struct Tensor *gold_tensor = (struct Tensor *)gold_ptr;
-  struct Tensor *approx_tensor = (struct Tensor *)approx_ptr;
-
-  size_t *dim_sizes = gold_tensor->dims.dim_sizes;
-  size_t batch_dim = dim_sizes[0];
-  size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3];
-
-  printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size);
-
-  float *gold_data = (float *)gold_tensor->host_data;
-  float *approx_data = (float *)approx_tensor->host_data;
-
-  FILE *fp = fopen("img_psnr.txt", "w+");
-
-  float sum_psnr = 0.0;
-  int num_errors = 0;
-  for (size_t i = 0; i < batch_dim; i++) {
-    float mse_sum = 0.0;
-    float max_val = -999999;
-    size_t offset = i * image_size;
-
-    for (size_t j = 0; j < image_size; j++) {
-      float diff = gold_data[offset + j] - approx_data[offset + j];
-      float diff_square = diff * diff;
-      mse_sum += diff_square;
-
-      if (max_val < gold_data[offset + j]) {
-        max_val = gold_data[offset + j];
-      }
-    }
-
-    mse_sum = mse_sum / image_size;
-    float psnr = 20 * log10(255 / sqrt(mse_sum));
-
-    sum_psnr += psnr;
-    if (psnr < PSNR_threshold)
-      num_errors += 1;
-
-    printf("PSNR value = %f \n", psnr);
-    psnr_list.push_back(psnr);
-
-    std::ostringstream ss;
-    ss << std::fixed << psnr;
-    std::string print_str = ss.str();
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-    fwrite("\n", 1, 1, fp);
-  }
-
-  float violation_rate = (num_errors * 1.0) / batch_dim * 100.0;
-  printf("*** violation_rate= %f \n\n", violation_rate);
-
-  float avg_psnr = sum_psnr / batch_dim;
-  printf("*** avg_psnr =  %f \n\n", avg_psnr);
-  dumpAvgPSNR(avg_psnr);
-
-  float success_rate = 100.0 - violation_rate;
-  dumpFinalAccuracy(success_rate);
-
-  fclose(fp);
-
-  float var = 0.0;
-  for (size_t i = 0; i < batch_dim; i++) {
-    var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr);
-  }
-
-  var /= batch_dim;
-  float std = sqrt(var);
-
-  dumpPSNRStd(std);
-
-  return violation_rate;
-}
-
-void dumpOutput(void *output_ptr, const char *file_name) {
-
-  struct Tensor *out_tensor = (struct Tensor *)output_ptr;
-  size_t size_in_bytes = out_tensor->size_in_bytes;
-  printf("** Output size = %lu \n", size_in_bytes);
-
-  float *host_data = (float *)out_tensor->host_data;
-  FILE *fd = fopen(file_name, "w+");
-  fwrite(host_data, 1, size_in_bytes, fd);
-  fclose(fd);
-}
-
-#endif
diff --git a/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py b/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py
index 230fdf8b73dfd7959cfaa98fe06eafe6a75087b1..853b0dc3e23a3ea847748ecaeda62650e99ee430 100755
--- a/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py
+++ b/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from sys import argv
 
-from hpvm_profiler import profile_configs, read_hpvm_configs
+from hpvm_profiler import profile_configs, read_hpvm_configs, write_hpvm_configs
 
 # relative to cwd()
 benchmarks_bindir = Path("../hpvm-c")
@@ -17,4 +17,6 @@ dnn = argv[1]
 bench_bin_file = benchmarks_bindir / f"hpvm_{dnn}"
 config_file = benchmarks_srcdir / dnn / "data/tuner_confs.txt"
 out_config_file = f"./{dnn}.txt"
-profile_configs(bench_bin_file, config_file, out_config_file)
+header, configs = read_hpvm_configs(config_file)
+profile_configs(bench_bin_file, configs[1:6], configs[0], progress_bar=False)
+write_hpvm_configs(header, configs[:6], out_config_file)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt b/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..778593a57ddfc3a6abcc4ed045f02614535739f8
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/CMakeLists.txt
@@ -0,0 +1,18 @@
+# --[ llvm-lit test setup
+# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py)
+# as most of the tests require some kind of compilation / generation
+# which is best done over there.
+configure_lit_site_cfg(
+  ../../lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+)
+add_lit_testsuite(check-hpvm-torch2hpvm "Run tests for package torch2hpvm"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  # We depend on check_dnn_acc.py defined in ../hpvm-c/
+  # to compare the inference accuracy of our frontend-generated binary
+  # to that of the baseline.
+  DEPENDS check_dnn_acc
+  ARGS "-j1"  # Run frontend generation sequentially
+)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/alexnet2_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/alexnet2_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..4adf30226b19179be066f150b36ef3bd4a010636
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/alexnet2_cifar10.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py alexnet2_cifar10
+RUN: check_dnn_acc.py final_accuracy alexnet2_cifar10
diff --git a/hpvm/test/dnn_benchmarks/pytorch/alexnet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/alexnet_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..cffec91e415cda256a72de5a04abb956336519d7
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/alexnet_cifar10.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py alexnet_cifar10
+RUN: check_dnn_acc.py final_accuracy alexnet_cifar10
diff --git a/hpvm/test/dnn_benchmarks/pytorch/alexnet_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/alexnet_imagenet.test
new file mode 100644
index 0000000000000000000000000000000000000000..126de1bfe80106bbd803ace37534cd38ab54a67c
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/alexnet_imagenet.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py alexnet_imagenet
+RUN: check_dnn_acc.py final_accuracy alexnet_imagenet
diff --git a/hpvm/test/dnn_benchmarks/pytorch/lenet_mnist.test b/hpvm/test/dnn_benchmarks/pytorch/lenet_mnist.test
new file mode 100644
index 0000000000000000000000000000000000000000..b87a976bcd1bfa8d637f1298d5259bccb8781419
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/lenet_mnist.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py lenet_mnist
+RUN: check_dnn_acc.py final_accuracy lenet_mnist
diff --git a/hpvm/test/dnn_benchmarks/pytorch/lit.cfg.py b/hpvm/test/dnn_benchmarks/pytorch/lit.cfg.py
new file mode 100644
index 0000000000000000000000000000000000000000..34473d24bea3565d0e2865c7026b43538f927ce7
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/lit.cfg.py
@@ -0,0 +1,36 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+import lit.formats
+from lit.llvm import llvm_config
+
+# name: The name of this test suite.
+config.name = "HPVM-Torch2HPVM"
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.ShTest(False)
+
+# suffixes: A list of file extensions to treat as test files. This is overriden
+# by individual lit.local.cfg files in the test subdirectories.
+config.suffixes = [".test"]
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+current_source_dir = os.path.dirname(os.path.relpath(__file__, config.llvm_src_root))
+current_binary_dir = os.path.join(config.llvm_obj_root, current_source_dir)
+config.test_exec_root = current_binary_dir
+
+# Tweak the PATH to include the tools dir.
+llvm_config.with_environment("PATH", config.llvm_tools_dir, append_path=True)
+
+# Add substitution for check_dnn_acc.py which goes under build/bin.
+llvm_config.add_tool_substitutions(
+    ["check_dnn_acc.py"], os.path.join(config.llvm_obj_root, "bin")
+)
+# Add substitution for our main script in this directory.
+llvm_config.add_tool_substitutions(["test_frontend.py"], config.test_source_root)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/mobilenet_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/mobilenet_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..9964887b420a3896c83eff0114a419ad10740dc1
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/mobilenet_cifar10.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py mobilenet_cifar10
+RUN: check_dnn_acc.py final_accuracy mobilenet_cifar10
diff --git a/hpvm/test/dnn_benchmarks/pytorch/resnet18_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/resnet18_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..71e0881a3f6d81a2982ac3fbd2dddd849f23a08b
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/resnet18_cifar10.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py resnet18_cifar10
+RUN: check_dnn_acc.py final_accuracy resnet18_cifar10
diff --git a/hpvm/test/dnn_benchmarks/pytorch/resnet50_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/resnet50_imagenet.test
new file mode 100644
index 0000000000000000000000000000000000000000..b1ff2e6a92f506da299c1f94ebec10ddd1958159
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/resnet50_imagenet.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py resnet50_imagenet
+RUN: check_dnn_acc.py final_accuracy resnet50_imagenet
diff --git a/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py b/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py
old mode 100644
new mode 100755
index 2fb1de17ee226571e6cd6b808640bf35280932db..3c20c6ea5a472a693156b4881b58d4e0f1fc8575
--- a/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py
+++ b/hpvm/test/dnn_benchmarks/pytorch/test_frontend.py
@@ -1,52 +1,56 @@
+#!/usr/bin/env python3
 import os
 import shutil
 import site
 from pathlib import Path
 from subprocess import run
-import torch
+from sys import argv
 
+import torch
 from torch2hpvm import BinDataset, ModelExporter
 from torch.nn import Module
 
 site.addsitedir(os.path.dirname(__file__))
 import dnn
 
-benchmarks = [
-    (dnn.LeNet, 1, 28, 5000, "lenet_mnist"),
-    (dnn.AlexNet, 3, 32, 5000, "alexnet_cifar10"),
-    (dnn.AlexNet2, 3, 32, 5000, "alexnet2_cifar10"),
-    (dnn.AlexNetImageNet, 3, 224, 500, "alexnet_imagenet"),
-    (dnn.MobileNet, 3, 32, 5000, "mobilenet_cifar10"),
-    (dnn.ResNet18, 3, 32, 5000, "resnet18_cifar10"),
-    (dnn.ResNet50, 3, 224, 100, "resnet50_imagenet"),
-    (dnn.VGG16Cifar10, 3, 32, 5000, "vgg16_cifar10"),
-    (dnn.VGG16Cifar100, 3, 32, 5000, "vgg16_cifar100"),
-    (dnn.VGG16ImageNet, 3, 224, 100, "vgg16_imagenet"),
-]
+benchmarks = {
+    "lenet_mnist": (dnn.LeNet, 1, 28, 1000),
+    "alexnet_cifar10": (dnn.AlexNet, 3, 32, 500),
+    "alexnet2_cifar10": (dnn.AlexNet2, 3, 32, 500),
+    "alexnet_imagenet": (dnn.AlexNetImageNet, 3, 224, 500),
+    "mobilenet_cifar10": (dnn.MobileNet, 3, 32, 500),
+    "resnet18_cifar10": (dnn.ResNet18, 3, 32, 500),
+    "resnet50_imagenet": (dnn.ResNet50, 3, 224, 25),
+    "vgg16_cifar10": (dnn.VGG16Cifar10, 3, 32, 500),
+    "vgg16_cifar100": (dnn.VGG16Cifar100, 3, 32, 500),
+    "vgg16_imagenet": (dnn.VGG16ImageNet, 3, 224, 10),
+}
 self_folder = Path(__file__).parent
-for model_cls, nch, img_size, batch_size, pathname in benchmarks:
-    codegen_dir = Path(f"/tmp/{pathname}")
-    print(f"Generating {pathname} to {codegen_dir}")
-    if codegen_dir.exists():
-        shutil.rmtree(codegen_dir)
+netname = argv[1]
+model_cls, nch, img_size, batch_size = benchmarks[netname]
+codegen_dir = Path(f"./{netname}")
+print(f"Generating {netname} to {codegen_dir}")
+if codegen_dir.exists():
+    shutil.rmtree(codegen_dir)
 
-    params = self_folder / "../model_params" / pathname
-    dataset_shape = 5000, nch, img_size, img_size
-    bin_tuneset = BinDataset(
-        params / "tune_input.bin", params / "tune_labels.bin", dataset_shape
-    )
-    bin_testset = BinDataset(
-        params / "test_input.bin", params / "test_labels.bin", dataset_shape
-    )
-    model: Module = model_cls()
-    checkpoint = self_folder / "../model_params/pytorch" / f"{pathname}.pth.tar"
-    model.load_state_dict(torch.load(checkpoint.as_posix()))
+params = self_folder / "../model_params" / netname
+dataset_shape = 5000, nch, img_size, img_size
+bin_tuneset = BinDataset(
+    params / "tune_input.bin", params / "tune_labels.bin", dataset_shape
+)
+bin_testset = BinDataset(
+    params / "test_input.bin", params / "test_labels.bin", dataset_shape
+)
+model: Module = model_cls()
+checkpoint = self_folder / "../model_params/pytorch" / f"{netname}.pth.tar"
+model.load_state_dict(torch.load(checkpoint.as_posix()))
+print(model)
 
-    build_dir = codegen_dir / "build"
-    target_binary = build_dir / pathname
-    conf_file = self_folder / "../hpvm-c/benchmarks" / pathname / "data/tuner_confs.txt"
-    exporter = ModelExporter(
-        model, bin_tuneset, bin_testset, codegen_dir, config_file=conf_file
-    )
-    exporter.generate(batch_size=batch_size).compile(target_binary, build_dir)
-    run([str(target_binary), "test"], check=True)
+build_dir = codegen_dir / "build"
+target_binary = build_dir / netname
+conf_file = self_folder / "../hpvm-c/benchmarks" / netname / "data/tuner_confs.txt"
+exporter = ModelExporter(
+    model, bin_tuneset, bin_testset, codegen_dir, config_file=conf_file
+)
+exporter.generate(batch_size=batch_size).compile(target_binary, build_dir)
+run([str(target_binary), "test"], check=True)
diff --git a/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar10.test b/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar10.test
new file mode 100644
index 0000000000000000000000000000000000000000..5544c75d2823fb31da6624e109c81567770d18ad
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar10.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py vgg16_cifar10
+RUN: check_dnn_acc.py final_accuracy vgg16_cifar10
diff --git a/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar100.test b/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar100.test
new file mode 100644
index 0000000000000000000000000000000000000000..66bd69ee377b4dd84071e3c63ec631f3c041512a
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/vgg16_cifar100.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py vgg16_cifar100
+RUN: check_dnn_acc.py final_accuracy vgg16_cifar100
diff --git a/hpvm/test/dnn_benchmarks/pytorch/vgg16_imagenet.test b/hpvm/test/dnn_benchmarks/pytorch/vgg16_imagenet.test
new file mode 100644
index 0000000000000000000000000000000000000000..6529998ec4e4d62d14fc6b99d42474f3161d2eb7
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/pytorch/vgg16_imagenet.test
@@ -0,0 +1,2 @@
+RUN: test_frontend.py vgg16_imagenet
+RUN: check_dnn_acc.py final_accuracy vgg16_imagenet
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt
index 6e22eba67471855971005bf9e57ed0aa38dafff8..1cadb68b801186316e90a9ff1a5f8880925b2ac8 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt
@@ -2,13 +2,13 @@
 # Don't put binaries in build/bin. This doesn't affect global setting.
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
 
+# MODEL_PARAMS_DIR is given as -DMODEL_PARAMS_DIR=<value> to compiler.
 set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/")
 set(test_compile_targets "")
 function(add_trt_source target_name filepath)
-  set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.cpp")
-  configure_file(${filepath} ${generated_file_path})
-  add_executable(${target_name} ${generated_file_path})
+  add_executable(${target_name} ${filepath})
   target_link_libraries(${target_name} tensor_runtime_online)
+  target_compile_definitions(${target_name} PRIVATE "-DMODEL_PARAMS_DIR=${MODEL_PARAMS_DIR}")
   set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE)
 endfunction(add_trt_source)
 
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc
index ab80718fd33d0b9787be4a0f183e3a7a65dc76e7..5bc3ea6428382c93ccf77cd16056f9ed8cbae542 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc
@@ -1,8 +1,13 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 /* NOTE: Reference Architecture to use for profiling */
 void testCifarNet() {
@@ -10,7 +15,7 @@ void testCifarNet() {
   printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
+      std::string(MODEL_PARAMS_DIR_STR) + "/alexnet2_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc
index b3b69d6b695eca9286b90685f3e071e234887d27..bf01835c2e6a23ca9e0916b6747096905788004c 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc
@@ -1,14 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_cifar10/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc
index 44b78b9169707fd6c7b9ff6503a4a9aa8d2ec947..2e80dd98f9406b216af29ef5f843b84655ea1d86 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc
@@ -1,8 +1,13 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 /* NOTE: Reference Architecture to use for profiling */
 void testLenetTanh() {
@@ -12,7 +17,7 @@ void testLenetTanh() {
 
   int test_batch_size = 5000;
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/lenet_mnist/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc
index d4423bf4345756e72ad46b140ae8cafc26eae264..5ecb8618f8db55da5e4cc435d07d799cd98beaca 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc
@@ -1,15 +1,20 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/mobilenet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc
index 76dea5ef08713d22fe7086b678bb3274378d0fd9..1e1bc36f79d022cc8c8fa4289de68e2817dda8b3 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc
@@ -1,15 +1,20 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
 
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc
index 2772fd3da42d50aa2ff5391d1e3c85c610a4960a..73b057c0971102c709e2f4c5fce141e9146c45f7 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc
@@ -1,14 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar100/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc
index 954f6778b899d2cefb2b28d68a32fad33d52f70c..1928398c43ef19c626a80636018d8e50d969e3c7 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc
@@ -1,14 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc
index e7431234d705449efa0fc5aafe23238e89be1d30..8f08e80d1f722060e89437e3a0c5e7963b58eb9d 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc
@@ -2,7 +2,13 @@
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
 
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 /* NOTE: Reference Architecture to use for profiling */
 void testCifarNet() {
@@ -10,7 +16,7 @@ void testCifarNet() {
   printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
+      std::string(MODEL_PARAMS_DIR_STR) + "/alexnet2_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc
index 12c304c9b401c586a0da4658b092f2b791268983..9f23cc656678f01bc9eea9611b943264d1b848f2 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc
@@ -1,14 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_cifar10/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   // std::string labels_path = dir_prefix + std::string("labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc
index b57e60c0fef41b283ad57a7b203759a8f014252d..74de9507e540a620299068624a5a6b6d8efdbe6a 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc
@@ -1,15 +1,20 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/alexnet_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc
index 9777670722b69c8b23a82a77312d17386f2d5c3f..e973f712c9e06ced1a37a721fcc7d5eb27126350 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc
@@ -1,8 +1,13 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int total_runs = 1;
 
@@ -11,7 +16,7 @@ void testLenetTanh() {
 
   int test_batch_size = 5000;
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/lenet_mnist/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc
index 3e37bf7feb6641af3afdeb8fb9f3a65fdfcbdce3..36f90e4954d4ad885cb56b1e36cf516e72c65cb2 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc
@@ -1,14 +1,20 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
+      std::string(MODEL_PARAMS_DIR_STR) + "/mobilenet_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc
index c8a99419a81d19b374642c21c977a511413f9ae2..6cfcfbfbe183d894ed4ebba79e709de8d9523205 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc
@@ -1,15 +1,20 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
 
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc
index 3aeabc22736e6955a9ad5ad07144fc38057616ea..56e02cc4aa4e353739637e0ece46f1193a66cd15 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc
@@ -1,15 +1,20 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/";
+      std::string(MODEL_PARAMS_DIR_STR) + "/resnet50_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc
index f7fffadfc36ba0fd248371efb35a1b7dfede68d3..a7b05ee731542e0fb6ccd5c4ae29fb4789890224 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc
@@ -1,13 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
+
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc
index d3949c7cc568063f3b344d8497551fa1f4f4102c..b908f4201bf4bccb9f947de7fc703be764bdc15d 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc
@@ -1,14 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_cifar100/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc
index 2bb1be2821a8d33062bf1cfd83bb978f59884fa9..a881e7905f6b52a77da9a48e7dc0fe7d29af93cb 100644
--- a/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc
@@ -1,14 +1,19 @@
-
-
 #include "tensor_runtime.h"
 #include "tensorUtils.h"
 
+#ifndef MODEL_PARAMS_DIR
+#error MODEL_PARAMS_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define MODEL_PARAMS_DIR_STR STRINGIFY(MODEL_PARAMS_DIR)
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR_STR) + "/vgg16_imagenet/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in
index e2bc5cbafa23bd64094a3198ad8466f682f6bbdc..b20af0b80f192fe3c87b004c05a72b034aee098d 100644
--- a/hpvm/tools/hpvm-clang/main.py.in
+++ b/hpvm/tools/hpvm-clang/main.py.in
@@ -36,6 +36,7 @@ def compile_hpvm_c(
     link_libs: List[str] = None,
     working_dir: PathLike = None,
     conf_file: PathLike = None,
+    verbose: bool = False,
 ):
     from subprocess import check_output
 
@@ -84,7 +85,8 @@ def compile_hpvm_c(
         link_binary(hpvm_rt_linked_file, output_file, link_dirs, link_libs)
     )
     for command in commands:
-        print(" ".join(command))
+        if verbose:
+            print(" ".join(command))
         check_output(command)
 
 
@@ -245,6 +247,10 @@ See option -b for that."""
         help="[clang linker] Link library (such as -lpthread)"
     )
 
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Print out all clang/opt/llvm-link commands used"
+    )
+
     args = parser.parse_args()
     if args.tensor_target == "tensor":
         if args.conf_file is None: