diff --git a/hpvm/test/dnn_benchmarks/CMakeLists.txt b/hpvm/test/dnn_benchmarks/CMakeLists.txt
index 92d34ad2a59a2077071f5ce3c7d309efce786ddf..f6c3db36cbb235289a795f205a48417e4b342d9c 100644
--- a/hpvm/test/dnn_benchmarks/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/CMakeLists.txt
@@ -4,13 +4,13 @@
 find_package(CUDA REQUIRED)
 
 get_filename_component(LLVM_BIN_DIR ${PROJECT_BINARY_DIR}/bin REALPATH)
-set(LLVM_CLANG_XX "${LLVM_BIN_DIR}/clang++")
 set(LLVM_OPT "${LLVM_BIN_DIR}/opt")
 set(LLVM_LINK "${LLVM_BIN_DIR}/llvm-link")
+set(CMAKE_CXX_COMPILER "${LLVM_BIN_DIR}/clang++")
 
 # Directories to include
 set(HPVM_PROJECTS ${PROJECT_SOURCE_DIR}/tools/hpvm/projects)
-set(HPVM_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../benchmarks/include)
+set(HPVM_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../dnn_benchmarks/include)
 set(TENSOR_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
 set(TENSOR_RT_INCLUDE_DIR ${HPVM_PROJECTS}/hpvm-tensor-rt/tensor_runtime/include)
 set(INCLUDES ${HPVM_INCLUDE_DIR} ${TENSOR_INCLUDE_DIR} ${TENSOR_RT_INCLUDE_DIR} ${CUDA_INCLUDE_DIRS})
@@ -19,7 +19,11 @@ foreach(dir ${INCLUDES})
 endforeach()
 
 # Built-in libraries to link
-list(APPEND LINKER_FLAGS -lpthread -lcudart -lcurand -lcudnn -lcublas -lcufft -lOpenCL -lstdc++fs -lomp)
+list(
+  APPEND LINKER_FLAGS
+  "-L${CUDA_TOOLKIT_ROOT_DIR}/lib64"
+  -lpthread -lcudart -lcurand -lcudnn -lcublas -lcufft -lOpenCL -lstdc++fs -lomp -lm
+)
 
 # The hpvm-rt runtime
 # This has to be explicitly set as hpvm-rt.ll is created in a custom_target
@@ -27,7 +31,10 @@ list(APPEND LINKER_FLAGS -lpthread -lcudart -lcurand -lcudnn -lcublas -lcufft -l
 # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
 set(HPVM_RT_PATH ${PROJECT_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.ll)
 
-# Optimization flags
+# Compile flags (clang++)
+set(CLANG_FLAGS -fno-exceptions -std=c++11 -O3)
+
+# Passes flags
 set(
   HPVM_OPT_PASSES
   -load LLVMBuildDFG.so
@@ -35,7 +42,7 @@ set(
   -load LLVMDFG2LLVM_CUDNN.so
   -load LLVMDFG2LLVM_CPU.so
   -load LLVMClearDFG.so
-  -inplace -dfg2llvm-cpu -dfg2llvm-cudnn -clearDFG
+  -inplace -dfg2llvm-cudnn -dfg2llvm-cpu -clearDFG
 )
 
 # Manually specify dependencies because we're not using cmake "normally"
@@ -58,13 +65,16 @@ foreach(entry ${entries})
 
       set(WORK_DIR ${CMAKE_CURRENT_BINARY_DIR})
       add_custom_command(
-        OUTPUT "${target}.ll"
-        DEPENDS ${src_file}
-        COMMAND ${LLVM_CLANG_XX} ${INCLUDE_COMPILER_STRINGS} -emit-llvm -S -O1 ${src_file}
+        OUTPUT "${target}.ll" DEPENDS ${src_file}
+        COMMAND ${CMAKE_CXX_COMPILER} ${INCLUDE_COMPILER_STRINGS} ${CLANG_FLAGS} -emit-llvm -S ${src_file}
           -o ${WORK_DIR}/${target}.ll
       )
       add_custom_command(
-        OUTPUT "${target}.opt.bc"
+        OUTPUT
+          "${WORK_DIR}/${target}.hpvm.ll"
+          "${WORK_DIR}/${target}_cudnn.bc"
+          "${WORK_DIR}/${target}_cudnn_linked.bc"
+          "${WORK_DIR}/${target}_cudnn_linked"
         DEPENDS "${target}.ll"
         COMMAND ${LLVM_OPT} -load LLVMGenHPVM.so -genhpvm -globaldce -S ${WORK_DIR}/${target}.ll
           -o ${WORK_DIR}/${target}.hpvm.ll
@@ -72,10 +82,12 @@ foreach(entry ${entries})
           -o ${WORK_DIR}/${target}_cudnn.bc
         COMMAND ${LLVM_LINK} ${WORK_DIR}/${target}_cudnn.bc ${HPVM_RT_PATH}
           -o ${WORK_DIR}/${target}_cudnn_linked.bc
-        COMMAND ${LLVM_CLANG_XX} ${WORK_DIR}/${target}_cudnn_linked.bc $<TARGET_FILE:tensor_runtime>
-          -o ${WORK_DIR}/${target}_cudnn_linked ${LINKER_FLAGS}
+        COMMAND ${CMAKE_CXX_COMPILER}
+          ${WORK_DIR}/${target}_cudnn_linked.bc
+          $<TARGET_FILE:tensor_runtime> $<TARGET_FILE:gpu_profiler> $<TARGET_FILE:promise_profiler>
+          -o ${WORK_DIR}/${target} ${LINKER_FLAGS}
       )
-      add_custom_target(${target} DEPENDS "${target}.opt.bc")
+      add_custom_target(${target} DEPENDS "${WORK_DIR}/${target}_cudnn_linked")
       add_dependencies(${target} ${DEPEND})
     endforeach()
   endif()