diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt
index b8a1716f2a7c18c7e1dcb8d907c3585707c22386..02ab62fca57f66155ffafff0686634b3efe4f861 100644
--- a/hpvm/projects/hpvm-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-rt/CMakeLists.txt
@@ -6,18 +6,20 @@ SET(CMAKE_CXX_STANDARD 11)
 # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} if found
 find_package(OpenCL REQUIRED)
 
-add_llvm_library(hpvm-rt.ll hpvm-rt.cpp
-  DEPENDS
-  clang
-  llvm-dis
-)
-target_compile_options(hpvm-rt.ll PUBLIC -flto)
-target_include_directories(hpvm-rt.ll PRIVATE ${OpenCL_INCLUDE_DIRS})
-link_directories(${OpenCL_LIBRARY})
-
-add_custom_target(hpvm-rt.cpp.o ALL
-  COMMAND ar -x ${CMAKE_BINARY_DIR}/lib/libhpvm-rt.ll.a
-  COMMAND mv ${CMAKE_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.cpp.o ${CMAKE_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc
-  COMMAND  ${CMAKE_BINARY_DIR}/bin/llvm-dis  ${CMAKE_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
+# This puts libhpvm-rt.a in lib/ which we don't care about
+# we want ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o
+# which is a LLVM Bitcode file because of the -flto below.
+add_llvm_library(hpvm-rt hpvm-rt.cpp DEPENDS clang)
+target_compile_options(hpvm-rt PUBLIC -flto)
+target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS})
+target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY})
 
-add_dependencies(hpvm-rt.cpp.o   hpvm-rt.ll)
+# Move and rename hpvm-rt.cpp.o to be an actual bc code
+add_custom_command(
+  OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/hpvm-rt.bc" ALL
+  DEPENDS hpvm-rt
+  COMMAND cp
+    ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o
+    ${CMAKE_CURRENT_BINARY_DIR}/hpvm-rt.bc
+)
+add_custom_target(hpvm-rt.bc ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/hpvm-rt.bc")
diff --git a/hpvm/scripts/download_weights.sh b/hpvm/scripts/download_weights.sh
new file mode 100755
index 0000000000000000000000000000000000000000..757abbf3b6f442e729fc100dad73605511e0454f
--- /dev/null
+++ b/hpvm/scripts/download_weights.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# First get hands on gdown -- google drive downloader
+wget https://raw.githubusercontent.com/circulosmeos/gdown.pl/master/gdown.pl -O gdown.pl
+chmod +x ./gdown.pl
+# Download the zip file from google drive
+./gdown.pl 'https://drive.google.com/file/d/1V_yd9sKcZQ7zhnO5YhRpOsaBPLEEvM9u' model_params.zip
+unzip model_params.zip # should give a "model_params" folder
+# All our benchmarks also know to look for parameters in <build_dir>/model_params.
+# Cleanup:
+rm gdown.pl model_params.zip
\ No newline at end of file
diff --git a/hpvm/scripts/llvm_installer.sh b/hpvm/scripts/llvm_installer.sh
index 21ed6ee6d13ef83e0cc62f643d8e674e7c0e5a90..0cbaea8e493de8a08833ca5ab025e2692f609fe5 100755
--- a/hpvm/scripts/llvm_installer.sh
+++ b/hpvm/scripts/llvm_installer.sh
@@ -256,17 +256,7 @@ if [ $DOWNLOAD_WEIGHTS == "y" ]; then
   echo
   echo "Downloading weights for DNN benchmarks..."
   echo
-
-  # First get hands on gdown -- google drive downloader
-  wget https://raw.githubusercontent.com/circulosmeos/gdown.pl/master/gdown.pl -O gdown.pl
-  chmod +x ./gdown.pl
-  # Download the zip file from google drive
-  ./gdown.pl 'https://drive.google.com/file/d/1V_yd9sKcZQ7zhnO5YhRpOsaBPLEEvM9u' model_params.zip
-  unzip model_params.zip # should give a "model_params" folder
-  mv model_params $BUILD_DIR
-  # All our benchmarks also know to look for parameters in <build_dir>/model_params.
-  # Cleanup:
-  rm gdown.pl model_params.zip
+  ../scripts/download_weights.sh
 else
   echo "Skipping weight download"
 fi
diff --git a/hpvm/test/dnn_benchmarks/CMakeLists.txt b/hpvm/test/dnn_benchmarks/CMakeLists.txt
index 887b2d1e6c3003cf886a907bcaf51c830dd0e423..3b78ad26df6ab435978b7d0f171fb654430ca324 100644
--- a/hpvm/test/dnn_benchmarks/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/CMakeLists.txt
@@ -40,10 +40,10 @@ list(
 )
 
 # The hpvm-rt runtime
-# This has to be explicitly set as hpvm-rt.ll is created in a custom_target
+# This has to be explicitly set as hpvm-rt.bc is created in a custom_target
 # and does not export its file location.
 # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
-set(HPVM_RT_PATH ${PROJECT_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.ll)
+set(HPVM_RT_PATH ${PROJECT_BINARY_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
 
 # Compile flags (clang++)
 set(CLANG_FLAGS -fno-exceptions -std=c++11 -O3)
@@ -89,7 +89,7 @@ function(compile_single_benchmark target src_file extra_passes extra_dfg_flags)
   )
   add_custom_command(
     OUTPUT "${WORK_DIR}/${target}.linked.bc"
-    DEPENDS "${WORK_DIR}/${target}.llvm.ll" hpvm-rt.ll llvm-link
+    DEPENDS "${WORK_DIR}/${target}.llvm.ll" hpvm-rt.bc llvm-link
     COMMAND ${LLVM_LINK} ${WORK_DIR}/${target}.llvm.ll ${HPVM_RT_PATH}
       -o ${WORK_DIR}/${target}.linked.bc
   )
@@ -106,12 +106,19 @@ function(compile_single_benchmark target src_file extra_passes extra_dfg_flags)
   set(test_compile_targets ${test_compile_targets} ${target} PARENT_SCOPE)
 endfunction(compile_single_benchmark)
 
+set(test_run_targets "")
+function(run_single_benchmark run_target benchmark)
+  add_custom_target(
+    ${run_target}
+    COMMAND ${WORK_DIR}/${benchmark}
+  )
+  add_dependencies(${run_target} ${benchmark})
+  set(test_run_targets ${test_run_targets} ${run_target} PARENT_SCOPE)
+endfunction(run_single_benchmark)
+
 file(GLOB entries ./benchmarks/*)
 foreach(dir ${entries})
   get_filename_component(dirname "${dir}" NAME)
-  compile_single_benchmark(
-    "test_${dirname}" ${dir}/${dirname}.cpp LLVMDFG2LLVM_CUDNN -dfg2llvm-cudnn
-  )
   set(
     loop_extra_flags
     -dfg2llvm-wrapperapi
@@ -119,9 +126,14 @@ foreach(dir ${entries})
       -configuration-inputs-filename=${dir}/data/tuner_confs.txt
   )
   compile_single_benchmark(
-    "test_${dirname}_loop" ${dir}/${dirname}_loop.cpp
+    ${dirname} ${dir}/${dirname}.cpp
     LLVMDFG2LLVM_WrapperAPI "${loop_extra_flags}"
   )
+  run_single_benchmark(run_${dirname} ${dirname})
+  compile_single_benchmark(
+    ${dirname}_cudnn ${dir}/${dirname}_cudnn.cpp LLVMDFG2LLVM_CUDNN -dfg2llvm-cudnn
+  )
+  run_single_benchmark(run_${dirname}_cudnn ${dirname}_cudnn)
 endforeach(dir)
 message(STATUS "List of test dnn benchmarks: ${test_compile_targets}")
 add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets})
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt
deleted file mode 100644
index 5f4f8f3e013624a1a3ae207ed2a7d5ce891cc97a..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt
+++ /dev/null
@@ -1,333 +0,0 @@
-Iteration 0
-tensorConvApprox 278.79 1.05196e+06
-tensorAdd 10.2997 67459
-tensorTanh 9.41899 59799
-tensorPooling 8.7877 56466
-tensorConvApprox 474.917 3.4457e+06
-tensorAdd 8.85339 93260
-tensorTanh 6.91859 73647
-tensorPooling 7.04614 73919
-tensorConvApprox 126.982 1.32622e+06
-tensorAdd 7.1022 84171
-tensorTanh 3.69422 45751
-tensorConvApprox 170.758 2.00465e+06
-tensorAdd 4.44843 60019
-tensorTanh 2.37416 34330
-tensorConvApprox 123.886 1.52748e+06
-tensorAdd 4.49425 62747
-tensorTanh 2.37106 35584
-tensorPooling 2.73525 35603
-tensorGemmGPU 1.01091 17832
-tensorAdd 0.436253 8917
-tensorSoftmax 0.725692 8841
-
-Iteration Compute Time   : 1256.05
-Iteration Compute Energy : 1.01743e+07
-Iteration Control Time   : 0.009216
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 1
-tensorConvApprox 232.83 2.59416e+06
-tensorAdd 9.83777 117351
-tensorTanh 8.74536 109048
-tensorPooling 9.06987 101563
-tensorConvApprox 472.868 4.85619e+06
-tensorAdd 8.75096 100782
-tensorTanh 6.57158 77864
-tensorPooling 6.84105 85240
-tensorConvApprox 119.42 1.41716e+06
-tensorAdd 7.12127 93535
-tensorTanh 3.44853 51083
-tensorConvApprox 169.705 2.17413e+06
-tensorAdd 4.49521 64196
-tensorTanh 2.11762 36496
-tensorConvApprox 127.77 1.65853e+06
-tensorAdd 4.55607 65305
-tensorTanh 2.46591 37340
-tensorPooling 2.78005 37207
-tensorGemmGPU 0.744988 9278
-tensorAdd 0.177951 9278
-tensorSoftmax 0.799515 18556
-
-Iteration Compute Time   : 1201.12
-Iteration Compute Energy : 1.37143e+07
-Iteration Control Time   : 0.010784
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 2
-tensorConvApprox 242.334 2.77089e+06
-tensorAdd 9.79774 120846
-tensorTanh 8.68626 104654
-tensorPooling 7.72626 88891
-tensorConvApprox 476.464 4.99714e+06
-tensorAdd 8.90216 109726
-tensorTanh 6.82249 77902
-tensorPooling 6.34269 78111
-tensorConvApprox 122.633 1.45989e+06
-tensorAdd 7.01535 93535
-tensorTanh 3.81937 50988
-tensorConvApprox 167.544 2.15054e+06
-tensorAdd 4.53569 64274
-tensorTanh 2.38136 36766
-tensorConvApprox 130.565 1.68899e+06
-tensorAdd 4.86129 74520
-tensorTanh 2.57813 37072
-tensorPooling 2.64114 37036
-tensorGemmGPU 0.72966 9259
-tensorAdd 0.15376 9259
-tensorSoftmax 0.701116 9259
-
-Iteration Compute Time   : 1217.23
-Iteration Compute Energy : 1.40695e+07
-Iteration Control Time   : 0.007136
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 3
-tensorConvApprox 234.416 2.76961e+06
-tensorAdd 9.80104 121305
-tensorTanh 8.85387 112830
-tensorPooling 7.59564 88929
-tensorConvApprox 455.892 4.89773e+06
-tensorAdd 8.86965 103152
-tensorTanh 6.64733 79607
-tensorPooling 6.8992 87797
-tensorConvApprox 126.115 1.52457e+06
-tensorAdd 7.1263 94903
-tensorTanh 3.41387 43215
-tensorConvApprox 167.072 2.16785e+06
-tensorAdd 4.47527 64768
-tensorTanh 2.09151 27717
-tensorConvApprox 127.179 1.66812e+06
-tensorAdd 4.92365 75573
-tensorTanh 2.33164 37644
-tensorPooling 2.64786 37511
-tensorGemmGPU 0.768604 9392
-tensorAdd 0.164351 9392
-tensorSoftmax 0.380702 9392
-
-Iteration Compute Time   : 1187.66
-Iteration Compute Energy : 1.4031e+07
-Iteration Control Time   : 0.00928
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 4
-tensorConvApprox 215.966 2.59061e+06
-tensorAdd 9.78472 125715
-tensorTanh 8.80776 116840
-tensorPooling 8.11465 100418
-tensorConvApprox 473.822 5.11946e+06
-tensorAdd 8.93135 110777
-tensorTanh 6.81398 86703
-tensorPooling 6.48502 79023
-tensorConvApprox 126.247 1.51225e+06
-tensorAdd 7.09952 93573
-tensorTanh 3.42331 51064
-tensorConvApprox 168.28 2.17381e+06
-tensorAdd 4.62186 64616
-tensorTanh 2.45317 36648
-tensorConvApprox 128.931 1.68308e+06
-tensorAdd 4.89374 74832
-tensorTanh 2.61026 37454
-tensorPooling 2.80283 37264
-tensorGemmGPU 0.798076 9297
-tensorAdd 0.185535 9297
-tensorSoftmax 0.665405 9297
-
-Iteration Compute Time   : 1191.74
-Iteration Compute Energy : 1.4122e+07
-Iteration Control Time   : 0.009472
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 5
-tensorConvApprox 230.122 2.72008e+06
-tensorAdd 9.75665 121732
-tensorTanh 8.8012 114122
-tensorPooling 8.51484 97201
-tensorConvApprox 459.856 4.96706e+06
-tensorAdd 8.90559 111123
-tensorTanh 6.6376 79680
-tensorPooling 7.20607 87103
-tensorConvApprox 124.097 1.48424e+06
-tensorAdd 7.13135 94466
-tensorTanh 3.49173 51558
-tensorConvApprox 169.758 2.19389e+06
-tensorAdd 4.57018 64350
-tensorTanh 2.13442 27372
-tensorConvApprox 130.337 1.69389e+06
-tensorAdd 4.57089 65325
-tensorTanh 2.42165 37340
-tensorPooling 2.77173 37112
-tensorGemmGPU 0.75446 9278
-tensorAdd 0.279614 9278
-tensorSoftmax 0.382302 9278
-
-Iteration Compute Time   : 1192.5
-Iteration Compute Energy : 1.40755e+07
-Iteration Control Time   : 0.012256
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 6
-tensorConvApprox 212.819 2.45406e+06
-tensorAdd 9.80731 121314
-tensorTanh 8.91515 112716
-tensorPooling 9.21384 96839
-tensorConvApprox 473.333 4.94392e+06
-tensorAdd 8.82347 110354
-tensorTanh 6.81225 86665
-tensorPooling 6.22019 79004
-tensorConvApprox 114.721 1.38355e+06
-tensorAdd 7.15427 95488
-tensorTanh 3.46833 51768
-tensorConvApprox 167.818 2.16784e+06
-tensorAdd 4.5323 64844
-tensorTanh 2.42101 36899
-tensorConvApprox 113.709 1.4909e+06
-tensorAdd 4.98154 76868
-tensorTanh 2.65291 38472
-tensorPooling 2.74475 38241
-tensorGemmGPU 0.755004 19101
-tensorAdd 0.40611 9560
-tensorSoftmax 0.308094 9560
-
-Iteration Compute Time   : 1161.62
-Iteration Compute Energy : 1.3488e+07
-Iteration Control Time   : 0.007648
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 7
-tensorConvApprox 198.427 2.4577e+06
-tensorAdd 9.98907 127907
-tensorTanh 8.7332 110523
-tensorPooling 8.04159 102292
-tensorConvApprox 480.98 5.19136e+06
-tensorAdd 8.6654 103035
-tensorTanh 6.69069 79620
-tensorPooling 7.12681 87278
-tensorConvApprox 116.104 1.41549e+06
-tensorAdd 7.119 95508
-tensorTanh 3.50891 52135
-tensorConvApprox 177.713 2.28228e+06
-tensorAdd 4.6027 64312
-tensorTanh 2.20879 27603
-tensorConvApprox 115.356 1.52029e+06
-tensorAdd 5.00823 76332
-tensorTanh 2.43148 38050
-tensorPooling 3.6187 57056
-tensorGemmGPU 0.785788 18892
-tensorAdd 0.142079 9446
-tensorSoftmax 0.360638 9449
-
-Iteration Compute Time   : 1167.61
-Iteration Compute Energy : 1.39266e+07
-Iteration Control Time   : 0.006592
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 8
-tensorConvApprox 207.155 2.56109e+06
-tensorAdd 9.87201 126675
-tensorTanh 8.74117 109261
-tensorPooling 7.81673 92718
-tensorConvApprox 472.513 5.11272e+06
-tensorAdd 8.89288 111006
-tensorTanh 6.66803 79562
-tensorPooling 6.28598 79755
-tensorConvApprox 121.24 1.47787e+06
-tensorAdd 7.15468 95017
-tensorTanh 3.49115 51634
-tensorConvApprox 172.267 2.23362e+06
-tensorAdd 4.62106 64358
-tensorTanh 2.23269 27603
-tensorConvApprox 118.481 1.55783e+06
-tensorAdd 5.00305 76086
-tensorTanh 2.42523 37796
-tensorPooling 2.78123 37796
-tensorGemmGPU 0.809467 9449
-tensorAdd 0.178847 9449
-tensorSoftmax 0.420606 9449
-
-Iteration Compute Time   : 1169.05
-Iteration Compute Energy : 1.39607e+07
-Iteration Control Time   : 0.008896
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-Iteration 9
-tensorConvApprox 228.894 2.71249e+06
-tensorAdd 8.90542 105278
-tensorTanh 8.00889 89461
-tensorPooling 7.955 89556
-tensorConvApprox 450.969 4.76405e+06
-tensorAdd 8.98159 112455
-tensorTanh 6.57929 80420
-tensorPooling 5.77895 72606
-tensorConvApprox 118.413 1.36516e+06
-tensorAdd 7.12326 96567
-tensorTanh 3.41185 52316
-tensorConvApprox 168.071 2.21097e+06
-tensorAdd 4.46561 65433
-tensorTanh 2.10921 27945
-tensorConvApprox 118.765 1.5872e+06
-tensorAdd 4.87207 76849
-tensorTanh 2.346 38472
-tensorPooling 2.38908 38241
-tensorGemmGPU 0.734172 19101
-tensorAdd 0.114239 9560
-tensorSoftmax 0.313374 9560
-
-Iteration Compute Time   : 1159.2
-Iteration Compute Energy : 1.36237e+07
-Iteration Control Time   : 0.00864
-Iteration Control Energy : 0
-Iteration Config Time   : 0
-Iteration Config Energy : 0
-Iteration End Frequency : 0
-
-
-
-
-Total Compute Time  : 11903.8
-Total Compute Energy: 1.35186e+08
-
-Total Control Time  : 0.08992
-Total Control Energy: 2.56749e-315
-
-Total Config Time  : 2.56749e-315
-Total Config Energy: 2.56749e-315
-
-Total Time  : 11903.9
-Total Energy: 1.35186e+08
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet2/alexnet2.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/data/quant_ranges_rt.txt
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/data/quant_ranges_rt.txt
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/data/tuner_confs.txt
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet2_cifar10/data/tuner_confs.txt
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet/alexnet.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/data/quant_ranges_rt.txt
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/data/quant_ranges_rt.txt
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/data/tuner_confs.txt
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet_cifar10/data/tuner_confs.txt
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index 466e311577d1e1d46d2e0c6a2a624cc21900be4f..340e0aa1194ac57e96eadd1669a97fa25fdd0c44 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 4, 4);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -464,6 +464,7 @@ int main() {
 
   std::string dir_prefix =
       std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/";
+
   std::string input_path = dir_prefix + std::string("input.bin");
   std::string labels_path = dir_prefix + std::string("labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
@@ -514,16 +515,16 @@ int main() {
   std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin");
   void *dense_3_b =
       readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1);
-  void *input = readTrainedWeights(input_path.c_str(), 0, 1000, 3, 224, 224);
+  // void* input = readTrainedWeights(input_path.c_str(), 0, 1000,3,224,224);
   // uint32_t* labels = readLabels2(labels_path.c_str(),6000);
 
-  uint32_t *labels = readLabels3(labels_path.c_str(), 1000);
+  // uint32_t* labels = readLabels3(labels_path.c_str(), 1000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
+  // args->input = input;
+  // args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -557,14 +558,40 @@ int main() {
   args->dense_3_b = dense_3_b;
   args->dense_3_b_bytes = 0;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+  int batch_size = 100;
+  int test_input_size = 4000;
+  int batch_count = test_input_size / batch_size;
 
-  __hpvm__wait(dfg);
+  startMemTracking();
+  startProfiling();
 
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+  for (int j = 0; j < 1; j++) {
+    for (int i = 0; i < batch_count; i++) {
 
+      int start = i * batch_size;
+      int end = (i + 1) * batch_size;
+
+      void *input =
+          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
+
+      args->input = input;
+      args->input_bytes = 0;
+
+      void *dfg = __hpvm__launch(0, root, (void *)args);
+
+      __hpvm__wait(dfg);
+
+      void *result = static_cast<RootIn *>(args)->r.tensor;
+      hpvm_request_tensor(result, 0);
+
+      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+
+      freeBatchMemory();
+    }
+  }
+
+  stopProfiling();
   __hpvm__cleanup();
-  computeAccuracy3(labels, result);
+
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
similarity index 89%
rename from hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
index 340e0aa1194ac57e96eadd1669a97fa25fdd0c44..466e311577d1e1d46d2e0c6a2a624cc21900be4f 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 4, 4);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -464,7 +464,6 @@ int main() {
 
   std::string dir_prefix =
       std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/";
-
   std::string input_path = dir_prefix + std::string("input.bin");
   std::string labels_path = dir_prefix + std::string("labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
@@ -515,16 +514,16 @@ int main() {
   std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin");
   void *dense_3_b =
       readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1);
-  // void* input = readTrainedWeights(input_path.c_str(), 0, 1000,3,224,224);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 1000, 3, 224, 224);
   // uint32_t* labels = readLabels2(labels_path.c_str(),6000);
 
-  // uint32_t* labels = readLabels3(labels_path.c_str(), 1000);
+  uint32_t *labels = readLabels3(labels_path.c_str(), 1000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  // args->input = input;
-  // args->input_bytes = 0;
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -558,40 +557,14 @@ int main() {
   args->dense_3_b = dense_3_b;
   args->dense_3_b_bytes = 0;
 
-  int batch_size = 100;
-  int test_input_size = 4000;
-  int batch_count = test_input_size / batch_size;
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-  startMemTracking();
-  startProfiling();
+  __hpvm__wait(dfg);
 
-  for (int j = 0; j < 1; j++) {
-    for (int i = 0; i < batch_count; i++) {
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
-
-      void *input =
-          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
-
-      args->input = input;
-      args->input_bytes = 0;
-
-      void *dfg = __hpvm__launch(0, root, (void *)args);
-
-      __hpvm__wait(dfg);
-
-      void *result = static_cast<RootIn *>(args)->r.tensor;
-      hpvm_request_tensor(result, 0);
-
-      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
-
-      freeBatchMemory();
-    }
-  }
-
-  stopProfiling();
   __hpvm__cleanup();
-
+  computeAccuracy3(labels, result);
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp
index 3613e9f1325d73e7515a88f3e198bcd32821224c..9a8bfbc68fcaad4b369223c53e98121e9934b27d 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -294,15 +294,15 @@ int main() {
       readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 1024, 10);
   std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
   void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1);
-  void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 1, 28, 28);
+  //  void* input = readTrainedWeights(input_path.c_str(), 0, 5000,1,28,28);
 
-  uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
+  //  uint32_t* labels = readLabels3(labels_path.c_str(), 5000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
+  //  args->input = input;
+  //  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -320,15 +320,37 @@ int main() {
   args->dense_2_b = dense_2_b;
   args->dense_2_b_bytes = 0;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+  int batch_size = 500;
+  int test_input_size = 5000;
+  int batch_count = test_input_size / batch_size;
+
+  startMemTracking();
+  startProfiling();
+
+  for (int i = 0; i < batch_count; i++) {
+
+    int start = i * batch_size;
+    int end = (i + 1) * batch_size;
+
+    void *input = readInputBatch(input_path.c_str(), 0, start, end, 1, 28, 28);
+
+    args->input = input;
+    args->input_bytes = 0;
+
+    void *dfg = __hpvm__launch(0, root, (void *)args);
+
+    __hpvm__wait(dfg);
+
+    void *result = static_cast<RootIn *>(args)->r.tensor;
+    hpvm_request_tensor(result, 0);
 
-  __hpvm__wait(dfg);
+    llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
 
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+    freeBatchMemory();
+  }
 
+  stopProfiling();
   __hpvm__cleanup();
-  computeAccuracy3(labels, result);
 
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
similarity index 86%
rename from hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
index 9a8bfbc68fcaad4b369223c53e98121e9934b27d..3613e9f1325d73e7515a88f3e198bcd32821224c 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 2, 2, 1, 1);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_tanh(t1);
@@ -294,15 +294,15 @@ int main() {
       readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 1024, 10);
   std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
   void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1);
-  //  void* input = readTrainedWeights(input_path.c_str(), 0, 5000,1,28,28);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 1, 28, 28);
 
-  //  uint32_t* labels = readLabels3(labels_path.c_str(), 5000);
+  uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  //  args->input = input;
-  //  args->input_bytes = 0;
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -320,37 +320,15 @@ int main() {
   args->dense_2_b = dense_2_b;
   args->dense_2_b_bytes = 0;
 
-  int batch_size = 500;
-  int test_input_size = 5000;
-  int batch_count = test_input_size / batch_size;
-
-  startMemTracking();
-  startProfiling();
-
-  for (int i = 0; i < batch_count; i++) {
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-
-    void *input = readInputBatch(input_path.c_str(), 0, start, end, 1, 28, 28);
-
-    args->input = input;
-    args->input_bytes = 0;
-
-    void *dfg = __hpvm__launch(0, root, (void *)args);
-
-    __hpvm__wait(dfg);
-
-    void *result = static_cast<RootIn *>(args)->r.tensor;
-    hpvm_request_tensor(result, 0);
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-    llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+  __hpvm__wait(dfg);
 
-    freeBatchMemory();
-  }
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-  stopProfiling();
   __hpvm__cleanup();
+  computeAccuracy3(labels, result);
 
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp
index b32dccabc2f29b54e8da35551f8d982cd13a378c..047697767d9fa0d7f428a02eeb6b8a9566597137 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -63,7 +63,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -115,7 +115,7 @@ void var_11_node(void *t1, size_t bytes_t1) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -167,7 +167,7 @@ void var_17_node(void *t1, size_t bytes_t1) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -219,7 +219,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -271,7 +271,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -323,7 +323,7 @@ void var_35_node(void *t1, size_t bytes_t1) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -375,7 +375,7 @@ void var_41_node(void *t1, size_t bytes_t1) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -427,7 +427,7 @@ void var_47_node(void *t1, size_t bytes_t1) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -479,7 +479,7 @@ void var_53_node(void *t1, size_t bytes_t1) {
 }
 
 void var_54_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -531,7 +531,7 @@ void var_59_node(void *t1, size_t bytes_t1) {
 }
 
 void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -583,7 +583,7 @@ void var_65_node(void *t1, size_t bytes_t1) {
 }
 
 void var_66_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -635,7 +635,7 @@ void var_71_node(void *t1, size_t bytes_t1) {
 }
 
 void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -687,7 +687,7 @@ void var_77_node(void *t1, size_t bytes_t1) {
 }
 
 void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -721,7 +721,7 @@ void var_81_node(void *t1, size_t bytes_t1) {
 }
 
 void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -729,7 +729,7 @@ void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_83_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -1966,11 +1966,10 @@ typedef struct __attribute__((__packed__)) {
 } RootIn;
 
 int main() {
-
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet/";
 
   std::string input_path = dir_prefix + std::string("input.bin");
-  std::string labels_path = dir_prefix + std::string("labels.bin");
+  std::string labels_path = dir_prefix + std::string("labels32.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
       readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3);
@@ -2502,14 +2501,14 @@ int main() {
       readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 1024, 10);
   std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
   void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1);
-  void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32);
-  uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
+  // void* input = readTrainedWeights(input_path.c_str(), 0, 5000,3,32,32);
+  // uint8_t* labels = readLabels(labels_path.c_str(), 5000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
+  // args->input = input;
+  // args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->batch_normalization_1_gamma = batch_normalization_1_gamma;
@@ -2785,14 +2784,39 @@ int main() {
   args->dense_1_b = dense_1_b;
   args->dense_1_b_bytes = 0;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+  int batch_size = 500;
+  int test_input_size = 5000;
+  int batch_count = test_input_size / batch_size;
+
+  // void* input = create4DTensor(0,nchw,batch_size,3,32,32);
+
+  startMemTracking();
+  startProfiling();
+
+  for (int i = 0; i < batch_count; i++) {
 
-  __hpvm__wait(dfg);
+    int start = i * batch_size;
+    int end = (i + 1) * batch_size;
 
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+    // copyInputBatch(input_path.c_str(),start,end,3,32,32, input);
+    void *input = readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32);
 
+    args->input = input;
+    args->input_bytes = 0;
+
+    void *dfg = __hpvm__launch(0, root, (void *)args);
+
+    __hpvm__wait(dfg);
+
+    void *result = static_cast<RootIn *>(args)->r.tensor;
+    hpvm_request_tensor(result, 0);
+
+    llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+
+    freeBatchMemory();
+  }
+  stopProfiling();
   __hpvm__cleanup();
-  computeAccuracy3(labels, result);
+
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_cudnn.cpp
similarity index 98%
rename from hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_cudnn.cpp
index 8200b1bfcc1662db20e56c839a982a65464c9ff1..b32dccabc2f29b54e8da35551f8d982cd13a378c 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/mobilenet_cudnn.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -63,7 +63,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -115,7 +115,7 @@ void var_11_node(void *t1, size_t bytes_t1) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -167,7 +167,7 @@ void var_17_node(void *t1, size_t bytes_t1) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -219,7 +219,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -271,7 +271,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -323,7 +323,7 @@ void var_35_node(void *t1, size_t bytes_t1) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -375,7 +375,7 @@ void var_41_node(void *t1, size_t bytes_t1) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -427,7 +427,7 @@ void var_47_node(void *t1, size_t bytes_t1) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -479,7 +479,7 @@ void var_53_node(void *t1, size_t bytes_t1) {
 }
 
 void var_54_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -531,7 +531,7 @@ void var_59_node(void *t1, size_t bytes_t1) {
 }
 
 void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -583,7 +583,7 @@ void var_65_node(void *t1, size_t bytes_t1) {
 }
 
 void var_66_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -635,7 +635,7 @@ void var_71_node(void *t1, size_t bytes_t1) {
 }
 
 void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -687,7 +687,7 @@ void var_77_node(void *t1, size_t bytes_t1) {
 }
 
 void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
@@ -721,7 +721,7 @@ void var_81_node(void *t1, size_t bytes_t1) {
 }
 
 void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -729,7 +729,7 @@ void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_83_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -1966,6 +1966,7 @@ typedef struct __attribute__((__packed__)) {
 } RootIn;
 
 int main() {
+
   std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet/";
 
   std::string input_path = dir_prefix + std::string("input.bin");
@@ -2501,14 +2502,14 @@ int main() {
       readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 1024, 10);
   std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
   void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1);
-  // void* input = readTrainedWeights(input_path.c_str(), 0, 5000,3,32,32);
-  // uint8_t* labels = readLabels(labels_path.c_str(), 5000);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32);
+  uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  // args->input = input;
-  // args->input_bytes = 0;
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->batch_normalization_1_gamma = batch_normalization_1_gamma;
@@ -2784,39 +2785,14 @@ int main() {
   args->dense_1_b = dense_1_b;
   args->dense_1_b_bytes = 0;
 
-  int batch_size = 500;
-  int test_input_size = 5000;
-  int batch_count = test_input_size / batch_size;
-
-  // void* input = create4DTensor(0,nchw,batch_size,3,32,32);
-
-  startMemTracking();
-  startProfiling();
-
-  for (int i = 0; i < batch_count; i++) {
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
+  __hpvm__wait(dfg);
 
-    // copyInputBatch(input_path.c_str(),start,end,3,32,32, input);
-    void *input = readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32);
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-    args->input = input;
-    args->input_bytes = 0;
-
-    void *dfg = __hpvm__launch(0, root, (void *)args);
-
-    __hpvm__wait(dfg);
-
-    void *result = static_cast<RootIn *>(args)->r.tensor;
-    hpvm_request_tensor(result, 0);
-
-    llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
-
-    freeBatchMemory();
-  }
-  stopProfiling();
   __hpvm__cleanup();
-
+  computeAccuracy3(labels, result);
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/data/quant_ranges_rt.txt
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/quant_ranges_rt.txt
rename to hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/data/quant_ranges_rt.txt
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/data/tuner_confs.txt
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/resnet18/data/tuner_confs.txt
rename to hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/data/tuner_confs.txt
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
similarity index 100%
rename from hpvm/test/dnn_benchmarks/benchmarks/resnet18/resnet18.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index c4bd6be08b5afad0367e93f640c54b45e7d41938..42bad74ac39511a64ee4fd20e589cec5caf14836 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -11,32 +11,36 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(1);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 3, 3, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(2);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(3);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_3_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(4);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
@@ -45,24 +49,27 @@ void var_3_node(void *t1, size_t bytes_t1) {
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(5);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(6);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(7);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -71,32 +78,36 @@ void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(8);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_8_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(9);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(10);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(11);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -105,32 +116,36 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(12);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(13);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(14);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(15);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -139,24 +154,27 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(16);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_16_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(17);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(18);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -165,40 +183,45 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(19);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(20);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_20_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(21);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(22);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(23);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -207,32 +230,36 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_23_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(24);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_24_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(25);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(26);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_26_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(27);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -241,32 +268,36 @@ void var_26_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(28);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_28_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(29);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_29_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(30);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(31);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -275,40 +306,45 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(32);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_32_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(33);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(34);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(35);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(36);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -317,32 +353,36 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(37);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_37_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(38);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(39);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(40);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -351,32 +391,36 @@ void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(41);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_41_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(42);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(43);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(44);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -385,40 +429,45 @@ void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(45);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(46);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(47);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(48);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(49);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -427,32 +476,36 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_49_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(50);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_50_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(51);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(52);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(53);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -461,32 +514,36 @@ void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_53_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(54);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_54_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(55);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_55_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(56);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(57);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -495,24 +552,27 @@ void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(58);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_58_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(59);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(60);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -521,40 +581,45 @@ void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(61);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(62);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_62_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(63);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(64);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(65);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -563,32 +628,36 @@ void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_65_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(66);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_66_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(67);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(68);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(69);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -597,32 +666,36 @@ void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_69_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(70);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_70_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(71);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_71_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(72);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(73);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -631,40 +704,45 @@ void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_73_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(74);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_74_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(75);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_75_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(76);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_76_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(77);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_77_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(78);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -673,32 +751,36 @@ void var_77_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(79);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_79_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(80);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_80_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(81);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_81_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(82);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -707,32 +789,36 @@ void var_81_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(83);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_83_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(84);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_84_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(85);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_85_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(86);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -741,40 +827,45 @@ void var_85_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_86_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(87);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_87_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(88);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_88_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(89);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_89_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(90);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_90_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(91);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -783,32 +874,36 @@ void var_90_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_91_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(92);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_92_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(93);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_93_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(94);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_94_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(95);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -817,32 +912,36 @@ void var_94_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_95_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(96);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_96_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(97);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_97_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(98);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_98_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(99);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -851,40 +950,45 @@ void var_98_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_99_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(100);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_100_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(101);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_101_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(102);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_102_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(103);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_103_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(104);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -893,32 +997,36 @@ void var_103_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_104_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(105);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_105_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(106);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_106_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(107);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_107_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(108);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -927,32 +1035,36 @@ void var_107_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_108_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(109);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_109_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(110);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_110_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(111);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_111_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(112);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -961,24 +1073,27 @@ void var_111_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_112_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(113);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_113_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(114);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_114_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(115);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -987,40 +1102,45 @@ void var_114_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_115_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(116);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_116_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(117);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_117_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(118);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_118_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(119);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_119_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(120);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1029,32 +1149,36 @@ void var_119_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_120_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(121);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_121_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(122);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_122_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(123);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_123_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(124);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1063,32 +1187,36 @@ void var_123_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_124_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(125);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_125_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(126);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_126_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(127);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_127_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(128);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1097,40 +1225,45 @@ void var_127_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_128_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(129);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_129_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(130);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_130_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(131);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_131_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(132);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_132_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(133);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1139,32 +1272,36 @@ void var_132_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_133_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(134);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_134_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(135);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_135_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(136);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_136_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(137);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1173,32 +1310,36 @@ void var_136_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_137_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(138);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_138_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(139);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_139_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(140);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_140_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(141);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1207,40 +1348,45 @@ void var_140_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_141_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(142);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_142_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(143);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_143_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(144);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_144_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(145);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_145_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(146);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1249,32 +1395,36 @@ void var_145_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_146_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(147);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_147_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(148);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_148_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(149);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_149_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(150);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1283,32 +1433,36 @@ void var_149_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_150_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(151);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_151_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(152);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_152_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(153);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_153_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(154);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1317,40 +1471,45 @@ void var_153_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_154_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(155);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_155_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(156);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_156_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(157);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_157_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(158);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_158_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(159);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1359,32 +1518,36 @@ void var_158_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_159_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(160);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_160_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(161);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_161_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(162);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_162_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(163);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1393,32 +1556,36 @@ void var_162_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_163_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(164);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_164_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(165);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_165_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(166);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_166_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(167);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1427,40 +1594,45 @@ void var_166_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_167_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(168);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_168_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(169);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_169_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(170);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_170_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(171);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_171_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(172);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1469,32 +1641,36 @@ void var_171_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_172_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(173);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_173_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(174);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_174_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(175);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_175_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(176);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1503,32 +1679,36 @@ void var_175_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_176_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(177);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_177_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(178);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_178_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(179);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_179_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(180);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1537,40 +1717,45 @@ void var_179_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_180_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(181);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_181_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(182);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_182_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(183);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_183_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(184);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_184_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(185);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1579,32 +1764,36 @@ void var_184_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_185_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(186);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_186_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(187);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_187_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(188);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_188_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(189);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1613,32 +1802,36 @@ void var_188_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_189_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(190);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_190_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(191);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_191_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(192);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_192_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(193);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1647,24 +1840,27 @@ void var_192_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_193_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(194);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_194_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(195);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_195_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(196);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1673,40 +1869,45 @@ void var_195_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_196_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(197);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_197_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(198);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_198_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(199);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_199_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(200);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_200_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(201);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1715,32 +1916,36 @@ void var_200_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_201_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(202);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_202_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(203);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_203_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(204);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_204_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(205);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1749,32 +1954,36 @@ void var_204_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_205_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(206);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_206_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(207);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_207_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(208);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_208_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(209);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1783,40 +1992,45 @@ void var_208_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_209_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(210);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_210_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(211);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_211_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(212);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_212_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(213);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_213_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(214);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1825,32 +2039,36 @@ void var_213_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_214_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(215);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_215_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(216);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_216_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(217);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_217_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(218);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1859,32 +2077,36 @@ void var_217_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_218_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(219);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_219_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(220);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_220_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(221);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_221_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(222);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1893,56 +2115,63 @@ void var_221_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_222_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
+  __hpvm__node_id(223);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_223_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(224);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_224_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(225);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_225_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(226);
 
   void *r = __hpvm__tensor_pool_mean(t1, 7, 7, 0, 0, 7, 7);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_226_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(227);
 
   void *r = __hpvm__tensor_mul(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_227_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
+  __hpvm__node_id(228);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_228_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
+  __hpvm__node_id(229);
 
   void *r = __hpvm__tensor_softmax(t1);
   __hpvm__return(2, r, (size_t)0);
@@ -6081,14 +6310,13 @@ int main() {
   std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
   void *dense_1_b =
       readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 1000, 1, 1);
-  void *input = readTrainedWeights(input_path.c_str(), 0, 100, 3, 224, 224);
-  uint32_t *labels = readLabels3(labels_path.c_str(), 100);
+
+  // void* input = readTrainedWeights(input_path.c_str(), 0,100,3,224,224);
+  // uint32_t* labels = readLabelsBatch3(labels_path.c_str(),0,100);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -6730,14 +6958,39 @@ int main() {
   args->dense_1_b = dense_1_b;
   args->dense_1_b_bytes = 0;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+  startMemTracking();
+  startProfiling();
+
+  unsigned int batch_size = 50;
+  unsigned int test_input_size = 1000;
+  unsigned int batch_count = test_input_size / batch_size;
+
+  for (int j = 0; j < 1; j++) {
+    for (int i = 0; i < batch_count; i++) {
+      int start = i * batch_size;
+      int end = (i + 1) * batch_size;
+
+      void *input =
+          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
+
+      args->input = input;
+      args->input_bytes = 0;
 
-  __hpvm__wait(dfg);
+      void *dfg = __hpvm__launch(0, root, (void *)args);
 
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+      __hpvm__wait(dfg);
 
+      void *result = static_cast<RootIn *>(args)->r.tensor;
+      hpvm_request_tensor(result, 0);
+
+      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+
+      freeBatchMemory();
+    }
+  }
+
+  stopProfiling();
   __hpvm__cleanup();
-  computeAccuracy3(labels, result);
+
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
similarity index 94%
rename from hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
index 42bad74ac39511a64ee4fd20e589cec5caf14836..c4bd6be08b5afad0367e93f640c54b45e7d41938 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
@@ -11,36 +11,32 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(1);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 3, 3, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(2);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(3);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_3_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(4);
 
   void *r = __hpvm__tensor_pool_max(t1, 3, 3, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
@@ -49,27 +45,24 @@ void var_3_node(void *t1, size_t bytes_t1) {
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(5);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(6);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(7);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -78,36 +71,32 @@ void var_6_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                 size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                 size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(8);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_8_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(9);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_9_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(10);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(11);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -116,36 +105,32 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(12);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(13);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_13_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(14);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(15);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -154,27 +139,24 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(16);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_16_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(17);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(18);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -183,45 +165,40 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(19);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_19_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(20);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_20_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(21);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(22);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(23);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -230,36 +207,32 @@ void var_22_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_23_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(24);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_24_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(25);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(26);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_26_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(27);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -268,36 +241,32 @@ void var_26_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(28);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_28_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(29);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_29_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(30);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(31);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -306,45 +275,40 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(32);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_32_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(33);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(34);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(35);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(36);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -353,36 +317,32 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_36_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(37);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_37_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(38);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(39);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(40);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -391,36 +351,32 @@ void var_39_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(41);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_41_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(42);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_42_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(43);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(44);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -429,45 +385,40 @@ void var_43_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(45);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(46);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(47);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(48);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(49);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -476,36 +427,32 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_49_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(50);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_50_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(51);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(52);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(53);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -514,36 +461,32 @@ void var_52_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_53_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(54);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_54_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(55);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_55_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(56);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(57);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -552,27 +495,24 @@ void var_56_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_57_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(58);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_58_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(59);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(60);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -581,45 +521,40 @@ void var_59_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_60_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(61);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_61_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(62);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_62_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(63);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_63_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(64);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(65);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -628,36 +563,32 @@ void var_64_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_65_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(66);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_66_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(67);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_67_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(68);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(69);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -666,36 +597,32 @@ void var_68_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_69_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(70);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_70_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(71);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_71_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(72);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(73);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -704,45 +631,40 @@ void var_72_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_73_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(74);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_74_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(75);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_75_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(76);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_76_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(77);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_77_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(78);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -751,36 +673,32 @@ void var_77_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_78_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(79);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_79_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(80);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_80_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(81);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_81_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(82);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -789,36 +707,32 @@ void var_81_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_82_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(83);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_83_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(84);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_84_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(85);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_85_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(86);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -827,45 +741,40 @@ void var_85_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_86_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(87);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_87_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(88);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_88_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(89);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_89_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(90);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_90_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(91);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -874,36 +783,32 @@ void var_90_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_91_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(92);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_92_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(93);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_93_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(94);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_94_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(95);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -912,36 +817,32 @@ void var_94_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_95_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(96);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_96_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(97);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_97_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(98);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_98_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(99);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -950,45 +851,40 @@ void var_98_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_99_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3,
                  size_t bytes_t3, void *t4, size_t bytes_t4, void *t5,
                  size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(100);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_100_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(101);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_101_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(102);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_102_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(103);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_103_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(104);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -997,36 +893,32 @@ void var_103_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_104_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(105);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_105_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(106);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_106_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(107);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_107_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(108);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1035,36 +927,32 @@ void var_107_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_108_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(109);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_109_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(110);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_110_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(111);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_111_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(112);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1073,27 +961,24 @@ void var_111_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_112_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(113);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_113_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(114);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_114_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(115);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1102,45 +987,40 @@ void var_114_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_115_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(116);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_116_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(117);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_117_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(118);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_118_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(119);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_119_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(120);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1149,36 +1029,32 @@ void var_119_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_120_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(121);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_121_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(122);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_122_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(123);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_123_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(124);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1187,36 +1063,32 @@ void var_123_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_124_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(125);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_125_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(126);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_126_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(127);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_127_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(128);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1225,45 +1097,40 @@ void var_127_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_128_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(129);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_129_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(130);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_130_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(131);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_131_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(132);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_132_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(133);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1272,36 +1139,32 @@ void var_132_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_133_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(134);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_134_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(135);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_135_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(136);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_136_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(137);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1310,36 +1173,32 @@ void var_136_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_137_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(138);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_138_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(139);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_139_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(140);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_140_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(141);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1348,45 +1207,40 @@ void var_140_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_141_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(142);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_142_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(143);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_143_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(144);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_144_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(145);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_145_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(146);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1395,36 +1249,32 @@ void var_145_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_146_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(147);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_147_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(148);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_148_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(149);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_149_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(150);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1433,36 +1283,32 @@ void var_149_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_150_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(151);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_151_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(152);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_152_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(153);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_153_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(154);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1471,45 +1317,40 @@ void var_153_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_154_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(155);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_155_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(156);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_156_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(157);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_157_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(158);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_158_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(159);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1518,36 +1359,32 @@ void var_158_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_159_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(160);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_160_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(161);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_161_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(162);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_162_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(163);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1556,36 +1393,32 @@ void var_162_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_163_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(164);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_164_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(165);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_165_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(166);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_166_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(167);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1594,45 +1427,40 @@ void var_166_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_167_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(168);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_168_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(169);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_169_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(170);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_170_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(171);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_171_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(172);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1641,36 +1469,32 @@ void var_171_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_172_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(173);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_173_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(174);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_174_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(175);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_175_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(176);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1679,36 +1503,32 @@ void var_175_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_176_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(177);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_177_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(178);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_178_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(179);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_179_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(180);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1717,45 +1537,40 @@ void var_179_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_180_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(181);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_181_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(182);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_182_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(183);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_183_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(184);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_184_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(185);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1764,36 +1579,32 @@ void var_184_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_185_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(186);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_186_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(187);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_187_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(188);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_188_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(189);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1802,36 +1613,32 @@ void var_188_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_189_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(190);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_190_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(191);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_191_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(192);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_192_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(193);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1840,27 +1647,24 @@ void var_192_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_193_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(194);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_194_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(195);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 2, 2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_195_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(196);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1869,45 +1673,40 @@ void var_195_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_196_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(197);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_197_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(198);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_198_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(199);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_199_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(200);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_200_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(201);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1916,36 +1715,32 @@ void var_200_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_201_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(202);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_202_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(203);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_203_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(204);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_204_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(205);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1954,36 +1749,32 @@ void var_204_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_205_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(206);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_206_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(207);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_207_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(208);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_208_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(209);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -1992,45 +1783,40 @@ void var_208_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_209_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(210);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_210_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(211);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_211_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(212);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_212_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(213);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_213_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(214);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -2039,36 +1825,32 @@ void var_213_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_214_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(215);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_215_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(216);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_216_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(217);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_217_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(218);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -2077,36 +1859,32 @@ void var_217_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_218_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(219);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_219_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(220);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_220_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(221);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_221_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(222);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
@@ -2115,63 +1893,56 @@ void var_221_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 void var_222_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2,
                   void *t3, size_t bytes_t3, void *t4, size_t bytes_t4,
                   void *t5, size_t bytes_t5) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(5, t1, t2, t3, t4, t5, 0);
-  __hpvm__node_id(223);
 
   void *r = __hpvm__tensor_batchnorm(t1, t2, t3, t4, t5, 0.001);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_223_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(224);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_224_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(225);
 
   void *r = __hpvm__tensor_relu(t1);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_225_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(226);
 
   void *r = __hpvm__tensor_pool_mean(t1, 7, 7, 0, 0, 7, 7);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_226_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(227);
 
   void *r = __hpvm__tensor_mul(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_227_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
-  __hpvm__node_id(228);
 
   void *r = __hpvm__tensor_add(t1, t2);
   __hpvm__return(2, r, (size_t)0);
 }
 
 void var_228_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
-  __hpvm__node_id(229);
 
   void *r = __hpvm__tensor_softmax(t1);
   __hpvm__return(2, r, (size_t)0);
@@ -6310,13 +6081,14 @@ int main() {
   std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
   void *dense_1_b =
       readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 1000, 1, 1);
-
-  // void* input = readTrainedWeights(input_path.c_str(), 0,100,3,224,224);
-  // uint32_t* labels = readLabelsBatch3(labels_path.c_str(),0,100);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 100, 3, 224, 224);
+  uint32_t *labels = readLabels3(labels_path.c_str(), 100);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -6958,39 +6730,14 @@ int main() {
   args->dense_1_b = dense_1_b;
   args->dense_1_b_bytes = 0;
 
-  startMemTracking();
-  startProfiling();
-
-  unsigned int batch_size = 50;
-  unsigned int test_input_size = 1000;
-  unsigned int batch_count = test_input_size / batch_size;
-
-  for (int j = 0; j < 1; j++) {
-    for (int i = 0; i < batch_count; i++) {
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
-
-      void *input =
-          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
-
-      args->input = input;
-      args->input_bytes = 0;
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-      void *dfg = __hpvm__launch(0, root, (void *)args);
+  __hpvm__wait(dfg);
 
-      __hpvm__wait(dfg);
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-      void *result = static_cast<RootIn *>(args)->r.tensor;
-      hpvm_request_tensor(result, 0);
-
-      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
-
-      freeBatchMemory();
-    }
-  }
-
-  stopProfiling();
   __hpvm__cleanup();
-
+  computeAccuracy3(labels, result);
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index f1533c75b4b838f5b86dfbf915cfd359b9682636..059bff6d22a51853090700072d4cf3915ed5f796 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) {
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) {
 }
 
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) {
 }
 
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) {
 }
 
 void var_43_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) {
 }
 
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) {
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -920,14 +920,14 @@ int main() {
       readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 10);
   std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
   void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1);
-  void *input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32);
-  uint32_t *labels = readLabels3(labels_path.c_str(), 2000);
+  // void* input = readTrainedWeights(input_path.c_str(), 0,2000,3,32,32);
+  // uint32_t* labels = readLabels3(labels_path.c_str(),2000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
+  // args->input = input;
+  // args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -989,27 +989,40 @@ int main() {
   args->dense_2_b = dense_2_b;
   args->dense_2_b_bytes = 0;
 
+  int batch_size = 500;
+  int test_input_size = 5000;
+  int batch_count = test_input_size / batch_size;
+
   startMemTracking();
   startProfiling();
 
-  input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32);
+  for (int j = 0; j < 1; j++) {
+    for (int i = 0; i < batch_count; i++) {
 
-  args->input = input;
-  args->input_bytes = 0;
+      int start = i * batch_size;
+      int end = (i + 1) * batch_size;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+      // copyInputBatch(input_path.c_str(),start,end,3,32,32, input);
+      void *input =
+          readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32);
 
-  __hpvm__wait(dfg);
+      args->input = input;
+      args->input_bytes = 0;
 
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+      void *dfg = __hpvm__launch(0, root, (void *)args);
 
-  computeAccuracy3(labels, result);
+      __hpvm__wait(dfg);
 
-  freeBatchMemory();
+      void *result = static_cast<RootIn *>(args)->r.tensor;
+      hpvm_request_tensor(result, 0);
 
-  stopProfiling();
+      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+
+      freeBatchMemory();
+    }
+  }
 
+  stopProfiling();
   __hpvm__cleanup();
 
   return 0;
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
similarity index 91%
rename from hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
index 3a853d3a0f5399057164594951a884222a02e105..f1533c75b4b838f5b86dfbf915cfd359b9682636 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) {
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) {
 }
 
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) {
 }
 
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) {
 }
 
 void var_43_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) {
 }
 
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) {
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -829,8 +829,7 @@ typedef struct __attribute__((__packed__)) {
 
 int main() {
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/";
-
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("input.bin");
   std::string labels_path = dir_prefix + std::string("labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
@@ -918,18 +917,17 @@ int main() {
   void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 512, 1, 1);
   std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin");
   void *dense_2_w =
-      readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 100);
+      readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 10);
   std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
-  void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 100, 1, 1);
-
-  //  void* input = readTrainedWeights(input_path.c_str(), 0,2000,3,32,32);
-  //  uint32_t* labels = readLabels3(labels_path.c_str(),2000);
+  void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32);
+  uint32_t *labels = readLabels3(labels_path.c_str(), 2000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  //  args->input = input;
-  //  args->input_bytes = 0;
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -991,39 +989,27 @@ int main() {
   args->dense_2_b = dense_2_b;
   args->dense_2_b_bytes = 0;
 
-  int batch_size = 500;
-  int test_input_size = 5000;
-  int batch_count = test_input_size / batch_size;
-
   startMemTracking();
   startProfiling();
 
-  for (int j = 0; j < 14; j++) {
-    for (int i = 0; i < batch_count; i++) {
+  input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32);
 
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
+  args->input = input;
+  args->input_bytes = 0;
 
-      void *input =
-          readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32);
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-      args->input = input;
-      args->input_bytes = 0;
+  __hpvm__wait(dfg);
 
-      void *dfg = __hpvm__launch(0, root, (void *)args);
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-      __hpvm__wait(dfg);
+  computeAccuracy3(labels, result);
 
-      void *result = static_cast<RootIn *>(args)->r.tensor;
-      hpvm_request_tensor(result, 0);
-
-      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
-
-      freeBatchMemory();
-    }
-  }
+  freeBatchMemory();
 
   stopProfiling();
+
   __hpvm__cleanup();
 
   return 0;
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 41fe9ae0f34c5c5086f8c16491f5035d5a382702..3a853d3a0f5399057164594951a884222a02e105 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) {
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) {
 }
 
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) {
 }
 
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) {
 }
 
 void var_43_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) {
 }
 
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) {
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -922,14 +922,14 @@ int main() {
   std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
   void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 100, 1, 1);
 
-  void *input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32);
-  uint32_t *labels = readLabels3(labels_path.c_str(), 2000);
+  //  void* input = readTrainedWeights(input_path.c_str(), 0,2000,3,32,32);
+  //  uint32_t* labels = readLabels3(labels_path.c_str(),2000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
+  //  args->input = input;
+  //  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -991,14 +991,40 @@ int main() {
   args->dense_2_b = dense_2_b;
   args->dense_2_b_bytes = 0;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+  int batch_size = 500;
+  int test_input_size = 5000;
+  int batch_count = test_input_size / batch_size;
+
+  startMemTracking();
+  startProfiling();
+
+  for (int j = 0; j < 14; j++) {
+    for (int i = 0; i < batch_count; i++) {
+
+      int start = i * batch_size;
+      int end = (i + 1) * batch_size;
+
+      void *input =
+          readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32);
+
+      args->input = input;
+      args->input_bytes = 0;
+
+      void *dfg = __hpvm__launch(0, root, (void *)args);
+
+      __hpvm__wait(dfg);
+
+      void *result = static_cast<RootIn *>(args)->r.tensor;
+      hpvm_request_tensor(result, 0);
+
+      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
 
-  __hpvm__wait(dfg);
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+      freeBatchMemory();
+    }
+  }
 
+  stopProfiling();
   __hpvm__cleanup();
-  computeAccuracy3(labels, result);
 
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
similarity index 91%
rename from hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
index 059bff6d22a51853090700072d4cf3915ed5f796..41fe9ae0f34c5c5086f8c16491f5035d5a382702 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar10/vgg16_cifar10_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) {
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) {
 }
 
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) {
 }
 
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) {
 }
 
 void var_43_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) {
 }
 
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) {
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -829,7 +829,8 @@ typedef struct __attribute__((__packed__)) {
 
 int main() {
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/";
+
   std::string input_path = dir_prefix + std::string("input.bin");
   std::string labels_path = dir_prefix + std::string("labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
@@ -917,17 +918,18 @@ int main() {
   void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 512, 1, 1);
   std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin");
   void *dense_2_w =
-      readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 10);
+      readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 100);
   std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin");
-  void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1);
-  // void* input = readTrainedWeights(input_path.c_str(), 0,2000,3,32,32);
-  // uint32_t* labels = readLabels3(labels_path.c_str(),2000);
+  void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 100, 1, 1);
+
+  void *input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32);
+  uint32_t *labels = readLabels3(labels_path.c_str(), 2000);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  // args->input = input;
-  // args->input_bytes = 0;
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -989,41 +991,14 @@ int main() {
   args->dense_2_b = dense_2_b;
   args->dense_2_b_bytes = 0;
 
-  int batch_size = 500;
-  int test_input_size = 5000;
-  int batch_count = test_input_size / batch_size;
-
-  startMemTracking();
-  startProfiling();
-
-  for (int j = 0; j < 1; j++) {
-    for (int i = 0; i < batch_count; i++) {
-
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
-
-      // copyInputBatch(input_path.c_str(),start,end,3,32,32, input);
-      void *input =
-          readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32);
-
-      args->input = input;
-      args->input_bytes = 0;
-
-      void *dfg = __hpvm__launch(0, root, (void *)args);
-
-      __hpvm__wait(dfg);
-
-      void *result = static_cast<RootIn *>(args)->r.tensor;
-      hpvm_request_tensor(result, 0);
-
-      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-      freeBatchMemory();
-    }
-  }
+  __hpvm__wait(dfg);
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-  stopProfiling();
   __hpvm__cleanup();
+  computeAccuracy3(labels, result);
 
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index f269aa9091521809751cd2214a46d039379c0114..2bd129300adc5ffb609df1e46c951630d682b883 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) {
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) {
 }
 
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) {
 }
 
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) {
 }
 
 void var_43_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) {
 }
 
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) {
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -403,7 +403,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_49_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -411,7 +411,7 @@ void var_49_node(void *t1, size_t bytes_t1) {
 }
 
 void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -419,7 +419,7 @@ void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::CUDNN_TARGET);
+  __hpvm__hint(hpvm::TENSOR_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -877,7 +877,8 @@ typedef struct __attribute__((__packed__)) {
 
 int main() {
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
+  std::string dir_prefix =
+      std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
   std::string input_path = dir_prefix + std::string("input.bin");
   std::string labels_path = dir_prefix + std::string("labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
@@ -976,14 +977,10 @@ int main() {
   std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin");
   void *dense_3_b =
       readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1);
-  void *input = readTrainedWeights(input_path.c_str(), 0, 100, 3, 224, 224);
-  uint32_t *labels = readLabelsBatch3(labels_path.c_str(), 0, 100);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
-  args->input = input;
-  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -1049,14 +1046,40 @@ int main() {
   args->dense_3_b = dense_3_b;
   args->dense_3_b_bytes = 0;
 
-  void *dfg = __hpvm__launch(0, root, (void *)args);
+  startMemTracking();
+  startProfiling();
+
+  unsigned int batch_size = 50;
+  unsigned int test_input_size = 1000;
+  unsigned int batch_count = test_input_size / batch_size;
+
+  for (int j = 0; j < 1; j++) {
+    for (int i = 0; i < batch_count; i++) {
 
-  __hpvm__wait(dfg);
+      int start = i * batch_size;
+      int end = (i + 1) * batch_size;
 
-  void *result = static_cast<RootIn *>(args)->r.tensor;
-  hpvm_request_tensor(result, 0);
+      void *input =
+          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
 
+      args->input = input;
+      args->input_bytes = 0;
+
+      void *dfg = __hpvm__launch(0, root, (void *)args);
+
+      __hpvm__wait(dfg);
+
+      void *result = static_cast<RootIn *>(args)->r.tensor;
+      hpvm_request_tensor(result, 0);
+
+      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
+
+      freeBatchMemory();
+    }
+  }
+
+  stopProfiling();
   __hpvm__cleanup();
-  computeAccuracy3(labels, result);
+
   return 0;
 }
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
similarity index 91%
rename from hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp
rename to hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
index 2bd129300adc5ffb609df1e46c951630d682b883..f269aa9091521809751cd2214a46d039379c0114 100644
--- a/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_loop.cpp
+++ b/hpvm/test/dnn_benchmarks/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
@@ -11,7 +11,7 @@
 #include <config.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -19,7 +19,7 @@ void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -27,7 +27,7 @@ void var_1_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_2_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -35,7 +35,7 @@ void var_2_node(void *t1, size_t bytes_t1) {
 }
 
 void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_5_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -59,7 +59,7 @@ void var_5_node(void *t1, size_t bytes_t1) {
 }
 
 void var_6_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -67,7 +67,7 @@ void var_6_node(void *t1, size_t bytes_t1) {
 }
 
 void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -75,7 +75,7 @@ void var_7_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -83,7 +83,7 @@ void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_9_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -91,7 +91,7 @@ void var_9_node(void *t1, size_t bytes_t1) {
 }
 
 void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -99,7 +99,7 @@ void var_10_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -107,7 +107,7 @@ void var_11_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_12_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -115,7 +115,7 @@ void var_12_node(void *t1, size_t bytes_t1) {
 }
 
 void var_13_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -123,7 +123,7 @@ void var_13_node(void *t1, size_t bytes_t1) {
 }
 
 void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -131,7 +131,7 @@ void var_14_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -139,7 +139,7 @@ void var_15_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_16_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -147,7 +147,7 @@ void var_16_node(void *t1, size_t bytes_t1) {
 }
 
 void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -155,7 +155,7 @@ void var_17_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -163,7 +163,7 @@ void var_18_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_19_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -171,7 +171,7 @@ void var_19_node(void *t1, size_t bytes_t1) {
 }
 
 void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -179,7 +179,7 @@ void var_20_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -187,7 +187,7 @@ void var_21_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_22_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -195,7 +195,7 @@ void var_22_node(void *t1, size_t bytes_t1) {
 }
 
 void var_23_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -203,7 +203,7 @@ void var_23_node(void *t1, size_t bytes_t1) {
 }
 
 void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -211,7 +211,7 @@ void var_24_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -219,7 +219,7 @@ void var_25_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_26_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -227,7 +227,7 @@ void var_26_node(void *t1, size_t bytes_t1) {
 }
 
 void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -235,7 +235,7 @@ void var_27_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -243,7 +243,7 @@ void var_28_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_29_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -251,7 +251,7 @@ void var_29_node(void *t1, size_t bytes_t1) {
 }
 
 void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -259,7 +259,7 @@ void var_30_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -267,7 +267,7 @@ void var_31_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_32_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -275,7 +275,7 @@ void var_32_node(void *t1, size_t bytes_t1) {
 }
 
 void var_33_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -283,7 +283,7 @@ void var_33_node(void *t1, size_t bytes_t1) {
 }
 
 void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -291,7 +291,7 @@ void var_34_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -299,7 +299,7 @@ void var_35_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_36_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -307,7 +307,7 @@ void var_36_node(void *t1, size_t bytes_t1) {
 }
 
 void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -315,7 +315,7 @@ void var_37_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -323,7 +323,7 @@ void var_38_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_39_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -331,7 +331,7 @@ void var_39_node(void *t1, size_t bytes_t1) {
 }
 
 void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_convolution(t1, t2, 1, 1, 1, 1);
@@ -339,7 +339,7 @@ void var_40_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -347,7 +347,7 @@ void var_41_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_42_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -355,7 +355,7 @@ void var_42_node(void *t1, size_t bytes_t1) {
 }
 
 void var_43_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
@@ -363,7 +363,7 @@ void var_43_node(void *t1, size_t bytes_t1) {
 }
 
 void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -371,7 +371,7 @@ void var_44_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -379,7 +379,7 @@ void var_45_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_46_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -387,7 +387,7 @@ void var_46_node(void *t1, size_t bytes_t1) {
 }
 
 void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -395,7 +395,7 @@ void var_47_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -403,7 +403,7 @@ void var_48_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_49_node(void *t1, size_t bytes_t1) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(1, t1, 0);
 
   void *r = __hpvm__tensor_relu(t1);
@@ -411,7 +411,7 @@ void var_49_node(void *t1, size_t bytes_t1) {
 }
 
 void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_mul(t1, t2);
@@ -419,7 +419,7 @@ void var_50_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
 }
 
 void var_51_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
-  __hpvm__hint(hpvm::TENSOR_TARGET);
+  __hpvm__hint(hpvm::CUDNN_TARGET);
   __hpvm__attributes(2, t1, t2, 0);
 
   void *r = __hpvm__tensor_add(t1, t2);
@@ -877,8 +877,7 @@ typedef struct __attribute__((__packed__)) {
 
 int main() {
 
-  std::string dir_prefix =
-      std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
+  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
   std::string input_path = dir_prefix + std::string("input.bin");
   std::string labels_path = dir_prefix + std::string("labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
@@ -977,10 +976,14 @@ int main() {
   std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin");
   void *dense_3_b =
       readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1);
+  void *input = readTrainedWeights(input_path.c_str(), 0, 100, 3, 224, 224);
+  uint32_t *labels = readLabelsBatch3(labels_path.c_str(), 0, 100);
 
   __hpvm__init();
   RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
 
+  args->input = input;
+  args->input_bytes = 0;
   args->conv2d_1_w = conv2d_1_w;
   args->conv2d_1_w_bytes = 0;
   args->conv2d_1_b = conv2d_1_b;
@@ -1046,40 +1049,14 @@ int main() {
   args->dense_3_b = dense_3_b;
   args->dense_3_b_bytes = 0;
 
-  startMemTracking();
-  startProfiling();
-
-  unsigned int batch_size = 50;
-  unsigned int test_input_size = 1000;
-  unsigned int batch_count = test_input_size / batch_size;
-
-  for (int j = 0; j < 1; j++) {
-    for (int i = 0; i < batch_count; i++) {
+  void *dfg = __hpvm__launch(0, root, (void *)args);
 
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
+  __hpvm__wait(dfg);
 
-      void *input =
-          readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224);
+  void *result = static_cast<RootIn *>(args)->r.tensor;
+  hpvm_request_tensor(result, 0);
 
-      args->input = input;
-      args->input_bytes = 0;
-
-      void *dfg = __hpvm__launch(0, root, (void *)args);
-
-      __hpvm__wait(dfg);
-
-      void *result = static_cast<RootIn *>(args)->r.tensor;
-      hpvm_request_tensor(result, 0);
-
-      llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
-
-      freeBatchMemory();
-    }
-  }
-
-  stopProfiling();
   __hpvm__cleanup();
-
+  computeAccuracy3(labels, result);
   return 0;
 }