diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
index 1c4a386ce2fd9e50953a49377df20c9d3ebf75da..1f6dd875ffa6b39ab57609d7690c9a9ad3944b44 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
@@ -93,7 +93,9 @@ int main(int argc, char *argv[]){
   #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++){
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}});
+    void *{{input_name}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}});
+    args->{{input_name}} = {{input_name}};
+    args->{{input_name}}_bytes = 0;
 
     void* dfg = __hpvm__launch(0, root, (void*) args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
index ebbfec271686292ced4cf830ae8787303e8cda68..860e3b6423bc78d073096a981f765bed10fb73a7 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
@@ -512,15 +512,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
index 44b8e0b37d9e774b939608a0aa5988b9f2ba9565..f44e19dece121cb01a1f3e6a8bf9e27ea945e6ce 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
@@ -517,16 +517,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
index fad49161eb2b6a6c248ba2531bd1c31fe00023b8..6d8973ad982b1aa3b206a0cf40ee1888c37e293f 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
@@ -454,15 +454,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
index 1fe72045f0ca22925c6e395ee5a7a2d0d9beef9b..b2a940d501d8b1c2e29dbe7240012ace8197bbb4 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
@@ -460,16 +460,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index dcb8b4faf825e0814bdaf38a83a2683763fa8cd4..474ab64cadf3eac158d39e6e1e6686765c3bac36 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -572,15 +572,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
index 64a781102a6f7545a573016df5a49b7b04ca8fd6..10e95202f2e2188a9dcd1c12a168a612f897fcf9 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
@@ -577,16 +577,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
index 63d8bd1cc735f9d9dbfd52ac4ec56aebbb06ceeb..5c42f6953cfd9256cea73b39868a7ec571f18565 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -335,15 +335,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 1, 28, 28, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 1, 28, 28);
+    args->input = input;
+    args->input_bytes = 0;;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
index 558e7effa1cf0d3395e9fc21b26441210a15b033..0c2568f81b701cb474a257b190be61b4bba45f3e 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
@@ -340,16 +340,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 1, 28, 28, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 1, 28, 28);
+    args->input = input;
+    args->input_bytes = 0;;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
index fc7835dedd4967a1fd03b3f878b944173974dd21..01d027341686291c83e605bdeee1bbcffa68d6e9 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
@@ -2804,15 +2804,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
index 70083420c2af4eacee3f8e68b6a96b63c75563ba..e51e85dd980dd910389ec4415174e6e005f75c41 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
@@ -2809,16 +2809,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
index 4aa288f18e4720ab22de1ead1b91115689cdba8b..fa83c534d0639241205758018f8f7c37401e6b22 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
@@ -1550,15 +1550,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
index 064834b707300962250040a29c0dccfb801f8412..c7b789c2343a8dfd1e847652af2bd1d6adfd51f1 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
@@ -1482,16 +1482,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index 28447c6b401c90baf06de3bd8e7f77503623b6d9..91d07e30469e675fd2027f29290e35a0db888174 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -6977,15 +6977,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
index 1f2903754dd8ed52302990aa05af2fad0e09dabc..932580e03e7ccc4495d8d76be2f7147369e36d68 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
@@ -6754,16 +6754,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index 3651eb176ea947882569cb5409706989289d103c..195c676c11d53b19e0d18ed4908198a929d188aa 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -1007,15 +1007,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
index f2f6557614fceb32f205287cd1490fbad3ed004a..c304237ea57ba15d48cff0773860cdc469fc2a04 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
@@ -1013,16 +1013,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 2315433e100db05ef6f2a209ba41d22aeac3f656..4cd5c134293d85983146352175e278915ab1d2ba 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -1007,15 +1007,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
index 6d5957bb5318694bacfe51ecfbb76436a5d310bd..532fca6b856f296624c21e9a18421763c4b70f48 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
@@ -1012,16 +1012,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index 7a9534b7fed94cfd6652bd9a972767dd0ba76bfe..8e299f40e6ddd04a3ce9f8d9dffff49b1de36189 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -1067,15 +1067,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
index 40bac1f901985099dd73103f9a6589a862b84ade..930a33e43c706e6e91475fc97671c39c23f63387 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
@@ -1073,16 +1073,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
index 05d9157a6473fb74061e6edefc4455080368f706..1d5ac7d908b0990f21de885c645786997640264c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
@@ -329,46 +329,6 @@ struct Tensor *readInputBatch(const char *file_name, long data_type, long start,
   return weights;
 }
 
-void *copyInputBatch(const char *file_name, long start, long end,
-                     long dim2_size, long dim3_size, long dim4_size,
-                     void *inputTensor_ptr) {
-
-  struct Tensor *inputTensor = (struct Tensor *)inputTensor_ptr;
-
-  int dim1_size = end - start;
-  // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  long int file_header_size =
-      type_size * start * dim2_size * dim3_size * dim4_size;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  fclose(file);
-
-  initTensorData(inputTensor, tensor_data, size_in_bytes);
-  free(tensor_data);
-
-  printf("******NOTE: tensor Dims = %d \n", inputTensor->dims.num_dims);
-  if (inputTensor->host_data == NULL || inputTensor->gpu_data == NULL)
-    printf("ERROR: NULL data pointers \n");
-
-  // Chaning Tensor Placement to HOST
-  changeTensorPlacement(inputTensor, HOST);
-
-  return inputTensor;
-}
-
 uint8_t *readLabels(const char *labels_file, int num_labels) {
 
   uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);