From c1de48841b21cf827f4dd85ff0d979bfa97be34f Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Fri, 26 Mar 2021 01:06:37 -0500
Subject: [PATCH] Remove copyInputBatch altogether

---
 .../torch2hpvm/template_hpvm.cpp.in           |  4 +-
 .../alexnet2_cifar10/alexnet2_cifar10.cpp     |  8 ++--
 .../alexnet2_cifar10_cudnn.cpp                |  8 ++--
 .../alexnet_cifar10/alexnet_cifar10.cpp       |  8 ++--
 .../alexnet_cifar10/alexnet_cifar10_cudnn.cpp |  8 ++--
 .../alexnet_imagenet/alexnet_imagenet.cpp     |  8 ++--
 .../alexnet_imagenet_cudnn.cpp                |  8 ++--
 .../benchmarks/lenet_mnist/lenet_mnist.cpp    |  8 ++--
 .../lenet_mnist/lenet_mnist_cudnn.cpp         |  8 ++--
 .../mobilenet_cifar10/mobilenet_cifar10.cpp   |  8 ++--
 .../mobilenet_cifar10_cudnn.cpp               |  8 ++--
 .../resnet18_cifar10/resnet18_cifar10.cpp     |  8 ++--
 .../resnet18_cifar10_cudnn.cpp                |  8 ++--
 .../resnet50_imagenet/resnet50_imagenet.cpp   |  8 ++--
 .../resnet50_imagenet_cudnn.cpp               |  8 ++--
 .../vgg16_cifar10/vgg16_cifar10.cpp           |  8 ++--
 .../vgg16_cifar10/vgg16_cifar10_cudnn.cpp     |  8 ++--
 .../vgg16_cifar100/vgg16_cifar100.cpp         |  8 ++--
 .../vgg16_cifar100/vgg16_cifar100_cudnn.cpp   |  8 ++--
 .../vgg16_imagenet/vgg16_imagenet.cpp         |  8 ++--
 .../vgg16_imagenet/vgg16_imagenet_cudnn.cpp   |  8 ++--
 .../hpvm-c/include/tensorUtils.h              | 40 -------------------
 22 files changed, 63 insertions(+), 141 deletions(-)

diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
index 1c4a386ce2..1f6dd875ff 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
@@ -93,7 +93,9 @@ int main(int argc, char *argv[]){
   #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++){
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, {{input_shape|join(', ')}}, {{input_name}});
+    void *{{input_name}} = readInputBatch(input_path.c_str(), 0, start, end, {{input_shape|join(', ')}});
+    args->{{input_name}} = {{input_name}};
+    args->{{input_name}}_bytes = 0;
 
     void* dfg = __hpvm__launch(0, root, (void*) args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
index ebbfec2716..860e3b6423 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
@@ -512,15 +512,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
index 44b8e0b37d..f44e19dece 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
@@ -517,16 +517,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
index fad49161eb..6d8973ad98 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
@@ -454,15 +454,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
index 1fe72045f0..b2a940d501 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
@@ -460,16 +460,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index dcb8b4faf8..474ab64cad 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -572,15 +572,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
index 64a781102a..10e95202f2 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
@@ -577,16 +577,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
index 63d8bd1cc7..5c42f6953c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -335,15 +335,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 1, 28, 28, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 1, 28, 28);
+    args->input = input;
+    args->input_bytes = 0;;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
index 558e7effa1..0c2568f81b 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
@@ -340,16 +340,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 1, 28, 28, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 1, 28, 28);
+    args->input = input;
+    args->input_bytes = 0;;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
index fc7835dedd..01d0273416 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
@@ -2804,15 +2804,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
index 70083420c2..e51e85dd98 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
@@ -2809,16 +2809,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
index 4aa288f18e..fa83c534d0 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
@@ -1550,15 +1550,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
index 064834b707..c7b789c234 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
@@ -1482,16 +1482,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index 28447c6b40..91d07e3046 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -6977,15 +6977,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
index 1f2903754d..932580e03e 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
@@ -6754,16 +6754,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index 3651eb176e..195c676c11 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -1007,15 +1007,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
index f2f6557614..c304237ea5 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
@@ -1013,16 +1013,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 2315433e10..4cd5c13429 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -1007,15 +1007,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
index 6d5957bb53..532fca6b85 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
@@ -1012,16 +1012,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 32, 32);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index 7a9534b7fe..8e299f40e6 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -1067,15 +1067,13 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
index 40bac1f901..930a33e43c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
@@ -1073,16 +1073,14 @@ int main(int argc, char *argv[]) {
     llvm_hpvm_initializeRuntimeController(config_path.c_str());
   }
 
-  if (config_path != "") {
-    llvm_hpvm_initializeRuntimeController(config_path.c_str());
-  }
-
   float total_accuracy = 0;
   startMemTracking();
 #pragma clang loop unroll(disable)
   for (int i = 0; i < batch_count; i++) {
     int start = i * batch_size, end = start + batch_size;
-    copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input);
+    void* input = readInputBatch(input_path.c_str(), nchw, start, end, 3, 224, 224);
+    args->input = input;
+    args->input_bytes = 0;
 
     void *dfg = __hpvm__launch(0, root, (void *)args);
     __hpvm__wait(dfg);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
index 05d9157a64..1d5ac7d908 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
@@ -329,46 +329,6 @@ struct Tensor *readInputBatch(const char *file_name, long data_type, long start,
   return weights;
 }
 
-void *copyInputBatch(const char *file_name, long start, long end,
-                     long dim2_size, long dim3_size, long dim4_size,
-                     void *inputTensor_ptr) {
-
-  struct Tensor *inputTensor = (struct Tensor *)inputTensor_ptr;
-
-  int dim1_size = end - start;
-  // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  long int file_header_size =
-      type_size * start * dim2_size * dim3_size * dim4_size;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  fclose(file);
-
-  initTensorData(inputTensor, tensor_data, size_in_bytes);
-  free(tensor_data);
-
-  printf("******NOTE: tensor Dims = %d \n", inputTensor->dims.num_dims);
-  if (inputTensor->host_data == NULL || inputTensor->gpu_data == NULL)
-    printf("ERROR: NULL data pointers \n");
-
-  // Chaning Tensor Placement to HOST
-  changeTensorPlacement(inputTensor, HOST);
-
-  return inputTensor;
-}
-
 uint8_t *readLabels(const char *labels_file, int num_labels) {
 
   uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);
-- 
GitLab