From 86323b34ee0793c288b058c2fef358f248e9ca40 Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Tue, 5 Nov 2019 15:26:16 -0600
Subject: [PATCH] Benchmark I/O

---
 .../benchmarks/canny_test/Makefile            |   3 +-
 .../benchmarks/canny_test/src/canny_test.cpp  | 304 +++++++++---------
 2 files changed, 150 insertions(+), 157 deletions(-)

diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile
index 3287830462..4b9b5baedd 100644
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile
@@ -24,8 +24,7 @@ LINKER_FLAGS = -L $(CUDA_TOOLKIT_ROOT_DIR)/lib64 -lpthread -lcudart -lcurand -lc
 HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib
 
 
-# FIXME:
-CONF_FILE_PATH=
+CONF_FILE_PATH=$(realpath data/tuner_confs.txt)
 # NOTE: Needs proper handling in the WRAPPER backend because Quant range not needed for IMAGE Benchmarks
 WRAPPER_API_QUANT_FILE_PATH=
 
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp
index 5f254f5e95..eb25c085e2 100644
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp
@@ -1,233 +1,227 @@
 
+#include <cstring>
+#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
 #include <sys/stat.h>
-#include <cstring>
-#include <visc.h>
 #include <tensorTypes.h>
 #include <tensorUtils.h>
-
-// __device__ float avg(float x) {
-//     return __fdividef(x, 3.0f);
-// }
-
-// __device__ void *avg_ptr = (void *) avg;
+#include <unistd.h>
+#include <visc.h>
 
 /* 0. Grayscale */
 
-void var_0_node(void* t1, size_t bytes_t1) {
-    __visc__hint(visc::GPU_TARGET);
-    __visc__attributes(1, t1, 0);
+void var_0_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(1, t1, 0);
 
-    void *r = __visc__tensor_reduce(t1, 1, nullptr); //device::fadd_ptrptr);
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_reduce(t1, 1, device::fadd_ptrptr);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_1_node(void* t1, size_t bytes_t1) {
-    __visc__hint(visc::GPU_TARGET);
-    __visc__attributes(1, t1, 0);
+void var_1_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(1, t1, 0);
 
-    void *r = __visc__tensor_map1(nullptr, t1); // (void *) &avg_ptr
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_map1(device::favg3_ptrptr, t1);
+  __visc__return(2, r, (size_t)0);
 }
 
 /* 1. Denoise */
 
-void var_2_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
-    __visc__hint(visc::PROMISE_TARGET);
-    __visc__attributes(2, t1, t2, 0);
+void var_2_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-    void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
 /* 2. Get edge gradient / direction */
 
-void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
-    __visc__hint(visc::PROMISE_TARGET);
-    __visc__attributes(2, t1, t2, 0);
+void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-    void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
-    __visc__hint(visc::PROMISE_TARGET);
-    __visc__attributes(2, t1, t2, 0);
+void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-    void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
-    __visc__hint(visc::PROMISE_TARGET);
-    __visc__attributes(2, t1, t2, 0);
+void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-    void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fhypot_ptrptr
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_map2(device::fhypot_ptrptr, t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
 /* 3. Normalize grad magnitude */
 
-void var_6_node(void* t1, size_t bytes_t1) {
-    __visc__hint(visc::GPU_TARGET);
-    __visc__attributes(1, t1, 0);
+void var_6_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(1, t1, 0);
 
-    void *r = __visc__tensor_reduce(t1, 2, nullptr); // device::fmax_ptrptr
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_reduce(t1, 2, device::fmax_ptrptr);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_7_node(void* t1, size_t bytes_t1) {
-    __visc__hint(visc::GPU_TARGET);
-    __visc__attributes(1, t1, 0);
+void var_7_node(void *t1, size_t bytes_t1) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(1, t1, 0);
 
-    void *r = __visc__tensor_reduce(t1, 3, nullptr); // device::fmax_ptrptr
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_reduce(t1, 3, device::fmax_ptrptr);
+  __visc__return(2, r, (size_t)0);
 }
 
-void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
-    __visc__hint(visc::PROMISE_TARGET);
-    __visc__attributes(2, t1, t2, 0);
+void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
+  __visc__hint(visc::GPU_TARGET);
+  __visc__attributes(2, t1, t2, 0);
 
-    void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fdiv_ptrptr
-    __visc__return(2, r, (size_t) 0);
+  void *r = __visc__tensor_map2(device::fdiv_ptrptr, t1, t2);
+  __visc__return(2, r, (size_t)0);
 }
 
 void root(
-    void* input, size_t input_bytes,
-    void* gaussian, size_t gaussian_bytes,
-    void* sobel_x, size_t sobel_x_bytes,
-    void* sobel_y, size_t sobel_y_bytes
-){
-    __visc__hint(visc::CPU_TARGET);
-    __visc__attributes(8, input, input_bytes, gaussian, gaussian_bytes, sobel_x, sobel_x_bytes, sobel_y, sobel_y_bytes, 0);
+    void *input, size_t input_bytes, void *gaussian, size_t gaussian_bytes,
+    void *sobel_x, size_t sobel_x_bytes, void *sobel_y, size_t sobel_y_bytes) {
+  __visc__hint(visc::CPU_TARGET);
+  __visc__attributes(
+      8, input, input_bytes, gaussian, gaussian_bytes, sobel_x, sobel_x_bytes,
+      sobel_y, sobel_y_bytes, 0);
 
-    void* var_0 = __visc__createNodeND(0, var_0_node);
+  void *var_0 = __visc__createNodeND(0, var_0_node);
 
-    __visc__bindIn(var_0, 0, 0, 0);
-    __visc__bindIn(var_0, 1, 1, 0);
+  __visc__bindIn(var_0, 0, 0, 0);
+  __visc__bindIn(var_0, 1, 1, 0);
 
-    void* var_1 = __visc__createNodeND(0, var_1_node);
+  void *var_1 = __visc__createNodeND(0, var_1_node);
 
-    __visc__edge(var_0, var_1, 1, 0, 0, 0);
-    __visc__edge(var_0, var_1, 1, 1, 1, 0);
+  __visc__edge(var_0, var_1, 1, 0, 0, 0);
+  __visc__edge(var_0, var_1, 1, 1, 1, 0);
 
-    void* var_2 = __visc__createNodeND(0, var_2_node);
+  void *var_2 = __visc__createNodeND(0, var_2_node);
 
-    __visc__edge(var_1, var_2, 1, 0, 0, 0);
-    __visc__edge(var_1, var_2, 1, 1, 1, 0);
-    __visc__bindIn(var_2, 2, 2, 0);
-    __visc__bindIn(var_2, 3, 3, 0);
+  __visc__edge(var_1, var_2, 1, 0, 0, 0);
+  __visc__edge(var_1, var_2, 1, 1, 1, 0);
+  __visc__bindIn(var_2, 2, 2, 0);
+  __visc__bindIn(var_2, 3, 3, 0);
 
-    void* var_3 = __visc__createNodeND(0, var_3_node);
+  void *var_3 = __visc__createNodeND(0, var_3_node);
 
-    __visc__edge(var_2, var_3, 1, 0, 0, 0);
-    __visc__edge(var_2, var_3, 1, 1, 1, 0);
-    __visc__bindIn(var_3, 4, 2, 0);
-    __visc__bindIn(var_3, 5, 3, 0);
+  __visc__edge(var_2, var_3, 1, 0, 0, 0);
+  __visc__edge(var_2, var_3, 1, 1, 1, 0);
+  __visc__bindIn(var_3, 4, 2, 0);
+  __visc__bindIn(var_3, 5, 3, 0);
 
-    void* var_4 = __visc__createNodeND(0, var_4_node);
+  void *var_4 = __visc__createNodeND(0, var_4_node);
 
-    __visc__edge(var_2, var_4, 1, 0, 0, 0);
-    __visc__edge(var_2, var_4, 1, 1, 1, 0);
-    __visc__bindIn(var_4, 6, 2, 0);
-    __visc__bindIn(var_4, 7, 3, 0);
+  __visc__edge(var_2, var_4, 1, 0, 0, 0);
+  __visc__edge(var_2, var_4, 1, 1, 1, 0);
+  __visc__bindIn(var_4, 6, 2, 0);
+  __visc__bindIn(var_4, 7, 3, 0);
 
-    void* var_5 = __visc__createNodeND(0, var_5_node);
+  void *var_5 = __visc__createNodeND(0, var_5_node);
 
-    __visc__edge(var_3, var_5, 1, 0, 0, 0);
-    __visc__edge(var_3, var_5, 1, 1, 1, 0);
-    __visc__edge(var_4, var_5, 1, 0, 2, 0);
-    __visc__edge(var_4, var_5, 1, 1, 3, 0);
+  __visc__edge(var_3, var_5, 1, 0, 0, 0);
+  __visc__edge(var_3, var_5, 1, 1, 1, 0);
+  __visc__edge(var_4, var_5, 1, 0, 2, 0);
+  __visc__edge(var_4, var_5, 1, 1, 3, 0);
 
-    void* var_6 = __visc__createNodeND(0, var_6_node);
+  void *var_6 = __visc__createNodeND(0, var_6_node);
 
-    __visc__edge(var_5, var_6, 1, 0, 0, 0);
-    __visc__edge(var_5, var_6, 1, 1, 1, 0);
+  __visc__edge(var_5, var_6, 1, 0, 0, 0);
+  __visc__edge(var_5, var_6, 1, 1, 1, 0);
 
-    void* var_7 = __visc__createNodeND(0, var_7_node);
+  void *var_7 = __visc__createNodeND(0, var_7_node);
 
-    __visc__edge(var_6, var_7, 1, 0, 0, 0);
-    __visc__edge(var_6, var_7, 1, 1, 1, 0);
+  __visc__edge(var_6, var_7, 1, 0, 0, 0);
+  __visc__edge(var_6, var_7, 1, 1, 1, 0);
 
-    void* var_8 = __visc__createNodeND(0, var_8_node);
+  void *var_8 = __visc__createNodeND(0, var_8_node);
 
-    __visc__edge(var_5, var_8, 1, 0, 0, 0);
-    __visc__edge(var_5, var_8, 1, 1, 1, 0);
-    __visc__edge(var_7, var_8, 1, 0, 2, 0);
-    __visc__edge(var_7, var_8, 1, 1, 3, 0);
+  __visc__edge(var_5, var_8, 1, 0, 0, 0);
+  __visc__edge(var_5, var_8, 1, 1, 1, 0);
+  __visc__edge(var_7, var_8, 1, 0, 2, 0);
+  __visc__edge(var_7, var_8, 1, 1, 3, 0);
 
-    __visc__bindOut(var_8, 0, 0, 0);
-    __visc__bindOut(var_8, 1, 1, 0);
+  __visc__bindOut(var_8, 0, 0, 0);
+  __visc__bindOut(var_8, 1, 1, 0);
 }
 
 struct ret_t {
-    void* tensor;
-    size_t bytes;
+  void *tensor;
+  size_t bytes;
 };
 
 struct __attribute__((__packed__)) RootIn {
-    void* input;
-    size_t input_bytes;
-    void* gaussian;
-    size_t gaussian_bytes;
-    void* sobel_x;
-    size_t sobel_x_bytes;
-    void* sobel_y;
-    size_t sobel_y_bytes;
-    struct ret_t r;
+  void *input;
+  size_t input_bytes;
+  void *gaussian;
+  size_t gaussian_bytes;
+  void *sobel_x;
+  size_t sobel_x_bytes;
+  void *sobel_y;
+  size_t sobel_y_bytes;
+  struct ret_t r;
 };
 
-// Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-//     int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-//     auto *data = new float[w * h];
-//     float sum = 0.0f;
-//     for (int64_t i = -m; i <= m; i++)
-//         for (int64_t j = -n; j <= n; j++) {
-//             size_t idx = (i + m) * h + (j + n);
-//             float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-//             data[idx] = exp(exponent);
-//             sum += data[idx];
-//         }
-//     if (sum != 0.0f)
-//         for (size_t i = 0; i < w * h; i++)
-//             data[i] /= sum;
-//     return (Tensor *) createFilterFromData(float_, data, w, h, n_chan);
-// }
-
-// std::pair<Tensor *, Tensor *> getSobelKernels() {
-//     std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-//     std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-//     auto *t1 = (Tensor *) createFilterFromData(float_, k1.data(), 3, 3, 1);
-//     auto *t2 = (Tensor *) createFilterFromData(float_, k2.data(), 3, 3, 1);
-//     return std::make_pair(t1, t2);
-// }
+Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
+  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
+  auto *data = new float[w * h];
+  float sum = 0.0f;
+  for (int64_t i = -m; i <= m; i++)
+    for (int64_t j = -n; j <= n; j++) {
+      size_t idx = (i + m) * h + (j + n);
+      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
+      data[idx] = exp(exponent);
+      sum += data[idx];
+    }
+  if (sum != 0.0f)
+    for (size_t i = 0; i < w * h; i++)
+      data[i] /= sum;
+  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
+}
+
+std::pair<Tensor *, Tensor *> getSobelKernels() {
+  std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
+  std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
+  auto *t1 =
+      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
+  auto *t2 =
+      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
+  return std::make_pair(t1, t2);
+}
 
 int main(int argc, char *argv[]) {
-    if (argc < 3)
-        return 0;
-    std::string output_path = argv[2];
-    // Tensor *image = readDataSet(argv[1]);
-    // Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
-    // Tensor *sobel_x, *sobel_y;
-    // std::tie(sobel_x, sobel_y) = getSobelKernels();
-    Tensor *image = nullptr, *gaussian = nullptr, *sobel_x = nullptr, *sobel_y = nullptr;
+  if (argc < 3)
+    return 0;
+  Tensor *image = readDataSet(argv[1]);
+  Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
+  Tensor *sobel_x = nullptr, *sobel_y = nullptr;
+  std::tie(sobel_x, sobel_y) = getSobelKernels();
 
-    __visc__init();
-    RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
-    *args = RootIn{image, 0, gaussian, 0, sobel_x, 0, sobel_y, 0};
+  __visc__init();
+  RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn)));
+  *args = RootIn{image, 0, gaussian, 0, sobel_x, 0, sobel_y, 0};
 
-    void* dfg = __visc__launch(0, root, (void*) args);
+  void *dfg = __visc__launch(0, root, (void *)args);
 
-    __visc__wait(dfg);
+  __visc__wait(dfg);
 
-    void *result = static_cast<RootIn*>(args)->input;
-    hpvm_request_tensor(result, 0);
+  void *result = static_cast<RootIn *>(args)->input;
+  hpvm_request_tensor(result, 0);
 
-    __visc__cleanup();
-    // saveDataSet(output_path, (Tensor *) result);
-    // computeAccuracy2(labels, 10000, result);
-    return 0;
+  __visc__cleanup();
+  saveDataSet(argv[2], "", (Tensor *)result);
+
+  // computeAccuracy2(labels, 10000, result);
+  return 0;
 }
-- 
GitLab