From 5f5ce3a0c18185df00410bcf30c3a19fea1c3064 Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Tue, 5 Nov 2019 19:56:52 -0600
Subject: [PATCH] Add some debug prints

---
 .../include/functional/reduce.cuh             |  6 +--
 .../tensor_runtime/src/img_tensor_runtime.cu  | 12 +++---
 .../tensor_runtime/src/img_tensor_utils.cpp   | 39 ++++++++++---------
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh
index bcd58f90bd..51d8c4da3f 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh
@@ -146,7 +146,7 @@ __host__ Tensor *reduceDim(
 
   // Calculate approximation parameters  
   if (skip_rate != 0.0f)
-    INFO("Approximation happening...");
+    INFO("Approximation happening...\n");
   size_t approx_row_size = (size_t)((1 - skip_rate) * row_size);
 
   // If # of output entries is small, and row_size is enough for 16 threads,
@@ -154,7 +154,7 @@ __host__ Tensor *reduceDim(
   // Remember if reducing dim in parallel, threads must be (16, 32).
   if (num_rows < NThreads * MaxNBlocks && row_size >= AlongDimTh * 8) {
     DEBUG(
-        "Reducing in parallel, row size = %lu, actually using %lu", row_size,
+        "Reducing in parallel, row size = %lu, actually using %lu\n", row_size,
         approx_row_size);
     size_t grid_x = std::min(MaxBlocksPerDim, ceilDiv(num_irows, 32ul));
     size_t grid_y = std::min(
@@ -166,7 +166,7 @@ __host__ Tensor *reduceDim(
         num_irows, num_orows, row_size, approx_row_size);
   } else {
     DEBUG(
-        "Reducing sequentially, row size = %lu, actually using %lu", row_size,
+        "Reducing sequentially, row size = %lu, actually using %lu\n", row_size,
         approx_row_size);
     // Reduce sequentially.
     size_t threads = std::min(NThreads, num_irows);
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
index 4abcc52c99..633c5a6655 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
@@ -18,7 +18,7 @@
 void *tensorFft(void *input) {
   // https://docs.nvidia.com/cuda/cufft/index.html#twod-complex-to-real-transforms
   // Tensor checking
-  INFO("FFT");
+  INFO("FFT\n");
   auto *t_input = (Tensor *)input;
   if (t_input->data_type != CUDNN_DATA_FLOAT)
     throw std::runtime_error("Only float32 is supported");
@@ -54,7 +54,7 @@ void *tensorFft(void *input) {
 }
 
 void *tensorReduce(void *input, size_t axis, void *func) {
-  INFO("Reduce");
+  INFO("Reduce\n");
   auto *src = (Tensor *)input;
   if (axis >= src->dims.num_dims)
     throw std::runtime_error("Dimension out of range");
@@ -67,7 +67,7 @@ void *tensorReduce(void *input, size_t axis, void *func) {
 
 void *tensorReductionSamplingReduce(
     void *input, size_t axis, void *func, int skip_level) {
-  INFO("Reduce with sampling");
+  INFO("Reduce with sampling\n");
   auto *src = (Tensor *)input;
   if (axis >= src->dims.num_dims)
     throw std::runtime_error("Dimension out of range");
@@ -92,19 +92,19 @@ void *tensorProjectiveT(void *input, void *transformation) {
 }
 
 void *tensorMap1(void *f, void *i) {
-  INFO("Map1");
+  INFO("Map1\n");
   auto *src = (Tensor *)i;
   return mapGeneral<float, 1>(f, {src});
 }
 
 void *tensorMap2(void *f2, void *i1, void *i2) {
-  INFO("Map2");
+  INFO("Map2\n");
   auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2;
   return mapGeneral<float, 2>(f2, {src1, src2});
 }
 
 void *tensorMap3(void *f3, void *i1, void *i2, void *i3) {
-  INFO("Map3");
+  INFO("Map3\n");
   auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2, *src3 = (Tensor *)i3;
   return mapGeneral<float, 3>(f3, {src1, src2, src3});
 }
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp
index 6d2477ab46..172229d532 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp
@@ -22,7 +22,7 @@ static inline bool isRegFile(const char *path, dirent *dp) {
     return false;
   struct stat sb {};
   if (lstat(path, &sb) == -1) {
-    INFO("lstat failed for file %s", path);
+    INFO("lstat failed for file %s\n", path);
     return false;
   }
   mode_t type = sb.st_mode & S_IFMT;
@@ -72,19 +72,19 @@ static inline float *uint8_to_float(const uint8_t *ui, size_t len) {
 
 static Tensor *to_nhwc(Tensor *t) {
   if (t->data_format == CUDNN_TENSOR_NHWC) {
-    DEBUG("Tensor already in NHWC format, no conversion needed");
+    DEBUG("Tensor already in NHWC format, no conversion needed\n");
     return t;
   } else if (t->data_format != CUDNN_TENSOR_NCHW) {
     throw std::runtime_error(
         "Unknown tensor format: " + std::to_string(t->data_format));
   } else {
-    DEBUG("Converting to NHWC format");
+    DEBUG("Converting to NHWC format\n");
   }
 
   size_t *dim_arr = t->dims.dim_sizes;
   size_t n = dim_arr[0], c = dim_arr[1], h = dim_arr[2], w = dim_arr[3];
   auto *out_tensor =
-      (Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, c, h, w);
+      (Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, h, w, c);
   size_t nhwc_offset = 0;
   size_t element_size = getTypeSize(t->data_type);
   char *out_data = (char *)(out_tensor->host_data),
@@ -104,13 +104,13 @@ static Tensor *to_nhwc(Tensor *t) {
 
 static Tensor *to_nchw(Tensor *t) {
   if (t->data_format == CUDNN_TENSOR_NCHW) {
-    DEBUG("Tensor already in NCHW format, no conversion needed");
+    DEBUG("Tensor already in NCHW format, no conversion needed\n");
     return t;
   } else if (t->data_format != CUDNN_TENSOR_NHWC) {
     throw std::runtime_error(
         "Unknown tensor format: " + std::to_string(t->data_format));
   } else {
-    DEBUG("Converting to NCHW format");
+    DEBUG("Converting to NCHW format\n");
   }
   size_t *dim_arr = t->dims.dim_sizes;
   size_t n = dim_arr[0], h = dim_arr[1], w = dim_arr[2], c = dim_arr[3];
@@ -134,15 +134,15 @@ static Tensor *to_nchw(Tensor *t) {
 }
 
 Tensor *readDataSet(const char *path, size_t n_color) {
-  INFO("Loading image dataset from path %s", path);
+  INFO("Loading image dataset from path %s\n", path);
   auto *first_image = (Tensor *)loadAsImage(sample_file(path).c_str(), n_color);
   std::vector<size_t> sizes = ::sizes(first_image);
   delete first_image;
   size_t h = sizes[2], w = sizes[3];
   size_t count = count_file(path);
-  DEBUG("Counted %d images in path.", count);
+  DEBUG("Loading shape: (%lu, %lu, %lu, %lu)\n", count, n_color, h, w);
   auto *batch = (Tensor *)create4DTensor(
-      CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, n_color, h, w);
+      CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, h, w, n_color);
   size_t n_floats = n_color * h * w;
   auto *base_data = (float *)batch->host_data;
   auto dirp = opendir(path);
@@ -164,18 +164,18 @@ Tensor *readDataSet(const char *path, size_t n_color) {
   (void)closedir(dirp);
   auto *nchw_batch = to_nchw(batch);
   delete batch;
-  DEBUG("Loaded all images.");
+  DEBUG("Loaded all images.\n");
   return nchw_batch;
 }
 
 void saveDataSet(
     const char *path, const char *prefix, Tensor *batch) {
-  INFO("Saving image dataset to path %s", path);
-  DEBUG("Copying to CPU before printing");
+  INFO("Saving image dataset to path %s\n", path);
+  DEBUG("Copying to CPU before printing\n");
   deviceToHostCopy(batch);
   Tensor *converted_batch = batch;
   if (batch->data_format == CUDNN_TENSOR_NCHW) {
-    DEBUG("Copy-converting to NHWC format");
+    DEBUG("Copy-converting to NHWC format\n");
     converted_batch = to_nhwc(batch);
   }
   std::vector<size_t> sizes = ::sizes(converted_batch);
@@ -200,14 +200,15 @@ void saveDataSet(
 }
 
 void *loadAsImage(const char *filename, size_t n_color) {
-  INFO("Loading image from path=%s", filename);
+  INFO("Loading image from path=%s\n", filename);
   int x, y, n; // x = width, y = height, n = # 8-bit components per pixel
   uint8_t *data = stbi_load(filename, &x, &y, &n, n_color);
   if (data == nullptr)
     throw std::runtime_error("Image load failed");
   float *converted = uint8_to_float(data, x * y * n);
+  DEBUG("Loading shape: (1, %lu, %lu, %lu)(NHWC)\n", y, x, n_color);
   auto *image =
-      (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, n, y, x);
+      (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, y, x, n);
   std::memcpy(image->host_data, converted, x * y * n * sizeof(float));
   auto *nchw_image = to_nchw(image);
   stbi_image_free(data);
@@ -216,11 +217,11 @@ void *loadAsImage(const char *filename, size_t n_color) {
 }
 
 void saveToImage(const char *filename, Tensor *tensor) {
-  INFO("Saving image data to path=%s", filename);
+  INFO("Saving image data to path=%s\n", filename);
   deviceToHostCopy(tensor);
   Tensor *converted_tensor = tensor;
   if (tensor->data_format == CUDNN_TENSOR_NCHW) {
-    DEBUG("Copy-converting to NHWC format");
+    DEBUG("Copy-converting to NHWC format\n");
     converted_tensor = to_nhwc(tensor);
   }
   auto *hdr_data = (float *)converted_tensor->host_data;
@@ -236,7 +237,7 @@ void saveToImage(const char *filename, Tensor *tensor) {
 
 void *createFilterFromData(
     int data_type, void *data, size_t w, size_t h, size_t n_chan) {
-  DEBUG("Creating filter from data");
+  DEBUG("Creating filter from data\n");
   auto *tensor =
       (Tensor *)create4DTensor(data_type, CUDNN_TENSOR_NCHW, 1, n_chan, h, w);
   char *tensor_data;
@@ -265,7 +266,7 @@ std::vector<float> PSNR(void *gold_ptr, void *approx_ptr) {
     size_t batch_dim = dim_sizes[0];
     size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3];
     float image_size_f = image_size;
-    DEBUG("batch_dim = %lu, image_size = %lu", batch_dim, image_size);
+    DEBUG("batch_dim = %lu, image_size = %lu\n", batch_dim, image_size);
     auto *image_size_tensor = (Tensor *)create4DTensor(
       CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 1, 1
     );
-- 
GitLab