Add some debug prints

5f5ce3a0 · Yifan Zhao · 74187f97 · 5f5ce3a0 · 5f5ce3a0 · 5f5ce3a0
Commit 5f5ce3a0 authored 5 years ago by Yifan Zhao
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh
@@ -146,7 +146,7 @@ __host__ Tensor *reduceDim(

  // Calculate approximation parameters  
  if (skip_rate != 0.0f)
-    INFO("Approximation happening...");
+    INFO("Approximation happening...\n");
  size_t approx_row_size = (size_t)((1 - skip_rate) * row_size);

  // If # of output entries is small, and row_size is enough for 16 threads,
@@ -154,7 +154,7 @@ __host__ Tensor *reduceDim(
  // Remember if reducing dim in parallel, threads must be (16, 32).
  if (num_rows < NThreads * MaxNBlocks && row_size >= AlongDimTh * 8) {
    DEBUG(
-        "Reducing in parallel, row size = %lu, actually using %lu", row_size,
+        "Reducing in parallel, row size = %lu, actually using %lu\n", row_size,
        approx_row_size);
    size_t grid_x = std::min(MaxBlocksPerDim, ceilDiv(num_irows, 32ul));
    size_t grid_y = std::min(
@@ -166,7 +166,7 @@ __host__ Tensor *reduceDim(
        num_irows, num_orows, row_size, approx_row_size);
  } else {
    DEBUG(
-        "Reducing sequentially, row size = %lu, actually using %lu", row_size,
+        "Reducing sequentially, row size = %lu, actually using %lu\n", row_size,
        approx_row_size);
    // Reduce sequentially.
    size_t threads = std::min(NThreads, num_irows);

--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu
@@ -18,7 +18,7 @@
 void *tensorFft(void *input) {
  // https://docs.nvidia.com/cuda/cufft/index.html#twod-complex-to-real-transforms
  // Tensor checking
-  INFO("FFT");
+  INFO("FFT\n");
  auto *t_input = (Tensor *)input;
  if (t_input->data_type != CUDNN_DATA_FLOAT)
    throw std::runtime_error("Only float32 is supported");
@@ -54,7 +54,7 @@ void *tensorFft(void *input) {
 }

 void *tensorReduce(void *input, size_t axis, void *func) {
-  INFO("Reduce");
+  INFO("Reduce\n");
  auto *src = (Tensor *)input;
  if (axis >= src->dims.num_dims)
    throw std::runtime_error("Dimension out of range");
@@ -67,7 +67,7 @@ void *tensorReduce(void *input, size_t axis, void *func) {

 void *tensorReductionSamplingReduce(
    void *input, size_t axis, void *func, int skip_level) {
-  INFO("Reduce with sampling");
+  INFO("Reduce with sampling\n");
  auto *src = (Tensor *)input;
  if (axis >= src->dims.num_dims)
    throw std::runtime_error("Dimension out of range");
@@ -92,19 +92,19 @@ void *tensorProjectiveT(void *input, void *transformation) {
 }

 void *tensorMap1(void *f, void *i) {
-  INFO("Map1");
+  INFO("Map1\n");
  auto *src = (Tensor *)i;
  return mapGeneral<float, 1>(f, {src});
 }

 void *tensorMap2(void *f2, void *i1, void *i2) {
-  INFO("Map2");
+  INFO("Map2\n");
  auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2;
  return mapGeneral<float, 2>(f2, {src1, src2});
 }

 void *tensorMap3(void *f3, void *i1, void *i2, void *i3) {
-  INFO("Map3");
+  INFO("Map3\n");
  auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2, *src3 = (Tensor *)i3;
  return mapGeneral<float, 3>(f3, {src1, src2, src3});
 }

--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp
@@ -22,7 +22,7 @@ static inline bool isRegFile(const char *path, dirent *dp) {
    return false;
  struct stat sb {};
  if (lstat(path, &sb) == -1) {
-    INFO("lstat failed for file %s", path);
+    INFO("lstat failed for file %s\n", path);
    return false;
  }
  mode_t type = sb.st_mode & S_IFMT;
@@ -72,19 +72,19 @@ static inline float *uint8_to_float(const uint8_t *ui, size_t len) {

 static Tensor *to_nhwc(Tensor *t) {
  if (t->data_format == CUDNN_TENSOR_NHWC) {
-    DEBUG("Tensor already in NHWC format, no conversion needed");
+    DEBUG("Tensor already in NHWC format, no conversion needed\n");
    return t;
  } else if (t->data_format != CUDNN_TENSOR_NCHW) {
    throw std::runtime_error(
        "Unknown tensor format: " + std::to_string(t->data_format));
  } else {
-    DEBUG("Converting to NHWC format");
+    DEBUG("Converting to NHWC format\n");
  }

  size_t *dim_arr = t->dims.dim_sizes;
  size_t n = dim_arr[0], c = dim_arr[1], h = dim_arr[2], w = dim_arr[3];
  auto *out_tensor =
-      (Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, c, h, w);
+      (Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, h, w, c);
  size_t nhwc_offset = 0;
  size_t element_size = getTypeSize(t->data_type);
  char *out_data = (char *)(out_tensor->host_data),
@@ -104,13 +104,13 @@ static Tensor *to_nhwc(Tensor *t) {

 static Tensor *to_nchw(Tensor *t) {
  if (t->data_format == CUDNN_TENSOR_NCHW) {
-    DEBUG("Tensor already in NCHW format, no conversion needed");
+    DEBUG("Tensor already in NCHW format, no conversion needed\n");
    return t;
  } else if (t->data_format != CUDNN_TENSOR_NHWC) {
    throw std::runtime_error(
        "Unknown tensor format: " + std::to_string(t->data_format));
  } else {
-    DEBUG("Converting to NCHW format");
+    DEBUG("Converting to NCHW format\n");
  }
  size_t *dim_arr = t->dims.dim_sizes;
  size_t n = dim_arr[0], h = dim_arr[1], w = dim_arr[2], c = dim_arr[3];
@@ -134,15 +134,15 @@ static Tensor *to_nchw(Tensor *t) {
 }

 Tensor *readDataSet(const char *path, size_t n_color) {
-  INFO("Loading image dataset from path %s", path);
+  INFO("Loading image dataset from path %s\n", path);
  auto *first_image = (Tensor *)loadAsImage(sample_file(path).c_str(), n_color);
  std::vector<size_t> sizes = ::sizes(first_image);
  delete first_image;
  size_t h = sizes[2], w = sizes[3];
  size_t count = count_file(path);
-  DEBUG("Counted %d images in path.", count);
+  DEBUG("Loading shape: (%lu, %lu, %lu, %lu)\n", count, n_color, h, w);
  auto *batch = (Tensor *)create4DTensor(
-      CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, n_color, h, w);
+      CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, h, w, n_color);
  size_t n_floats = n_color * h * w;
  auto *base_data = (float *)batch->host_data;
  auto dirp = opendir(path);
@@ -164,18 +164,18 @@ Tensor *readDataSet(const char *path, size_t n_color) {
  (void)closedir(dirp);
  auto *nchw_batch = to_nchw(batch);
  delete batch;
-  DEBUG("Loaded all images.");
+  DEBUG("Loaded all images.\n");
  return nchw_batch;
 }

 void saveDataSet(
    const char *path, const char *prefix, Tensor *batch) {
-  INFO("Saving image dataset to path %s", path);
-  DEBUG("Copying to CPU before printing");
+  INFO("Saving image dataset to path %s\n", path);
+  DEBUG("Copying to CPU before printing\n");
  deviceToHostCopy(batch);
  Tensor *converted_batch = batch;
  if (batch->data_format == CUDNN_TENSOR_NCHW) {
-    DEBUG("Copy-converting to NHWC format");
+    DEBUG("Copy-converting to NHWC format\n");
    converted_batch = to_nhwc(batch);
  }
  std::vector<size_t> sizes = ::sizes(converted_batch);
@@ -200,14 +200,15 @@ void saveDataSet(
 }

 void *loadAsImage(const char *filename, size_t n_color) {
-  INFO("Loading image from path=%s", filename);
+  INFO("Loading image from path=%s\n", filename);
  int x, y, n; // x = width, y = height, n = # 8-bit components per pixel
  uint8_t *data = stbi_load(filename, &x, &y, &n, n_color);
  if (data == nullptr)
    throw std::runtime_error("Image load failed");
  float *converted = uint8_to_float(data, x * y * n);
+  DEBUG("Loading shape: (1, %lu, %lu, %lu)(NHWC)\n", y, x, n_color);
  auto *image =
-      (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, n, y, x);
+      (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, y, x, n);
  std::memcpy(image->host_data, converted, x * y * n * sizeof(float));
  auto *nchw_image = to_nchw(image);
  stbi_image_free(data);
@@ -216,11 +217,11 @@ void *loadAsImage(const char *filename, size_t n_color) {
 }

 void saveToImage(const char *filename, Tensor *tensor) {
-  INFO("Saving image data to path=%s", filename);
+  INFO("Saving image data to path=%s\n", filename);
  deviceToHostCopy(tensor);
  Tensor *converted_tensor = tensor;
  if (tensor->data_format == CUDNN_TENSOR_NCHW) {
-    DEBUG("Copy-converting to NHWC format");
+    DEBUG("Copy-converting to NHWC format\n");
    converted_tensor = to_nhwc(tensor);
  }
  auto *hdr_data = (float *)converted_tensor->host_data;
@@ -236,7 +237,7 @@ void saveToImage(const char *filename, Tensor *tensor) {

 void *createFilterFromData(
    int data_type, void *data, size_t w, size_t h, size_t n_chan) {
-  DEBUG("Creating filter from data");
+  DEBUG("Creating filter from data\n");
  auto *tensor =
      (Tensor *)create4DTensor(data_type, CUDNN_TENSOR_NCHW, 1, n_chan, h, w);
  char *tensor_data;
@@ -265,7 +266,7 @@ std::vector<float> PSNR(void *gold_ptr, void *approx_ptr) {
    size_t batch_dim = dim_sizes[0];
    size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3];
    float image_size_f = image_size;
-    DEBUG("batch_dim = %lu, image_size = %lu", batch_dim, image_size);
+    DEBUG("batch_dim = %lu, image_size = %lu\n", batch_dim, image_size);
    auto *image_size_tensor = (Tensor *)create4DTensor(
      CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 1, 1
    );