From ed00fc37de574607e41ec1b500ede2a12c3da963 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@miranda.cs.illinois.edu>
Date: Tue, 27 Apr 2021 22:46:55 -0500
Subject: [PATCH] More fixes to Tensor runtime prints

---
 .../include/approxhpvm_runtime_utils.h        | 27 +++++++-----
 .../tensor_runtime/src/group_conv.cu          |  6 +--
 .../tensor_runtime/src/tensor_runtime.cu      | 22 +++++-----
 .../tensor_runtime/src/wrapper_runtime.cu     | 44 +++++++++----------
 4 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h
index c318a8fb6a..8dfa287afc 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h
@@ -111,9 +111,10 @@ void *handleTensorConvApproximationTuples_CPU(
     }
     case CPUNodeConfiguration::APPROX::PERFORATION: {
       PerfParams params = perfParamSet->getPerfParams(param);
-      INFO("perforation param = %i\n", param);
-      INFO("params.row = %i, params.col = %i, params.skip_offset = %i\n",
-           params.row, params.col, params.skip_offset);
+
+      DEBUG("perforation param = %i\n", param);
+      DEBUG("params.row = %i, params.col = %i, params.skip_offset = %i\n", params.row, params.col, params.skip_offset);
+      
       void *t_out;
       RC->resume_profiler();
       t_out = tensorConvApproxCPU(
@@ -131,9 +132,10 @@ void *handleTensorConvApproximationTuples_CPU(
     }
     case CPUNodeConfiguration::APPROX::INPUT_SAMPLING: {
       SampParams params = sampParamSet->getSampParams(param);
-      INFO("sampling param = %i\n", param);
-      INFO("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate,
-           params.skip_offset);
+
+      DEBUG("sampling param = %i\n", param);
+      DEBUG("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate, params.skip_offset);
+      
       void *t_out;
       RC->resume_profiler();
       t_out = tensorConvApproxCPU(input, filter, conv_pad_h, conv_pad_w,
@@ -536,9 +538,10 @@ void *handleTensorConvApproximationTuples(
     case GPUNodeConfiguration::APPROX::PERFORATION:
     case GPUNodeConfiguration::APPROX::PERFORATION_HP: {
       PerfParams params = perfParamSet->getPerfParams(param);
-      INFO("perforation param = %i\n", param);
-      INFO("params.row = %i, params.col = %i, params.skip_offset = %i\n",
-           params.row, params.col, params.skip_offset);
+
+      DEBUG("perforation param = %i\n", param);
+      DEBUG("params.row = %i, params.col = %i, params.skip_offset = %i\n", params.row, params.col, params.skip_offset);
+      
       void *t_out;
       RC->resume_profiler();
       t_out = tensorConvApproxHalf2(
@@ -557,9 +560,11 @@ void *handleTensorConvApproximationTuples(
     case GPUNodeConfiguration::APPROX::INPUT_SAMPLING:
     case GPUNodeConfiguration::APPROX::INPUT_SAMPLING_HP: {
       SampParams params = sampParamSet->getSampParams(param);
-      INFO("sampling param = %i\n", param);
-      INFO("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate,
+      
+      DEBUG("sampling param = %i\n", param);
+      DEBUG("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate,
            params.skip_offset);
+      
       void *t_out;
       RC->resume_profiler();
       t_out = tensorConvApproxHalf2(input, filter, conv_pad_h, conv_pad_w,
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu
index 6a3fcc12e0..e42b2cbc06 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu
@@ -218,7 +218,7 @@ void *tensorConvCutlass(void *input_ptr, void *filter_ptr, int vertical_pad,
                         int horizontal_pad, int vertical_stride,
                         int horizontal_stride, int conv_mode, int conv_groups) {
 
-  INFO("*** TensorConvolution \n");
+  //INFO("*** TensorGroupConvolution \n");
   profileEvent("Conv");
 
   Tensor *input = (Tensor *)input_ptr;
@@ -290,7 +290,7 @@ void *tensorConvCutlass(void *input_ptr, void *filter_ptr, int vertical_pad,
     hostToDeviceCopy(input);
     hostToDeviceCopy(filter);
 
-    INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride,
+    DEBUG("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride,
          horizontal_stride);
 
     checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc));
@@ -386,7 +386,7 @@ void *tensorHalfConvCutlass(void *input_ptr, void *filter_ptr, int vertical_pad,
                             int horizontal_stride, int conv_mode,
                             int conv_groups) {
 
-  INFO("*** TensorHConvolution \n");
+  DEBUG("*** TensorHConvolution \n");
   profileEvent("#Conv");
 
   Tensor *input = (Tensor *)input_ptr;
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
index 253f761433..9f69e54437 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
@@ -52,7 +52,7 @@ void *tensorAdd(void *x_ptr, void *bias_ptr) {
   Tensor *x = (Tensor *)x_ptr;
   Tensor *bias = (Tensor *)bias_ptr;
 
-  INFO("*** TensorAdd \n");
+  //INFO("*** TensorAdd \n");
   profileEvent("Add");
 
   float alpha = 1.0f;
@@ -85,7 +85,8 @@ void *tensorConvolution(void *input_ptr, void *filter_ptr, int vertical_pad,
                         int horizontal_pad, int vertical_stride,
                         int horizontal_stride, int conv_mode, int conv_groups) {
 
-  INFO("*** TensorConvolution \n");
+
+  //INFO("*** TensorConvolution \n");
   profileEvent("Conv");
 
   Tensor *input = (Tensor *)input_ptr;
@@ -213,7 +214,6 @@ void *tensorPooling(void *input_ptr, int poolFunction, int window_height,
                     int window_width, int vertical_pad, int horizontal_pad,
                     int vertical_stride, int horizontal_stride) {
 
-  INFO("*** TensorPooling \n");
   profileEvent("Pool");
 
   Tensor *input = (Tensor *)input_ptr;
@@ -272,7 +272,7 @@ void *tensorPooling(void *input_ptr, int poolFunction, int window_height,
  * https://gist.github.com/peterwittek/6303527 */
 void *tensorGemmGPU(void *lhs_ptr, void *rhs_ptr) {
 
-  INFO("*** TensorGemmGPU \n");
+  //INFO("*** TensorGemmGPU \n");
   profileEvent("Mul");
 
   Tensor *lhs = (Tensor *)lhs_ptr;
@@ -364,7 +364,7 @@ void *tensorRelu(void *input_ptr) {
 // Think: Should Softmax be broken into multiple IR operations?
 void *tensorSoftmax(void *input_ptr) {
 
-  INFO("*** TensorSoftmax \n");
+  //INFO("*** TensorSoftmax \n");
   profileEvent("Softmax");
 
   Tensor *input = (Tensor *)input_ptr;
@@ -386,7 +386,7 @@ void *tensorSoftmax(void *input_ptr) {
 
 void *tensorRelu2(void *input_ptr, float min, float max) {
 
-  INFO("*** TensorClippedRelu *** \n");
+  //INFO("*** TensorClippedRelu *** \n");
   profileEvent("Relu");
 
   cudnnActivationDescriptor_t reluDesc;
@@ -413,7 +413,7 @@ void *tensorRelu2(void *input_ptr, float min, float max) {
 
 void *tensorTanh(void *input_ptr) {
 
-  INFO("*** TensorTanh \n");
+  //INFO("*** TensorTanh \n");
   profileEvent("Tanh");
 
   Tensor *input = (Tensor *)input_ptr;
@@ -441,7 +441,7 @@ void *tensorTanh(void *input_ptr) {
 void *tensorBatchNorm(void *input_ptr, void *gamma_ptr, void *beta_ptr,
                       void *mean_ptr, void *variance_ptr, double epsilon) {
 
-  INFO("*** TensorBatchNorm \n");
+  // INFO("*** TensorBatchNorm \n");
   profileEvent("BatchNorm");
 
   Tensor *input = (Tensor *)input_ptr;
@@ -477,7 +477,7 @@ void *tensorBatchNorm(void *input_ptr, void *gamma_ptr, void *beta_ptr,
 // TODO: benchmark performance of tensorSplit
 void **tensorSplit(void *tensor_ptr, int num_splits, int split_dim) {
 
-  INFO("*** TensorSplit \n");
+  //INFO("*** TensorSplit \n");
   profileEvent("tensorSplit");
 
   Tensor *tensor = (Tensor *)tensor_ptr;
@@ -533,7 +533,7 @@ void **tensorSplit(void *tensor_ptr, int num_splits, int split_dim) {
 
 void *tensorConcat(void **tensors_ptr, int num_splits, int split_dim) {
 
-  INFO("*** TensorConcat \n");
+  //INFO("*** TensorConcat \n");
   profileEvent("tensorConcat");
 
   Tensor **tensors = (Tensor **)tensors_ptr;
@@ -595,7 +595,7 @@ void *tensorConcat(void **tensors_ptr, int num_splits, int split_dim) {
 void *tensorLRN(void *input_ptr, unsigned int LRN_window, double LRN_alpha,
                 double LRN_beta, double LRN_k) {
 
-  INFO("*** TensorLRN \n");
+  //INFO("*** TensorLRN \n");
   profileEvent("tensorLRN");
 
   Tensor *input = (Tensor *)input_ptr;
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu
index 79ea02592d..a972d097bf 100644
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu
@@ -75,9 +75,7 @@ extern "C" {
 
 
 
-
-void *
-wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter,
+  void* wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter,
                   void *bias, int conv_pad_h, int conv_pad_w, int conv_stride_h,
                   int conv_stride_w, int pool_id, int pool_size,
                   int activation_id,
@@ -323,15 +321,14 @@ wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter,
   return NULL;
 }
 
-void *wrapper_ConvLayer2(
-    const char *hpvm_node_id, void *input, void *filter, void *bias,
-    int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
-    int pool_id, int pool_size_v, int pool_size_h, int pool_pad_v,
-    int pool_pad_h, int pool_stride_v, int pool_stride_h, int activation_id,
-    // NOTE: out_min, out_max are only relevant for ClippedRelu
-    float out_min, float out_max) {
+void *wrapper_ConvLayer2(const char *hpvm_node_id, void *input, void *filter, void *bias,
+			 int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
+			 int pool_id, int pool_size_v, int pool_size_h, int pool_pad_v,
+			 int pool_pad_h, int pool_stride_v, int pool_stride_h, int activation_id,
+			 // NOTE: out_min, out_max are only relevant for ClippedRelu
+			 float out_min, float out_max) {
 
-  INFO("***  TensorConv \n");
+  INFO("***  ConvLayer \n");
 
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
   if (NodeConf->isGPUNodeConfiguration()) {
@@ -523,7 +520,6 @@ void *wrapper_ConvLayer2(
        }
 
        void* pool_out;
-
        if (pool_size_v > 0) {
          switch (pool_id) {
          case 0:
@@ -591,6 +587,8 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights,
                 // NOTE: out_min and out_max are only relevant for ClippedRelu
                 float out_min, float out_max) {
 
+  INFO("***  DenseLayer \n");
+    
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
   if (NodeConf->isGPUNodeConfiguration()) {
     DEBUG("GPU Configuration for FCLayer\n");
@@ -730,7 +728,7 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights,
 
 void *wrapper_tensorRelu(const char *hpvm_node_id, void *input_ptr) {
 
-  INFO("*** TensorRelu \n");
+  INFO("***  TensorRelu \n");
 
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
 
@@ -884,10 +882,8 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr,
                               void *gamma_ptr, void *beta_ptr, void *mean_ptr,
                               void *variance_ptr, double epsilon) {
 
-  INFO("*** TensorBatchNorm  \n");
-
+  INFO("***  TensorBatchNorm  \n");
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
-
   
   if (NodeConf->isGPUNodeConfiguration()) {
       // mapped to GPU - get a GPU configuration
@@ -898,14 +894,8 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr,
           std::pair<GPUNodeConfiguration::TENSOR_OP,
                     std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>>
           &ApproxChoices =
-
               GPUConf->getApproxChoices();
 
-      // printf("*** BatchNorm \n ApproxChoice = %d \n  BatchNorm = %d \n CONV = %d
-      // \n", ApproxChoices[0].first,
-      //	       GPUNodeConfiguration::TENSOR_OP::BATCHNORM,
-      //	       GPUNodeConfiguration::TENSOR_OP::CONV);
-
       // Approximation choices must be for a batchnorm operation
       CUSTOM_ASSERT(
           ApproxChoices.size() == 1 &&
@@ -946,6 +936,8 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr,
 void *wrapper_tensorAdd(const char *hpvm_node_id, void *input_ptr,
                         void *bias_ptr) {
 
+  INFO("***  TensorAdd \n");
+  
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
 
   if (NodeConf->isGPUNodeConfiguration()) {
@@ -999,7 +991,7 @@ void *wrapper_tensorPooling(const char *hpvm_node_id, void *input_ptr,
                             int horizontal_pad, int vertical_stride,
                             int horizontal_stride) {
 
-  INFO("*** TensorPooling  \n");
+  INFO("***  TensorPooling  \n");
 
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
 
@@ -1075,6 +1067,9 @@ void *wrapper_tensorGroupConvolution(const char *hpvm_node_id, void *input,
                                      int horizontal_pad, int vertical_stride,
                                      int horizontal_stride, int conv_mode,
                                      int conv_groups) {
+
+  INFO("***  TensorGroupConv  \n");
+  
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
 
   if (NodeConf->isGPUNodeConfiguration()) {
@@ -1128,8 +1123,9 @@ void *wrapper_tensorGroupConvolution(const char *hpvm_node_id, void *input,
 }
 
 void *wrapper_tensorSoftmax(const char *hpvm_node_id, void *input_ptr) {
-  //  return tensorSoftmax(input_ptr);
 
+  INFO("***  TensorSoftmax \n ");
+  
   NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
   if (NodeConf->isGPUNodeConfiguration()) {
 
-- 
GitLab