From ed00fc37de574607e41ec1b500ede2a12c3da963 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@miranda.cs.illinois.edu> Date: Tue, 27 Apr 2021 22:46:55 -0500 Subject: [PATCH] More fixes to Tensor runtime prints --- .../include/approxhpvm_runtime_utils.h | 27 +++++++----- .../tensor_runtime/src/group_conv.cu | 6 +-- .../tensor_runtime/src/tensor_runtime.cu | 22 +++++----- .../tensor_runtime/src/wrapper_runtime.cu | 44 +++++++++---------- 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h index c318a8fb6a..8dfa287afc 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h @@ -111,9 +111,10 @@ void *handleTensorConvApproximationTuples_CPU( } case CPUNodeConfiguration::APPROX::PERFORATION: { PerfParams params = perfParamSet->getPerfParams(param); - INFO("perforation param = %i\n", param); - INFO("params.row = %i, params.col = %i, params.skip_offset = %i\n", - params.row, params.col, params.skip_offset); + + DEBUG("perforation param = %i\n", param); + DEBUG("params.row = %i, params.col = %i, params.skip_offset = %i\n", params.row, params.col, params.skip_offset); + void *t_out; RC->resume_profiler(); t_out = tensorConvApproxCPU( @@ -131,9 +132,10 @@ void *handleTensorConvApproximationTuples_CPU( } case CPUNodeConfiguration::APPROX::INPUT_SAMPLING: { SampParams params = sampParamSet->getSampParams(param); - INFO("sampling param = %i\n", param); - INFO("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate, - params.skip_offset); + + DEBUG("sampling param = %i\n", param); + DEBUG("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate, params.skip_offset); + void *t_out; RC->resume_profiler(); t_out = tensorConvApproxCPU(input, filter, conv_pad_h, conv_pad_w, @@ -536,9 +538,10 @@ void *handleTensorConvApproximationTuples( case GPUNodeConfiguration::APPROX::PERFORATION: case GPUNodeConfiguration::APPROX::PERFORATION_HP: { PerfParams params = perfParamSet->getPerfParams(param); - INFO("perforation param = %i\n", param); - INFO("params.row = %i, params.col = %i, params.skip_offset = %i\n", - params.row, params.col, params.skip_offset); + + DEBUG("perforation param = %i\n", param); + DEBUG("params.row = %i, params.col = %i, params.skip_offset = %i\n", params.row, params.col, params.skip_offset); + void *t_out; RC->resume_profiler(); t_out = tensorConvApproxHalf2( @@ -557,9 +560,11 @@ void *handleTensorConvApproximationTuples( case GPUNodeConfiguration::APPROX::INPUT_SAMPLING: case GPUNodeConfiguration::APPROX::INPUT_SAMPLING_HP: { SampParams params = sampParamSet->getSampParams(param); - INFO("sampling param = %i\n", param); - INFO("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate, + + DEBUG("sampling param = %i\n", param); + DEBUG("params.skip_rate = %i, params.skip_offset = %i\n", params.skip_rate, params.skip_offset); + void *t_out; RC->resume_profiler(); t_out = tensorConvApproxHalf2(input, filter, conv_pad_h, conv_pad_w, diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu index 6a3fcc12e0..e42b2cbc06 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/group_conv.cu @@ -218,7 +218,7 @@ void *tensorConvCutlass(void *input_ptr, void *filter_ptr, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride, int conv_mode, int conv_groups) { - INFO("*** TensorConvolution \n"); + //INFO("*** TensorGroupConvolution \n"); profileEvent("Conv"); Tensor *input = (Tensor *)input_ptr; @@ -290,7 +290,7 @@ void *tensorConvCutlass(void *input_ptr, void *filter_ptr, int vertical_pad, hostToDeviceCopy(input); hostToDeviceCopy(filter); - INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, + DEBUG("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride); checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); @@ -386,7 +386,7 @@ void *tensorHalfConvCutlass(void *input_ptr, void *filter_ptr, int vertical_pad, int horizontal_stride, int conv_mode, int conv_groups) { - INFO("*** TensorHConvolution \n"); + DEBUG("*** TensorHConvolution \n"); profileEvent("#Conv"); Tensor *input = (Tensor *)input_ptr; diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu index 253f761433..9f69e54437 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu @@ -52,7 +52,7 @@ void *tensorAdd(void *x_ptr, void *bias_ptr) { Tensor *x = (Tensor *)x_ptr; Tensor *bias = (Tensor *)bias_ptr; - INFO("*** TensorAdd \n"); + //INFO("*** TensorAdd \n"); profileEvent("Add"); float alpha = 1.0f; @@ -85,7 +85,8 @@ void *tensorConvolution(void *input_ptr, void *filter_ptr, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride, int conv_mode, int conv_groups) { - INFO("*** TensorConvolution \n"); + + //INFO("*** TensorConvolution \n"); profileEvent("Conv"); Tensor *input = (Tensor *)input_ptr; @@ -213,7 +214,6 @@ void *tensorPooling(void *input_ptr, int poolFunction, int window_height, int window_width, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride) { - INFO("*** TensorPooling \n"); profileEvent("Pool"); Tensor *input = (Tensor *)input_ptr; @@ -272,7 +272,7 @@ void *tensorPooling(void *input_ptr, int poolFunction, int window_height, * https://gist.github.com/peterwittek/6303527 */ void *tensorGemmGPU(void *lhs_ptr, void *rhs_ptr) { - INFO("*** TensorGemmGPU \n"); + //INFO("*** TensorGemmGPU \n"); profileEvent("Mul"); Tensor *lhs = (Tensor *)lhs_ptr; @@ -364,7 +364,7 @@ void *tensorRelu(void *input_ptr) { // Think: Should Softmax be broken into multiple IR operations? void *tensorSoftmax(void *input_ptr) { - INFO("*** TensorSoftmax \n"); + //INFO("*** TensorSoftmax \n"); profileEvent("Softmax"); Tensor *input = (Tensor *)input_ptr; @@ -386,7 +386,7 @@ void *tensorSoftmax(void *input_ptr) { void *tensorRelu2(void *input_ptr, float min, float max) { - INFO("*** TensorClippedRelu *** \n"); + //INFO("*** TensorClippedRelu *** \n"); profileEvent("Relu"); cudnnActivationDescriptor_t reluDesc; @@ -413,7 +413,7 @@ void *tensorRelu2(void *input_ptr, float min, float max) { void *tensorTanh(void *input_ptr) { - INFO("*** TensorTanh \n"); + //INFO("*** TensorTanh \n"); profileEvent("Tanh"); Tensor *input = (Tensor *)input_ptr; @@ -441,7 +441,7 @@ void *tensorTanh(void *input_ptr) { void *tensorBatchNorm(void *input_ptr, void *gamma_ptr, void *beta_ptr, void *mean_ptr, void *variance_ptr, double epsilon) { - INFO("*** TensorBatchNorm \n"); + // INFO("*** TensorBatchNorm \n"); profileEvent("BatchNorm"); Tensor *input = (Tensor *)input_ptr; @@ -477,7 +477,7 @@ void *tensorBatchNorm(void *input_ptr, void *gamma_ptr, void *beta_ptr, // TODO: benchmark performance of tensorSplit void **tensorSplit(void *tensor_ptr, int num_splits, int split_dim) { - INFO("*** TensorSplit \n"); + //INFO("*** TensorSplit \n"); profileEvent("tensorSplit"); Tensor *tensor = (Tensor *)tensor_ptr; @@ -533,7 +533,7 @@ void **tensorSplit(void *tensor_ptr, int num_splits, int split_dim) { void *tensorConcat(void **tensors_ptr, int num_splits, int split_dim) { - INFO("*** TensorConcat \n"); + //INFO("*** TensorConcat \n"); profileEvent("tensorConcat"); Tensor **tensors = (Tensor **)tensors_ptr; @@ -595,7 +595,7 @@ void *tensorConcat(void **tensors_ptr, int num_splits, int split_dim) { void *tensorLRN(void *input_ptr, unsigned int LRN_window, double LRN_alpha, double LRN_beta, double LRN_k) { - INFO("*** TensorLRN \n"); + //INFO("*** TensorLRN \n"); profileEvent("tensorLRN"); Tensor *input = (Tensor *)input_ptr; diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu index 79ea02592d..a972d097bf 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu @@ -75,9 +75,7 @@ extern "C" { - -void * -wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter, + void* wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter, void *bias, int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w, int pool_id, int pool_size, int activation_id, @@ -323,15 +321,14 @@ wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter, return NULL; } -void *wrapper_ConvLayer2( - const char *hpvm_node_id, void *input, void *filter, void *bias, - int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w, - int pool_id, int pool_size_v, int pool_size_h, int pool_pad_v, - int pool_pad_h, int pool_stride_v, int pool_stride_h, int activation_id, - // NOTE: out_min, out_max are only relevant for ClippedRelu - float out_min, float out_max) { +void *wrapper_ConvLayer2(const char *hpvm_node_id, void *input, void *filter, void *bias, + int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w, + int pool_id, int pool_size_v, int pool_size_h, int pool_pad_v, + int pool_pad_h, int pool_stride_v, int pool_stride_h, int activation_id, + // NOTE: out_min, out_max are only relevant for ClippedRelu + float out_min, float out_max) { - INFO("*** TensorConv \n"); + INFO("*** ConvLayer \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isGPUNodeConfiguration()) { @@ -523,7 +520,6 @@ void *wrapper_ConvLayer2( } void* pool_out; - if (pool_size_v > 0) { switch (pool_id) { case 0: @@ -591,6 +587,8 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights, // NOTE: out_min and out_max are only relevant for ClippedRelu float out_min, float out_max) { + INFO("*** DenseLayer \n"); + NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isGPUNodeConfiguration()) { DEBUG("GPU Configuration for FCLayer\n"); @@ -730,7 +728,7 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights, void *wrapper_tensorRelu(const char *hpvm_node_id, void *input_ptr) { - INFO("*** TensorRelu \n"); + INFO("*** TensorRelu \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); @@ -884,10 +882,8 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr, void *gamma_ptr, void *beta_ptr, void *mean_ptr, void *variance_ptr, double epsilon) { - INFO("*** TensorBatchNorm \n"); - + INFO("*** TensorBatchNorm \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); - if (NodeConf->isGPUNodeConfiguration()) { // mapped to GPU - get a GPU configuration @@ -898,14 +894,8 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr, std::pair<GPUNodeConfiguration::TENSOR_OP, std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>> &ApproxChoices = - GPUConf->getApproxChoices(); - // printf("*** BatchNorm \n ApproxChoice = %d \n BatchNorm = %d \n CONV = %d - // \n", ApproxChoices[0].first, - // GPUNodeConfiguration::TENSOR_OP::BATCHNORM, - // GPUNodeConfiguration::TENSOR_OP::CONV); - // Approximation choices must be for a batchnorm operation CUSTOM_ASSERT( ApproxChoices.size() == 1 && @@ -946,6 +936,8 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr, void *wrapper_tensorAdd(const char *hpvm_node_id, void *input_ptr, void *bias_ptr) { + INFO("*** TensorAdd \n"); + NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isGPUNodeConfiguration()) { @@ -999,7 +991,7 @@ void *wrapper_tensorPooling(const char *hpvm_node_id, void *input_ptr, int horizontal_pad, int vertical_stride, int horizontal_stride) { - INFO("*** TensorPooling \n"); + INFO("*** TensorPooling \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); @@ -1075,6 +1067,9 @@ void *wrapper_tensorGroupConvolution(const char *hpvm_node_id, void *input, int horizontal_pad, int vertical_stride, int horizontal_stride, int conv_mode, int conv_groups) { + + INFO("*** TensorGroupConv \n"); + NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isGPUNodeConfiguration()) { @@ -1128,8 +1123,9 @@ void *wrapper_tensorGroupConvolution(const char *hpvm_node_id, void *input, } void *wrapper_tensorSoftmax(const char *hpvm_node_id, void *input_ptr) { - // return tensorSoftmax(input_ptr); + INFO("*** TensorSoftmax \n "); + NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isGPUNodeConfiguration()) { -- GitLab