From 5f5ce3a0c18185df00410bcf30c3a19fea1c3064 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Tue, 5 Nov 2019 19:56:52 -0600 Subject: [PATCH] Add some debug prints --- .../include/functional/reduce.cuh | 6 +-- .../tensor_runtime/src/img_tensor_runtime.cu | 12 +++--- .../tensor_runtime/src/img_tensor_utils.cpp | 39 ++++++++++--------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh index bcd58f90bd..51d8c4da3f 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/reduce.cuh @@ -146,7 +146,7 @@ __host__ Tensor *reduceDim( // Calculate approximation parameters if (skip_rate != 0.0f) - INFO("Approximation happening..."); + INFO("Approximation happening...\n"); size_t approx_row_size = (size_t)((1 - skip_rate) * row_size); // If # of output entries is small, and row_size is enough for 16 threads, @@ -154,7 +154,7 @@ __host__ Tensor *reduceDim( // Remember if reducing dim in parallel, threads must be (16, 32). if (num_rows < NThreads * MaxNBlocks && row_size >= AlongDimTh * 8) { DEBUG( - "Reducing in parallel, row size = %lu, actually using %lu", row_size, + "Reducing in parallel, row size = %lu, actually using %lu\n", row_size, approx_row_size); size_t grid_x = std::min(MaxBlocksPerDim, ceilDiv(num_irows, 32ul)); size_t grid_y = std::min( @@ -166,7 +166,7 @@ __host__ Tensor *reduceDim( num_irows, num_orows, row_size, approx_row_size); } else { DEBUG( - "Reducing sequentially, row size = %lu, actually using %lu", row_size, + "Reducing sequentially, row size = %lu, actually using %lu\n", row_size, approx_row_size); // Reduce sequentially. size_t threads = std::min(NThreads, num_irows); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu index 4abcc52c99..633c5a6655 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu @@ -18,7 +18,7 @@ void *tensorFft(void *input) { // https://docs.nvidia.com/cuda/cufft/index.html#twod-complex-to-real-transforms // Tensor checking - INFO("FFT"); + INFO("FFT\n"); auto *t_input = (Tensor *)input; if (t_input->data_type != CUDNN_DATA_FLOAT) throw std::runtime_error("Only float32 is supported"); @@ -54,7 +54,7 @@ void *tensorFft(void *input) { } void *tensorReduce(void *input, size_t axis, void *func) { - INFO("Reduce"); + INFO("Reduce\n"); auto *src = (Tensor *)input; if (axis >= src->dims.num_dims) throw std::runtime_error("Dimension out of range"); @@ -67,7 +67,7 @@ void *tensorReduce(void *input, size_t axis, void *func) { void *tensorReductionSamplingReduce( void *input, size_t axis, void *func, int skip_level) { - INFO("Reduce with sampling"); + INFO("Reduce with sampling\n"); auto *src = (Tensor *)input; if (axis >= src->dims.num_dims) throw std::runtime_error("Dimension out of range"); @@ -92,19 +92,19 @@ void *tensorProjectiveT(void *input, void *transformation) { } void *tensorMap1(void *f, void *i) { - INFO("Map1"); + INFO("Map1\n"); auto *src = (Tensor *)i; return mapGeneral<float, 1>(f, {src}); } void *tensorMap2(void *f2, void *i1, void *i2) { - INFO("Map2"); + INFO("Map2\n"); auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2; return mapGeneral<float, 2>(f2, {src1, src2}); } void *tensorMap3(void *f3, void *i1, void *i2, void *i3) { - INFO("Map3"); + INFO("Map3\n"); auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2, *src3 = (Tensor *)i3; return mapGeneral<float, 3>(f3, {src1, src2, src3}); } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp index 6d2477ab46..172229d532 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp @@ -22,7 +22,7 @@ static inline bool isRegFile(const char *path, dirent *dp) { return false; struct stat sb {}; if (lstat(path, &sb) == -1) { - INFO("lstat failed for file %s", path); + INFO("lstat failed for file %s\n", path); return false; } mode_t type = sb.st_mode & S_IFMT; @@ -72,19 +72,19 @@ static inline float *uint8_to_float(const uint8_t *ui, size_t len) { static Tensor *to_nhwc(Tensor *t) { if (t->data_format == CUDNN_TENSOR_NHWC) { - DEBUG("Tensor already in NHWC format, no conversion needed"); + DEBUG("Tensor already in NHWC format, no conversion needed\n"); return t; } else if (t->data_format != CUDNN_TENSOR_NCHW) { throw std::runtime_error( "Unknown tensor format: " + std::to_string(t->data_format)); } else { - DEBUG("Converting to NHWC format"); + DEBUG("Converting to NHWC format\n"); } size_t *dim_arr = t->dims.dim_sizes; size_t n = dim_arr[0], c = dim_arr[1], h = dim_arr[2], w = dim_arr[3]; auto *out_tensor = - (Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, c, h, w); + (Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, h, w, c); size_t nhwc_offset = 0; size_t element_size = getTypeSize(t->data_type); char *out_data = (char *)(out_tensor->host_data), @@ -104,13 +104,13 @@ static Tensor *to_nhwc(Tensor *t) { static Tensor *to_nchw(Tensor *t) { if (t->data_format == CUDNN_TENSOR_NCHW) { - DEBUG("Tensor already in NCHW format, no conversion needed"); + DEBUG("Tensor already in NCHW format, no conversion needed\n"); return t; } else if (t->data_format != CUDNN_TENSOR_NHWC) { throw std::runtime_error( "Unknown tensor format: " + std::to_string(t->data_format)); } else { - DEBUG("Converting to NCHW format"); + DEBUG("Converting to NCHW format\n"); } size_t *dim_arr = t->dims.dim_sizes; size_t n = dim_arr[0], h = dim_arr[1], w = dim_arr[2], c = dim_arr[3]; @@ -134,15 +134,15 @@ static Tensor *to_nchw(Tensor *t) { } Tensor *readDataSet(const char *path, size_t n_color) { - INFO("Loading image dataset from path %s", path); + INFO("Loading image dataset from path %s\n", path); auto *first_image = (Tensor *)loadAsImage(sample_file(path).c_str(), n_color); std::vector<size_t> sizes = ::sizes(first_image); delete first_image; size_t h = sizes[2], w = sizes[3]; size_t count = count_file(path); - DEBUG("Counted %d images in path.", count); + DEBUG("Loading shape: (%lu, %lu, %lu, %lu)\n", count, n_color, h, w); auto *batch = (Tensor *)create4DTensor( - CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, n_color, h, w); + CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, h, w, n_color); size_t n_floats = n_color * h * w; auto *base_data = (float *)batch->host_data; auto dirp = opendir(path); @@ -164,18 +164,18 @@ Tensor *readDataSet(const char *path, size_t n_color) { (void)closedir(dirp); auto *nchw_batch = to_nchw(batch); delete batch; - DEBUG("Loaded all images."); + DEBUG("Loaded all images.\n"); return nchw_batch; } void saveDataSet( const char *path, const char *prefix, Tensor *batch) { - INFO("Saving image dataset to path %s", path); - DEBUG("Copying to CPU before printing"); + INFO("Saving image dataset to path %s\n", path); + DEBUG("Copying to CPU before printing\n"); deviceToHostCopy(batch); Tensor *converted_batch = batch; if (batch->data_format == CUDNN_TENSOR_NCHW) { - DEBUG("Copy-converting to NHWC format"); + DEBUG("Copy-converting to NHWC format\n"); converted_batch = to_nhwc(batch); } std::vector<size_t> sizes = ::sizes(converted_batch); @@ -200,14 +200,15 @@ void saveDataSet( } void *loadAsImage(const char *filename, size_t n_color) { - INFO("Loading image from path=%s", filename); + INFO("Loading image from path=%s\n", filename); int x, y, n; // x = width, y = height, n = # 8-bit components per pixel uint8_t *data = stbi_load(filename, &x, &y, &n, n_color); if (data == nullptr) throw std::runtime_error("Image load failed"); float *converted = uint8_to_float(data, x * y * n); + DEBUG("Loading shape: (1, %lu, %lu, %lu)(NHWC)\n", y, x, n_color); auto *image = - (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, n, y, x); + (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, y, x, n); std::memcpy(image->host_data, converted, x * y * n * sizeof(float)); auto *nchw_image = to_nchw(image); stbi_image_free(data); @@ -216,11 +217,11 @@ void *loadAsImage(const char *filename, size_t n_color) { } void saveToImage(const char *filename, Tensor *tensor) { - INFO("Saving image data to path=%s", filename); + INFO("Saving image data to path=%s\n", filename); deviceToHostCopy(tensor); Tensor *converted_tensor = tensor; if (tensor->data_format == CUDNN_TENSOR_NCHW) { - DEBUG("Copy-converting to NHWC format"); + DEBUG("Copy-converting to NHWC format\n"); converted_tensor = to_nhwc(tensor); } auto *hdr_data = (float *)converted_tensor->host_data; @@ -236,7 +237,7 @@ void saveToImage(const char *filename, Tensor *tensor) { void *createFilterFromData( int data_type, void *data, size_t w, size_t h, size_t n_chan) { - DEBUG("Creating filter from data"); + DEBUG("Creating filter from data\n"); auto *tensor = (Tensor *)create4DTensor(data_type, CUDNN_TENSOR_NCHW, 1, n_chan, h, w); char *tensor_data; @@ -265,7 +266,7 @@ std::vector<float> PSNR(void *gold_ptr, void *approx_ptr) { size_t batch_dim = dim_sizes[0]; size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; float image_size_f = image_size; - DEBUG("batch_dim = %lu, image_size = %lu", batch_dim, image_size); + DEBUG("batch_dim = %lu, image_size = %lu\n", batch_dim, image_size); auto *image_size_tensor = (Tensor *)create4DTensor( CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 1, 1 ); -- GitLab