Skip to content
Snippets Groups Projects
Commit 5f5ce3a0 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Add some debug prints

parent 74187f97
No related branches found
No related tags found
No related merge requests found
......@@ -146,7 +146,7 @@ __host__ Tensor *reduceDim(
// Calculate approximation parameters
if (skip_rate != 0.0f)
INFO("Approximation happening...");
INFO("Approximation happening...\n");
size_t approx_row_size = (size_t)((1 - skip_rate) * row_size);
// If # of output entries is small, and row_size is enough for 16 threads,
......@@ -154,7 +154,7 @@ __host__ Tensor *reduceDim(
// Remember if reducing dim in parallel, threads must be (16, 32).
if (num_rows < NThreads * MaxNBlocks && row_size >= AlongDimTh * 8) {
DEBUG(
"Reducing in parallel, row size = %lu, actually using %lu", row_size,
"Reducing in parallel, row size = %lu, actually using %lu\n", row_size,
approx_row_size);
size_t grid_x = std::min(MaxBlocksPerDim, ceilDiv(num_irows, 32ul));
size_t grid_y = std::min(
......@@ -166,7 +166,7 @@ __host__ Tensor *reduceDim(
num_irows, num_orows, row_size, approx_row_size);
} else {
DEBUG(
"Reducing sequentially, row size = %lu, actually using %lu", row_size,
"Reducing sequentially, row size = %lu, actually using %lu\n", row_size,
approx_row_size);
// Reduce sequentially.
size_t threads = std::min(NThreads, num_irows);
......
......@@ -18,7 +18,7 @@
void *tensorFft(void *input) {
// https://docs.nvidia.com/cuda/cufft/index.html#twod-complex-to-real-transforms
// Tensor checking
INFO("FFT");
INFO("FFT\n");
auto *t_input = (Tensor *)input;
if (t_input->data_type != CUDNN_DATA_FLOAT)
throw std::runtime_error("Only float32 is supported");
......@@ -54,7 +54,7 @@ void *tensorFft(void *input) {
}
void *tensorReduce(void *input, size_t axis, void *func) {
INFO("Reduce");
INFO("Reduce\n");
auto *src = (Tensor *)input;
if (axis >= src->dims.num_dims)
throw std::runtime_error("Dimension out of range");
......@@ -67,7 +67,7 @@ void *tensorReduce(void *input, size_t axis, void *func) {
void *tensorReductionSamplingReduce(
void *input, size_t axis, void *func, int skip_level) {
INFO("Reduce with sampling");
INFO("Reduce with sampling\n");
auto *src = (Tensor *)input;
if (axis >= src->dims.num_dims)
throw std::runtime_error("Dimension out of range");
......@@ -92,19 +92,19 @@ void *tensorProjectiveT(void *input, void *transformation) {
}
void *tensorMap1(void *f, void *i) {
INFO("Map1");
INFO("Map1\n");
auto *src = (Tensor *)i;
return mapGeneral<float, 1>(f, {src});
}
void *tensorMap2(void *f2, void *i1, void *i2) {
INFO("Map2");
INFO("Map2\n");
auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2;
return mapGeneral<float, 2>(f2, {src1, src2});
}
void *tensorMap3(void *f3, void *i1, void *i2, void *i3) {
INFO("Map3");
INFO("Map3\n");
auto *src1 = (Tensor *)i1, *src2 = (Tensor *)i2, *src3 = (Tensor *)i3;
return mapGeneral<float, 3>(f3, {src1, src2, src3});
}
......
......@@ -22,7 +22,7 @@ static inline bool isRegFile(const char *path, dirent *dp) {
return false;
struct stat sb {};
if (lstat(path, &sb) == -1) {
INFO("lstat failed for file %s", path);
INFO("lstat failed for file %s\n", path);
return false;
}
mode_t type = sb.st_mode & S_IFMT;
......@@ -72,19 +72,19 @@ static inline float *uint8_to_float(const uint8_t *ui, size_t len) {
static Tensor *to_nhwc(Tensor *t) {
if (t->data_format == CUDNN_TENSOR_NHWC) {
DEBUG("Tensor already in NHWC format, no conversion needed");
DEBUG("Tensor already in NHWC format, no conversion needed\n");
return t;
} else if (t->data_format != CUDNN_TENSOR_NCHW) {
throw std::runtime_error(
"Unknown tensor format: " + std::to_string(t->data_format));
} else {
DEBUG("Converting to NHWC format");
DEBUG("Converting to NHWC format\n");
}
size_t *dim_arr = t->dims.dim_sizes;
size_t n = dim_arr[0], c = dim_arr[1], h = dim_arr[2], w = dim_arr[3];
auto *out_tensor =
(Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, c, h, w);
(Tensor *)create4DTensor(t->data_type, CUDNN_TENSOR_NHWC, n, h, w, c);
size_t nhwc_offset = 0;
size_t element_size = getTypeSize(t->data_type);
char *out_data = (char *)(out_tensor->host_data),
......@@ -104,13 +104,13 @@ static Tensor *to_nhwc(Tensor *t) {
static Tensor *to_nchw(Tensor *t) {
if (t->data_format == CUDNN_TENSOR_NCHW) {
DEBUG("Tensor already in NCHW format, no conversion needed");
DEBUG("Tensor already in NCHW format, no conversion needed\n");
return t;
} else if (t->data_format != CUDNN_TENSOR_NHWC) {
throw std::runtime_error(
"Unknown tensor format: " + std::to_string(t->data_format));
} else {
DEBUG("Converting to NCHW format");
DEBUG("Converting to NCHW format\n");
}
size_t *dim_arr = t->dims.dim_sizes;
size_t n = dim_arr[0], h = dim_arr[1], w = dim_arr[2], c = dim_arr[3];
......@@ -134,15 +134,15 @@ static Tensor *to_nchw(Tensor *t) {
}
Tensor *readDataSet(const char *path, size_t n_color) {
INFO("Loading image dataset from path %s", path);
INFO("Loading image dataset from path %s\n", path);
auto *first_image = (Tensor *)loadAsImage(sample_file(path).c_str(), n_color);
std::vector<size_t> sizes = ::sizes(first_image);
delete first_image;
size_t h = sizes[2], w = sizes[3];
size_t count = count_file(path);
DEBUG("Counted %d images in path.", count);
DEBUG("Loading shape: (%lu, %lu, %lu, %lu)\n", count, n_color, h, w);
auto *batch = (Tensor *)create4DTensor(
CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, n_color, h, w);
CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, h, w, n_color);
size_t n_floats = n_color * h * w;
auto *base_data = (float *)batch->host_data;
auto dirp = opendir(path);
......@@ -164,18 +164,18 @@ Tensor *readDataSet(const char *path, size_t n_color) {
(void)closedir(dirp);
auto *nchw_batch = to_nchw(batch);
delete batch;
DEBUG("Loaded all images.");
DEBUG("Loaded all images.\n");
return nchw_batch;
}
void saveDataSet(
const char *path, const char *prefix, Tensor *batch) {
INFO("Saving image dataset to path %s", path);
DEBUG("Copying to CPU before printing");
INFO("Saving image dataset to path %s\n", path);
DEBUG("Copying to CPU before printing\n");
deviceToHostCopy(batch);
Tensor *converted_batch = batch;
if (batch->data_format == CUDNN_TENSOR_NCHW) {
DEBUG("Copy-converting to NHWC format");
DEBUG("Copy-converting to NHWC format\n");
converted_batch = to_nhwc(batch);
}
std::vector<size_t> sizes = ::sizes(converted_batch);
......@@ -200,14 +200,15 @@ void saveDataSet(
}
void *loadAsImage(const char *filename, size_t n_color) {
INFO("Loading image from path=%s", filename);
INFO("Loading image from path=%s\n", filename);
int x, y, n; // x = width, y = height, n = # 8-bit components per pixel
uint8_t *data = stbi_load(filename, &x, &y, &n, n_color);
if (data == nullptr)
throw std::runtime_error("Image load failed");
float *converted = uint8_to_float(data, x * y * n);
DEBUG("Loading shape: (1, %lu, %lu, %lu)(NHWC)\n", y, x, n_color);
auto *image =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, n, y, x);
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, 1, y, x, n);
std::memcpy(image->host_data, converted, x * y * n * sizeof(float));
auto *nchw_image = to_nchw(image);
stbi_image_free(data);
......@@ -216,11 +217,11 @@ void *loadAsImage(const char *filename, size_t n_color) {
}
void saveToImage(const char *filename, Tensor *tensor) {
INFO("Saving image data to path=%s", filename);
INFO("Saving image data to path=%s\n", filename);
deviceToHostCopy(tensor);
Tensor *converted_tensor = tensor;
if (tensor->data_format == CUDNN_TENSOR_NCHW) {
DEBUG("Copy-converting to NHWC format");
DEBUG("Copy-converting to NHWC format\n");
converted_tensor = to_nhwc(tensor);
}
auto *hdr_data = (float *)converted_tensor->host_data;
......@@ -236,7 +237,7 @@ void saveToImage(const char *filename, Tensor *tensor) {
void *createFilterFromData(
int data_type, void *data, size_t w, size_t h, size_t n_chan) {
DEBUG("Creating filter from data");
DEBUG("Creating filter from data\n");
auto *tensor =
(Tensor *)create4DTensor(data_type, CUDNN_TENSOR_NCHW, 1, n_chan, h, w);
char *tensor_data;
......@@ -265,7 +266,7 @@ std::vector<float> PSNR(void *gold_ptr, void *approx_ptr) {
size_t batch_dim = dim_sizes[0];
size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3];
float image_size_f = image_size;
DEBUG("batch_dim = %lu, image_size = %lu", batch_dim, image_size);
DEBUG("batch_dim = %lu, image_size = %lu\n", batch_dim, image_size);
auto *image_size_tensor = (Tensor *)create4DTensor(
CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 1, 1
);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment