diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_direct_call.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_direct_call.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fba89135e4efa4b067bf4efbf38cba4745525127 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_direct_call.cpp @@ -0,0 +1,98 @@ +#include "tensor_runtime.h" +#include "visc.h" + +Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { + int64_t m = (w - 1) / 2, n = (h - 1) / 2; + auto *data = new float[w * h]; + float sum = 0.0f; + for (int64_t i = -m; i <= m; i++) + for (int64_t j = -n; j <= n; j++) { + size_t idx = (i + m) * h + (j + n); + float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); + data[idx] = exp(exponent); + sum += data[idx]; + } + if (sum != 0.0f) + for (size_t i = 0; i < w * h; i++) + data[i] /= sum; + return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); +} + +std::pair<Tensor *, Tensor *> getSobelKernels() { + std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); + std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); + auto *t1 = + (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); + auto *t2 = + (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); + return std::make_pair(t1, t2); +} + +void *main_fp32(void *dataset) { + Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); + Tensor *kernel_x, *kernel_y; + std::tie(kernel_x, kernel_y) = getSobelKernels(); + + // 0. Grayscale + auto *summed_image = + wrapper_tensorReduce("1", dataset, 1, device::fadd_ptrptr); + auto *grayscale_image = + wrapper_tensorMap1("2", device::favg3_ptrptr, summed_image); + // 1. Denoise + auto *image2 = wrapper_ConvLayer( + "3", grayscale_image, gaussian, nullptr, 2, 2, 1, 1, 0, 0, -1, 0.0, 0.0); + // 2. Get edge gradient / direction + auto *grad_x = wrapper_ConvLayer( + "4", image2, kernel_x, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); + auto *grad_y = wrapper_ConvLayer( + "5", image2, kernel_y, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); + auto *grad_mag = + wrapper_tensorMap2("6", device::fhypot_ptrptr, grad_x, grad_y); + // 2.5. Normalize grad magnitude + auto *grad_max_1D = + wrapper_tensorReduce("7", grad_mag, 2, device::fmax_ptrptr); + auto *grad_max = + wrapper_tensorReduce("8", grad_max_1D, 3, device::fmax_ptrptr); + auto *grad_mag_norm = + wrapper_tensorMap2("9", device::fdiv_ptrptr, grad_mag, grad_max); + return grad_mag_norm; +} + +extern void llvm_hpvm_initializeRuntimeController( + const char *ConfigFile, const char *QRangeFile); + +extern void llvm_hpvm_clearRuntimeController(); + +extern void llvm_hpvm_initTensorRt(int gpuid); + +extern void llvm_hpvm_nextIter(); + +const size_t batch_size = 1000; + +int main(int argc, char *argv[]) { + if (argc < 3) + return 0; + llvm_hpvm_initTensorRt(0); + llvm_hpvm_initializeRuntimeController("data/tuner_confs.txt", ""); + + size_t bstart = 0; + startMemTracking(); + startProfiling(); + while (true) { + Tensor *batch = readDataSet(argv[1], bstart, batch_size); + if (batch == nullptr) + break; + + auto *result_fp32 = main_fp32(batch); + saveDataSet(argv[2], (Tensor *)result_fp32, bstart); + + bstart += batch_size; + llvm_hpvm_nextIter(); + freeBatchMemory(); + clearTensorMap(); + } + stopProfiling(); + llvm_hpvm_clearRuntimeController(); + + // std::vector<float> psnrs = PSNR(result_fp32, result_fp16); +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp index eb25c085e2b6c5aa556092329dc17473392b8a40..84df96e2458058d4849f0164fc20d9a443700624 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp @@ -43,7 +43,7 @@ void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { __visc__hint(visc::GPU_TARGET); __visc__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); __visc__return(2, r, (size_t)0); } @@ -51,7 +51,7 @@ void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { __visc__hint(visc::GPU_TARGET); __visc__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); __visc__return(2, r, (size_t)0); }