diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_canny.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_canny.cc new file mode 100644 index 0000000000000000000000000000000000000000..0aa33bc43dace1f847f44ed7ad6dcfc0082d014a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_canny.cc @@ -0,0 +1,221 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> +#include "tensor_runtime.h" +#include "utils.h" + + + + +Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { + int64_t m = (w - 1) / 2, n = (h - 1) / 2; + auto *data = new float[w * h]; + float sum = 0.0f; + for (int64_t i = -m; i <= m; i++) + for (int64_t j = -n; j <= n; j++) { + size_t idx = (i + m) * h + (j + n); + float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); + data[idx] = exp(exponent); + sum += data[idx]; + } + if (sum != 0.0f) + for (size_t i = 0; i < w * h; i++) + data[i] /= sum; + return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); +} + +std::pair<Tensor*, Tensor*> getSobelKernels() { + std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); + std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); + auto *t1 = + (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); + auto *t2 = + (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); + return std::make_pair(t1, t2); +} + +/*** + +TODOs: + +* Precision calculation? +* tensorArgMax? +* tensorSelect? +* tensorContract +* autotuning support for these functions +* FP32 vs F16 versions of sampling perforation? +* Need tensorRT version and a PROMISE API version +* How to Profile? are profileEvent calls added +* Pytorch version + + +****/ + +void* canny_filter(void* dataset) { + Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); + Tensor *kernel_x, *kernel_y; + std::tie(kernel_x, kernel_y) = getSobelKernels(); + + // 0. Grayscale + auto *summed_image = autotuner_tensorReduce(dataset, 1, MathOp::Add); + auto *grayscale_image = autotuner_tensorMap1(MathOp::Avg3, summed_image); + // 1. Denoise + auto *image2 = ConvLayer_PROMISE(grayscale_image, 0.0, 0.0, gaussian, + 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, + 1, 0, 0, -1, 0.0, 0.0, 0); + // 2. Get edge gradient / direction + auto *grad_x = ConvLayer_PROMISE( + image2, 0.0, 0.0, kernel_x, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0, + -1, 0.0, 0.0, 0); + auto *grad_y = ConvLayer_PROMISE( + image2, 0.0, 0.0, kernel_y, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0, + -1, 0.0, 0.0, 0); + auto *grad_mag = autotuner_tensorMap2(MathOp::Hypot, grad_x, grad_y); + // 2.5. Normalize grad magnitude + auto *grad_max_1D = autotuner_tensorReduce(grad_mag, 2, MathOp::Max); + auto *grad_max = autotuner_tensorReduce(grad_max_1D, 3, MathOp::Max); + auto *grad_mag_norm = autotuner_tensorMap2(MathOp::Div, grad_mag, grad_max); + return grad_mag_norm; +} + +const size_t batch_size = 500, total_max = 3000; +const float psnr_threshold = 30.0; + + + + +int main() { + const char *input_path = "../model_params/image_processing_5k"; + const char *ref_output_path = "../model_params/canny_ref_output"; + std::vector<float> psnr; + llvm_hpvm_initTensorRt(1); + size_t bstart = 0; + startMemTracking(); + while (true) { + Tensor *batch = readDataSet(input_path, bstart, batch_size); + if (batch == nullptr) + break; + + auto *result = main_procedure(batch); + auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, 1); + std::vector<float> psnr_batch = PSNR(ref_output, result); + std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); + bstart += batch_size; + if (bstart >= total_max) + break; + freeBatchMemory(); + } + float violation = violationRate(psnr, psnr_threshold); + float mean_psnr = mean(psnr); + std::ofstream of("final_accuracy"); + of << violation * 100 << ", " << mean_psnr << '\n'; + return 0; +} + + + + + + + + +int main(){ + + llvm_hpvm_initTensorRt(0); + + + //std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); + std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); + + std::string input_path = dir_prefix + std::string("input.bin"); + //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); + std::string labels_path = dir_prefix + std::string("labels.bin"); + //uint8_t* labels = readLabels(labels_path.c_str(),10000); + std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); + void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); + std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); + void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); + std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); + void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); + std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); + void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); + std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); + void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); + std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); + void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); + std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); + void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); + std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); + void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); + std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); + void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); + std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); + void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); + std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); + void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); + std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); + void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); + + + startMemTracking(); + + int test_input_size = 2000; + int batch_size = 2000; + int batch_count = test_input_size / batch_size; + float final_accuracy = 0.0; + + // NOTE: Starting time profiling + startProfiling(); + + for(int i = 0; i < batch_count; i++){ + + int start = i * batch_size; + int end = (i + 1) * batch_size; + void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); + + void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); + void* var_1 = tensorAdd(var_0, conv2d_1_b); + void* var_2 = tensorTanh(var_1); + void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); + void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); + void* var_6 = tensorAdd(var_5, conv2d_2_b); + void* var_7 = tensorTanh(var_6); + void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); + void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); + void* var_11 = tensorAdd(var_10, conv2d_3_b); + void* var_12 = tensorTanh(var_11); + void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); + void* var_14 = tensorAdd(var_13, conv2d_4_b); + void* var_15 = tensorTanh(var_14); + void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); + void* var_17 = tensorAdd(var_16, conv2d_5_b); + void* var_18 = tensorTanh(var_17); + void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); + void* var_22 = tensorGemmGPU(var_19, dense_1_w); + void* var_23 = tensorAdd(var_22, dense_1_b); + void* var_24 = tensorSoftmax(var_23); + + uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); + + float accuracy = computeAccuracy2(labels,batch_size,var_24); + final_accuracy += accuracy; + + freeBatchMemory(); + } + + stopProfiling(); + + final_accuracy = final_accuracy / batch_count; + dumpFinalAccuracy(final_accuracy); + + + llvm_hpvm_cleanupTensorRt(); + + return 0; + +}