Skip to content
Snippets Groups Projects
Commit 790f3e16 authored by Hashim Sharif's avatar Hashim Sharif
Browse files

Adding Alexnet-canny

parent 57ecf570
No related branches found
No related tags found
No related merge requests found
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include "tensor_runtime.h"
#include "utils.h"
Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
int64_t m = (w - 1) / 2, n = (h - 1) / 2;
auto *data = new float[w * h];
float sum = 0.0f;
for (int64_t i = -m; i <= m; i++)
for (int64_t j = -n; j <= n; j++) {
size_t idx = (i + m) * h + (j + n);
float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
data[idx] = exp(exponent);
sum += data[idx];
}
if (sum != 0.0f)
for (size_t i = 0; i < w * h; i++)
data[i] /= sum;
return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
}
std::pair<Tensor*, Tensor*> getSobelKernels() {
std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
auto *t1 =
(Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
auto *t2 =
(Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
return std::make_pair(t1, t2);
}
/***
TODOs:
* Precision calculation?
* tensorArgMax?
* tensorSelect?
* tensorContract
* autotuning support for these functions
* FP32 vs F16 versions of sampling perforation?
* Need tensorRT version and a PROMISE API version
* How to Profile? are profileEvent calls added
* Pytorch version
****/
void* canny_filter(void* dataset) {
Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
Tensor *kernel_x, *kernel_y;
std::tie(kernel_x, kernel_y) = getSobelKernels();
// 0. Grayscale
auto *summed_image = autotuner_tensorReduce(dataset, 1, MathOp::Add);
auto *grayscale_image = autotuner_tensorMap1(MathOp::Avg3, summed_image);
// 1. Denoise
auto *image2 = ConvLayer_PROMISE(grayscale_image, 0.0, 0.0, gaussian,
0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1,
1, 0, 0, -1, 0.0, 0.0, 0);
// 2. Get edge gradient / direction
auto *grad_x = ConvLayer_PROMISE(
image2, 0.0, 0.0, kernel_x, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0,
-1, 0.0, 0.0, 0);
auto *grad_y = ConvLayer_PROMISE(
image2, 0.0, 0.0, kernel_y, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0,
-1, 0.0, 0.0, 0);
auto *grad_mag = autotuner_tensorMap2(MathOp::Hypot, grad_x, grad_y);
// 2.5. Normalize grad magnitude
auto *grad_max_1D = autotuner_tensorReduce(grad_mag, 2, MathOp::Max);
auto *grad_max = autotuner_tensorReduce(grad_max_1D, 3, MathOp::Max);
auto *grad_mag_norm = autotuner_tensorMap2(MathOp::Div, grad_mag, grad_max);
return grad_mag_norm;
}
const size_t batch_size = 500, total_max = 3000;
const float psnr_threshold = 30.0;
int main() {
const char *input_path = "../model_params/image_processing_5k";
const char *ref_output_path = "../model_params/canny_ref_output";
std::vector<float> psnr;
llvm_hpvm_initTensorRt(1);
size_t bstart = 0;
startMemTracking();
while (true) {
Tensor *batch = readDataSet(input_path, bstart, batch_size);
if (batch == nullptr)
break;
auto *result = main_procedure(batch);
auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, 1);
std::vector<float> psnr_batch = PSNR(ref_output, result);
std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
bstart += batch_size;
if (bstart >= total_max)
break;
freeBatchMemory();
}
float violation = violationRate(psnr, psnr_threshold);
float mean_psnr = mean(psnr);
std::ofstream of("final_accuracy");
of << violation * 100 << ", " << mean_psnr << '\n';
return 0;
}
int main(){
llvm_hpvm_initTensorRt(0);
//std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/");
std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/");
std::string input_path = dir_prefix + std::string("input.bin");
//void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32);
std::string labels_path = dir_prefix + std::string("labels.bin");
//uint8_t* labels = readLabels(labels_path.c_str(),10000);
std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin");
void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11);
std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin");
void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1);
std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin");
void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5);
std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin");
void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1);
std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin");
void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3);
std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin");
void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1);
std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin");
void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3);
std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin");
void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1);
std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin");
void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3);
std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin");
void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1);
std::string dense_1_w_path = dir_prefix + std::string("fc12.bin");
void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10);
std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin");
void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1);
startMemTracking();
int test_input_size = 2000;
int batch_size = 2000;
int batch_count = test_input_size / batch_size;
float final_accuracy = 0.0;
// NOTE: Starting time profiling
startProfiling();
for(int i = 0; i < batch_count; i++){
int start = i * batch_size;
int end = (i + 1) * batch_size;
void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0);
void* var_1 = tensorAdd(var_0, conv2d_1_b);
void* var_2 = tensorTanh(var_1);
void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2);
void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0);
void* var_6 = tensorAdd(var_5, conv2d_2_b);
void* var_7 = tensorTanh(var_6);
void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2);
void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0);
void* var_11 = tensorAdd(var_10, conv2d_3_b);
void* var_12 = tensorTanh(var_11);
void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0);
void* var_14 = tensorAdd(var_13, conv2d_4_b);
void* var_15 = tensorTanh(var_14);
void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0);
void* var_17 = tensorAdd(var_16, conv2d_5_b);
void* var_18 = tensorTanh(var_17);
void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2);
void* var_22 = tensorGemmGPU(var_19, dense_1_w);
void* var_23 = tensorAdd(var_22, dense_1_b);
void* var_24 = tensorSoftmax(var_23);
uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end);
float accuracy = computeAccuracy2(labels,batch_size,var_24);
final_accuracy += accuracy;
freeBatchMemory();
}
stopProfiling();
final_accuracy = final_accuracy / batch_count;
dumpFinalAccuracy(final_accuracy);
llvm_hpvm_cleanupTensorRt();
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment