From 61a39cc1e6aa663a11bb7d342481dd6b329713a3 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Sat, 23 Feb 2019 16:55:19 -0600 Subject: [PATCH] Reducing compile-time warnings in HPVM tensor runtime --- .../dnn_sources/include/utils.h | 28 ++++----- .../dnn_sources/src/alexnet2_cifar10.cc | 2 +- .../dnn_sources/src/alexnet_cifar10.cc | 8 +-- .../dnn_sources/src/fc2_clipped.cc | 2 +- .../dnn_sources/src/fc3_clipped.cc | 2 +- .../dnn_sources/src/fc4_clipped.cc | 2 +- .../dnn_sources/src/lenet_front.cc | 61 +++++++++++++++++++ .../dnn_sources/src/lenet_keras.cc | 6 +- .../tensor_runtime/include/debug.h | 8 +-- .../tensor_runtime/include/error.h | 10 +-- .../include/half_precision_api.h | 3 +- .../tensor_runtime/include/op_overheads.h | 2 +- .../tensor_runtime/include/tensor_runtime.h | 33 +++++++--- .../tensor_runtime/include/tensor_utils.cu | 1 + .../tensor_runtime/src/tensor_runtime.cu | 21 ++++--- 15 files changed, 136 insertions(+), 53 deletions(-) create mode 100644 llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h index 86754b82d7..ec28fccb78 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h @@ -18,9 +18,9 @@ void printTensorInfo(void* tensor_ptr){ } printf("tensor dims = %d \n", tensor->dims.num_dims); - printf("dim1_size = %d \n", tensor->dims.dim_sizes[0]); - printf("dim2_size = %d \n", tensor->dims.dim_sizes[1]); - printf("num_elems = %d \n", tensor->num_elems); + printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]); + printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]); + printf("num_elems = %lu \n", tensor->num_elems); } @@ -37,9 +37,9 @@ void dumpWeightsToFile(char* file_name, void* weights_ptr){ abort(); } - printf("size_in_bytes = %d \n", weights->size_in_bytes); + printf("size_in_bytes = %lu \n", weights->size_in_bytes); size_t bytes_written = fwrite(weights->host_data, 1, weights->size_in_bytes, fp); - printf("bytes_written = %d \n", bytes_written); + printf("bytes_written = %lu \n", bytes_written); fclose(fp); } @@ -133,9 +133,9 @@ void printTensorDims(void* tensor_ptr){ struct Tensor* tensor = (struct Tensor*) tensor_ptr; - printf("Num_elems = %d \n", tensor->num_elems); + printf("Num_elems = %lu \n", tensor->num_elems); for (int i = 0; i < tensor->dims.num_dims; i++){ - printf("dim[%d] = %d \n", i, tensor->dims.dim_sizes[i]); + printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]); } } @@ -178,7 +178,7 @@ void compareValues(void* tensor_ptr, float* data, size_t num_elems){ } -void* readInputTensor(char* file_name, int data_type, int dim1_size, int dim2_size, +void* readInputTensor(const char* file_name, int data_type, int dim1_size, int dim2_size, int dim3_size, int dim4_size){ int type_size = 4; // NOTE: Assuming floating point tensors @@ -271,13 +271,13 @@ uint8_t* readLabels(const char* labels_file, int num_labels){ fclose(file); - printf("--labels bytes_read = %d \n", bytes_read); + printf("--labels bytes_read = %lu \n", bytes_read); return labels; } -void computeAccuracy(char* labels_file, int num_labels, void* result_ptr){ +void computeAccuracy(const char* labels_file, int num_labels, void* result_ptr){ struct Tensor* result = (struct Tensor*) result_ptr; @@ -318,19 +318,18 @@ void computeAccuracy(char* labels_file, int num_labels, void* result_ptr){ -void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigned num_classes = 10){ +float computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigned num_classes = 10){ unsigned num_zeros = 0; struct Tensor* result = (struct Tensor*) result_ptr; - //uint8_t* labels = readLabels(labels_file, num_labels); size_t batch_dim = result->dims.dim_sizes[0]; size_t channels = result->dims.dim_sizes[1]; float* data = (float*) result->host_data; int num_errors = 0; - printf("batch_dim = %d, channels = %d \n", batch_dim, channels); + printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels); for(int i = 0; i < batch_dim; i++){ int chosen = 0; @@ -338,7 +337,6 @@ void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigne if (data[i * channels + chosen] < data[i * channels + id]) chosen = id; } - //printf("chosen = %d, label = %d \n", chosen, labels[i]); if(labels[i] == 0) num_zeros++; @@ -350,7 +348,6 @@ void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigne printf("****** Accuracy = %f \n\n", accuracy); printf("****** Zero class labels %d \n", num_zeros); - FILE* fp = fopen("final_accuracy", "w+"); if(fp != NULL){ @@ -363,6 +360,7 @@ void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigne fclose(fp); + return accuracy; } diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc index b0107a4da0..b9a73c2e2a 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc @@ -74,7 +74,7 @@ void testCifarNet(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd = open(myfifo, O_RDONLY); int ret_val = fcntl(fd, F_GETFD); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc index 2576c8aac1..3e5cec7d07 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc @@ -19,7 +19,7 @@ bool Opentuner_run = false; /* NOTE: Reference Architecture to use for profiling */ void testCifarNet(){ - int total_runs = 1; + int total_runs = 100; if(Opentuner_run){ total_runs = 1000000; } @@ -28,7 +28,7 @@ void testCifarNet(){ printf("********* CIFAR-10 DNN ********** \n"); // FIXIT: Extend this to batch of images - currently 5 images - int test_batch_size = 10000; + int test_batch_size = 5000; //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); @@ -71,7 +71,7 @@ void testCifarNet(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd = open(myfifo, O_RDONLY); int ret_val = fcntl(fd, F_GETFD); @@ -160,7 +160,7 @@ void testCifarNet(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd_out = open(myfifo, O_WRONLY); int ret_val = fcntl(fd_out, F_GETFD); if(ret_val == -1){ diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc index de19b94b55..575f9b164f 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc @@ -48,7 +48,7 @@ void test_2_Layer_clipped_FC(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd = open(myfifo, O_RDONLY); int ret_val = fcntl(fd, F_GETFD); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc index a66c4b3ec3..f566fd98a7 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc @@ -57,7 +57,7 @@ void test_3_Layer_clipped_FC(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd = open(myfifo, O_RDONLY); int ret_val = fcntl(fd, F_GETFD); if(ret_val == -1){ diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc index ec409aaa42..24a4d88812 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc @@ -57,7 +57,7 @@ void test_4_Layer_clipped_FC(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd = open(myfifo, O_RDONLY); int ret_val = fcntl(fd, F_GETFD); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc new file mode 100644 index 0000000000..effb293a8b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc @@ -0,0 +1,61 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + +int main(){ + + llvm_hpvm_initTensorRt(0); + + std::string dir_prefix = std::string("../model_params/lenet_front/"); + std::string input_path = dir_prefix + std::string("input.bin"); + void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); + std::string labels_path = dir_prefix + std::string("labels.bin"); + uint8_t* labels = readLabels(labels_path.c_str(),10000); + std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); + void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); + std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); + void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); + std::string conv2d_2_w_path = dir_prefix + std::string("conv2.bin"); + void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); + std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias2.bin"); + void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); + std::string dense_1_w_path = dir_prefix + std::string("fc5.bin"); + void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); + std::string dense_1_b_path = dir_prefix + std::string("fc_bias5.bin"); + void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); + std::string dense_2_w_path = dir_prefix + std::string("fc6.bin"); + void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); + std::string dense_2_b_path = dir_prefix + std::string("fc_bias6.bin"); + void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); + + + void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 0); + void* var_1 = tensorAdd(var_0, conv2d_1_b); + void* var_2 = tensorTanh(var_1); + void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); + void* var_4 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); + void* var_5 = tensorAdd(var_4, conv2d_2_b); + void* var_6 = tensorTanh(var_5); + void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); + void* var_9 = tensorGemmGPU(var_7, dense_1_w); + void* var_10 = tensorAdd(var_9, dense_1_b); + void* var_11 = tensorTanh(var_10); + void* var_12 = tensorGemmGPU(var_11, dense_2_w); + void* var_13 = tensorAdd(var_12, dense_2_b); + void* var_14 = tensorTanh(var_13); + void* var_15 = tensorSoftmax(var_14); + + computeAccuracy2(labels, 10000, var_15); + + llvm_hpvm_cleanupTensorRt(); + + return 0; + +} diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc index b6854f4563..edf3641076 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc @@ -28,7 +28,7 @@ void testLenetTanh(){ printf("********* Lenet-2 Architecture ********** \n"); // FIXIT: Extend this to batch of images - currently 5 images - int test_batch_size = 5000; + int test_batch_size = 10000; uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); @@ -64,7 +64,7 @@ void testLenetTanh(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd = open(myfifo, O_RDONLY); int ret_val = fcntl(fd, F_GETFD); @@ -135,7 +135,7 @@ void testLenetTanh(){ if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; + const char* myfifo = "/tmp/myfifo"; int fd_out = open(myfifo, O_WRONLY); int ret_val = fcntl(fd_out, F_GETFD); if(ret_val == -1){ diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h index 4d38c6eea4..a0a52a2edb 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h @@ -4,7 +4,7 @@ #define RUNTIME_DEBUG #define LOG_DEBUG 0 // Sets the debug logging to true -#define LOG_INFO 0 // Sets the info logging to true +#define LOG_INFO 1 // Sets the info logging to true #include "tensor.h" @@ -38,7 +38,7 @@ -void INFO(char* format, ...){ +void INFO(const char* format, ...){ if(!LOG_INFO) // Don't print if logging info is disabled return; va_list args; @@ -48,7 +48,7 @@ void INFO(char* format, ...){ va_end(args); } -void DEBUG(char* format, ...){ +void DEBUG(const char* format, ...){ if(!LOG_DEBUG) // Don't print if logging info is disabled return; va_list args; @@ -58,7 +58,7 @@ void DEBUG(char* format, ...){ va_end(args); } -void ERROR(char* format, ...){ +void ERROR(const char* format, ...){ if(!LOG_DEBUG) // Don't print if logging info is disabled return; va_list args; diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h index 31baef8fd6..ed3269d50b 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h @@ -33,7 +33,7 @@ -void readOpenTunerFlags(char* file_name){ +void readOpenTunerFlags(const char* file_name){ total_ops = 0; op_counter = 0; @@ -467,8 +467,6 @@ void* addBitError(void* x_ptr, int error_scale){ unsigned char* data_arr = reinterpret_cast<unsigned char*>(x->host_data); // FIXIT: Need to be careful about floating point datatype assumptions - int size_of_elem = 4; - long int total_bytes = x->size_in_bytes; long int error_iterations = total_bytes * 0.01 * error_freq; INFO("total_bytes = %lu, error_iterations = %lu \n", total_bytes, error_iterations); @@ -618,7 +616,7 @@ void* addGaussianError(void* x_ptr, int error_scale){ // NOTE: Check if a large gridSize will work with really large tensors vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems); - float alpha = 1.0f, beta = 0.0f; + float alpha = 1.0f; // FIXIT: routine fails for 3D tensors checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc, @@ -701,7 +699,8 @@ void* addPromiseError(void* x_ptr, int error_scale){ // NOTE: Check if a large gridSize will work with really large tensors vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems); - float alpha = 1.0f, beta = 0.0f; + float alpha = 1.0f; + //float beta = 0.0f; checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc, bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data)); @@ -747,6 +746,7 @@ __global__ void quantizeElem(float* A, int n, float mul_factor, float min){ void* quantizeTensorPromise(void* input_ptr, float min, float max){ + INFO("QuantizeTensorPROMISE \n"); Tensor* input = (Tensor*) input_ptr; int quantize_range = 256; diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h index 01e312efe4..e942ae2b72 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h @@ -500,7 +500,8 @@ void* tensorHalfAdd(void* x_ptr, void* bias_ptr){ INFO("*** TensorHalfAdd \n"); profileEvent("tensorHalfAdd"); - float alpha = 1.0f, beta = 0.0f; + float alpha = 1.0f; + // float beta = 0.0f; hostToDeviceCopy(x); hostToDeviceCopy(bias); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h index 5ea0b28545..ddd718515a 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h @@ -171,7 +171,7 @@ void add_relu_overheads(void* input_ptr, int error_scale){ } -float add_pool_overheads(void* input_ptr, int kernel_size, +void add_pool_overheads(void* input_ptr, int kernel_size, int stride_size, int error_scale){ Tensor* input = (Tensor*) input_ptr; diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h index 1551ceea5e..ba49976406 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h @@ -48,9 +48,9 @@ extern "C"{ int vertical_stride, int horizontal_stride, int conv_mode, int compute_precision); void* tensorHalfConvolution(void* input, void* filter, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride, - int conv_mode, int compute_precision); + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride, + int conv_mode, int compute_precision); void* tensorPooling(void* input, int poolFunction, @@ -59,10 +59,10 @@ extern "C"{ int vertical_stride, int horizontal_stride); void* tensorHalfPooling(void* input, - int poolFunction, - int window_height, int window_width, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride); + int poolFunction, + int window_height, int window_width, + int vertical_pad, int horizontal_pad, + int vertical_stride, int horizontal_stride); void* tensorLRN(void* input, unsigned int LRN_window, @@ -119,11 +119,28 @@ extern "C"{ int activation_id, float out_min, float out_max, int swing); // NOTE: min_val, max_val apply to 'ClippedRelu' + + void* ConvLayer_GPU(void* input, + void* filter, + void* bias, + int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w, + int pool_id, int pool_size, + int activation_id, // Relu, Tanh, ClipRelu + float out_min, float out_max); + + + void* FCLayer_GPU(void* input, + void* weights, + void* bias, + int activation_id, + float out_min, float out_max); + + } void dumpAccuracyNorms(); -void readOpenTunerFlags(char* file_name); +void readOpenTunerFlags(const char* file_name); void clearOpCounter(); void clearTensorMap(); void freeOutputTensors(); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu index e5db155e1e..9ed220b273 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu @@ -57,6 +57,7 @@ struct Tensor* getRuntimeTensor(struct Tensor_t* hpvm_tensor){ return tensorsArr[tensor_id]; else ERROR("Tensor not found in runtime. Aborting ..."); + return NULL; } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu index 27e683e694..48f5d31f5c 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu @@ -309,14 +309,15 @@ void printTensorDims2(void* tensor_ptr){ struct Tensor* tensor = (struct Tensor*) tensor_ptr; - printf("Num_elems = %d \n", tensor->num_elems); + printf("Num_elems = %lu \n", tensor->num_elems); for (int i = 0; i < tensor->dims.num_dims; i++){ - printf("dim[%d] = %d \n", i, tensor->dims.dim_sizes[i]); + printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]); } } + // FIXIT: Apparently this is not working for 3D tensors or dimensions other than 4D // Perhaps 3D, 2D tensors can be remapped to 4D tensors to make this work? void* tensorAdd(void* x_ptr, void* bias_ptr){ @@ -329,7 +330,8 @@ void* tensorAdd(void* x_ptr, void* bias_ptr){ INFO("*** TensorAdd \n"); profileEvent("tensorAdd"); - float alpha = 1.0f, beta = 0.0f; + float alpha = 1.0f; + //float beta = 0.0f; hostToDeviceCopy(x); hostToDeviceCopy(bias); @@ -621,7 +623,8 @@ void* tensorGemmCPU(void* lhs_ptr, void* rhs_ptr){ INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); // FIXIT: Need to be more aware of the implications of alpha and beta - float alpha = 1.0f, beta = 0.0f; + //float alpha = 1.0f; + // float beta = 0.0f; // 'm' holds the batch dimension - assuming NCHW format Tensors int m = lhs->dims.dim_sizes[0]; // The rhs must be a 2D tensor @@ -1221,10 +1224,11 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max, break; } - if(swing < 8){ + + if(swing < 8 && activation_id != -1){ activation_out = quantizeTensorPromise(activation_out, out_min, out_max); } - + return activation_out; } @@ -1305,8 +1309,9 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max, ERROR("Activation id %d NOT supported \n", activation_out); break; } - - if(swing < 8){ + + + if(swing < 8 && activation_id != -1){ activation_out = quantizeTensorPromise(activation_out, out_min, out_max); } -- GitLab