From 61a39cc1e6aa663a11bb7d342481dd6b329713a3 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Sat, 23 Feb 2019 16:55:19 -0600
Subject: [PATCH] Reducing compile-time warnings in HPVM tensor runtime

---
 .../dnn_sources/include/utils.h               | 28 ++++-----
 .../dnn_sources/src/alexnet2_cifar10.cc       |  2 +-
 .../dnn_sources/src/alexnet_cifar10.cc        |  8 +--
 .../dnn_sources/src/fc2_clipped.cc            |  2 +-
 .../dnn_sources/src/fc3_clipped.cc            |  2 +-
 .../dnn_sources/src/fc4_clipped.cc            |  2 +-
 .../dnn_sources/src/lenet_front.cc            | 61 +++++++++++++++++++
 .../dnn_sources/src/lenet_keras.cc            |  6 +-
 .../tensor_runtime/include/debug.h            |  8 +--
 .../tensor_runtime/include/error.h            | 10 +--
 .../include/half_precision_api.h              |  3 +-
 .../tensor_runtime/include/op_overheads.h     |  2 +-
 .../tensor_runtime/include/tensor_runtime.h   | 33 +++++++---
 .../tensor_runtime/include/tensor_utils.cu    |  1 +
 .../tensor_runtime/src/tensor_runtime.cu      | 21 ++++---
 15 files changed, 136 insertions(+), 53 deletions(-)
 create mode 100644 llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc

diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
index 86754b82d7..ec28fccb78 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
@@ -18,9 +18,9 @@ void printTensorInfo(void* tensor_ptr){
   }
 
   printf("tensor dims = %d \n", tensor->dims.num_dims);
-  printf("dim1_size = %d \n", tensor->dims.dim_sizes[0]);
-  printf("dim2_size = %d \n", tensor->dims.dim_sizes[1]);
-  printf("num_elems = %d \n", tensor->num_elems);
+  printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]);
+  printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]);
+  printf("num_elems = %lu \n", tensor->num_elems);
 }
 
 
@@ -37,9 +37,9 @@ void dumpWeightsToFile(char* file_name, void* weights_ptr){
     abort();
   }
 
-  printf("size_in_bytes = %d \n", weights->size_in_bytes);
+  printf("size_in_bytes = %lu \n", weights->size_in_bytes);
   size_t bytes_written = fwrite(weights->host_data, 1, weights->size_in_bytes, fp);
-  printf("bytes_written = %d \n", bytes_written);
+  printf("bytes_written = %lu \n", bytes_written);
   fclose(fp);
 }
 
@@ -133,9 +133,9 @@ void printTensorDims(void* tensor_ptr){
 
   struct Tensor* tensor = (struct Tensor*) tensor_ptr;
 
-  printf("Num_elems = %d \n", tensor->num_elems);
+  printf("Num_elems = %lu \n", tensor->num_elems);
   for (int i = 0; i < tensor->dims.num_dims; i++){
-    printf("dim[%d] = %d \n", i, tensor->dims.dim_sizes[i]);
+    printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]);
   }
 }
 
@@ -178,7 +178,7 @@ void compareValues(void* tensor_ptr, float* data, size_t num_elems){
 }
 
 
-void* readInputTensor(char* file_name, int data_type, int dim1_size, int dim2_size,
+void* readInputTensor(const char* file_name, int data_type, int dim1_size, int dim2_size,
 		      int dim3_size, int dim4_size){
 
   int type_size = 4; // NOTE: Assuming floating point tensors
@@ -271,13 +271,13 @@ uint8_t* readLabels(const char* labels_file, int num_labels){
 
   fclose(file);
   
-  printf("--labels bytes_read = %d \n", bytes_read);
+  printf("--labels bytes_read = %lu \n", bytes_read);
   return labels;
 }
 
 
 
-void computeAccuracy(char* labels_file, int num_labels, void* result_ptr){
+void computeAccuracy(const char* labels_file, int num_labels, void* result_ptr){
 
   struct Tensor* result = (struct Tensor*) result_ptr;
   
@@ -318,19 +318,18 @@ void computeAccuracy(char* labels_file, int num_labels, void* result_ptr){
 
 
 
-void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigned num_classes = 10){
+float computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigned num_classes = 10){
 
   unsigned num_zeros = 0;
   
   struct Tensor* result = (struct Tensor*) result_ptr;
   
-  //uint8_t* labels = readLabels(labels_file, num_labels);
   size_t batch_dim = result->dims.dim_sizes[0];
   size_t channels = result->dims.dim_sizes[1];
   float* data = (float*) result->host_data;
   int num_errors = 0;
 
-  printf("batch_dim = %d, channels = %d \n", batch_dim, channels);
+  printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels);
   
   for(int i = 0; i < batch_dim; i++){
     int chosen = 0;
@@ -338,7 +337,6 @@ void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigne
       if (data[i * channels + chosen] < data[i * channels + id]) chosen = id;
     }
     
-    //printf("chosen = %d, label = %d \n", chosen, labels[i]);
     if(labels[i] == 0)
       num_zeros++;
       
@@ -350,7 +348,6 @@ void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigne
   printf("****** Accuracy = %f \n\n", accuracy);
   printf("****** Zero class labels %d \n", num_zeros);
 
-
   FILE* fp = fopen("final_accuracy", "w+");
   if(fp != NULL){
 
@@ -363,6 +360,7 @@ void computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsigne
 
   fclose(fp);
 
+  return accuracy;    
 }
 
 
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc
index b0107a4da0..b9a73c2e2a 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc
@@ -74,7 +74,7 @@ void testCifarNet(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd = open(myfifo, O_RDONLY);
 
       int ret_val = fcntl(fd, F_GETFD);
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc
index 2576c8aac1..3e5cec7d07 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc
@@ -19,7 +19,7 @@ bool Opentuner_run = false;
 /* NOTE: Reference Architecture to use for profiling */
 void testCifarNet(){
 
-  int total_runs = 1;
+  int total_runs = 100;
   if(Opentuner_run){
     total_runs = 1000000;
   }
@@ -28,7 +28,7 @@ void testCifarNet(){
   printf("********* CIFAR-10 DNN ********** \n");
   // FIXIT: Extend this to batch of images - currently 5 images
 
-  int test_batch_size = 10000;
+  int test_batch_size = 5000;
 
   //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size);
   uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size);
@@ -71,7 +71,7 @@ void testCifarNet(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd = open(myfifo, O_RDONLY);
 
       int ret_val = fcntl(fd, F_GETFD);
@@ -160,7 +160,7 @@ void testCifarNet(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd_out = open(myfifo, O_WRONLY);
       int ret_val = fcntl(fd_out, F_GETFD);
       if(ret_val == -1){
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
index de19b94b55..575f9b164f 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc2_clipped.cc
@@ -48,7 +48,7 @@ void test_2_Layer_clipped_FC(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd = open(myfifo, O_RDONLY);
 
       int ret_val = fcntl(fd, F_GETFD);
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
index a66c4b3ec3..f566fd98a7 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc3_clipped.cc
@@ -57,7 +57,7 @@ void test_3_Layer_clipped_FC(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd = open(myfifo, O_RDONLY);
       int ret_val = fcntl(fd, F_GETFD);
       if(ret_val == -1){
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
index ec409aaa42..24a4d88812 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/fc4_clipped.cc
@@ -57,7 +57,7 @@ void test_4_Layer_clipped_FC(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd = open(myfifo, O_RDONLY);
 
       int ret_val = fcntl(fd, F_GETFD);
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc
new file mode 100644
index 0000000000..effb293a8b
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_front.cc
@@ -0,0 +1,61 @@
+
+#include <stdio.h> 
+#include <stdlib.h> 
+#include <unistd.h> 
+#include <fcntl.h> 
+#include <sys/types.h> 
+#include <sys/stat.h> 
+#include <string.h> 
+#include "../../tensor_runtime/include/tensor_runtime.h" 
+#include "../include/utils.h" 
+
+int main(){ 
+
+  llvm_hpvm_initTensorRt(0); 
+
+  std::string dir_prefix = std::string("../model_params/lenet_front/"); 
+  std::string input_path =  dir_prefix + std::string("input.bin"); 
+  void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); 
+  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
+  uint8_t* labels = readLabels(labels_path.c_str(),10000); 
+  std::string conv2d_1_w_path =  dir_prefix + std::string("conv0.bin"); 
+  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
+  std::string conv2d_1_b_path =  dir_prefix + std::string("conv_bias0.bin"); 
+  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2.bin"); 
+  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); 
+  std::string conv2d_2_b_path =  dir_prefix + std::string("conv_bias2.bin"); 
+  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
+  std::string dense_1_w_path =  dir_prefix + std::string("fc5.bin"); 
+  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); 
+  std::string dense_1_b_path =  dir_prefix + std::string("fc_bias5.bin"); 
+  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
+  std::string dense_2_w_path =  dir_prefix + std::string("fc6.bin"); 
+  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
+  std::string dense_2_b_path =  dir_prefix + std::string("fc_bias6.bin"); 
+  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
+
+
+  void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 0); 
+  void* var_1 = tensorAdd(var_0, conv2d_1_b); 
+  void* var_2 = tensorTanh(var_1); 
+  void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
+  void* var_4 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); 
+  void* var_5 = tensorAdd(var_4, conv2d_2_b); 
+  void* var_6 = tensorTanh(var_5); 
+  void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
+  void* var_9 = tensorGemmGPU(var_7, dense_1_w); 
+  void* var_10 = tensorAdd(var_9, dense_1_b); 
+  void* var_11 = tensorTanh(var_10); 
+  void* var_12 = tensorGemmGPU(var_11, dense_2_w); 
+  void* var_13 = tensorAdd(var_12, dense_2_b); 
+  void* var_14 = tensorTanh(var_13); 
+  void* var_15 = tensorSoftmax(var_14); 
+
+  computeAccuracy2(labels, 10000, var_15);
+
+  llvm_hpvm_cleanupTensorRt(); 
+
+  return 0; 
+
+}
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
index b6854f4563..edf3641076 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_keras.cc
@@ -28,7 +28,7 @@ void testLenetTanh(){
   printf("********* Lenet-2 Architecture ********** \n");
   // FIXIT: Extend this to batch of images - currently 5 images
 
-  int test_batch_size = 5000;
+  int test_batch_size = 10000;
 
   uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
   
@@ -64,7 +64,7 @@ void testLenetTanh(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd = open(myfifo, O_RDONLY);
 
       int ret_val = fcntl(fd, F_GETFD);
@@ -135,7 +135,7 @@ void testLenetTanh(){
 
     if(Opentuner_run){
 
-      char* myfifo = "/tmp/myfifo";
+      const char* myfifo = "/tmp/myfifo";
       int fd_out = open(myfifo, O_WRONLY);
       int ret_val = fcntl(fd_out, F_GETFD);
       if(ret_val == -1){
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h
index 4d38c6eea4..a0a52a2edb 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/debug.h
@@ -4,7 +4,7 @@
 #define RUNTIME_DEBUG
 
 #define LOG_DEBUG 0   // Sets the debug logging to true
-#define LOG_INFO 0  // Sets the info logging to true
+#define LOG_INFO 1  // Sets the info logging to true
 
 #include "tensor.h"
 
@@ -38,7 +38,7 @@
 
 
 
-void INFO(char* format, ...){
+void INFO(const char* format, ...){
   if(!LOG_INFO) // Don't print if logging info is disabled
     return;
   va_list args;
@@ -48,7 +48,7 @@ void INFO(char* format, ...){
   va_end(args);
 }
 
-void DEBUG(char* format, ...){
+void DEBUG(const char* format, ...){
   if(!LOG_DEBUG) // Don't print if logging info is disabled
     return;
   va_list args;
@@ -58,7 +58,7 @@ void DEBUG(char* format, ...){
   va_end(args);
 }
 
-void ERROR(char* format, ...){
+void ERROR(const char* format, ...){
   if(!LOG_DEBUG) // Don't print if logging info is disabled
     return;
   va_list args;
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
index 31baef8fd6..ed3269d50b 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
@@ -33,7 +33,7 @@
 
 
 
-void readOpenTunerFlags(char* file_name){
+void readOpenTunerFlags(const char* file_name){
 
   total_ops = 0;
   op_counter = 0;
@@ -467,8 +467,6 @@ void* addBitError(void* x_ptr, int error_scale){
 
   unsigned char* data_arr = reinterpret_cast<unsigned char*>(x->host_data);
   // FIXIT: Need to be careful about floating point datatype assumptions
-  int size_of_elem = 4; 
-
   long int total_bytes = x->size_in_bytes;
   long int error_iterations = total_bytes * 0.01 * error_freq;
   INFO("total_bytes = %lu, error_iterations = %lu \n", total_bytes, error_iterations);
@@ -618,7 +616,7 @@ void* addGaussianError(void* x_ptr, int error_scale){
   // NOTE: Check if a large gridSize will work with really large tensors
   vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems);
   
-  float alpha = 1.0f, beta = 0.0f;
+  float alpha = 1.0f;
     
   // FIXIT: routine fails for 3D tensors
   checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc,
@@ -701,7 +699,8 @@ void* addPromiseError(void* x_ptr, int error_scale){
   // NOTE: Check if a large gridSize will work with really large tensors
   vecMul<<<gridSize, blockSize>>>((float*) x->gpu_data, (float*) bias->gpu_data, x->num_elems);
   
-  float alpha = 1.0f, beta = 0.0f;    
+  float alpha = 1.0f;
+  //float beta = 0.0f;    
   checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc,
 			    bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data));
 
@@ -747,6 +746,7 @@ __global__ void quantizeElem(float* A, int n, float mul_factor, float min){
 
 void* quantizeTensorPromise(void* input_ptr, float min, float max){
 
+  INFO("QuantizeTensorPROMISE \n");
   Tensor* input = (Tensor*) input_ptr;
   
   int quantize_range = 256;
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h
index 01e312efe4..e942ae2b72 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/half_precision_api.h
@@ -500,7 +500,8 @@ void* tensorHalfAdd(void* x_ptr, void* bias_ptr){
   INFO("*** TensorHalfAdd \n");
   profileEvent("tensorHalfAdd");
 
-  float alpha = 1.0f, beta = 0.0f;
+  float alpha = 1.0f;
+  // float beta = 0.0f;
   hostToDeviceCopy(x);
   hostToDeviceCopy(bias);
 
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h
index 5ea0b28545..ddd718515a 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/op_overheads.h
@@ -171,7 +171,7 @@ void add_relu_overheads(void* input_ptr, int error_scale){
 }
 
 
-float add_pool_overheads(void* input_ptr, int kernel_size,
+void add_pool_overheads(void* input_ptr, int kernel_size,
 			 int stride_size, int error_scale){
 
   Tensor* input = (Tensor*) input_ptr;
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
index 1551ceea5e..ba49976406 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
@@ -48,9 +48,9 @@ extern "C"{
 			  int vertical_stride, int horizontal_stride,
 			  int conv_mode, int compute_precision);
   void* tensorHalfConvolution(void* input, void* filter,
-			  int vertical_pad, int horizontal_pad,
-			  int vertical_stride, int horizontal_stride,
-			  int conv_mode, int compute_precision);
+			      int vertical_pad, int horizontal_pad,
+			      int vertical_stride, int horizontal_stride,
+			      int conv_mode, int compute_precision);
 
   void* tensorPooling(void* input,
 		      int poolFunction,
@@ -59,10 +59,10 @@ extern "C"{
 		      int vertical_stride, int horizontal_stride);
 
   void* tensorHalfPooling(void* input,
-		       int poolFunction,
-		       int window_height, int window_width,
-		       int vertical_pad, int horizontal_pad,
-		       int vertical_stride, int horizontal_stride);
+			  int poolFunction,
+			  int window_height, int window_width,
+			  int vertical_pad, int horizontal_pad,
+			  int vertical_stride, int horizontal_stride);
 
   
   void* tensorLRN(void* input, unsigned int LRN_window,
@@ -119,11 +119,28 @@ extern "C"{
 			int activation_id,
 			float out_min, float out_max, int swing); // NOTE: min_val, max_val apply to 'ClippedRelu'
 
+
+  void* ConvLayer_GPU(void* input, 
+		      void* filter, 
+		      void* bias, 
+		      int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
+		      int pool_id, int pool_size,
+		      int activation_id, // Relu, Tanh, ClipRelu
+		      float out_min, float out_max);  
+
+
+  void* FCLayer_GPU(void* input, 
+		    void* weights, 
+		    void* bias, 
+		    int activation_id,
+		    float out_min, float out_max);
+  
+  
 }
 
 
 void dumpAccuracyNorms();
-void readOpenTunerFlags(char* file_name);
+void readOpenTunerFlags(const char* file_name);
 void clearOpCounter();
 void clearTensorMap();
 void freeOutputTensors();
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu
index e5db155e1e..9ed220b273 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_utils.cu
@@ -57,6 +57,7 @@ struct Tensor* getRuntimeTensor(struct Tensor_t* hpvm_tensor){
     return tensorsArr[tensor_id];
   else
     ERROR("Tensor not found in runtime. Aborting ...");
+  return NULL;
 }
 
 
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
index 27e683e694..48f5d31f5c 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
@@ -309,14 +309,15 @@ void printTensorDims2(void* tensor_ptr){
 
   struct Tensor* tensor = (struct Tensor*) tensor_ptr;
 
-  printf("Num_elems = %d \n", tensor->num_elems);
+  printf("Num_elems = %lu \n", tensor->num_elems);
   for (int i = 0; i < tensor->dims.num_dims; i++){
-    printf("dim[%d] = %d \n", i, tensor->dims.dim_sizes[i]);
+    printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]);
   }
 }
 
 
 
+
 // FIXIT: Apparently this is not working for 3D tensors or dimensions other than 4D
 // Perhaps 3D, 2D tensors can be remapped to 4D tensors to make this work?
 void* tensorAdd(void* x_ptr, void* bias_ptr){
@@ -329,7 +330,8 @@ void* tensorAdd(void* x_ptr, void* bias_ptr){
   INFO("*** TensorAdd \n");  
   profileEvent("tensorAdd");
     
-  float alpha = 1.0f, beta = 0.0f;
+  float alpha = 1.0f;
+  //float beta = 0.0f;
   hostToDeviceCopy(x);
   hostToDeviceCopy(bias);
 
@@ -621,7 +623,8 @@ void* tensorGemmCPU(void* lhs_ptr, void* rhs_ptr){
   INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
 
   // FIXIT: Need to be more aware of the implications of alpha and beta
-  float alpha = 1.0f, beta = 0.0f;
+  //float alpha = 1.0f;
+  // float beta = 0.0f;
   // 'm' holds the batch dimension - assuming NCHW format Tensors
   int m = lhs->dims.dim_sizes[0];
   // The rhs must be a 2D tensor
@@ -1221,10 +1224,11 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max,
     break;
   }
 
-  if(swing < 8){
+
+  if(swing < 8 && activation_id != -1){
     activation_out = quantizeTensorPromise(activation_out, out_min, out_max);
   }
-    
+  
   return activation_out;
 }
 
@@ -1305,8 +1309,9 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max,
     ERROR("Activation id %d NOT supported \n", activation_out);
     break;
   }
-
-  if(swing < 8){
+  
+  
+  if(swing < 8 && activation_id != -1){
     activation_out = quantizeTensorPromise(activation_out, out_min, out_max);
   }
   
-- 
GitLab