Adding PROMISE-like API to tensor runtime+test src

74d0793f · Hashim Sharif · c723cf01 · 74d0793f · 74d0793f · 74d0793f
Commit 74d0793f authored 6 years ago by Hashim Sharif
--- a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt
+++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt
@@ -84,3 +84,8 @@ target_link_libraries(lenet_tanh_half  tensor_runtime)

 add_executable(lenet_keras_half  dnn_sources/src/half/lenet_keras_half.cc)
 target_link_libraries(lenet_keras_half  tensor_runtime)
+
+
+# Promise API sources
+add_executable(lenet_keras_promise  dnn_sources/src/promise/lenet_keras_promise.cc)
+target_link_libraries(lenet_keras_promise  tensor_runtime)
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_keras_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/lenet_keras_promise.cc
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+#include "../../../tensor_runtime/include/tensor_runtime.h"
+#include "../../include/utils.h"
+
+
+bool Opentuner_run = false;
+
+/* NOTE: Reference Architecture to use for profiling */
+void testLenetTanh(){
+
+  int total_runs = 100;
+  if(Opentuner_run){
+    total_runs = 100000;
+  }
+  
+  printf("********* Lenet-2 Architecture ********** \n");
+  // FIXIT: Extend this to batch of images - currently 5 images
+
+  int test_batch_size = 5000;
+
+  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
+  
+  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
+				CUDNN_DATA_FLOAT,
+				test_batch_size, 1, 28, 28);
+
+  // NOTE: Filter descriptors do NOT have batch size
+  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
+  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
+  void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
+					  float_type, 32, 1, 5, 5);    
+  void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
+					float_type, 1, 32, 1, 1);  
+  void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
+					  float_type, 64, 32, 5, 5);  
+  void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
+					float_type, 1, 64, 1, 1);  
+  void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
+					 float_type, 1, 1, 7*7*64, 1024);  
+  void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
+				      float_type, 1, 1024, 1, 1);  
+  void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
+					 float_type, 1, 1, 1024, 10);  
+  void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
+				      float_type, 1, 10, 1, 1);  
+
+
+  
+  clearTensorMap();
+  
+  for(int i = 0; i < total_runs; i++){
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd = open(myfifo, O_RDONLY);
+
+      int ret_val = fcntl(fd, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+
+      char str[100];
+      read(fd, str, 80);
+      if(strcmp(str, "stop_run") == 0){
+	abort();
+      }
+
+      close(fd);
+    }
+
+    
+    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
+
+    // Start power and performance profiling 
+    startProfiling();
+
+    void* conv1_out = ConvLayer_PROMISE(input, conv1_filter, conv1_bias, 2, 2, 1, 1, 0, 2, 0, 0.0, 0.0);
+
+    void* conv2_out = ConvLayer_PROMISE(conv1_out, conv2_filter, conv2_bias, 2, 2, 1, 1, 0, 2, 0, 0.0, 0.0);
+
+    void* fc1_out = FCLayer_PROMISE(conv2_out, fc1_weights, fc1_bias, 0, 0.0, 0.0);
+    
+    void* fc2_out = FCLayer_PROMISE(fc1_out, fc2_weights, fc2_bias, 0, 0.0, 0.0);
+
+    void* result = tensorSoftmax(fc2_out);
+
+    // End profiling and dump output to profile.txt
+    stopProfiling();
+  
+    computeAccuracy2(labels, test_batch_size, result);
+    
+    dumpAccuracyNorms();
+    freeOutputTensors();  
+
+    if(Opentuner_run){
+
+      char* myfifo = "/tmp/myfifo";
+      int fd_out = open(myfifo, O_WRONLY);
+      int ret_val = fcntl(fd_out, F_GETFD);
+      if(ret_val == -1){
+	printf("Invalid descriptor \n");
+	abort();
+      }
+      
+      const char* str = "completed***!\n\0";
+      write(fd_out, str, 80);
+      close(fd_out);
+    }
+    
+  }
+
+
+  
+}
+
+
+int main(int argc, char* argv[]){
+
+  if(argc > 1)
+    Opentuner_run = true;
+
+  llvm_hpvm_initTensorRt(0);
+
+  testLenetTanh();
+
+  llvm_hpvm_cleanupTensorRt();
+
+  return 0;
+}
+
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
@@ -5,6 +5,7 @@
 #include <cmath>
 #include <memory>
 #include <string>
+//#include "runtime_types.h"


 #ifndef CUDNN_HEADER
@@ -97,7 +98,22 @@ extern "C"{
  void* tensorSoftmax(void* input);

  /* Error injection API - used for accuracy tuning */
-  void* tensorAddError(void* x_ptr, int error_scale);  
+  void* tensorAddError(void* x_ptr, int error_scale);
+
+
+
+
+  /****  PROMISE API *****/
+  void* ConvLayer_PROMISE(void* input, void* filter, void* bias,
+			  int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
+			  int pool_id, int pool_size,
+			  int activation_id, // Relu, Tanh, ClipRelu
+			  float min_val, float max_val); // NOTE: min_val, max_val apply to 'ClippedRelu'
+
+
+  void* FCLayer_PROMISE(void* input, void* weights, void* bias, int activation_id,
+  		        float min_val, float max_val); // NOTE: min_val, max_val apply to 'ClippedRelu'
+  
 }



--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
@@ -34,5 +34,8 @@ void dummyFunction(){
  void* tensorTanhPtr = (void*) &tensorTanh;
  void* tensorHalfTanhPtr = (void*) &tensorHalfTanh;
  void* tensorSoftmaxPtr = (void*) &tensorSoftmax;
-  void* tensorAddErrorPtr = (void*) &tensorAddError;    
+  void* tensorAddErrorPtr = (void*) &tensorAddError;
+  void* ConvLayer = (void*) &ConvLayer_PROMISE;
+  void* FCLayer = (void*) &FCLayer_PROMISE;    
+
 }
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
@@ -1027,3 +1027,96 @@ void* tensorTanh(void* input_ptr){



+/*********** PROMISE API **************/
+
+void* ConvLayer_PROMISE(void* input, void* filter, void* bias,
+			int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
+			int pool_id, int pool_size,
+			int activation_id, // Relu, Tanh, ClipRelu
+			float min_val, float max_val){ // NOTE: min_val, max_val apply to 'ClippedRelu'
+
+  
+  void* conv_out = tensorConvolution(input, filter,
+				     conv_pad_h, conv_pad_w,
+				     conv_stride_h, conv_stride_w,
+				     1, 0);
+  void* conv_add;
+  if(bias != NULL){
+    conv_add = tensorAdd(conv_out, bias);
+  }
+  else{
+    conv_add = conv_out;
+  }
+
+  void* pool_out;
+  // NOTE: Skip pooling on negative pool sizes
+  if(pool_size > 0){
+    //FIXME: Currently only using MaxPooling
+    pool_out = tensorPooling(conv_add, 0, pool_size, pool_size, 0, 0, pool_size, pool_size);
+  }
+  else{
+    pool_out = conv_add;
+  }
+  
+  void* activation_out;  
+  switch(activation_id){
+  case -1:
+    activation_out = pool_out;
+    INFO("NO Activation Function \n");
+    break;
+  case 0:
+    activation_out = tensorTanh(pool_out);
+    break;
+  case 1:
+    activation_out = tensorRelu(pool_out);
+    break;
+  case 2:
+    activation_out = tensorRelu2(pool_out, min_val, max_val);
+    break;
+  default:
+    ERROR("Activation id %d NOT supported \n", activation_out);
+    break;
+  }
+
+  return activation_out;
+  
+}
+
+
+void* FCLayer_PROMISE(void* input, void* weights, void* bias, int activation_id,
+		      float min_val, float max_val){ // NOTE: min_val, max_val apply to 'ClippedRelu'
+
+  void* gemm_out = tensorGemmGPU(input, weights);
+
+  void* gemmbias_out;
+  if(bias != NULL){
+    gemmbias_out = tensorAdd(gemm_out, bias);
+  }
+  else{
+    gemmbias_out = gemm_out;
+  }
+ 
+  void* activation_out;
+  switch(activation_id){
+
+  case -1:
+    activation_out = gemmbias_out;
+    INFO("No Activation Function \n");
+    break;
+  case 0:
+    activation_out = tensorTanh(gemmbias_out);
+    break;
+  case 1:
+    activation_out = tensorRelu(gemmbias_out);
+    break;
+  case 2:
+    activation_out = tensorRelu2(gemmbias_out, min_val, max_val);
+    break;
+  default:
+    ERROR("Activation id %d NOT supported \n", activation_out);
+    break;
+  }
+
+  return activation_out;
+  
+}