Adding BatchNorm and DepthwiseConv2D sources

e5779328 · Hashim Sharif · 63ab0581 · e5779328 · e5779328 · e5779328
Commit e5779328 authored 5 years ago by Hashim Sharif
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/depthwise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/depthwise.cc
+#include <stdio.h> 
+#include <stdlib.h> 
+#include <unistd.h> 
+#include <fcntl.h> 
+#include <sys/types.h> 
+#include <sys/stat.h> 
+#include <string.h> 
+#include "../../tensor_runtime/include/tensor_runtime.h" 
+#include "../include/utils.h" 
+int main(){ 
+  llvm_hpvm_initTensorRt(0); 
+  std::string dir_prefix = std::string("../model_params/depthwise_test_8/"); 
+  std::string input_path =  dir_prefix + std::string("input.bin"); 
+  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
+  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
+  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
+  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
+  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
+  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
+  std::string depthwise_conv2d_1_b_path =  dir_prefix + std::string("depthwise_conv2d_1_b.bin"); 
+  void* depthwise_conv2d_1_b =  readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
+  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); 
+  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
+  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
+  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
+  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
+  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
+  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
+  startMemTracking(); 
+  int test_input_size = 10000; 
+  int batch_size = 10000; 
+  int batch_count = test_input_size / batch_size; 
+  float final_accuracy = 0.0; 
+  for(int i = 0; i < batch_count; i++){ 
+    int start = i * batch_size; 
+    int end = (i + 1) * batch_size; 
+    void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
+    void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); 
+    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
+    void* var_2 = tensorRelu(var_1); 
+    void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
+    void* var_4 = tensorConvolution(var_3, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
+    void* var_5 = tensorAdd(var_4, depthwise_conv2d_1_b); 
+    void* var_6 = tensorRelu(var_5); 
+    void* var_8 = tensorGemmGPU(var_6, dense_1_w); 
+    void* var_9 = tensorAdd(var_8, dense_1_b); 
+    void* var_10 = tensorRelu(var_9); 
+    void* var_11 = tensorGemmGPU(var_10, dense_2_w); 
+    void* var_12 = tensorAdd(var_11, dense_2_b); 
+    void* var_13 = tensorRelu(var_12); 
+    void* var_14 = tensorSoftmax(var_13); 
+    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
+    float accuracy = computeAccuracy2(labels, batch_size, var_14); 
+    final_accuracy += accuracy; 
+    freeBatchMemory(); 
+  }
+  final_accuracy = final_accuracy / batch_count; 
+  dumpFinalAccuracy(final_accuracy); 
+  llvm_hpvm_cleanupTensorRt(); 
+  return 0; 
+}
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/depthwise_batchnorm.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/depthwise_batchnorm.cc
+#include <stdio.h> 
+#include <stdlib.h> 
+#include <unistd.h> 
+#include <fcntl.h> 
+#include <sys/types.h> 
+#include <sys/stat.h> 
+#include <string.h> 
+#include "../../tensor_runtime/include/tensor_runtime.h" 
+#include "../include/utils.h" 
+int main(){ 
+  llvm_hpvm_initTensorRt(0); 
+  std::string dir_prefix = std::string("../model_params/depthwise_batchnorm2/"); 
+  std::string input_path =  dir_prefix + std::string("input.bin"); 
+  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
+  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
+  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
+  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
+  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
+  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
+  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
+  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
+  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
+  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
+  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
+  std::string depthwise_conv2d_1_b_path =  dir_prefix + std::string("depthwise_conv2d_1_b.bin"); 
+  void* depthwise_conv2d_1_b =  readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
+  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
+  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
+  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
+  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
+  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
+  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
+  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); 
+  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
+  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
+  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
+  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
+  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
+  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
+  startMemTracking(); 
+  int test_input_size = 10000; 
+  int batch_size = 10000; 
+  int batch_count = test_input_size / batch_size; 
+  float final_accuracy = 0.0; 
+  for(int i = 0; i < batch_count; i++){ 
+    int start = i * batch_size; 
+    int end = (i + 1) * batch_size; 
+    void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
+    void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); 
+    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
+    void* var_2 = tensorRelu(var_1); 
+    void* var_3 = tensorBatchNorm(var_2,batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
+    void* var_4 = tensorPooling(var_3,0,2,2,0,0,2,2); 
+    void* var_5 = tensorConvolution(var_4, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
+    void* var_6 = tensorAdd(var_5, depthwise_conv2d_1_b); 
+    void* var_7 = tensorRelu(var_6); 
+    void* var_8 = tensorBatchNorm(var_7,batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
+    void* var_10 = tensorGemmGPU(var_8, dense_1_w); 
+    void* var_11 = tensorAdd(var_10, dense_1_b); 
+    void* var_12 = tensorRelu(var_11); 
+    void* var_13 = tensorGemmGPU(var_12, dense_2_w); 
+    void* var_14 = tensorAdd(var_13, dense_2_b); 
+    void* var_15 = tensorRelu(var_14); 
+    void* var_16 = tensorSoftmax(var_15); 
+    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
+    float accuracy = computeAccuracy2(labels, batch_size, var_16); 
+    final_accuracy += accuracy; 
+    freeBatchMemory(); 
+  }
+  final_accuracy = final_accuracy / batch_count; 
+  dumpFinalAccuracy(final_accuracy); 
+  llvm_hpvm_cleanupTensorRt(); 
+  return 0; 
+}
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/depthwise_batchnorm_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/depthwise_batchnorm_promise.cc
+#include <stdio.h> 
+#include <stdlib.h> 
+#include <unistd.h> 
+#include <fcntl.h> 
+#include <sys/types.h> 
+#include <sys/stat.h> 
+#include <string.h> 
+#include "../../../tensor_runtime/include/tensor_runtime.h" 
+#include "../../include/utils.h" 
+int main(){ 
+  llvm_hpvm_initTensorRt(0); 
+  int total_runs = 1; 
+  for (int i = 0 ; i < total_runs; i++){ 
+    startMemTracking(); 
+    int test_input_size = 10000; 
+    int batch_size = 10000; 
+    int batch_count = test_input_size / batch_size; 
+    float final_accuracy = 0.0; 
+    for(int i = 0; i < batch_count; i++){ 
+      std::string dir_prefix = std::string("../model_params/depthwise_batchnorm2/"); 
+      std::string input_path =  dir_prefix + std::string("input.bin"); 
+      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
+      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
+      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
+      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
+      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
+      void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
+      void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
+      void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
+      void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
+      std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
+      void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
+      std::string depthwise_conv2d_1_b_path =  dir_prefix + std::string("depthwise_conv2d_1_b.bin"); 
+      void* depthwise_conv2d_1_b =  readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
+      void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
+      void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
+      void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
+      std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
+      void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
+      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
+      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); 
+      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
+      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
+      std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
+      void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
+      std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
+      void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
+      int start = i * batch_size; 
+      int end = (i + 1) * batch_size; 
+      void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
+      void* var_0 = ConvLayer_PROMISE(input, 0.0, 1.0, conv2d_1_w, -0.21894497, 0.20240873, conv2d_1_b, -0.2508162, 0.031047817, 2, 2, 1, 1, -1, 0, 1, 0.0, 0.3616602423787114, 9); 
+      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
+      void* var_2 = tensorPooling(var_1,0,2,2,0,0,2,2); 
+      void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
+      void* var_4 = tensorAdd(var_3, depthwise_conv2d_1_b); 
+      void* var_5 = tensorRelu(var_4); 
+      void* var_6 = tensorBatchNorm(var_5, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
+      void* var_7 = FCLayer_PROMISE(var_6, -1.0247770547866821, 5.120966439247134, dense_1_w, -0.04442959, 0.04483322, dense_1_b, -0.001500695, 0.002055318, 1, 0.0, 4.769639563560498, 9); 
+      void* var_8 = FCLayer_PROMISE(var_7, 0.0, 4.769639563560498, dense_2_w, -0.123055816, 0.12699054, dense_2_b, -0.00430397, 0.004860983, 1, 0.0, 21.660391826629642, 9); 
+      void* var_9 = tensorSoftmax(var_8); 
+      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
+      float accuracy = computeAccuracy2(labels, batch_size, var_9); 
+      final_accuracy += accuracy; 
+      freeBatchMemory(); 
+    }
+    final_accuracy = final_accuracy / batch_count; 
+    dumpFinalAccuracy(final_accuracy); 
+  }
+  dumpExecutionAccuracies(); 
+  llvm_hpvm_cleanupTensorRt(); 
+  return 0; 
+}
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/depthwise_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/depthwise_promise.cc
+#include <stdio.h> 
+#include <stdlib.h> 
+#include <unistd.h> 
+#include <fcntl.h> 
+#include <sys/types.h> 
+#include <sys/stat.h> 
+#include <string.h> 
+#include "../../../tensor_runtime/include/tensor_runtime.h" 
+#include "../../include/utils.h" 
+int main(){ 
+  llvm_hpvm_initTensorRt(1); 
+  int total_runs = 1; 
+  for (int i = 0 ; i < total_runs; i++){ 
+    startMemTracking(); 
+    int test_input_size = 10000; 
+    int batch_size = 10000; 
+    int batch_count = test_input_size / batch_size; 
+    float final_accuracy = 0.0; 
+    for(int i = 0; i < batch_count; i++){ 
+      std::string dir_prefix = std::string("../model_params/depthwise_test_8/");
+      std::string input_path =  dir_prefix + std::string("input.bin"); 
+      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
+      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
+      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
+      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
+      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+      std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
+      void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
+      std::string depthwise_conv2d_1_b_path =  dir_prefix + std::string("depthwise_conv2d_1_b.bin"); 
+      void* depthwise_conv2d_1_b =  readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); 
+      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
+      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); 
+      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
+      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
+      std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
+      void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
+      std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
+      void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
+      int start = i * batch_size; 
+      int end = (i + 1) * batch_size; 
+      void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
+      void* var_0 = ConvLayer_PROMISE(input, 0.0, 1.0, conv2d_1_w, -0.45243406, 0.4331673, conv2d_1_b, -0.2225991, 0.05682303, 2, 2, 1, 1, 0, 2, 1, 0.0, 2.593297730684286, 9); 
+      void* var_1 = tensorConvolution(var_0, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
+      void* var_2 = tensorAdd(var_1, depthwise_conv2d_1_b); 
+      void* var_3 = tensorRelu(var_2); 
+      void* var_4 = FCLayer_PROMISE(var_3, 0.0, 1.4831079334020663, dense_1_w, -0.1172131, 0.105426796, dense_1_b, -0.027105594, 0.04015947, 1, 0.0, 2.723612790107728, 9); 
+      void* var_5 = FCLayer_PROMISE(var_4, 0.0, 2.723612790107728, dense_2_w, -0.23769215, 0.20409682, dense_2_b, -0.004073992, 0.049675815, 1, 0.0, 24.92628944396973, 9); 
+      void* var_6 = tensorSoftmax(var_5); 
+      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
+      float accuracy = computeAccuracy2(labels, batch_size, var_6); 
+      final_accuracy += accuracy; 
+      freeBatchMemory(); 
+    }
+    final_accuracy = final_accuracy / batch_count; 
+    dumpFinalAccuracy(final_accuracy); 
+  }
+  dumpExecutionAccuracies(); 
+  llvm_hpvm_cleanupTensorRt(); 
+  return 0; 
+}