From 2509a5a42b1f78075efde3d87895a59e6a05e5e8 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Tue, 18 Dec 2018 20:56:14 -0600
Subject: [PATCH] Lenet-5 working with ApproxHPVM backends and Tensor Runtime

---
 .../benchmarks/lenet/src/lenet.cpp            | 250 +++++++++++++++++-
 .../common/include/tensorUtils.h              |   2 +-
 2 files changed, 237 insertions(+), 15 deletions(-)

diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
index a245f1a208..7698e1511b 100644
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
@@ -9,6 +9,7 @@
 
 using namespace std;
 
+/* DNN Layer 1 **/
 void tensorConvNode1(void *t1, size_t bytes1, void *t2, size_t bytes2) {
     __visc__hint(visc::CUDNN_TARGET);
     __visc__attributes(2, t1, t2, 0);
@@ -43,21 +44,142 @@ void tensorTanhNode1(void *t1, size_t bytest1) {
     __visc__return(2, r, (size_t) 0);
 }
 
+/** End of Layer 1 **/
+
+
+
+/* DNN Layer 2 **/
+void tensorConvNode2(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorAddNode2(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorPoolNode2(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode2(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 2 **/
+
+
+/***** DNN Layer3 ****/
+void tensorMulNode3(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_mul(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorAddNode3(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode3(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 3 **/
+
+
+
+/***** DNN Layer4 ****/
+void tensorMulNode4(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_mul(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorAddNode4(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode4(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 4 **/
+
+
 
 void root(void *x, size_t x_bytes,
 	  void *conv1_w, size_t conv1_w_bytes,
 	  void *conv1_b, size_t conv1_b_bytes,
 	  void *conv2_w, size_t conv2_w_bytes,
-	  void *conv2_b, size_t conv2_b_bytes){
+	  void *conv2_b, size_t conv2_b_bytes,
+	  void *fc1_w, size_t fc1_w_bytes,
+	  void *fc1_b, size_t fc1_b_bytes,
+	  void *fc2_w, size_t fc2_w_bytes,
+	  void *fc2_b, size_t fc2_b_bytes){
   
     __visc__hint(visc::CPU_TARGET);
     __visc__attributes(5, x, conv1_w, conv1_b, conv2_w, conv2_b, 0);
 
+    // Conv1 Nodes
     void *nodeConv1 = __visc__createNodeND(0, tensorConvNode1);
     void *nodeAdd1 = __visc__createNodeND(0, tensorAddNode1);
     void *nodePool1 = __visc__createNodeND(0, tensorPoolNode1);
     void *nodeTanh1 = __visc__createNodeND(0, tensorTanhNode1);
-
+    // Conv2 Nodes
+    void *nodeConv2 = __visc__createNodeND(0, tensorConvNode2);
+    void *nodeAdd2 = __visc__createNodeND(0, tensorAddNode2);
+    void *nodePool2 = __visc__createNodeND(0, tensorPoolNode2);
+    void *nodeTanh2 = __visc__createNodeND(0, tensorTanhNode2);
+    // FC1 Nodes
+    void *nodeMul3 = __visc__createNodeND(0, tensorMulNode3);
+    void *nodeAdd3 = __visc__createNodeND(0, tensorAddNode3);
+    void *nodeTanh3 = __visc__createNodeND(0, tensorTanhNode3);
+    // FC2 Nodes
+    void *nodeMul4 = __visc__createNodeND(0, tensorMulNode4);
+    void *nodeAdd4 = __visc__createNodeND(0, tensorAddNode4);
+    void *nodeTanh4 = __visc__createNodeND(0, tensorTanhNode4);
+   
+    
+    //***** Conv Layer 1 *******/
     // node, src, dst, stream
     __visc__bindIn(nodeConv1, 0, 0, 0);
     __visc__bindIn(nodeConv1, 1, 1, 0);
@@ -79,10 +201,76 @@ void root(void *x, size_t x_bytes,
     // node, node, type, src, dst, stream
     __visc__edge(nodePool1, nodeTanh1, 1, 0, 0, 0);
     __visc__edge(nodePool1, nodeTanh1, 1, 1, 1, 0);
+
+
+    /**** Conv Layer 2 ****/
+    // ConvOp2
+    __visc__bindIn(nodeConv2, 6, 2, 0);
+    __visc__bindIn(nodeConv2, 7, 3, 0);
     
+    __visc__edge(nodeTanh1, nodeConv2, 1, 0, 0, 0);
+    __visc__edge(nodeTanh1, nodeConv2, 1, 1, 1, 0);
+
+    // AddOp2
+    __visc__bindIn(nodeAdd2, 8, 2, 0);
+    __visc__bindIn(nodeAdd2, 9, 3, 0);
+
+    __visc__edge(nodeConv2, nodeAdd2, 1, 0, 0, 0);
+    __visc__edge(nodeConv2, nodeAdd2, 1, 1, 1, 0);
+
+    // PoolOp2
+    __visc__edge(nodeAdd2, nodePool2, 1, 0, 0, 0);
+    __visc__edge(nodeAdd2, nodePool2, 1, 1, 1, 0);
+
+    // TanhOp2
+    __visc__edge(nodePool2, nodeTanh2, 1, 0, 0, 0);
+    __visc__edge(nodePool2, nodeTanh2, 1, 1, 1, 0);
+
+
+    /**** FC Layer 1 ****/
+    // MulOp3
+    __visc__bindIn(nodeMul3, 10, 2, 0);
+    __visc__bindIn(nodeMul3, 11, 3, 0);
     
-    __visc__bindOut(nodeTanh1, 0, 0, 0);
-    __visc__bindOut(nodeTanh1, 1, 1, 0);
+    __visc__edge(nodeTanh2, nodeMul3, 1, 0, 0, 0);
+    __visc__edge(nodeTanh2, nodeMul3, 1, 1, 1, 0);
+
+    // AddOp3
+    __visc__bindIn(nodeAdd3, 12, 2, 0);
+    __visc__bindIn(nodeAdd3, 13, 3, 0);
+
+    __visc__edge(nodeMul3, nodeAdd3, 1, 0, 0, 0);
+    __visc__edge(nodeMul3, nodeAdd3, 1, 1, 1, 0);
+
+    // TanhOp3
+    __visc__edge(nodeAdd3, nodeTanh3, 1, 0, 0, 0);
+    __visc__edge(nodeAdd3, nodeTanh3, 1, 1, 1, 0);
+
+
+    /**** FC Layer 2 ****/
+    // MulOp4
+    __visc__bindIn(nodeMul4, 14, 2, 0);
+    __visc__bindIn(nodeMul4, 15, 3, 0);
+    
+    __visc__edge(nodeTanh3, nodeMul4, 1, 0, 0, 0);
+    __visc__edge(nodeTanh3, nodeMul4, 1, 1, 1, 0);
+
+    // AddOp4
+    __visc__bindIn(nodeAdd4, 16, 2, 0);
+    __visc__bindIn(nodeAdd4, 17, 3, 0);
+
+    __visc__edge(nodeMul4, nodeAdd4, 1, 0, 0, 0);
+    __visc__edge(nodeMul4, nodeAdd4, 1, 1, 1, 0);
+
+    // TanhOp4
+    __visc__edge(nodeAdd4, nodeTanh4, 1, 0, 0, 0);
+    __visc__edge(nodeAdd4, nodeTanh4, 1, 1, 1, 0);
+
+    
+
+    /***** Output Binding ****/
+    __visc__bindOut(nodeTanh4, 0, 0, 0);
+    __visc__bindOut(nodeTanh4, 1, 1, 0);
 
 }
 
@@ -97,44 +285,66 @@ typedef struct __attribute__((__packed__)) {
     void *x;
     size_t x_bytes;
     // 1st Layer parameters
-    void *conv1_w;
+    void* conv1_w;
     size_t conv1_w_bytes;
-    void *conv1_b;
+    void* conv1_b;
     size_t conv1_b_bytes;
     // 2nd Layer parameters
-    void *conv2_w;
+    void* conv2_w;
     size_t conv2_w_bytes;
-    void *conv2_b;
+    void* conv2_b;
     size_t conv2_b_bytes;
-
+    // 3rd Layer parameters
+    void* fc1_w;
+    size_t fc1_w_bytes;
+    void* fc1_b;
+    size_t fc1_b_bytes;
+    // 4th Layer parameters
+    void* fc2_w;
+    size_t fc2_w_bytes;
+    void* fc2_b;
+    size_t fc2_b_bytes;
+         
     struct ret_t r;
 }
 RootIn;
 
 int main() {
 
-    int test_batch_size = 1000;
+    int test_batch_size = 10000;
     std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params";
     std::string input_data_path = prefix + std::string("/FC_network2/mnist_float_input.bin");
+    std::string labels_path = "../../../../../../projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte";
     std::string conv1_w_path = prefix + std::string("/lenet_keras/conv1.bin");			  
     std::string conv1_b_path = prefix + std::string("/lenet_keras/conv1_bias.bin");
     std::string conv2_w_path = prefix + std::string("/lenet_keras/conv2.bin");			  
-    std::string conv2_b_path = prefix + std::string("/lenet_keras/conv2_bias.bin");  
+    std::string conv2_b_path = prefix + std::string("/lenet_keras/conv2_bias.bin");
+    std::string fc1_w_path = prefix + std::string("/lenet_keras/fc1.bin");			  
+    std::string fc1_b_path = prefix + std::string("/lenet_keras/fc1_bias.bin");
+    std::string fc2_w_path = prefix + std::string("/lenet_keras/fc2.bin");			  
+    std::string fc2_b_path = prefix + std::string("/lenet_keras/fc2_bias.bin");  
+
     
     printf("Reading Input Data from = %s \n", input_data_path.c_str());
-    
+
+    uint8_t* labels = readLabels(labels_path.c_str(), test_batch_size);
     void* x = readTrainedWeights(input_data_path.c_str(), float_type,
 				 test_batch_size, 1, 28, 28);
     void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 32, 1, 5, 5);
     void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 32, 1, 1);
     void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 64, 32, 5, 5);
     void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 64, 1, 1);
+    void* fc1_w = readTrainedWeights(fc1_w_path.c_str(), float_type, 1, 1, 7*7*64, 1024);
+    void* fc1_b = readTrainedWeights(fc1_b_path.c_str(), float_type, 1, 1024, 1, 1);
+    void* fc2_w = readTrainedWeights(fc2_w_path.c_str(), float_type, 1, 1, 1024, 10);
+    void* fc2_b = readTrainedWeights(fc2_b_path.c_str(), float_type, 1, 10, 1, 1);
 
     __visc__init();
 
     RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
     args->x = x;
     args->x_bytes = 0;
+    // Conv Layers params
     args->conv1_w = conv1_w;
     args->conv1_w_bytes = 0;
     args->conv1_b = conv1_b;
@@ -143,6 +353,15 @@ int main() {
     args->conv2_w_bytes = 0;
     args->conv2_b = conv2_b;
     args->conv2_b_bytes = 0;
+    // FC Layers params
+    args->fc1_w = fc1_w;
+    args->fc1_w_bytes = 0;
+    args->fc1_b = fc1_b;
+    args->fc1_b_bytes = 0;
+    args->fc2_w = fc2_w;
+    args->fc2_w_bytes = 0;
+    args->fc2_b = fc2_b;
+    args->fc2_b_bytes = 0;
 
     void *dfg = __visc__launch(0, root, (void *)args);
 
@@ -150,10 +369,13 @@ int main() {
 
     // FIXME: Value returned in the wrong index!!
     //void *r = static_cast<RootIn*>(args)->r.tensor;
-    void *r = static_cast<RootIn*>(args)->x;
-    hpvm_request_tensor(r, 0);
+    void *result = static_cast<RootIn*>(args)->x;
+    hpvm_request_tensor(result, 0);
 
     __visc__cleanup();
+   
+    computeAccuracy2(labels, test_batch_size, result);
+    
     return 0;
 }
 
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
index 4200ad1569..3e83c5dd89 100644
--- a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
@@ -247,7 +247,7 @@ struct Tensor* readTrainedWeights(const char* file_name, int data_type, int dim1
 }
 
 
-uint8_t* readLabels(char* labels_file, int num_labels){
+uint8_t* readLabels(const char* labels_file, int num_labels){
 
   int file_header_size = 8;
   uint8_t* labels = (uint8_t*) malloc(sizeof(uint8_t) * num_labels);
-- 
GitLab