Lenet-5 working with ApproxHPVM backends and Tensor Runtime

2509a5a4 · Hashim Sharif · ecacc226 · 2509a5a4 · 2509a5a4
Commit 2509a5a4 authored 6 years ago by Hashim Sharif
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp
@@ -9,6 +9,7 @@

 using namespace std;

+/* DNN Layer 1 **/
 void tensorConvNode1(void *t1, size_t bytes1, void *t2, size_t bytes2) {
    __visc__hint(visc::CUDNN_TARGET);
    __visc__attributes(2, t1, t2, 0);
@@ -43,21 +44,142 @@ void tensorTanhNode1(void *t1, size_t bytest1) {
    __visc__return(2, r, (size_t) 0);
 }

+/** End of Layer 1 **/
+
+
+
+/* DNN Layer 2 **/
+void tensorConvNode2(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+
+void tensorAddNode2(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorPoolNode2(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode2(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 2 **/
+
+
+/***** DNN Layer3 ****/
+void tensorMulNode3(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_mul(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorAddNode3(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode3(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 3 **/
+
+
+
+/***** DNN Layer4 ****/
+void tensorMulNode4(void *t1, size_t bytes1, void *t2, size_t bytes2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    // X * W = t2 * t1
+    void *r = __visc__tensor_mul(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorAddNode4(void *t1, size_t bytest1, void *t2, size_t bytest2) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(2, t1, t2, 0);
+
+    void* r = __visc__tensor_add(t1, t2);
+    __visc__return(2, r, (size_t) 0);
+}
+
+void tensorTanhNode4(void *t1, size_t bytest1) {
+    __visc__hint(visc::CUDNN_TARGET);
+    __visc__attributes(1, t1, 0);
+
+    void* r = __visc__tensor_tanh(t1);
+    __visc__return(2, r, (size_t) 0);
+}
+
+/** End of Layer 4 **/
+
+

 void root(void *x, size_t x_bytes,
 	  void *conv1_w, size_t conv1_w_bytes,
 	  void *conv1_b, size_t conv1_b_bytes,
 	  void *conv2_w, size_t conv2_w_bytes,
-	  void *conv2_b, size_t conv2_b_bytes){
+	  void *conv2_b, size_t conv2_b_bytes,
+	  void *fc1_w, size_t fc1_w_bytes,
+	  void *fc1_b, size_t fc1_b_bytes,
+	  void *fc2_w, size_t fc2_w_bytes,
+	  void *fc2_b, size_t fc2_b_bytes){
  
    __visc__hint(visc::CPU_TARGET);
    __visc__attributes(5, x, conv1_w, conv1_b, conv2_w, conv2_b, 0);

+    // Conv1 Nodes
    void *nodeConv1 = __visc__createNodeND(0, tensorConvNode1);
    void *nodeAdd1 = __visc__createNodeND(0, tensorAddNode1);
    void *nodePool1 = __visc__createNodeND(0, tensorPoolNode1);
    void *nodeTanh1 = __visc__createNodeND(0, tensorTanhNode1);
-
+    // Conv2 Nodes
+    void *nodeConv2 = __visc__createNodeND(0, tensorConvNode2);
+    void *nodeAdd2 = __visc__createNodeND(0, tensorAddNode2);
+    void *nodePool2 = __visc__createNodeND(0, tensorPoolNode2);
+    void *nodeTanh2 = __visc__createNodeND(0, tensorTanhNode2);
+    // FC1 Nodes
+    void *nodeMul3 = __visc__createNodeND(0, tensorMulNode3);
+    void *nodeAdd3 = __visc__createNodeND(0, tensorAddNode3);
+    void *nodeTanh3 = __visc__createNodeND(0, tensorTanhNode3);
+    // FC2 Nodes
+    void *nodeMul4 = __visc__createNodeND(0, tensorMulNode4);
+    void *nodeAdd4 = __visc__createNodeND(0, tensorAddNode4);
+    void *nodeTanh4 = __visc__createNodeND(0, tensorTanhNode4);
+   
+    
+    //***** Conv Layer 1 *******/
    // node, src, dst, stream
    __visc__bindIn(nodeConv1, 0, 0, 0);
    __visc__bindIn(nodeConv1, 1, 1, 0);
@@ -79,10 +201,76 @@ void root(void *x, size_t x_bytes,
    // node, node, type, src, dst, stream
    __visc__edge(nodePool1, nodeTanh1, 1, 0, 0, 0);
    __visc__edge(nodePool1, nodeTanh1, 1, 1, 1, 0);
+
+
+    /**** Conv Layer 2 ****/
+    // ConvOp2
+    __visc__bindIn(nodeConv2, 6, 2, 0);
+    __visc__bindIn(nodeConv2, 7, 3, 0);
    
+    __visc__edge(nodeTanh1, nodeConv2, 1, 0, 0, 0);
+    __visc__edge(nodeTanh1, nodeConv2, 1, 1, 1, 0);
+
+    // AddOp2
+    __visc__bindIn(nodeAdd2, 8, 2, 0);
+    __visc__bindIn(nodeAdd2, 9, 3, 0);
+
+    __visc__edge(nodeConv2, nodeAdd2, 1, 0, 0, 0);
+    __visc__edge(nodeConv2, nodeAdd2, 1, 1, 1, 0);
+
+    // PoolOp2
+    __visc__edge(nodeAdd2, nodePool2, 1, 0, 0, 0);
+    __visc__edge(nodeAdd2, nodePool2, 1, 1, 1, 0);
+
+    // TanhOp2
+    __visc__edge(nodePool2, nodeTanh2, 1, 0, 0, 0);
+    __visc__edge(nodePool2, nodeTanh2, 1, 1, 1, 0);
+
+
+    /**** FC Layer 1 ****/
+    // MulOp3
+    __visc__bindIn(nodeMul3, 10, 2, 0);
+    __visc__bindIn(nodeMul3, 11, 3, 0);
    
-    __visc__bindOut(nodeTanh1, 0, 0, 0);
-    __visc__bindOut(nodeTanh1, 1, 1, 0);
+    __visc__edge(nodeTanh2, nodeMul3, 1, 0, 0, 0);
+    __visc__edge(nodeTanh2, nodeMul3, 1, 1, 1, 0);
+
+    // AddOp3
+    __visc__bindIn(nodeAdd3, 12, 2, 0);
+    __visc__bindIn(nodeAdd3, 13, 3, 0);
+
+    __visc__edge(nodeMul3, nodeAdd3, 1, 0, 0, 0);
+    __visc__edge(nodeMul3, nodeAdd3, 1, 1, 1, 0);
+
+    // TanhOp3
+    __visc__edge(nodeAdd3, nodeTanh3, 1, 0, 0, 0);
+    __visc__edge(nodeAdd3, nodeTanh3, 1, 1, 1, 0);
+
+
+    /**** FC Layer 2 ****/
+    // MulOp4
+    __visc__bindIn(nodeMul4, 14, 2, 0);
+    __visc__bindIn(nodeMul4, 15, 3, 0);
+    
+    __visc__edge(nodeTanh3, nodeMul4, 1, 0, 0, 0);
+    __visc__edge(nodeTanh3, nodeMul4, 1, 1, 1, 0);
+
+    // AddOp4
+    __visc__bindIn(nodeAdd4, 16, 2, 0);
+    __visc__bindIn(nodeAdd4, 17, 3, 0);
+
+    __visc__edge(nodeMul4, nodeAdd4, 1, 0, 0, 0);
+    __visc__edge(nodeMul4, nodeAdd4, 1, 1, 1, 0);
+
+    // TanhOp4
+    __visc__edge(nodeAdd4, nodeTanh4, 1, 0, 0, 0);
+    __visc__edge(nodeAdd4, nodeTanh4, 1, 1, 1, 0);
+
+    
+
+    /***** Output Binding ****/
+    __visc__bindOut(nodeTanh4, 0, 0, 0);
+    __visc__bindOut(nodeTanh4, 1, 1, 0);

 }

@@ -97,44 +285,66 @@ typedef struct __attribute__((__packed__)) {
    void *x;
    size_t x_bytes;
    // 1st Layer parameters
-    void *conv1_w;
+    void* conv1_w;
    size_t conv1_w_bytes;
-    void *conv1_b;
+    void* conv1_b;
    size_t conv1_b_bytes;
    // 2nd Layer parameters
-    void *conv2_w;
+    void* conv2_w;
    size_t conv2_w_bytes;
-    void *conv2_b;
+    void* conv2_b;
    size_t conv2_b_bytes;
-
+    // 3rd Layer parameters
+    void* fc1_w;
+    size_t fc1_w_bytes;
+    void* fc1_b;
+    size_t fc1_b_bytes;
+    // 4th Layer parameters
+    void* fc2_w;
+    size_t fc2_w_bytes;
+    void* fc2_b;
+    size_t fc2_b_bytes;
+         
    struct ret_t r;
 }
 RootIn;

 int main() {

-    int test_batch_size = 1000;
+    int test_batch_size = 10000;
    std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params";
    std::string input_data_path = prefix + std::string("/FC_network2/mnist_float_input.bin");
+    std::string labels_path = "../../../../../../projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte";
    std::string conv1_w_path = prefix + std::string("/lenet_keras/conv1.bin");			  
    std::string conv1_b_path = prefix + std::string("/lenet_keras/conv1_bias.bin");
    std::string conv2_w_path = prefix + std::string("/lenet_keras/conv2.bin");			  
-    std::string conv2_b_path = prefix + std::string("/lenet_keras/conv2_bias.bin");  
+    std::string conv2_b_path = prefix + std::string("/lenet_keras/conv2_bias.bin");
+    std::string fc1_w_path = prefix + std::string("/lenet_keras/fc1.bin");			  
+    std::string fc1_b_path = prefix + std::string("/lenet_keras/fc1_bias.bin");
+    std::string fc2_w_path = prefix + std::string("/lenet_keras/fc2.bin");			  
+    std::string fc2_b_path = prefix + std::string("/lenet_keras/fc2_bias.bin");  
+
    
    printf("Reading Input Data from = %s \n", input_data_path.c_str());
-    
+
+    uint8_t* labels = readLabels(labels_path.c_str(), test_batch_size);
    void* x = readTrainedWeights(input_data_path.c_str(), float_type,
 				 test_batch_size, 1, 28, 28);
    void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 32, 1, 5, 5);
    void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 32, 1, 1);
    void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 64, 32, 5, 5);
    void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 64, 1, 1);
+    void* fc1_w = readTrainedWeights(fc1_w_path.c_str(), float_type, 1, 1, 7*7*64, 1024);
+    void* fc1_b = readTrainedWeights(fc1_b_path.c_str(), float_type, 1, 1024, 1, 1);
+    void* fc2_w = readTrainedWeights(fc2_w_path.c_str(), float_type, 1, 1, 1024, 10);
+    void* fc2_b = readTrainedWeights(fc2_b_path.c_str(), float_type, 1, 10, 1, 1);

    __visc__init();

    RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
    args->x = x;
    args->x_bytes = 0;
+    // Conv Layers params
    args->conv1_w = conv1_w;
    args->conv1_w_bytes = 0;
    args->conv1_b = conv1_b;
@@ -143,6 +353,15 @@ int main() {
    args->conv2_w_bytes = 0;
    args->conv2_b = conv2_b;
    args->conv2_b_bytes = 0;
+    // FC Layers params
+    args->fc1_w = fc1_w;
+    args->fc1_w_bytes = 0;
+    args->fc1_b = fc1_b;
+    args->fc1_b_bytes = 0;
+    args->fc2_w = fc2_w;
+    args->fc2_w_bytes = 0;
+    args->fc2_b = fc2_b;
+    args->fc2_b_bytes = 0;

    void *dfg = __visc__launch(0, root, (void *)args);

@@ -150,10 +369,13 @@ int main() {

    // FIXME: Value returned in the wrong index!!
    //void *r = static_cast<RootIn*>(args)->r.tensor;
-    void *r = static_cast<RootIn*>(args)->x;
-    hpvm_request_tensor(r, 0);
+    void *result = static_cast<RootIn*>(args)->x;
+    hpvm_request_tensor(result, 0);

    __visc__cleanup();
+   
+    computeAccuracy2(labels, test_batch_size, result);
+    
    return 0;
 }


--- a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
@@ -247,7 +247,7 @@ struct Tensor* readTrainedWeights(const char* file_name, int data_type, int dim1
 }


-uint8_t* readLabels(char* labels_file, int num_labels){
+uint8_t* readLabels(const char* labels_file, int num_labels){

  int file_header_size = 8;
  uint8_t* labels = (uint8_t*) malloc(sizeof(uint8_t) * num_labels);