fixing code examples in backend passes doc

0c446fbf · Abdul Rafae Noor · 0f160178 · 0c446fbf
Commit 0c446fbf authored 4 years ago by Abdul Rafae Noor
--- a/hpvm/docs/developerdocs/backend-passes.rst
+++ b/hpvm/docs/developerdocs/backend-passes.rst
@@ -380,7 +380,7 @@ Let’s consider the end result of the `FuseHPVMTensorNodes` example:
    void *r2 = __hpvm__tensor_add(r1, t3);
    void *r3 = __hpvm__tensor_relu(r2);
    void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
  }

 Similar to the FuseHPVMTensorNodes example, the DFG2LLVM_WrapperAPI pass also has fusion patterns. However in this pass, the tensor operations are within a single node.
@@ -405,6 +405,8 @@ codeGen(DFLeafNode* )
  void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2,size_t bytes_t2, void *t3, size_t bytes_t3) {
    __hpvm_request_tensor(t1, /* GPU */ 1);
    __hpvm_request_tensor(t2, /* GPU */ 1);
+    __hpvm_request_tensor(t3, /* GPU */ 1);
+



@@ -412,7 +414,7 @@ codeGen(DFLeafNode* )
    void *r2 = __hpvm__tensor_add(r1, t3);
    void *r3 = __hpvm__tensor_relu(r2);
    void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
  }


@@ -423,6 +425,7 @@ codeGen(DFLeafNode* )
  void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) {
    __hpvm_request_tensor(t1, /* GPU */ 1);
    __hpvm_request_tensor(t2, /* GPU */ 1);
+    __hpvm_request_tensor(t3, /* GPU */ 1);


    void* w1 = wrapper_ConvLayer2( “all_fused_wra...”, …);
@@ -432,7 +435,7 @@ codeGen(DFLeafNode* )
    void *r2 = __hpvm__tensor_add(r1, t3);
    void *r3 = __hpvm__tensor_relu(r2);
    void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
  }

 3. The remaining arguments of the wrapper_convLayer2 call are taken from the arguments passed to the individual tensor operations from which the fused call is made.
@@ -442,6 +445,7 @@ codeGen(DFLeafNode* )
  void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) {
    __hpvm_request_tensor(t1, /* GPU */ 1);
    __hpvm_request_tensor(t2, /* GPU */ 1);
+    __hpvm_request_tensor(t3, /* GPU */ 1);


    void* w1 = wrapper_ConvLayer2( “all_fused_wra...”, t1, t2, t3, 2, 2, 4, 4, 0, 3, 3, 0, 0, 2, 2, 0.0, 0.0);
@@ -450,7 +454,7 @@ codeGen(DFLeafNode* )
    void *r2 = __hpvm__tensor_add(r1, t3);
    void *r3 = __hpvm__tensor_relu(r2);
    void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
  }

 4. Finally, the original operations are removed and the final values uses are replaced with the wrapper function call result.