From 0c446fbf817b0eea0758af478dcff450e0b8666a Mon Sep 17 00:00:00 2001
From: Abdul Rafae Noor <arnoor2@tyler.cs.illinois.edu>
Date: Thu, 8 Apr 2021 16:19:22 -0500
Subject: [PATCH] fixing code examples in backend passes doc

---
 hpvm/docs/developerdocs/backend-passes.rst | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hpvm/docs/developerdocs/backend-passes.rst b/hpvm/docs/developerdocs/backend-passes.rst
index d8e2c2bffa..ea740656b1 100644
--- a/hpvm/docs/developerdocs/backend-passes.rst
+++ b/hpvm/docs/developerdocs/backend-passes.rst
@@ -380,7 +380,7 @@ Let’s consider the end result of the `FuseHPVMTensorNodes` example:
     void *r2 = __hpvm__tensor_add(r1, t3);
     void *r3 = __hpvm__tensor_relu(r2);
     void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
   }
 
 Similar to the FuseHPVMTensorNodes example, the DFG2LLVM_WrapperAPI pass also has fusion patterns. However in this pass, the tensor operations are within a single node.
@@ -405,6 +405,8 @@ codeGen(DFLeafNode* )
   void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2,size_t bytes_t2, void *t3, size_t bytes_t3) {
     __hpvm_request_tensor(t1, /* GPU */ 1);
     __hpvm_request_tensor(t2, /* GPU */ 1);
+    __hpvm_request_tensor(t3, /* GPU */ 1);
+
 
 
 
@@ -412,7 +414,7 @@ codeGen(DFLeafNode* )
     void *r2 = __hpvm__tensor_add(r1, t3);
     void *r3 = __hpvm__tensor_relu(r2);
     void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
   }
 
 
@@ -423,6 +425,7 @@ codeGen(DFLeafNode* )
   void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) {
     __hpvm_request_tensor(t1, /* GPU */ 1);
     __hpvm_request_tensor(t2, /* GPU */ 1);
+    __hpvm_request_tensor(t3, /* GPU */ 1);
 
 
     void* w1 = wrapper_ConvLayer2( “all_fused_wra...”, …);
@@ -432,7 +435,7 @@ codeGen(DFLeafNode* )
     void *r2 = __hpvm__tensor_add(r1, t3);
     void *r3 = __hpvm__tensor_relu(r2);
     void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
   }
 
 3. The remaining arguments of the wrapper_convLayer2 call are taken from the arguments passed to the individual tensor operations from which the fused call is made.
@@ -442,6 +445,7 @@ codeGen(DFLeafNode* )
   void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) {
     __hpvm_request_tensor(t1, /* GPU */ 1);
     __hpvm_request_tensor(t2, /* GPU */ 1);
+    __hpvm_request_tensor(t3, /* GPU */ 1);
 
 
     void* w1 = wrapper_ConvLayer2( “all_fused_wra...”, t1, t2, t3, 2, 2, 4, 4, 0, 3, 3, 0, 0, 2, 2, 0.0, 0.0);
@@ -450,7 +454,7 @@ codeGen(DFLeafNode* )
     void *r2 = __hpvm__tensor_add(r1, t3);
     void *r3 = __hpvm__tensor_relu(r2);
     void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
-    __hpvm__return(2, r3, (size_t)0);
+    __hpvm__return(2, r4, (size_t)0);
   }
 
 4. Finally, the original operations are removed and the final values uses are replaced with the wrapper function call result.
-- 
GitLab