From 0c446fbf817b0eea0758af478dcff450e0b8666a Mon Sep 17 00:00:00 2001 From: Abdul Rafae Noor <arnoor2@tyler.cs.illinois.edu> Date: Thu, 8 Apr 2021 16:19:22 -0500 Subject: [PATCH] fixing code examples in backend passes doc --- hpvm/docs/developerdocs/backend-passes.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hpvm/docs/developerdocs/backend-passes.rst b/hpvm/docs/developerdocs/backend-passes.rst index d8e2c2bffa..ea740656b1 100644 --- a/hpvm/docs/developerdocs/backend-passes.rst +++ b/hpvm/docs/developerdocs/backend-passes.rst @@ -380,7 +380,7 @@ Let’s consider the end result of the `FuseHPVMTensorNodes` example: void *r2 = __hpvm__tensor_add(r1, t3); void *r3 = __hpvm__tensor_relu(r2); void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2); - __hpvm__return(2, r3, (size_t)0); + __hpvm__return(2, r4, (size_t)0); } Similar to the FuseHPVMTensorNodes example, the DFG2LLVM_WrapperAPI pass also has fusion patterns. However in this pass, the tensor operations are within a single node. @@ -405,6 +405,8 @@ codeGen(DFLeafNode* ) void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2,size_t bytes_t2, void *t3, size_t bytes_t3) { __hpvm_request_tensor(t1, /* GPU */ 1); __hpvm_request_tensor(t2, /* GPU */ 1); + __hpvm_request_tensor(t3, /* GPU */ 1); + @@ -412,7 +414,7 @@ codeGen(DFLeafNode* ) void *r2 = __hpvm__tensor_add(r1, t3); void *r3 = __hpvm__tensor_relu(r2); void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2); - __hpvm__return(2, r3, (size_t)0); + __hpvm__return(2, r4, (size_t)0); } @@ -423,6 +425,7 @@ codeGen(DFLeafNode* ) void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) { __hpvm_request_tensor(t1, /* GPU */ 1); __hpvm_request_tensor(t2, /* GPU */ 1); + __hpvm_request_tensor(t3, /* GPU */ 1); void* w1 = wrapper_ConvLayer2( “all_fused_wra...â€, …); @@ -432,7 +435,7 @@ codeGen(DFLeafNode* ) void *r2 = __hpvm__tensor_add(r1, t3); void *r3 = __hpvm__tensor_relu(r2); void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2); - __hpvm__return(2, r3, (size_t)0); + __hpvm__return(2, r4, (size_t)0); } 3. The remaining arguments of the wrapper_convLayer2 call are taken from the arguments passed to the individual tensor operations from which the fused call is made. @@ -442,6 +445,7 @@ codeGen(DFLeafNode* ) void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) { __hpvm_request_tensor(t1, /* GPU */ 1); __hpvm_request_tensor(t2, /* GPU */ 1); + __hpvm_request_tensor(t3, /* GPU */ 1); void* w1 = wrapper_ConvLayer2( “all_fused_wra...â€, t1, t2, t3, 2, 2, 4, 4, 0, 3, 3, 0, 0, 2, 2, 0.0, 0.0); @@ -450,7 +454,7 @@ codeGen(DFLeafNode* ) void *r2 = __hpvm__tensor_add(r1, t3); void *r3 = __hpvm__tensor_relu(r2); void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2); - __hpvm__return(2, r3, (size_t)0); + __hpvm__return(2, r4, (size_t)0); } 4. Finally, the original operations are removed and the final values uses are replaced with the wrapper function call result. -- GitLab