Skip to content
Snippets Groups Projects
Commit 0c446fbf authored by Abdul Rafae Noor's avatar Abdul Rafae Noor
Browse files

fixing code examples in backend passes doc

parent 0f160178
No related branches found
No related tags found
No related merge requests found
......@@ -380,7 +380,7 @@ Let’s consider the end result of the `FuseHPVMTensorNodes` example:
void *r2 = __hpvm__tensor_add(r1, t3);
void *r3 = __hpvm__tensor_relu(r2);
void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
__hpvm__return(2, r3, (size_t)0);
__hpvm__return(2, r4, (size_t)0);
}
Similar to the FuseHPVMTensorNodes example, the DFG2LLVM_WrapperAPI pass also has fusion patterns. However in this pass, the tensor operations are within a single node.
......@@ -405,6 +405,8 @@ codeGen(DFLeafNode* )
void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2,size_t bytes_t2, void *t3, size_t bytes_t3) {
__hpvm_request_tensor(t1, /* GPU */ 1);
__hpvm_request_tensor(t2, /* GPU */ 1);
__hpvm_request_tensor(t3, /* GPU */ 1);
......@@ -412,7 +414,7 @@ codeGen(DFLeafNode* )
void *r2 = __hpvm__tensor_add(r1, t3);
void *r3 = __hpvm__tensor_relu(r2);
void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
__hpvm__return(2, r3, (size_t)0);
__hpvm__return(2, r4, (size_t)0);
}
......@@ -423,6 +425,7 @@ codeGen(DFLeafNode* )
void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) {
__hpvm_request_tensor(t1, /* GPU */ 1);
__hpvm_request_tensor(t2, /* GPU */ 1);
__hpvm_request_tensor(t3, /* GPU */ 1);
void* w1 = wrapper_ConvLayer2( “all_fused_wra...”, …);
......@@ -432,7 +435,7 @@ codeGen(DFLeafNode* )
void *r2 = __hpvm__tensor_add(r1, t3);
void *r3 = __hpvm__tensor_relu(r2);
void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
__hpvm__return(2, r3, (size_t)0);
__hpvm__return(2, r4, (size_t)0);
}
3. The remaining arguments of the wrapper_convLayer2 call are taken from the arguments passed to the individual tensor operations from which the fused call is made.
......@@ -442,6 +445,7 @@ codeGen(DFLeafNode* )
void all_fused_wrapper_api(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2, void *t3, size_t bytes_t3) {
__hpvm_request_tensor(t1, /* GPU */ 1);
__hpvm_request_tensor(t2, /* GPU */ 1);
__hpvm_request_tensor(t3, /* GPU */ 1);
void* w1 = wrapper_ConvLayer2( “all_fused_wra...”, t1, t2, t3, 2, 2, 4, 4, 0, 3, 3, 0, 0, 2, 2, 0.0, 0.0);
......@@ -450,7 +454,7 @@ codeGen(DFLeafNode* )
void *r2 = __hpvm__tensor_add(r1, t3);
void *r3 = __hpvm__tensor_relu(r2);
void *r4 = __hpvm__tensor_pool_max(r3, 3, 3, 0, 0, 2, 2);
__hpvm__return(2, r3, (size_t)0);
__hpvm__return(2, r4, (size_t)0);
}
4. Finally, the original operations are removed and the final values uses are replaced with the wrapper function call result.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment