diff --git a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp index 9b6ca06f631104d5d65711495e18f64babbcf6e7..5117cc23d30a7392ee53107e63e7c2d13a4f9692 100644 --- a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp +++ b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp @@ -9,14 +9,12 @@ // // This pass is uses fuses HPVM nodes based on the tensor operations contained // the nodes. This helps create the groundwork for indicating to the compiler -// that a set of tensor operations in a node are fusionable and it can have +// that a set of tensor operations in a node are fusionable and it can have // implications on performance and energy consumption of set of tensor // operations in question. // //===----------------------------------------------------------------------===// - - #define DEBUG_TYPE "FuseTensorNodes" #include "llvm/IR/ValueMap.h" @@ -141,6 +139,9 @@ static DFNode *findNextNodeInSequence(DFNode *SrcN) { } } + if (!DstN) + return NULL; + // If we reach this point, DstN is the unique successor of SrcN // Now, test that the DstN has a single predeccessor except Root (dummy) @@ -672,7 +673,9 @@ void FuseHPVMTensorNodes::updateParentNodeFunction(IntrinsicInst *II1, IItoRemove.push_back(II); } break; case Intrinsic::hpvm_bind_output: { - assert(false && "Source node of node fusion not expected in bind.out\n"); + // Replace BindOut node argument with fused function node. + II->setArgOperand(0, IInew); + } break; default: llvm_unreachable("Unknown use of HPVM createNode handle\n"); @@ -822,7 +825,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { } errs() << "THIS IS NOT A DUMMY NODE\n"; errs() << "INTRINSIC: " << *isValidHPVMTensorNode(N) << "\n"; - if(!preferredTargetIncludes(N, hpvm::TENSOR_TARGET)) { + if (!preferredTargetIncludes(N, hpvm::TENSOR_TARGET)) { // Only fuse if we plan to target PROMISE/Layers API // The CUDNN backend would be able to generate calls for the fused node, // but not the other way around @@ -987,13 +990,24 @@ bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) { FindFusionTargetsTraversal *FTTVisitor = new FindFusionTargetsTraversal(M, DFG); + // Visit each DFG only once + std::set<Function *> Visited; + errs() << "Find targets\n"; // Iterate over all the DFGs and produce code for each one of them for (auto rootNode : Roots) { + + Function *rootFunc = rootNode->getFuncPointer(); + if (Visited.find(rootFunc) != Visited.end()) + continue; + // Initiate code generation for root DFNode FTTVisitor->visit(rootNode); + + Visited.insert(rootFunc); } + errs() << "Finished visiting DFGs ...\n"; FuseHPVMTensorNodes::FusionTargets &FTs = FTTVisitor->getFusionTargets(); FuseHPVMTensorNodes Fuse; diff --git a/hpvm/test/regressionTests/DFG2LLVM_CUDNN/batchNorm.hpvm.ll b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/batchNorm.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3afd9273f86c1bb58f85fbab9dce3ec14d8243ab --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/batchNorm.hpvm.ll @@ -0,0 +1,151 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -S -inplace -dfg2llvm-cudnn < %s | FileCheck %s +; ModuleID = 'batchNorm.ll' +source_filename = "batchNorm.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9relu_nodePvm = type <{ i8*, i64 }> +%struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_mS_mS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initTensorRt(i32 0) +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupTensorRt() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + +; CHECK-LABEL: @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NEXT: call void @hpvm_request_tensor( +; CHECK-NEXT: call void @hpvm_request_tensor( +; CHECK-NEXT: call void @hpvm_request_tensor( +; CHECK-NEXT: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @llvm.hpvm.tensor.batchnorm(i8* %t1, i8* %t2, i8* %t3, i8* %t4, i8* %t5, double 1.000000e-03) +; CHECK: call i8* @tensorBatchNorm(i8* %t1, i8* %t2, i8* %t3, i8* %t4, i8* %t5, double 1.000000e-03) +; CHECK: ret + + +; CHECK-LABEL: @_Z9relu_nodePvm_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @llvm.hpvm.tensor.relu(i8* %t1) +; CHECK: call i8* @tensorRelu(i8* %t1) +; CHECK: ret + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 96) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_mS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !5 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.relu(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9relu_nodePvm @_Z9relu_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.relu(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9relu_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9relu_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9relu_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.batchnorm(i8*, i8*, i8*, i8*, i8*, double) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2, i8* in %t3, i64 %bytes_t3, i8* in %t4, i64 %bytes_t4, i8* in %t5, i64 %bytes_t5) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.batchnorm(i8* %t1, i8* %t2, i8* %t3, i8* %t4, i8* %t5, double 1.000000e-03) + %returnStruct = insertvalue %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m %returnStruct, i64 0, 1 + ret %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_mS_mS_m @_Z4rootPvmS_mS_mS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %batch_normalization_1_gamma, i64 %batch_normalization_1_gamma_bytes, i8* in %batch_normalization_1_beta, i64 %batch_normalization_1_beta_bytes, i8* in %batch_normalization_1_mean, i64 %batch_normalization_1_mean_bytes, i8* in %batch_normalization_1_variance, i64 %batch_normalization_1_variance_bytes) #4 { +entry: + %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 3, i32 3, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 4, i32 4, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 5, i32 5, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 6, i32 6, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 7, i32 7, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 8, i32 8, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 9, i32 9, i1 false) + %_Z9relu_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_mS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_cudnn = !{!2, !3} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{!4} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_promise = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c964d17b48694847d60e6755519cbfa0603770f)"} +!2 = !{%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned} +!3 = !{%struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned} +!4 = !{%struct.out._Z4rootPvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_mS_mS_m_cloned} +!5 = !{!6, !7, i64 0} +!6 = !{!"_ZTS6RootIn", !7, i64 0, !10, i64 8, !7, i64 16, !10, i64 24, !7, i64 32, !10, i64 40, !7, i64 48, !10, i64 56, !7, i64 64, !10, i64 72, !11, i64 80} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C++ TBAA"} +!10 = !{!"long", !8, i64 0} +!11 = !{!"_ZTS5ret_t", !7, i64 0, !10, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_CUDNN/matMul.hpvm.ll b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/matMul.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..978f3f87001249e111b56584fd85f6a28bfb30f7 --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/matMul.hpvm.ll @@ -0,0 +1,135 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -S -inplace -dfg2llvm-cudnn < %s | FileCheck %s +; ModuleID = 'matMul.ll' +source_filename = "matMul.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9relu_nodePvm = type <{ i8*, i64 }> +%struct.out._Z11matMul_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initTensorRt(i32 0) +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupTensorRt() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + +; CHECK-LABEL: @_Z11matMul_nodePvmS_m_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NEXT: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_mul(i8* %t1, i8* %t2) +; CHECK: call i8* @tensorGemmGPU(i8* %t1, i8* %t2) +; CHECK: ret + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 48) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_m (i8*, i64, i8*, i64)* @_Z4rootPvmS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !5 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.relu(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9relu_nodePvm @_Z9relu_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.relu(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9relu_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9relu_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9relu_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.mul(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z11matMul_nodePvmS_m @_Z11matMul_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.mul(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z11matMul_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z11matMul_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z11matMul_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_m @_Z4rootPvmS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %m1, i64 %m1_bytes) #4 { +entry: + %_Z11matMul_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z11matMul_nodePvmS_m (i8*, i64, i8*, i64)* @_Z11matMul_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z11matMul_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matMul_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matMul_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matMul_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z9relu_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z11matMul_nodePvmS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z11matMul_nodePvmS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_cudnn = !{!2, !3} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{!4} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_promise = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c964d17b48694847d60e6755519cbfa0603770f)"} +!2 = !{%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned} +!3 = !{%struct.out._Z11matMul_nodePvmS_m (i8*, i64, i8*, i64)* @_Z11matMul_nodePvmS_m_cloned} +!4 = !{%struct.out._Z4rootPvmS_m (i8*, i64, i8*, i64)* @_Z4rootPvmS_m_cloned} +!5 = !{!6, !7, i64 0} +!6 = !{!"_ZTS6RootIn", !7, i64 0, !10, i64 8, !7, i64 16, !10, i64 24, !11, i64 32} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C++ TBAA"} +!10 = !{!"long", !8, i64 0} +!11 = !{!"_ZTS5ret_t", !7, i64 0, !10, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_CUDNN/meanPoolWithSoftmax.hpvm.ll b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/meanPoolWithSoftmax.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c42ce49d803ca82cd559f7ab3b3c8107fe0a9b0f --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/meanPoolWithSoftmax.hpvm.ll @@ -0,0 +1,190 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -S -inplace -dfg2llvm-cudnn < %s | FileCheck %s +; ModuleID = 'meanPoolWithSoftmax.ll' +source_filename = "softmaxActivation.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9conv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z12softmax_nodePvm = type <{ i8*, i64 }> +%struct.out._Z14pool_mean_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initTensorRt(i32 0) +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupTensorRt() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) +; CHECK: call i8* @tensorConvolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1, i32 1, i32 0) +; CHECK: ret + +; CHECK-LABEL: @_Z13bias_add_nodePvmS_m_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_add(i8* %t1, i8* %t2) +; CHECK: call i8* @tensorAdd(i8* %t1, i8* %t2) +; CHECK: ret + +; CHECK-LABEL: @_Z12softmax_nodePvm_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_softmax(i8* %t1) +; CHECK: call i8* @tensorSoftmax(i8* %t1) +; CHECK: ret + +; CHECK-LABEL: @_Z14pool_mean_nodePvm_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: %call = tail call i8* @__hpvm__tensor_pool_mean(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) +; CHECK: call i8* @tensorPooling(i8* %t1, i32 1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) +; CHECK: ret + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9conv_nodePvmS_m @_Z9conv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) + %returnStruct = insertvalue %struct.out._Z9conv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9conv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z9conv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.softmax(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z12softmax_nodePvm @_Z12softmax_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.softmax(i8* %t1) + %returnStruct = insertvalue %struct.out._Z12softmax_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z12softmax_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z12softmax_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.mean(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z14pool_mean_nodePvm @_Z14pool_mean_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.mean(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z14pool_mean_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z14pool_mean_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z14pool_mean_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* nocapture readnone %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z12softmax_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z12softmax_nodePvm (i8*, i64)* @_Z12softmax_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z12softmax_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z12softmax_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z14pool_mean_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z14pool_mean_nodePvm (i8*, i64)* @_Z14pool_mean_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z12softmax_nodePvm_cloned.node, i8* %_Z14pool_mean_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z12softmax_nodePvm_cloned.node, i8* %_Z14pool_mean_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z14pool_mean_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z14pool_mean_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_cudnn = !{!2, !3, !4, !5} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{!6} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_promise = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c964d17b48694847d60e6755519cbfa0603770f)"} +!2 = !{%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!4 = !{%struct.out._Z12softmax_nodePvm (i8*, i64)* @_Z12softmax_nodePvm_cloned} +!5 = !{%struct.out._Z14pool_mean_nodePvm (i8*, i64)* @_Z14pool_mean_nodePvm_cloned} +!6 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!7 = !{!8, !9, i64 0} +!8 = !{!"_ZTS6RootIn", !9, i64 0, !12, i64 8, !9, i64 16, !12, i64 24, !9, i64 32, !12, i64 40, !13, i64 48} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !{!"long", !10, i64 0} +!13 = !{!"_ZTS5ret_t", !9, i64 0, !12, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_CUDNN/singleConvLayer.hpvm.ll b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/singleConvLayer.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..91586784b04f12a5919f4dc48a6b786dc89481a0 --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_CUDNN/singleConvLayer.hpvm.ll @@ -0,0 +1,192 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -S -inplace -dfg2llvm-cudnn < %s | FileCheck %s +; ModuleID = 'singleConvLayer.ll' +source_filename = "singleConv.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9conv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initTensorRt(i32 0) +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupTensorRt() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) +; CHECK: call i8* @tensorConvolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1, i32 1, i32 0) +; CHECK: ret + +; CHECK-LABEL: @_Z13bias_add_nodePvmS_m_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_add(i8* %t1, i8* %t2) +; CHECK: call i8* @tensorAdd(i8* %t1, i8* %t2) +; CHECK: ret + +; CHECK-LABEL: @_Z9tanh_nodePvm_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_tanh(i8* %t1) +; CHECK: call i8* @tensorTanh(i8* %t1) +; CHECK: ret + +; CHECK-LABEL: @_Z13pool_max_nodePvm_cloned_cudnn( +; CHECK: call void @hpvm_request_tensor( +; CHECK-NOT: call i8* @__hpvm__tensor_pool_max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) +; CHECK: call i8* @tensorPooling(i8* %t1, i32 0, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) +; CHECK: ret + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9conv_nodePvmS_m @_Z9conv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) + %returnStruct = insertvalue %struct.out._Z9conv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9conv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z9conv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9tanh_nodePvm @_Z9tanh_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* nocapture readnone %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_cudnn = !{!2, !3, !4, !5} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{!6} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_promise = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c964d17b48694847d60e6755519cbfa0603770f)"} +!2 = !{%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!4 = !{%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned} +!5 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!6 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!7 = !{!8, !9, i64 0} +!8 = !{!"_ZTS6RootIn", !9, i64 0, !12, i64 8, !9, i64 16, !12, i64 24, !9, i64 32, !12, i64 40, !13, i64 48} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !{!"long", !10, i64 0} +!13 = !{!"_ZTS5ret_t", !9, i64 0, !12, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapAddTanh.ll b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapAddTanh.ll new file mode 100644 index 0000000000000000000000000000000000000000..5bfec9230636970bb5b57deca7a2096a0d48e100 --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapAddTanh.ll @@ -0,0 +1,140 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_WrapperAPI.so -S -inplace -dfg2llvm-wrapperapi -quantization-levels-filename=q_file.txt --configuration-inputs-filename=conf_file.txt < %s | FileCheck %s +; ModuleID = 'addTanh.hpvm.ll' +source_filename = "addTanh.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> +%struct.out._Z11matadd_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_m = type <{ i8*, i64 }> + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initApproxhpvmRt(i32 0) +; CHECK-NEXT: call void @llvm_hpvm_initializeRuntimeController( +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupApproxhpvmRt() +; CHECK: call void @llvm_hpvm_clearRuntimeController() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + +; CHECK-LABEL: @_Z11matadd_nodePvmS_m_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorAdd( +; CHECK: ret + +; CHECK-LABEL: @_Z9tanh_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorTanh( +; CHECK: ret + + +; Function Attrs: norecurse uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 48) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_m (i8*, i64, i8*, i64)* @_Z4rootPvmS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !5 + tail call void @hpvm_request_tensor(i8* %0, i32 1) + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z9tanh_nodePvm @_Z9tanh_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z11matadd_nodePvmS_m @_Z11matadd_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z11matadd_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z11matadd_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z11matadd_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z4rootPvmS_m @_Z4rootPvmS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %m1, i64 %m1_bytes) #4 { +entry: + %_Z11matadd_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z11matadd_nodePvmS_m (i8*, i64, i8*, i64)* @_Z11matadd_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z11matadd_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matadd_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matadd_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matadd_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z11matadd_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z11matadd_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9tanh_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9tanh_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c2a920901bb51fcc2e51f920c0f726cbd6d3f0d)"} +!2 = !{%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned} +!3 = !{%struct.out._Z11matadd_nodePvmS_m (i8*, i64, i8*, i64)* @_Z11matadd_nodePvmS_m_cloned} +!4 = !{%struct.out._Z4rootPvmS_m (i8*, i64, i8*, i64)* @_Z4rootPvmS_m_cloned} +!5 = !{!6, !7, i64 0} +!6 = !{!"_ZTS6RootIn", !7, i64 0, !10, i64 8, !7, i64 16, !10, i64 24, !11, i64 32} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C++ TBAA"} +!10 = !{!"long", !8, i64 0} +!11 = !{!"_ZTS5ret_t", !7, i64 0, !10, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapBatchNormRelu.ll b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapBatchNormRelu.ll new file mode 100644 index 0000000000000000000000000000000000000000..c67d15ab6cac5fdf3538f661aa8583c6f0ce9771 --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapBatchNormRelu.ll @@ -0,0 +1,151 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_WrapperAPI.so -S -inplace -dfg2llvm-wrapperapi -quantization-levels-filename=q_file.txt --configuration-inputs-filename=conf_file.txt < %s | FileCheck %s +; ModuleID = 'batchNorm.hpvm.ll' +source_filename = "batchNorm.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9relu_nodePvm = type <{ i8*, i64 }> +%struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_mS_mS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initApproxhpvmRt(i32 0) +; CHECK-NEXT: call void @llvm_hpvm_initializeRuntimeController( +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupApproxhpvmRt() +; CHECK: call void @llvm_hpvm_clearRuntimeController() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + +; CHECK-LABEL: @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorBatchNorm( +; CHECK: ret + +; CHECK-LABEL: @_Z9relu_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorRelu( +; CHECK: ret + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 96) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_mS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !5 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.relu(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9relu_nodePvm @_Z9relu_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.relu(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9relu_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9relu_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9relu_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.batchnorm(i8*, i8*, i8*, i8*, i8*, double) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2, i8* in %t3, i64 %bytes_t3, i8* in %t4, i64 %bytes_t4, i8* in %t5, i64 %bytes_t5) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.batchnorm(i8* %t1, i8* %t2, i8* %t3, i8* %t4, i8* %t5, double 1.000000e-03) + %returnStruct = insertvalue %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m %returnStruct, i64 0, 1 + ret %struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_mS_mS_m @_Z4rootPvmS_mS_mS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %batch_normalization_1_gamma, i64 %batch_normalization_1_gamma_bytes, i8* in %batch_normalization_1_beta, i64 %batch_normalization_1_beta_bytes, i8* in %batch_normalization_1_mean, i64 %batch_normalization_1_mean_bytes, i8* in %batch_normalization_1_variance, i64 %batch_normalization_1_variance_bytes) #4 { +entry: + %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 3, i32 3, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 4, i32 4, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 5, i32 5, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 6, i32 6, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 7, i32 7, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 8, i32 8, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i32 9, i32 9, i1 false) + %_Z9relu_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_mS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c964d17b48694847d60e6755519cbfa0603770f)"} +!2 = !{%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned} +!3 = !{%struct.out._Z19batchNormLayer_nodePvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z19batchNormLayer_nodePvmS_mS_mS_mS_m_cloned} +!4 = !{%struct.out._Z4rootPvmS_mS_mS_mS_m (i8*, i64, i8*, i64, i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_mS_mS_m_cloned} +!5 = !{!6, !7, i64 0} +!6 = !{!"_ZTS6RootIn", !7, i64 0, !10, i64 8, !7, i64 16, !10, i64 24, !7, i64 32, !10, i64 40, !7, i64 48, !10, i64 56, !7, i64 64, !10, i64 72, !11, i64 80} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C++ TBAA"} +!10 = !{!"long", !8, i64 0} +!11 = !{!"_ZTS5ret_t", !7, i64 0, !10, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapConv.ll b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapConv.ll new file mode 100644 index 0000000000000000000000000000000000000000..b61979c89dff2127d423aa1421c02d7991309462 --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapConv.ll @@ -0,0 +1,136 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_WrapperAPI.so -S -inplace -dfg2llvm-wrapperapi -quantization-levels-filename=q_file.txt --configuration-inputs-filename=conf_file.txt < %s | FileCheck %s + +; ModuleID = 'fuseConv.hpvm_.ll' +source_filename = "fuseConv.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initApproxhpvmRt(i32 0) +; CHECK-NEXT: call void @llvm_hpvm_initializeRuntimeController( +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupApproxhpvmRt() +; CHECK: call void @llvm_hpvm_clearRuntimeController() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + + + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_ConvLayer2( +; CHECK: ret + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !4 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* in %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64, i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 3, i32 3, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 4, i32 4, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 5, i32 5, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +define %struct.out._Z13pool_max_nodePvm @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned(i8* in %s_s_s_t1, i64 %s_s_s_bytes_t1, i8* in %s_s_s_t2, i64 %s_s_s_bytes_t2, i8* in %s_s_d_t2, i64 %s_s_d_bytes_t2) { +entry: + %s_s_s_call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %s_s_s_t1, i8* %s_s_s_t2, i32 2, i32 2, i32 1, i32 1) + %s_s_call1 = call i8* @llvm.hpvm.tensor.add(i8* %s_s_s_call1, i8* %s_s_d_t2) + %s_call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %s_s_call1) + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %s_call1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5ccb2a532b5a0d82cee5c0d29a629a29dec2307c)"} +!2 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!3 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64, i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned} +!4 = !{!5, !6, i64 0} +!5 = !{!"_ZTS6RootIn", !6, i64 0, !9, i64 8, !6, i64 16, !9, i64 24, !6, i64 32, !9, i64 40, !10, i64 48} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!"long", !7, i64 0} +!10 = !{!"_ZTS5ret_t", !6, i64 0, !9, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapGroupPoolSoftmax.ll b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapGroupPoolSoftmax.ll new file mode 100644 index 0000000000000000000000000000000000000000..d3aa94081c2248245eee364e108bf2028c69534d --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapGroupPoolSoftmax.ll @@ -0,0 +1,164 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_WrapperAPI.so -S -inplace -dfg2llvm-wrapperapi -quantization-levels-filename=q_file.txt --configuration-inputs-filename=conf_file.txt < %s | FileCheck %s +; ModuleID = 'groupPoolSoftmax.hpvm.ll' +source_filename = "groupPoolSoftmax.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z14groupConv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z12softmax_nodePvm = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initApproxhpvmRt(i32 0) +; CHECK-NEXT: call void @llvm_hpvm_initializeRuntimeController( +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupApproxhpvmRt() +; CHECK: call void @llvm_hpvm_clearRuntimeController() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + +; CHECK-LABEL: @_Z14groupConv_nodePvmS_m_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorGroupConvolution(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @2, i32 0, i32 0), i8* %t1, i8* %t2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 32) +; CHECK: ret + +; CHECK-LABEL: @_Z13pool_max_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorPooling(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @3, i32 0, i32 0), i8* %t1, i32 0, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) +; CHECK: ret + +; CHECK-LABEL: @_Z12softmax_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_tensorSoftmax( +; CHECK: ret + + + +; Function Attrs: norecurse uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 48) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_m (i8*, i64, i8*, i64)* @_Z4rootPvmS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !6 + tail call void @hpvm_request_tensor(i8* %0, i32 1) + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.group.convolution(i8*, i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z14groupConv_nodePvmS_m @_Z14groupConv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.group.convolution(i8* %t1, i8* %t2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 32) + %returnStruct = insertvalue %struct.out._Z14groupConv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z14groupConv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z14groupConv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.softmax(i8*) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z12softmax_nodePvm @_Z12softmax_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.softmax(i8* %t1) + %returnStruct = insertvalue %struct.out._Z12softmax_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z12softmax_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z12softmax_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z4rootPvmS_m @_Z4rootPvmS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %gconv, i64 %gconv_bytes) #4 { +entry: + %_Z14groupConv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z14groupConv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z14groupConv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z14groupConv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z14groupConv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z14groupConv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z14groupConv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z14groupConv_nodePvmS_m_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z14groupConv_nodePvmS_m_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z12softmax_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z12softmax_nodePvm (i8*, i64)* @_Z12softmax_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13pool_max_nodePvm_cloned.node, i8* %_Z12softmax_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13pool_max_nodePvm_cloned.node, i8* %_Z12softmax_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z12softmax_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z12softmax_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4, !5} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5c2a920901bb51fcc2e51f920c0f726cbd6d3f0d)"} +!2 = !{%struct.out._Z14groupConv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z14groupConv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z12softmax_nodePvm (i8*, i64)* @_Z12softmax_nodePvm_cloned} +!4 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!5 = !{%struct.out._Z4rootPvmS_m (i8*, i64, i8*, i64)* @_Z4rootPvmS_m_cloned} +!6 = !{!7, !8, i64 0} +!7 = !{!"_ZTS6RootIn", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24, !12, i64 32} +!8 = !{!"any pointer", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C++ TBAA"} +!11 = !{!"long", !9, i64 0} +!12 = !{!"_ZTS5ret_t", !8, i64 0, !11, i64 8} diff --git a/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapMatMul.ll b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapMatMul.ll new file mode 100644 index 0000000000000000000000000000000000000000..ce901a6d411379a84e1878d416d5e7c77d3d9c5f --- /dev/null +++ b/hpvm/test/regressionTests/DFG2LLVM_WrapperAPI/wrapMatMul.ll @@ -0,0 +1,153 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_WrapperAPI.so -S -inplace -dfg2llvm-wrapperapi -quantization-levels-filename=q_file.txt --configuration-inputs-filename=conf_file.txt < %s | FileCheck %s + +; ModuleID = 'fuseMatMul.hpvm.ll' +source_filename = "fuseMatMul.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> + +; CHECK-LABEL: i32 @main( +; CHECK: call void @llvm_hpvm_initApproxhpvmRt(i32 0) +; CHECK-NEXT: call void @llvm_hpvm_initializeRuntimeController( +; CHECK-NEXT: call void @llvm.hpvm.init() +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @llvm_hpvm_cleanupApproxhpvmRt() +; CHECK: call void @llvm_hpvm_clearRuntimeController() +; CHECK-NEXT: call void @llvm.hpvm.cleanup() + + +; CHECK-LABEL: @_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call void @hpvm_request_tensor( +; CHECK: call i8* @wrapper_FCLayer( +; CHECK: ret + +; CHECK-LABEL: @_Z13pool_max_nodePvm_cloned_wrapper_api( +; CHECK: call void @hpvm_request_tensor(i8* %t1, i32 1) +; CHECK: call i8* @wrapper_tensorPooling( +; CHECK: ret + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !5 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.mul(i8*, i8*) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %matmul2d_1_w, i64 %matmul2d_1_w_bytes, i8* in %matmul2d_1_b, i64 %matmul2d_1_b_bytes) #4 { +entry: + %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64, i8*, i64, i8*, i64)* @_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i32 3, i32 3, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i32 4, i32 4, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i32 5, i32 5, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +define %struct.out._Z9tanh_nodePvm @_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned(i8* in %s_s_t1, i64 %s_s_bytes_t1, i8* in %s_s_t2, i64 %s_s_bytes_t2, i8* in %s_d_t2, i64 %s_d_bytes_t2) { +entry: + %s_s_call1 = call i8* @llvm.hpvm.tensor.mul(i8* %s_s_t1, i8* %s_s_t2) + %s_call1 = call i8* @llvm.hpvm.tensor.add(i8* %s_s_call1, i8* %s_d_t2) + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %s_call1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5ccb2a532b5a0d82cee5c0d29a629a29dec2307c)"} +!2 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!3 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!4 = !{%struct.out._Z9tanh_nodePvm (i8*, i64, i8*, i64, i8*, i64)* @_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned} +!5 = !{!6, !7, i64 0} +!6 = !{!"_ZTS6RootIn", !7, i64 0, !10, i64 8, !7, i64 16, !10, i64 24, !7, i64 32, !10, i64 40, !11, i64 48} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C++ TBAA"} +!10 = !{!"long", !8, i64 0} +!11 = !{!"_ZTS5ret_t", !7, i64 0, !10, i64 8} diff --git a/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseConv.hpvm.ll b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseConv.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4c60c749ff808d4d04f599be39e4992bbc7c8db1 --- /dev/null +++ b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseConv.hpvm.ll @@ -0,0 +1,177 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMFuseHPVMTensorNodes.so -S -inplace -hpvm-fuse < %s | FileCheck %s +; ModuleID = 'fuseConv.ll' +source_filename = "fuseConv.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9conv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + +; CHECK-LABEL: @_Z4rootPvmS_mS_m_cloned( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: ret + + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.convolution( +; CHECK-NEXT: call i8* @llvm.hpvm.tensor.add( +; CHECK-NEXT: call i8* @llvm.hpvm.tensor.tanh( +; CHECK-NEXT: call i8* @llvm.hpvm.tensor.pool.max( +; CHECK: ret + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9conv_nodePvmS_m @_Z9conv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) + %returnStruct = insertvalue %struct.out._Z9conv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9conv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z9conv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9tanh_nodePvm @_Z9tanh_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* in %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4, !5} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{!6} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5ccb2a532b5a0d82cee5c0d29a629a29dec2307c)"} +!2 = !{%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!4 = !{%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned} +!5 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!6 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!7 = !{!8, !9, i64 0} +!8 = !{!"_ZTS6RootIn", !9, i64 0, !12, i64 8, !9, i64 16, !12, i64 24, !9, i64 32, !12, i64 40, !13, i64 48} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !{!"long", !10, i64 0} +!13 = !{!"_ZTS5ret_t", !9, i64 0, !12, i64 8} diff --git a/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseConvNoPool.hpvm.ll b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseConvNoPool.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3e521bb5297f6d8087fec1b2e381c06d290f2d0f --- /dev/null +++ b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseConvNoPool.hpvm.ll @@ -0,0 +1,159 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMFuseHPVMTensorNodes.so -S -inplace -hpvm-fuse < %s | FileCheck %s +; ModuleID = 'fuseConvNoPool.ll' +source_filename = "fuseConvNoPool.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + +; CHECK-LABEL: @_Z4rootPvmS_mS_m_cloned( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: ret + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9relu_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.convolution( +; CHECK: call i8* @llvm.hpvm.tensor.add( +; CHECK: call i8* @llvm.hpvm.tensor.relu( +; CHECK: ret + + + +%struct.out._Z9conv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z9relu_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !6 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9conv_nodePvmS_m @_Z9conv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) + %returnStruct = insertvalue %struct.out._Z9conv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9conv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z9conv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.relu(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9relu_nodePvm @_Z9relu_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.relu(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9relu_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9relu_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9relu_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* nocapture readnone %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z9relu_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9relu_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z9relu_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4, !5} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5ccb2a532b5a0d82cee5c0d29a629a29dec2307c)"} +!2 = !{%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!4 = !{%struct.out._Z9relu_nodePvm (i8*, i64)* @_Z9relu_nodePvm_cloned} +!5 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!6 = !{!7, !8, i64 0} +!7 = !{!"_ZTS6RootIn", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24, !8, i64 32, !11, i64 40, !12, i64 48} +!8 = !{!"any pointer", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C++ TBAA"} +!11 = !{!"long", !9, i64 0} +!12 = !{!"_ZTS5ret_t", !8, i64 0, !11, i64 8} diff --git a/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseDiffTargets.hpvm.ll b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseDiffTargets.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5bdfe023bb6854e85d7f725a32c3920c5edc01d8 --- /dev/null +++ b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseDiffTargets.hpvm.ll @@ -0,0 +1,195 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMFuseHPVMTensorNodes.so -S -inplace -hpvm-fuse < %s | FileCheck %s +; ModuleID = 'fuseDiffTargets.ll' +source_filename = "fuseDiffTargets.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9conv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.convolution( +; CHECK: ret + +; CHECK-LABEL: @_Z13bias_add_nodePvmS_m_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.add( +; CHECK: ret + +; CHECK-LABEL: @_Z9tanh_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.tanh( +; CHECK: ret + +; CHECK-LABEL: @_Z13pool_max_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.pool.max( +; CHECK: ret + +; CHECK-LABEL: @_Z4rootPvmS_mS_m_cloned( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: ret + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9conv_nodePvmS_m @_Z9conv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) + %returnStruct = insertvalue %struct.out._Z9conv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9conv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z9conv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9tanh_nodePvm @_Z9tanh_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* in %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_gpu = !{!2, !3, !4} +!hpvm_hint_cpu = !{!5, !6} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} +!hpvm_hint_promise = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5ccb2a532b5a0d82cee5c0d29a629a29dec2307c)"} +!2 = !{%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned} +!4 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!5 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!6 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!7 = !{!8, !9, i64 0} +!8 = !{!"_ZTS6RootIn", !9, i64 0, !12, i64 8, !9, i64 16, !12, i64 24, !9, i64 32, !12, i64 40, !13, i64 48} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !{!"long", !10, i64 0} +!13 = !{!"_ZTS5ret_t", !9, i64 0, !12, i64 8} diff --git a/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseMatMul.hpvm.ll b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseMatMul.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e888631a3027a4cb30468ffca223e3429caa772f --- /dev/null +++ b/hpvm/test/regressionTests/FuseHPVMTensorNodes/fuseMatMul.hpvm.ll @@ -0,0 +1,187 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMFuseHPVMTensorNodes.so -S -inplace -hpvm-fuse < %s | FileCheck %s +; ModuleID = 'fuseMatMul.ll' +source_filename = "fuseMatMul.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z11matmul_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + +; CHECK-LABEL: @_Z13pool_max_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.pool.max( +; CHECK: ret + + +; CHECK-LABEL: @_Z4rootPvmS_mS_m_cloned( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK-NEXT: call i8* @llvm.hpvm.createEdge( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: ret + + +; CHECK-LABEL: @_Z11matmul_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.mul( +; CHECK: call i8* @llvm.hpvm.tensor.add( +; CHECK: call i8* @llvm.hpvm.tensor.tanh( +; CHECK-NOT: call i8* @llvm.hpvm.tensor.pool.max( +; CHECK: ret + + + + + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %0, i32 1) #3 + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.mul(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z11matmul_nodePvmS_m @_Z11matmul_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.mul(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z11matmul_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z11matmul_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z11matmul_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z9tanh_nodePvm @_Z9tanh_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %matmul2d_1_w, i64 %matmul2d_1_w_bytes, i8* in %matmul2d_1_b, i64 %matmul2d_1_b_bytes) #4 { +entry: + %_Z11matmul_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z11matmul_nodePvmS_m (i8*, i64, i8*, i64)* @_Z11matmul_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z11matmul_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z11matmul_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z11matmul_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4, !5, !6} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 5ccb2a532b5a0d82cee5c0d29a629a29dec2307c)"} +!2 = !{%struct.out._Z11matmul_nodePvmS_m (i8*, i64, i8*, i64)* @_Z11matmul_nodePvmS_m_cloned} +!3 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!4 = !{%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned} +!5 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!6 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!7 = !{!8, !9, i64 0} +!8 = !{!"_ZTS6RootIn", !9, i64 0, !12, i64 8, !9, i64 16, !12, i64 24, !9, i64 32, !12, i64 40, !13, i64 48} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !{!"long", !10, i64 0} +!13 = !{!"_ZTS5ret_t", !9, i64 0, !12, i64 8} diff --git a/hpvm/test/regressionTests/FuseHPVMTensorNodes/multiLaunch.hpvm.ll b/hpvm/test/regressionTests/FuseHPVMTensorNodes/multiLaunch.hpvm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6c40d752cc71b5bec7931257e5e431041073668e --- /dev/null +++ b/hpvm/test/regressionTests/FuseHPVMTensorNodes/multiLaunch.hpvm.ll @@ -0,0 +1,184 @@ +; RUN: opt -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMFuseHPVMTensorNodes.so -S -inplace -hpvm-fuse < %s | FileCheck %s +; ModuleID = 'multiLaunch.ll' +source_filename = "multiLaunch.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.out._Z9conv_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z13bias_add_nodePvmS_m = type <{ i8*, i64 }> +%struct.out._Z9tanh_nodePvm = type <{ i8*, i64 }> +%struct.out._Z13pool_max_nodePvm = type <{ i8*, i64 }> +%struct.out._Z4rootPvmS_mS_m = type <{ i8*, i64 }> + + +; CHECK-LABEL: @_Z4rootPvmS_mS_m_cloned( +; CHECK: call i8* @llvm.hpvm.createNode( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK-NEXT: call void @llvm.hpvm.bind.input( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: call void @llvm.hpvm.bind.output( +; CHECK: ret + + +; CHECK-LABEL: @_Z9conv_nodePvmS_m_cloned__Z13bias_add_nodePvmS_m_cloned__Z9tanh_nodePvm_cloned__Z13pool_max_nodePvm_cloned( +; CHECK: call i8* @llvm.hpvm.tensor.convolution( +; CHECK-NEXT: call i8* @llvm.hpvm.tensor.add( +; CHECK-NEXT: call i8* @llvm.hpvm.tensor.tanh( +; CHECK-NEXT: call i8* @llvm.hpvm.tensor.pool.max( +; CHECK: ret + + + +; Function Attrs: norecurse uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call void @llvm.hpvm.init() + %call = tail call noalias i8* @malloc(i64 64) #3 + %call1 = tail call noalias i8* @malloc(i64 64) #3 + %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call, i1 false) + call void @llvm.hpvm.wait(i8* %graphID) + %input = bitcast i8* %call to i8** + %0 = load i8*, i8** %input, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %0, i32 1) + %graphID1 = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned to i8*), i8* %call1, i1 false) + call void @llvm.hpvm.wait(i8* %graphID1) + %input4 = bitcast i8* %call1 to i8** + %1 = load i8*, i8** %input4, align 1, !tbaa !7 + tail call void @hpvm_request_tensor(i8* %1, i32 1) + call void @llvm.hpvm.cleanup() + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #1 + +declare dso_local void @hpvm_request_tensor(i8*, i32) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z9conv_nodePvmS_m @_Z9conv_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.convolution(i8* %t1, i8* %t2, i32 2, i32 2, i32 1, i32 1) + %returnStruct = insertvalue %struct.out._Z9conv_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9conv_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z9conv_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.add(i8*, i8*) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z13bias_add_nodePvmS_m @_Z13bias_add_nodePvmS_m_cloned(i8* in %t1, i64 %bytes_t1, i8* in %t2, i64 %bytes_t2) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.add(i8* %t1, i8* %t2) + %returnStruct = insertvalue %struct.out._Z13bias_add_nodePvmS_m undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13bias_add_nodePvmS_m %returnStruct, i64 0, 1 + ret %struct.out._Z13bias_add_nodePvmS_m %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.tanh(i8*) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z9tanh_nodePvm @_Z9tanh_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.tanh(i8* %t1) + %returnStruct = insertvalue %struct.out._Z9tanh_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z9tanh_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z9tanh_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z13pool_max_nodePvm @_Z13pool_max_nodePvm_cloned(i8* in %t1, i64 %bytes_t1) #4 { +entry: + %call1 = call i8* @llvm.hpvm.tensor.pool.max(i8* %t1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 2) + %returnStruct = insertvalue %struct.out._Z13pool_max_nodePvm undef, i8* %call1, 0 + %returnStruct2 = insertvalue %struct.out._Z13pool_max_nodePvm %returnStruct, i64 0, 1 + ret %struct.out._Z13pool_max_nodePvm %returnStruct2 +} + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createNode(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3 + +; Function Attrs: uwtable +define dso_local %struct.out._Z4rootPvmS_mS_m @_Z4rootPvmS_mS_m_cloned(i8* in %input, i64 %input_bytes, i8* in %conv2d_1_w, i64 %conv2d_1_w_bytes, i8* in %conv2d_1_b, i64 %conv2d_1_b_bytes) #4 { +entry: + %_Z9conv_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned to i8*)) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 2, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z9conv_nodePvmS_m_cloned.node, i32 3, i32 3, i1 false) + %_Z13bias_add_nodePvmS_m_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned to i8*)) + %output = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output1 = call i8* @llvm.hpvm.createEdge(i8* %_Z9conv_nodePvmS_m_cloned.node, i8* %_Z13bias_add_nodePvmS_m_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 4, i32 2, i1 false) + call void @llvm.hpvm.bind.input(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i32 5, i32 3, i1 false) + %_Z9tanh_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned to i8*)) + %output2 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output3 = call i8* @llvm.hpvm.createEdge(i8* %_Z13bias_add_nodePvmS_m_cloned.node, i8* %_Z9tanh_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + %_Z13pool_max_nodePvm_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned to i8*)) + %output4 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 0, i32 0, i1 false) + %output5 = call i8* @llvm.hpvm.createEdge(i8* %_Z9tanh_nodePvm_cloned.node, i8* %_Z13pool_max_nodePvm_cloned.node, i1 true, i32 1, i32 1, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 0, i32 0, i1 false) + call void @llvm.hpvm.bind.output(i8* %_Z13pool_max_nodePvm_cloned.node, i32 1, i32 1, i1 false) + ret %struct.out._Z4rootPvmS_mS_m undef +} + +; Function Attrs: nounwind +declare void @llvm.hpvm.init() #3 + +; Function Attrs: nounwind +declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.wait(i8*) #3 + +; Function Attrs: nounwind +declare void @llvm.hpvm.cleanup() #3 + +attributes #0 = { norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!hpvm_hint_promise = !{!2, !3, !4, !5, !6} +!hpvm_hint_gpu = !{} +!hpvm_hint_cpu = !{} +!hpvm_hint_cpu_gpu = !{} +!hpvm_hint_cudnn = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 0bba65ccd19f0ff92a84eaadbc0450fa7a0f0ccc)"} +!2 = !{%struct.out._Z9conv_nodePvmS_m (i8*, i64, i8*, i64)* @_Z9conv_nodePvmS_m_cloned} +!3 = !{%struct.out._Z13bias_add_nodePvmS_m (i8*, i64, i8*, i64)* @_Z13bias_add_nodePvmS_m_cloned} +!4 = !{%struct.out._Z9tanh_nodePvm (i8*, i64)* @_Z9tanh_nodePvm_cloned} +!5 = !{%struct.out._Z13pool_max_nodePvm (i8*, i64)* @_Z13pool_max_nodePvm_cloned} +!6 = !{%struct.out._Z4rootPvmS_mS_m (i8*, i64, i8*, i64, i8*, i64)* @_Z4rootPvmS_mS_m_cloned} +!7 = !{!8, !9, i64 0} +!8 = !{!"_ZTS6RootIn", !9, i64 0, !12, i64 8, !9, i64 16, !12, i64 24, !9, i64 32, !12, i64 40, !13, i64 48} +!9 = !{!"any pointer", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C++ TBAA"} +!12 = !{!"long", !10, i64 0} +!13 = !{!"_ZTS5ret_t", !9, i64 0, !12, i64 8}