Commit 4f8007cf authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Merge branch 'hpvm-release-internal' into hpvm-release

parents 86f9a54a 8a4c1734
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -S -localmem -dfg2llvm-opencl -dfg2llvm-cpu < %s | FileCheck %s
; ModuleID = 'ThreeLevel.ll'
source_filename = "ThreeLevel.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.Root = type { i32*, i64, i32*, i64 }
%emptyStruct = type <{}>
%emptyStruct.0 = type <{}>
%emptyStruct.1 = type <{}>
%emptyStruct.2 = type <{}>
declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #0
declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #0
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
; CHECK-LABEL: @Launch(
; CHECK: call i8* @llvm_hpvm_cpu_launch(i8*
; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
; Function Attrs: noinline nounwind uwtable
define dso_local void @Launch() local_unnamed_addr #2 {
entry:
%RootArgs = alloca %struct.Root, align 8
%0 = bitcast %struct.Root* %RootArgs to i8*
call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #6
%call = tail call noalias i8* @malloc(i64 1024) #6
%1 = bitcast %struct.Root* %RootArgs to i8**
store i8* %call, i8** %1, align 8, !tbaa !6
%Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
store i64 1024, i64* %Insize, align 8, !tbaa !12
%output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
%call1 = tail call noalias i8* @malloc(i64 1024) #6
%2 = bitcast i32** %output to i8**
store i8* %call1, i8** %2, align 8, !tbaa !13
%Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
store i64 1024, i64* %Outsize, align 8, !tbaa !14
%3 = bitcast %struct.Root* %RootArgs to i8*
%graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
call void @llvm.hpvm.wait(i8* %graphID)
call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #6
ret void
}
; Function Attrs: nofree nounwind
declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
; CHECK-LABEL: @main(
; CHECK: call i8* @llvm_hpvm_ocl_initContext(i32
; CHECK: call i8* @llvm_hpvm_ocl_launch(i8*
; CHECK: call void @llvm_hpvm_ocl_clearContext(i8*
; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK: call i8* @llvm_hpvm_ocl_argument_ptr(i8*
; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
; CHECK: call i8* @llvm_hpvm_ocl_argument_ptr(i8*
; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
; CHECK: call i8* @llvm_hpvm_ocl_executeNode(i8*
; CHECK-NEXT: call void @llvm_hpvm_ocl_wait(i8*
; CHECK-NEXT: call void @llvm_hpvm_ocl_free(i8*
; CHECK-NEXT: call void @llvm_hpvm_ocl_free(i8*
; CHECK-LABEL: @PipeRoot_cloned.3(
; CHECK: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_clone
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop(
; CHECK-LABEL: define i8* @LaunchDataflowGraph(i8*
; Function Attrs: nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #4 {
entry:
call void @llvm.hpvm.init()
tail call void @Launch()
call void @llvm.hpvm.cleanup()
ret i32 0
}
declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
declare i8* @llvm_hpvm_initializeTimerSet()
declare void @llvm_hpvm_switchToTimer(i8**, i32)
declare void @llvm_hpvm_printTimerSet(i8**, i8*)
; Function Attrs: nounwind readnone
declare i8* @llvm.hpvm.getNode() #5
; Function Attrs: nounwind readnone
declare i8* @llvm.hpvm.getParentNode(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNodeInstanceID.x(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNodeInstanceID.y(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNumNodeInstances.x(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNumNodeInstances.y(i8*) #5
; Function Attrs: nounwind
declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct @Func1_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%call4 = call i8* @llvm.hpvm.getNode()
%call15 = call i8* @llvm.hpvm.getParentNode(i8* %call4)
%call26 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call4)
%call37 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call4)
%call58 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call15)
%call79 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call15)
%call910 = call i64 @llvm.hpvm.getNumNodeInstances.x(i8* %call4)
%call1111 = call i64 @llvm.hpvm.getNumNodeInstances.y(i8* %call4)
%mul = mul i64 %call910, %call58
%add = add i64 %mul, %call26
%mul13 = mul i64 %call1111, %call79
%add14 = add i64 %mul13, %call37
%sext = shl i64 %add14, 32
%idxprom = ashr exact i64 %sext, 32
%arrayidx = getelementptr inbounds i32, i32* %In, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4, !tbaa !15
%sext36 = shl i64 %add, 32
%idxprom15 = ashr exact i64 %sext36, 32
%arrayidx16 = getelementptr inbounds i32, i32* %Out, i64 %idxprom15
%1 = load i32, i32* %arrayidx16, align 4, !tbaa !15
%add17 = add nsw i32 %1, %0
store i32 %add17, i32* %arrayidx16, align 4, !tbaa !15
ret %emptyStruct undef
}
; Function Attrs: nounwind
declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct.0 @Func3_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 1, i32 1, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
ret %emptyStruct.0 undef
}
; Function Attrs: nounwind
declare i8* @llvm.hpvm.createNode(i8*) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct.1 @Func2_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3, i64 5)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
ret %emptyStruct.1 undef
}
; Function Attrs: nounwind
declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct.2 @PipeRoot_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
ret %emptyStruct.2 undef
}
; Function Attrs: nounwind
declare void @llvm.hpvm.wait(i8*) #6
; Function Attrs: nounwind
declare void @llvm.hpvm.init() #6
; Function Attrs: nounwind
declare void @llvm.hpvm.cleanup() #6
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cpu-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cpu-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cpu-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cpu-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #5 = { nounwind readnone }
attributes #6 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!hpvm_hint_gpu = !{!2}
!hpvm_hint_cpu = !{!3, !4, !5}
!hpvm_hint_spir = !{}
!hpvm_hint_cudnn = !{}
!hpvm_hint_promise = !{}
!hpvm_hint_cpu_gpu = !{}
!hpvm_hint_cpu_spir = !{}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
!2 = !{%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned}
!3 = !{%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned}
!4 = !{%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned}
!5 = !{%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned}
!6 = !{!7, !8, i64 0}
!7 = !{!"Root", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24}
!8 = !{!"any pointer", !9, i64 0}
!9 = !{!"omnipotent char", !10, i64 0}
!10 = !{!"Simple C/C++ TBAA"}
!11 = !{!"long", !9, i64 0}
!12 = !{!7, !11, i64 8}
!13 = !{!7, !8, i64 16}
!14 = !{!7, !11, i64 24}
!15 = !{!16, !16, i64 0}
!16 = !{!"int", !9, i64 0}
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 < %s | FileCheck %s
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s
; ModuleID = 'ThreeLevel.ll'
source_filename = "ThreeLevel.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
......@@ -13,9 +13,9 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: i32 @main(
; CHECK: call void @llvm.hpvm.init()
; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
; CHECK-LABEL: @Func3_cloned(
; CHECK: call i8* @llvm.hpvm.createNode2D(
......@@ -42,26 +42,26 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned
; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
; CHECK-LABEL: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-LABEL: for.body1:
; CHECK: %index.y = phi i64 [ 0, %for.body ], [ %index.y.inc, %for.body1 ]
; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-LABEL: for.body:
; CHECK-NEXT: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
; CHECK-LABEL: @PipeRoot_cloned.4(
; CHECK: call void @llvm_hpvm_x86_dstack_push(
; CHECK: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
; CHECK-LABEL: @LaunchDataflowGraph(
; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.4(
......@@ -210,9 +210,9 @@ declare void @llvm.hpvm.wait(i8*) #3
; Function Attrs: nounwind
declare void @llvm.hpvm.cleanup() #3
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cpu-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cpu-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.module.flags = !{!0}
......
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s
; ModuleID = 'ThreeLevel.cond.ll'
source_filename = "ThreeLevel.cond.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.Root = type { i32*, i64, i32*, i64 }
%emptyStruct = type <{}>
%emptyStruct.0 = type <{}>
%emptyStruct.1 = type <{}>
%emptyStruct.2 = type <{}>
; CHECK-LABEL: @Launch(
; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
; CHECK-LABEL: i32 @main(
; CHECK: call void @llvm.hpvm.init()
; CHECK: call void @llvm.hpvm.cleanup()
; CHECK-LABEL: @Func3_cloned(
; CHECK: call i8* @llvm.hpvm.createNode2D(
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
; CHECK-LABEL: @Func2_cloned(
; CHECK: call i8* @llvm.hpvm.createNode2D(
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
; CHECK-LABEL: @PipeRoot_cloned(
; CHECK: call i8* @llvm.hpvm.createNode(
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned
; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
; CHECK: call i64 @llvm_hpvm_cpu_getDimInstance(
; CHECK: call i64 @llvm_hpvm_cpu_getDimInstance(
; CHECK-LABEL: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
; CHECK: br i1 %cond.y, label %for.body1, label %for.end2
; CHECK-LABEL: for.end2:
; CHECK: br i1 %cond.x, label %for.body, label %for.end
; CHECK-LABEL: for.end:
; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-LABEL: for.body:
; CHECK-NEXT: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.end2 ]
; CHECK-LABEL: for.body1:
; CHECK-NEXT: %index.y = phi i64 [ 0, %for.body ], [ %index.y.inc, %for.body1 ]
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
; CHECK: br i1 %cond.y, label %for.body1, label %for.end2
; CHECK-LABEL: for.end2:
; CHECK: br i1 %cond.x, label %for.body, label %for.end
; CHECK-LABEL: for.end:
; CHECK-LABEL: @PipeRoot_cloned.4(
; CHECK: call void @llvm_hpvm_cpu_dstack_push(
; CHECK-NEXT: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
; CHECK-LABEL: @LaunchDataflowGraph(
; CHECK: call %emptyStruct.2 @PipeRoot_cloned.4(
declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #0
declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #0
declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #0
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
; Function Attrs: noinline nounwind uwtable
define dso_local void @Launch() local_unnamed_addr #2 {
entry:
%RootArgs = alloca %struct.Root, align 8
%0 = bitcast %struct.Root* %RootArgs to i8*
call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #6
%call = tail call noalias i8* @malloc(i64 1024) #6
%1 = bitcast %struct.Root* %RootArgs to i8**
store i8* %call, i8** %1, align 8, !tbaa !6
%Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
store i64 1024, i64* %Insize, align 8, !tbaa !12
%output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
%call1 = tail call noalias i8* @malloc(i64 1024) #6
%2 = bitcast i32** %output to i8**
store i8* %call1, i8** %2, align 8, !tbaa !13
%Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
store i64 1024, i64* %Outsize, align 8, !tbaa !14
%3 = bitcast %struct.Root* %RootArgs to i8*
%graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
call void @llvm.hpvm.wait(i8* %graphID)
call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #6
ret void
}
; Function Attrs: nofree nounwind
declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
; Function Attrs: nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #4 {
entry:
call void @llvm.hpvm.init()
tail call void @Launch()
call void @llvm.hpvm.cleanup()
ret i32 0
}
declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
declare i8* @llvm_hpvm_initializeTimerSet()
declare void @llvm_hpvm_switchToTimer(i8**, i32)
declare void @llvm_hpvm_printTimerSet(i8**, i8*)
; Function Attrs: nounwind readnone
declare i8* @llvm.hpvm.getNode() #5
; Function Attrs: nounwind readnone
declare i8* @llvm.hpvm.getParentNode(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNodeInstanceID.x(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNodeInstanceID.y(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNumNodeInstances.x(i8*) #5
; Function Attrs: nounwind readnone
declare i64 @llvm.hpvm.getNumNodeInstances.y(i8*) #5
; Function Attrs: nounwind
declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct @Func1_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%call4 = call i8* @llvm.hpvm.getNode()
%call15 = call i8* @llvm.hpvm.getParentNode(i8* %call4)
%call26 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call4)
%conv = trunc i64 %call26 to i32
%call37 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call4)
%conv4 = trunc i64 %call37 to i32
%call58 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call15)
%conv6 = trunc i64 %call58 to i32
%call79 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call15)
%conv8 = trunc i64 %call79 to i32
%call910 = call i64 @llvm.hpvm.getNumNodeInstances.x(i8* %call4)
%conv10 = trunc i64 %call910 to i32
%call1111 = call i64 @llvm.hpvm.getNumNodeInstances.y(i8* %call4)
%conv12 = trunc i64 %call1111 to i32
%mul = mul nsw i32 %conv10, %conv6
%add = add nsw i32 %mul, %conv
%mul13 = mul nsw i32 %conv12, %conv8
%add14 = add nsw i32 %mul13, %conv4
%cmp = icmp eq i32 %add, %add14
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
%arrayidx = getelementptr inbounds i32, i32* %In, i64 3
%0 = load i32, i32* %arrayidx, align 4, !tbaa !15
%idxprom = sext i32 %add to i64
%arrayidx16 = getelementptr inbounds i32, i32* %Out, i64 %idxprom
%1 = load i32, i32* %arrayidx16, align 4, !tbaa !15
%add17 = add nsw i32 %1, %0
store i32 %add17, i32* %arrayidx16, align 4, !tbaa !15
br label %if.end
if.end: ; preds = %if.then, %entry
ret %emptyStruct undef
}
; Function Attrs: nounwind
declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct.0 @Func3_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 1, i32 1, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
ret %emptyStruct.0 undef
}
; Function Attrs: nounwind
declare i8* @llvm.hpvm.createNode(i8*) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct.1 @Func2_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3, i64 5)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
ret %emptyStruct.1 undef
}
; Function Attrs: nounwind
declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #6
; Function Attrs: nounwind uwtable
define dso_local %emptyStruct.2 @PipeRoot_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
entry:
%Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
ret %emptyStruct.2 undef
}