From 86f9a54aa50fd430b33c011ac907f03bc45818dc Mon Sep 17 00:00:00 2001
From: Akash Kothari <akashk4@tyler.cs.illinois.edu>
Date: Mon, 27 Jan 2020 11:25:58 -0600
Subject: [PATCH] Modifying failing regression tests

---
 .../BuildDFG/AllocationNode.ll                | 229 ++++++++++++++++
 .../BuildDFG/ThreeLevelEdge.genvisc.ll        | 248 -----------------
 .../BuildDFG/ThreeLevelEdge.ll                | 257 ++++++++++--------
 hpvm/test/regressionTests/BuildDFG/TwoNode.ll | 197 --------------
 .../regressionTests/GenHPVM/AllocationNode.ll | 253 ++++++++---------
 5 files changed, 494 insertions(+), 690 deletions(-)
 create mode 100644 hpvm/test/regressionTests/BuildDFG/AllocationNode.ll
 delete mode 100644 hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll
 delete mode 100644 hpvm/test/regressionTests/BuildDFG/TwoNode.ll

diff --git a/hpvm/test/regressionTests/BuildDFG/AllocationNode.ll b/hpvm/test/regressionTests/BuildDFG/AllocationNode.ll
new file mode 100644
index 0000000000..2bbe422e72
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/AllocationNode.ll
@@ -0,0 +1,229 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'AllocationNode.ll'
+source_filename = "ThreeLevel.allocation.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64 }
+%struct.out.Allocation = type <{ i8*, i64 }>
+%emptyStruct = type <{}>
+%emptyStruct.0 = type <{}>
+%emptyStruct.1 = type <{}>
+%emptyStruct.2 = type <{}>
+
+
+; CHECK-LABEL: %struct.out.Allocation =
+
+; CHECK-LABEL: void @Launch(
+; CHECK: call i8* @llvm.hpvm.launch(i8*
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK-NEXT: tail call void @Launch(
+; CHECK-NEXT: call void @llvm.hpvm.cleanup()
+
+; CHECK-LABEL: @Allocation_cloned(
+; CHECK: call i8* @llvm.hpvm.malloc(i64
+
+; CHECK-LABEL: @Func1_cloned(
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8*
+; CHECK: %Allocation_cloned.node = call i8* @llvm.hpvm.createNode(i8*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Allocation_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node
+; CHECK-NEXTL call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+
+; CHECK-LABEL: @PipeRoot_cloned(
+; CHECK: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__malloc(i64) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @Launch() local_unnamed_addr #2 {
+entry:
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #5
+  %call = tail call noalias i8* @malloc(i64 1024) #5
+  %1 = bitcast %struct.Root* %RootArgs to i8**
+  store i8* %call, i8** %1, align 8, !tbaa !6
+  %Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 1024, i64* %Insize, align 8, !tbaa !12
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  %call1 = tail call noalias i8* @malloc(i64 1024) #5
+  %2 = bitcast i32** %output to i8**
+  store i8* %call1, i8** %2, align 8, !tbaa !13
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 1024, i64* %Outsize, align 8, !tbaa !14
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #5
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #4 {
+entry:
+  call void @llvm.hpvm.init()
+  tail call void @Launch()
+  call void @llvm.hpvm.cleanup()
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.malloc(i64) #5
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Allocation @Allocation_cloned(i64 %block) #4 {
+entry:
+  %call1 = call i8* @llvm.hpvm.malloc(i64 %block)
+  %returnStruct = insertvalue %struct.out.Allocation undef, i8* %call1, 0
+  %returnStruct2 = insertvalue %struct.out.Allocation %returnStruct, i64 %block, 1
+  ret %struct.out.Allocation %returnStruct2
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #5
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct @Func1_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  ret %emptyStruct undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #5
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #5
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #5
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.0 @Func3_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
+  %Allocation_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Allocation (i64)* @Allocation_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Allocation_cloned.node, i32 1, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node, i8* %Func1_cloned.node, i1 true, i32 0, i32 0, i1 false)
+  %output1 = call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node, i8* %Func1_cloned.node, i1 true, i32 1, i32 1, i1 false)
+  ret %emptyStruct.0 undef
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.1 @Func2_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.1 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #5
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.2 @PipeRoot_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.2 undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #5
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #5
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #5
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_gpu = !{!2}
+!hpvm_hint_cpu = !{!3, !4, !5}
+!hpvm_hint_cpu_gpu = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 3551132592a00cab6c966df508ab511598269f78)"}
+!2 = !{%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned}
+!4 = !{%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned}
+!5 = !{%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24}
+!8 = !{!"any pointer", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C/C++ TBAA"}
+!11 = !{!"long", !9, i64 0}
+!12 = !{!7, !11, i64 8}
+!13 = !{!7, !8, i64 16}
+!14 = !{!7, !11, i64 24}
diff --git a/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll
deleted file mode 100644
index 8c76da27e8..0000000000
--- a/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll
+++ /dev/null
@@ -1,248 +0,0 @@
-; RUN: opt -load LLVMBuildDFG.so -S <  %s | FileCheck %s
-; ModuleID = 'ThreeLevelEdge.ll'
-source_filename = "ThreeLevelEdge.c"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.Root = type { i32*, i32*, i32* }
-%struct.out.Func1 = type <{ i32* }>
-%struct.out.Func4 = type <{ i32* }>
-%struct.out.Func5 = type <{ i32* }>
-%struct.out.Func3 = type <{ i32* }>
-%struct.out.Func2 = type <{ i32* }>
-%struct.out.PipeRoot = type <{ i32* }>
-
-; CHECK-LABEL: struct.Root =
-
-; CHECK-LABEL: %struct.out.Func1 =
-; CHECK-LABEL: %struct.out.Func4 =
-; CHECK-LABEL: %struct.out.Func5 =
-; CHECK-LABEL: %struct.out.Func3 =
-; CHECK-LABEL: %struct.out.Func2 =
-; CHECK-LABEL: %struct.out.PipeRoot =
-
-; CHECK-LABEL: i32 @main(
-; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
-; CHECK: call void @llvm.hpvm.init()
-; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
-; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
-; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
-
-; CHECK-LABEL: @Func1_cloned(
-; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
-; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
-
-; CHECK-LABEL: @Func4_cloned(
-; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
-; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
-
-; CHECK-LABEL: @Func5_cloned(
-; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
-; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
-
-; CHECK-LABEL: @Func3_cloned(
-; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
-; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
-; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
-
-; CHECK-LABEL: @Func2_cloned(
-; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
-
-; CHECK-LABEL: @PipeRoot_cloned(i32*
-; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
-; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
-; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
-
-
-declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
-
-declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
-
-declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() local_unnamed_addr #2 {
-entry:
-  %In1 = alloca i32, align 4
-  %In2 = alloca i32, align 4
-  %Out = alloca i32, align 4
-  %RootArgs = alloca %struct.Root, align 8
-  %0 = bitcast i32* %In1 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
-  store i32 1, i32* %In1, align 4, !tbaa !8
-  %1 = bitcast i32* %In2 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
-  store i32 2, i32* %In2, align 4, !tbaa !8
-  %2 = bitcast i32* %Out to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
-  store i32 0, i32* %Out, align 4, !tbaa !8
-  %3 = bitcast %struct.Root* %RootArgs to i8*
-  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
-  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
-  store i32* %In1, i32** %input1, align 8, !tbaa !12
-  %intput2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
-  store i32* %In2, i32** %intput2, align 8, !tbaa !15
-  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
-  store i32* %Out, i32** %output, align 8, !tbaa !16
-  call void @llvm.hpvm.init()
-  %4 = bitcast %struct.Root* %RootArgs to i8*
-  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
-  call void @llvm.hpvm.wait(i8* %graphID)
-  call void @llvm.hpvm.cleanup()
-  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
-  ret i32 0
-}
-
-declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
-
-declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
-
-declare i8* @llvm_hpvm_initializeTimerSet()
-
-declare void @llvm_hpvm_switchToTimer(i8**, i32)
-
-declare void @llvm_hpvm_printTimerSet(i8**, i8*)
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
-entry:
-  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
-  ret %struct.out.Func1 %returnStruct
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
-entry:
-  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
-  ret %struct.out.Func4 %returnStruct
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func5 @Func5_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
-entry:
-  %returnStruct = insertvalue %struct.out.Func5 undef, i32* %Out, 0
-  ret %struct.out.Func5 %returnStruct
-}
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
-entry:
-  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
-  %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned to i8*), i64 4, i64 5)
-  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
-  %output = call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node, i1 false, i32 1, i32 1, i1 false)
-  call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node, i32 0, i32 0, i1 false)
-  ret %struct.out.Func3 undef
-}
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
-entry:
-  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
-  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
-  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
-  ret %struct.out.Func2 undef
-}
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.createNode(i8*) #3
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
-entry:
-  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
-  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
-  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
-  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
-  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
-  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
-  ret %struct.out.PipeRoot undef
-}
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.init() #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.wait(i8*) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.cleanup() #3
-
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { argmemonly nounwind }
-attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-!hpvm_hint_cpu = !{!2, !3, !4, !5, !6, !7}
-!hpvm_hint_gpu = !{}
-!hpvm_hint_spir = !{}
-!hpvm_hint_cudnn = !{}
-!hpvm_hint_promise = !{}
-!hpvm_hint_cpu_gpu = !{}
-!hpvm_hint_cpu_spir = !{}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
-!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
-!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
-!4 = !{%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned}
-!5 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
-!6 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
-!7 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
-!8 = !{!9, !9, i64 0}
-!9 = !{!"int", !10, i64 0}
-!10 = !{!"omnipotent char", !11, i64 0}
-!11 = !{!"Simple C/C++ TBAA"}
-!12 = !{!13, !14, i64 0}
-!13 = !{!"Root", !14, i64 0, !14, i64 8, !14, i64 16}
-!14 = !{!"any pointer", !10, i64 0}
-!15 = !{!13, !14, i64 8}
-!16 = !{!13, !14, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll
index 4e97523049..8c76da27e8 100644
--- a/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll
+++ b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll
@@ -1,10 +1,17 @@
-; RUN: opt -load LLVMGenHPVM.so -S -genhpvm <  %s | FileCheck %s
-; ModuleID = 'ThreeLevelEdge.c'
+; RUN: opt -load LLVMBuildDFG.so -S <  %s | FileCheck %s
+; ModuleID = 'ThreeLevelEdge.ll'
 source_filename = "ThreeLevelEdge.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 %struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func4 = type <{ i32* }>
+%struct.out.Func5 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
 ; CHECK-LABEL: struct.Root =
 
 ; CHECK-LABEL: %struct.out.Func1 =
@@ -54,99 +61,28 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
 
 
-; Function Attrs: nounwind uwtable
-define dso_local void @Func1(i32* %In, i32* %Out) #0 {
-; CHECK-NOT: @Func1(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
-  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
-  ret void
-}
-
-declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
-
-declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
 
-declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
-
-; Function Attrs: nounwind uwtable
-define dso_local void @Func4(i32* %In, i32* %Out) #0 {
-; CHECK-NOT: @Func4(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
-  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
-  ret void
-}
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
 
-; Function Attrs: nounwind uwtable
-define dso_local void @Func5(i32* %In1, i32* %In2, i32* %Out) #0 {
-; CHECK-NOT: @Func3(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
-  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
-  ret void
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local void @Func3(i32* %In, i32* %Out) #0 {
-; CHECK-NOT: @Func3(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
-  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*)* nonnull @Func4, i64 3, i64 6) #3
-  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*, i32*)* nonnull @Func5, i64 4, i64 5) #3
-  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
-  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
-  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
-  ret void
-}
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
 
-declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
 
-declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
 
-declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
 
-declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
-
-; Function Attrs: nounwind uwtable
-define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
-; CHECK-NOT: @Func2(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
-  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
-  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
-  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
-  ret void
-}
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
 
 ; Function Attrs: nounwind uwtable
-define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
-; CHECK-NOT: @PipeRoot(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
-  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
-  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
-  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
-  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
-  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
-  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
-  ret void
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() local_unnamed_addr #0 {
+define dso_local i32 @main() local_unnamed_addr #2 {
 entry:
   %In1 = alloca i32, align 4
   %In2 = alloca i32, align 4
@@ -154,25 +90,26 @@ entry:
   %RootArgs = alloca %struct.Root, align 8
   %0 = bitcast i32* %In1 to i8*
   call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
-  store i32 1, i32* %In1, align 4, !tbaa !2
+  store i32 1, i32* %In1, align 4, !tbaa !8
   %1 = bitcast i32* %In2 to i8*
   call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
-  store i32 2, i32* %In2, align 4, !tbaa !2
+  store i32 2, i32* %In2, align 4, !tbaa !8
   %2 = bitcast i32* %Out to i8*
   call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
-  store i32 0, i32* %Out, align 4, !tbaa !2
+  store i32 0, i32* %Out, align 4, !tbaa !8
   %3 = bitcast %struct.Root* %RootArgs to i8*
   call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
   %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
-  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  store i32* %In1, i32** %input1, align 8, !tbaa !12
   %intput2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
-  store i32* %In2, i32** %intput2, align 8, !tbaa !9
+  store i32* %In2, i32** %intput2, align 8, !tbaa !15
   %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
-  store i32* %Out, i32** %output, align 8, !tbaa !10
-  call void (...) @__hpvm__init() #3
-  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
-  call void @__hpvm__wait(i8* %call) #3
-  call void (...) @__hpvm__cleanup() #3
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
   call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
   call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
   call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
@@ -180,30 +117,132 @@ entry:
   ret i32 0
 }
 
-declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
+  ret %struct.out.Func4 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func5 @Func5_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func5 undef, i32* %Out, 0
+  ret %struct.out.Func5 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
+  %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned to i8*), i64 4, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
 
-declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
 
-declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
 
-declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { argmemonly nounwind }
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5, !6, !7}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
 
 !0 = !{i32 1, !"wchar_size", i32 4}
 !1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"int", !4, i64 0}
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C/C++ TBAA"}
-!6 = !{!7, !8, i64 0}
-!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
-!8 = !{!"any pointer", !4, i64 0}
-!9 = !{!7, !8, i64 8}
-!10 = !{!7, !8, i64 16}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
+!4 = !{%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned}
+!5 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!6 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!7 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!13, !14, i64 0}
+!13 = !{!"Root", !14, i64 0, !14, i64 8, !14, i64 16}
+!14 = !{!"any pointer", !10, i64 0}
+!15 = !{!13, !14, i64 8}
+!16 = !{!13, !14, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/TwoNode.ll b/hpvm/test/regressionTests/BuildDFG/TwoNode.ll
deleted file mode 100644
index 49b9b35205..0000000000
--- a/hpvm/test/regressionTests/BuildDFG/TwoNode.ll
+++ /dev/null
@@ -1,197 +0,0 @@
-; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
-; ModuleID = 'CreateNodeAndEdge.ll'
-source_filename = "CreateNodeAndEdge.c"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.Root = type { i32*, i32*, i32* }
-%struct.out.Func1 = type <{ i32* }>
-%struct.out.Func2 = type <{ i32* }>
-%struct.out.PipeRoot = type <{ i32* }>
-
-; CHECK-LABEL: struct.Root =
-
-; CHECK-LABEL: %struct.out.Func1 =
-; CHECK-LABEL: %struct.out.Func3 =
-; CHECK-LABEL: %struct.out.Func2 =
-; CHECK-LABEL: %struct.out.PipeRoot =
-
-; CHECK-LABEL: i32 @main(
-; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
-; CHECK: call void @llvm.hpvm.init()
-; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
-; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4,
-; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
-
-; CHECK-LABEL: @Func1_cloned(
-; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
-; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
-
-; CHECK-LABEL: @Func3_cloned(
-; CHECK: [[RET3:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func3 undef,
-; CHECK-NEXT: ret %struct.out.Func3 [[RET3]]
-
-; CHECK-LABEL: @Func2_cloned(
-; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
-
-; CHECK-LABEL: @PipeRoot_cloned(i32*
-; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
-; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
-; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
-
-
-
-declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
-
-declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
-
-declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() local_unnamed_addr #2 {
-entry:
-  %In1 = alloca i32, align 4
-  %In2 = alloca i32, align 4
-  %Out = alloca i32, align 4
-  %RootArgs = alloca %struct.Root, align 8
-  %0 = bitcast i32* %In1 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
-  store i32 1, i32* %In1, align 4, !tbaa !5
-  %1 = bitcast i32* %In2 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
-  store i32 2, i32* %In2, align 4, !tbaa !5
-  %2 = bitcast i32* %Out to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
-  store i32 0, i32* %Out, align 4, !tbaa !5
-  %3 = bitcast %struct.Root* %RootArgs to i8*
-  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
-  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
-  store i32* %In1, i32** %input1, align 8, !tbaa !9
-  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
-  store i32* %In2, i32** %input2, align 8, !tbaa !12
-  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
-  store i32* %Out, i32** %output, align 8, !tbaa !13
-  call void @llvm.hpvm.init()
-  %4 = bitcast %struct.Root* %RootArgs to i8*
-  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
-  call void @llvm.hpvm.wait(i8* %graphID)
-  call void @llvm.hpvm.cleanup()
-  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
-  ret i32 0
-}
-
-declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
-
-declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
-
-declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
-
-declare i8* @llvm_hpvm_initializeTimerSet()
-
-declare void @llvm_hpvm_switchToTimer(i8**, i32)
-
-declare void @llvm_hpvm_printTimerSet(i8**, i8*)
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
-entry:
-  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
-  ret %struct.out.Func1 %returnStruct
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
-entry:
-  %returnStruct = insertvalue %struct.out.Func2 undef, i32* %Out, 0
-  ret %struct.out.Func2 %returnStruct
-}
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.createNode(i8*) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
-
-; Function Attrs: nounwind uwtable
-define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
-entry:
-  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
-  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
-  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
-  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
-  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
-  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
-  ret %struct.out.PipeRoot undef
-}
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.init() #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.wait(i8*) #3
-
-; Function Attrs: nounwind
-declare void @llvm.hpvm.cleanup() #3
-
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { argmemonly nounwind }
-attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-!hpvm_hint_cpu = !{!2, !3, !4}
-!hpvm_hint_gpu = !{}
-!hpvm_hint_spir = !{}
-!hpvm_hint_cudnn = !{}
-!hpvm_hint_promise = !{}
-!hpvm_hint_cpu_gpu = !{}
-!hpvm_hint_cpu_spir = !{}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
-!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
-!3 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
-!4 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
-!5 = !{!6, !6, i64 0}
-!6 = !{!"int", !7, i64 0}
-!7 = !{!"omnipotent char", !8, i64 0}
-!8 = !{!"Simple C/C++ TBAA"}
-!9 = !{!10, !11, i64 0}
-!10 = !{!"Root", !11, i64 0, !11, i64 8, !11, i64 16}
-!11 = !{!"any pointer", !7, i64 0}
-!12 = !{!10, !11, i64 8}
-!13 = !{!10, !11, i64 16}
diff --git a/hpvm/test/regressionTests/GenHPVM/AllocationNode.ll b/hpvm/test/regressionTests/GenHPVM/AllocationNode.ll
index db5f3ebbfc..8f23a425cc 100644
--- a/hpvm/test/regressionTests/GenHPVM/AllocationNode.ll
+++ b/hpvm/test/regressionTests/GenHPVM/AllocationNode.ll
@@ -1,67 +1,54 @@
 ; RUN: opt -load LLVMGenHPVM.so -S -genhpvm <  %s | FileCheck %s
-; ModuleID = 'AllocationNode.c'
-source_filename = "AllocationNode.c"
+; ModuleID = 'ThreeLevel.allocation.c'
+source_filename = "ThreeLevel.allocation.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%struct.Root = type { i64*, i64* }
-; CHECK-LABEL: struct.Root =
+%struct.Root = type { i32*, i64, i32*, i64 }
+; CHECK-LABEL: %struct.out.Allocation =
 
-; CHECK-LABEL: %struct.out.Func1 =
-; CHECK-LABEL: %struct.out.Func4 =
-; CHECK-LABEL: %struct.out.Func5 =
-; CHECK-LABEL: %struct.out.Func3 =
-; CHECK-LABEL: %struct.out.Func2 =
-; CHECK-LABEL: %struct.out.PipeRoot =
+; CHECK-LABEL: void @Launch(
+; CHECK: call i8* @llvm.hpvm.launch(i8*
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
 
 ; CHECK-LABEL: i32 @main(
-; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
 ; CHECK: call void @llvm.hpvm.init()
-; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
-; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i64*, i64*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]], i1 false)
-; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+; CHECK-NEXT: tail call void @Launch(
+; CHECK-NEXT: call void @llvm.hpvm.cleanup()
 
-; CHECK-LABEL: @Func1_cloned(
-; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
-; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+; CHECK-LABEL: @Allocation_cloned(
+; CHECK: call i8* @llvm.hpvm.malloc(i64
 
-; CHECK-LABEL: @Func4_cloned(
-; CHECK: [[RET4:%[1-9a-zA-Z]+]] = call i8* @llvm.hpvm.malloc(i64
-; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef, i8* [[RET4]], 0
-; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
-
-; CHECK-LABEL: @Func5_cloned(
-; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
-; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+; CHECK-LABEL: @Func1_cloned(
 
 ; CHECK-LABEL: @Func3_cloned(
-; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i64*
-; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i64*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
-; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func4_cloned.node,
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8*
+; CHECK: %Allocation_cloned.node = call i8* @llvm.hpvm.createNode(i8*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Allocation_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node
+; CHECK-NEXTL call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node
 
 ; CHECK-LABEL: @Func2_cloned(
-; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i64*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
-
-; CHECK-LABEL: @PipeRoot_cloned(i64*
-; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i64*
-; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i64*
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
-; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
-; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node,
-
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT:  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+
+; CHECK-LABEL: @PipeRoot_cloned(
+; CHECK: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
 
 ; Function Attrs: nounwind uwtable
-define dso_local void @Func1(i64* %In, i64* %Out) #0 {
-; CHECK-NOT: @Func1(
+define dso_local void @Func1(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 {
 entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
-  tail call void (i32, ...) @__hpvm__return(i32 1, i64* %Out) #3
+  tail call void @__hpvm__hint(i32 2) #5
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #5
   ret void
 }
 
@@ -69,138 +56,132 @@ declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
 
 declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
 
-declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
-
 ; Function Attrs: nounwind uwtable
-define dso_local void @Func4(i64* %In, i64* %Out) #0 {
-; CHECK-NOT: @Func4(
+define dso_local void @Allocation(i64 %block) #0 {
 entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
-  %0 = load i64, i64* %In, align 8, !tbaa !2
-  %call = tail call i8* @__hpvm__malloc(i64 %0) #3
-  tail call void (i32, ...) @__hpvm__return(i32 1, i8* %call) #3
+  %call = tail call i8* @__hpvm__malloc(i64 %block) #5
+  tail call void (i32, ...) @__hpvm__return(i32 2, i8* %call, i64 %block) #5
   ret void
 }
 
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
 declare dso_local i8* @__hpvm__malloc(i64) local_unnamed_addr #1
 
-; Function Attrs: nounwind uwtable
-define dso_local void @Func5(i64* %In, i64* %Out) #0 {
-; CHECK-NOT: @Func5(
-entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
-  tail call void (i32, ...) @__hpvm__return(i32 1, i64* %Out) #3
-  ret void
-}
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 
 ; Function Attrs: nounwind uwtable
-define dso_local void @Func3(i64* %In, i64* %Out) #0 {
-; CHECK-NOT: @Func3(
+define dso_local void @Func3(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 {
 entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
-  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i64*, i64*)* nonnull @Func4, i64 3, i64 6) #3
-  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i64*, i64*)* nonnull @Func5, i64 4, i64 5) #3
-  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
-  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 1, i32 0, i32 1, i32 0) #3
-  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__hint(i32 1) #5
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #5
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i64, i32*, i64)* nonnull @Func1, i64 3, i64 5) #5
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i64)* nonnull @Allocation) #5
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 2, i32 2, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 3, i32 3, i32 0) #5
+  %call2 = tail call i8* @__hpvm__edge(i8* %call1, i8* %call, i32 1, i32 0, i32 0, i32 0) #5
+  %call3 = tail call i8* @__hpvm__edge(i8* %call1, i8* %call, i32 1, i32 1, i32 1, i32 0) #5
   ret void
 }
 
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
-
 declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
 
 declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
 
 declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
 
-declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
-
 ; Function Attrs: nounwind uwtable
-define dso_local void @Func2(i64* %BindIn, i64* %SrcIn, i64* %Out) #0 {
-; CHECK-NOT: @Func2(
+define dso_local void @Func2(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 {
 entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 2, i64* %BindIn, i64* %SrcIn, i32 1, i64* %Out) #3
-  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i64*, i64*)* nonnull @Func3, i64 3) #3
-  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
-  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__hint(i32 1) #5
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #5
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i64, i32*, i64)* nonnull @Func3, i64 3, i64 5) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 1, i32 1, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 2, i32 2, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 3, i32 3, i32 0) #5
   ret void
 }
 
 ; Function Attrs: nounwind uwtable
-define dso_local void @PipeRoot(i64* %In, i64* %Out) #0 {
-; CHECK-NOT: @PipeRoot(
+define dso_local void @PipeRoot(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 {
 entry:
-  tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
-  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i64*, i64*)* nonnull @Func1) #3
-  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i64*, i64*, i64*)* nonnull @Func2) #3
-  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
-  tail call void @__hpvm__bindIn(i8* %call1, i32 0, i32 0, i32 0) #3
-  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 1, i32 0, i32 1, i32 0) #3
-  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__hint(i32 1) #5
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #5
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i64, i32*, i64)* nonnull @Func2) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 1, i32 1, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 2, i32 2, i32 0) #5
+  tail call void @__hpvm__bindIn(i8* %call, i32 3, i32 3, i32 0) #5
   ret void
 }
 
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() local_unnamed_addr #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @Launch() local_unnamed_addr #3 {
 entry:
-  %In = alloca i64, align 8
-  %Out = alloca i64, align 8
   %RootArgs = alloca %struct.Root, align 8
-  %0 = bitcast i64* %In to i8*
-  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3
-  store i64 1, i64* %In, align 8, !tbaa !2
-  %1 = bitcast i64* %Out to i8*
-  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %1) #3
-  store i64 0, i64* %Out, align 8, !tbaa !2
-  %2 = bitcast %struct.Root* %RootArgs to i8*
-  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
-  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
-  store i64* %In, i64** %input, align 8, !tbaa !6
-  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
-  store i64* %Out, i64** %output, align 8, !tbaa !9
-  call void (...) @__hpvm__init() #3
-  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i64*, i64*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
-  call void @__hpvm__wait(i8* %call) #3
-  call void (...) @__hpvm__cleanup() #3
-  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
-  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %1) #3
-  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3
-  ret i32 0
+  %0 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #5
+  %call = tail call noalias i8* @malloc(i64 1024) #5
+  %1 = bitcast %struct.Root* %RootArgs to i8**
+  store i8* %call, i8** %1, align 8, !tbaa !2
+  %Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 1024, i64* %Insize, align 8, !tbaa !8
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  %call1 = tail call noalias i8* @malloc(i64 1024) #5
+  %2 = bitcast i32** %output to i8**
+  store i8* %call1, i8** %2, align 8, !tbaa !9
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 1024, i64* %Outsize, align 8, !tbaa !10
+  %call2 = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i64, i32*, i64)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #5
+  call void @__hpvm__wait(i8* %call2) #5
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #5
+  ret void
 }
 
-declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #4
 
 declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
 
 declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
 
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  tail call void (...) @__hpvm__init() #5
+  tail call void @Launch()
+  tail call void (...) @__hpvm__cleanup() #5
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
 declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { nounwind }
+attributes #3 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { nounwind }
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}
 
 !0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"long", !4, i64 0}
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C/C++ TBAA"}
-!6 = !{!7, !8, i64 0}
-!7 = !{!"Root", !8, i64 0, !8, i64 8}
-!8 = !{!"any pointer", !4, i64 0}
-!9 = !{!7, !8, i64 8}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 3551132592a00cab6c966df508ab511598269f78)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"Root", !4, i64 0, !7, i64 8, !4, i64 16, !7, i64 24}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"long", !5, i64 0}
+!8 = !{!3, !7, i64 8}
+!9 = !{!3, !4, i64 16}
+!10 = !{!3, !7, i64 24}
-- 
GitLab