From a4edeab0b969c4ef1a94fd49fe973baef4d4d1b3 Mon Sep 17 00:00:00 2001
From: Akash Kothari <akashk4@tyler.cs.illinois.edu>
Date: Fri, 24 Jan 2020 16:39:03 -0600
Subject: [PATCH] Adding regression tests

---
 hpvm/CMakeLists.txt                           |  21 ++
 hpvm/test/CMakeLists.txt                      | 169 ++++++++++
 .../BuildDFG/CreateNode.genvisc.ll            | 151 +++++++++
 .../BuildDFG/CreateNodeAndEdge.genvisc.ll     | 191 +++++++++++
 .../BuildDFG/LeafBindEdge.genvisc.ll          | 250 ++++++++++++++
 .../BuildDFG/LeafInComingBindEdge.ll          | 200 ++++++++++++
 .../BuildDFG/LeafNodeGetters.genvisc.ll       | 304 ++++++++++++++++++
 hpvm/test/regressionTests/BuildDFG/OneRoot.ll | 111 +++++++
 .../BuildDFG/ThreeLevel.genvisc.ll            | 231 +++++++++++++
 .../BuildDFG/ThreeLevelEdge.genvisc.ll        | 248 ++++++++++++++
 .../BuildDFG/ThreeLevelEdge.ll                | 209 ++++++++++++
 .../BuildDFG/TwoLaunch.genvisc.ll             | 122 +++++++
 .../test/regressionTests/BuildDFG/TwoLevel.ll | 167 ++++++++++
 .../regressionTests/BuildDFG/TwoLevelGraph.ll | 210 ++++++++++++
 hpvm/test/regressionTests/BuildDFG/TwoNode.ll | 197 ++++++++++++
 .../ThreeLevel.atomic.genvisc.ll              | 280 ++++++++++++++++
 .../DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll      | 239 ++++++++++++++
 .../DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll  | 261 +++++++++++++++
 .../DFG2LLVM_X86/CreateNode.dfg.ll            | 182 +++++++++++
 .../DFG2LLVM_X86/ThreeLevel.dfg.ll            | 246 ++++++++++++++
 .../DFG2LLVM_X86/TwoLaunch.dfg.ll             | 230 +++++++++++++
 .../DFG2LLVM_X86/TwoLevel.dfg.ll              | 213 ++++++++++++
 .../DFG2LLVM_X86/oneLaunchAlloca.dfg.ll       | 121 +++++++
 .../regressionTests/GenVISC/AllocationNode.ll | 206 ++++++++++++
 .../regressionTests/GenVISC/CreateNode.ll     | 119 +++++++
 .../GenVISC/CreateNodeAndEdge.ll              | 153 +++++++++
 .../regressionTests/GenVISC/LeafBindEdge.ll   | 212 ++++++++++++
 .../GenVISC/LeafNodeGetters.ll                | 242 ++++++++++++++
 .../regressionTests/GenVISC/PopAndPush.ll     |  92 ++++++
 .../regressionTests/GenVISC/ThreeLevel.ll     | 190 +++++++++++
 .../regressionTests/GenVISC/ThreeLevelEdge.ll | 209 ++++++++++++
 .../test/regressionTests/GenVISC/TwoLaunch.ll |  96 ++++++
 hpvm/test/regressionTests/GenVISC/TwoLevel.ll | 167 ++++++++++
 .../GenVISC/oneLaunchAlloca.ll                |  86 +++++
 .../GenVISC/oneLaunchMalloc.ll                |  84 +++++
 35 files changed, 6409 insertions(+)
 create mode 100644 hpvm/test/CMakeLists.txt
 create mode 100644 hpvm/test/regressionTests/BuildDFG/CreateNode.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/CreateNodeAndEdge.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/LeafBindEdge.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/LeafInComingBindEdge.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/LeafNodeGetters.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/OneRoot.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/ThreeLevel.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/TwoLaunch.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/TwoLevel.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/TwoLevelGraph.ll
 create mode 100644 hpvm/test/regressionTests/BuildDFG/TwoNode.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll
 create mode 100644 hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/AllocationNode.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/CreateNode.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/CreateNodeAndEdge.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/LeafBindEdge.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/LeafNodeGetters.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/PopAndPush.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/ThreeLevel.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/ThreeLevelEdge.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/TwoLaunch.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/TwoLevel.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/oneLaunchAlloca.ll
 create mode 100644 hpvm/test/regressionTests/GenVISC/oneLaunchMalloc.ll

diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt
index eeccea523c..9fc8bd884b 100644
--- a/hpvm/CMakeLists.txt
+++ b/hpvm/CMakeLists.txt
@@ -1,3 +1,24 @@
 include_directories(./include/)
 add_subdirectory(lib)
 add_subdirectory(projects)
+
+
+add_subdirectory(test)
+
+# Add a global check rule now that all subdirectories have been traversed
+# and we know the total set of lit testsuites.
+get_property(LLVM_LIT_TESTSUITES GLOBAL PROPERTY LLVM_LIT_TESTSUITES)
+get_property(LLVM_LIT_PARAMS GLOBAL PROPERTY LLVM_LIT_PARAMS)
+get_property(LLVM_LIT_DEPENDS GLOBAL PROPERTY LLVM_LIT_DEPENDS)
+get_property(LLVM_LIT_EXTRA_ARGS GLOBAL PROPERTY LLVM_LIT_EXTRA_ARGS)
+get_property(LLVM_ADDITIONAL_TEST_TARGETS
+                 GLOBAL PROPERTY LLVM_ADDITIONAL_TEST_TARGETS)
+
+add_lit_target(hpmv-check-all
+"Running all regression tests"
+${LLVM_LIT_TESTSUITES}
+PARAMS ${LLVM_LIT_PARAMS}
+DEPENDS ${LLVM_LIT_DEPENDS} ${LLVM_ADDITIONAL_TEST_TARGETS}
+ARGS ${LLVM_LIT_EXTRA_ARGS}
+)
+
diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt
new file mode 100644
index 0000000000..8bd57df5eb
--- /dev/null
+++ b/hpvm/test/CMakeLists.txt
@@ -0,0 +1,169 @@
+llvm_canonicalize_cmake_booleans(
+  BUILD_SHARED_LIBS
+  HAVE_LIBXAR
+  HAVE_LIBZ
+  HAVE_OCAMLOPT
+  HAVE_OCAML_OUNIT
+  LLVM_ENABLE_DIA_SDK
+  LLVM_ENABLE_FFI
+  LLVM_ENABLE_THREADS
+  LLVM_INCLUDE_GO_TESTS
+  LLVM_LIBXML2_ENABLED
+  LLVM_LINK_LLVM_DYLIB
+  LLVM_TOOL_LTO_BUILD
+  LLVM_USE_INTEL_JITEVENTS
+  LLVM_BUILD_EXAMPLES
+  LLVM_ENABLE_PLUGINS
+  )
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+  )
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py
+  )
+
+
+# Set the depends list as a variable so that it can grow conditionally.
+# NOTE: Sync the substitutions in test/lit.cfg when adding to this list.
+set(LLVM_TEST_DEPENDS
+          BugpointPasses
+          FileCheck
+          LLVMHello
+          UnitTests
+          bugpoint
+          count
+          llc
+          lli
+          lli-child-target
+          llvm-addr2line
+          llvm-ar
+          llvm-as
+          llvm-bcanalyzer
+          llvm-c-test
+          llvm-cat
+          llvm-cfi-verify
+          llvm-config
+          llvm-cov
+          llvm-cvtres
+          llvm-cxxdump
+          llvm-cxxfilt
+          llvm-cxxmap
+          llvm-diff
+          llvm-dis
+          llvm-dlltool
+          dsymutil
+          llvm-dwarfdump
+          llvm-dwp
+          llvm-elfabi
+          llvm-exegesis
+          llvm-extract
+          llvm-isel-fuzzer
+          llvm-jitlink
+          llvm-lib
+          llvm-link
+          llvm-lipo
+          llvm-lto2
+          llvm-mc
+          llvm-mca
+          llvm-modextract
+          llvm-mt
+          llvm-nm
+          llvm-objcopy
+          llvm-objdump
+          llvm-opt-fuzzer
+          llvm-opt-report
+          llvm-pdbutil
+          llvm-profdata
+          llvm-ranlib
+          llvm-rc
+          llvm-readobj
+          llvm-readelf
+          llvm-rtdyld
+          llvm-size
+          llvm-split
+          llvm-strings
+          llvm-strip
+          llvm-symbolizer
+          llvm-tblgen
+          llvm-undname
+          llvm-xray
+          not
+          obj2yaml
+          opt
+          sancov
+          sanstats
+          verify-uselistorder
+          yaml-bench
+          yaml2obj
+        )
+
+if(TARGET llvm-lto)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-lto)
+endif()
+
+# If Intel JIT events are supported, depend on a tool that tests the listener.
+if( LLVM_USE_INTEL_JITEVENTS )
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-jitlistener)
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+if(TARGET LLVMgold)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} LLVMgold)
+endif()
+
+if(TARGET llvm-go)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-go)
+endif()
+
+if(TARGET LTO)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} LTO)
+endif()
+
+if(TARGET ocaml_llvm)
+  # Clear all non-OCaml cross-target dependencies when building out-of-tree.
+  if(LLVM_OCAML_OUT_OF_TREE)
+    set(LLVM_TEST_DEPENDS)
+  endif()
+
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS}
+          ocaml_llvm
+          ocaml_llvm_all_backends
+          ocaml_llvm_analysis
+          ocaml_llvm_bitreader
+          ocaml_llvm_bitwriter
+          ocaml_llvm_executionengine
+          ocaml_llvm_irreader
+          ocaml_llvm_linker
+          ocaml_llvm_target
+          ocaml_llvm_ipo
+          ocaml_llvm_passmgr_builder
+          ocaml_llvm_scalar_opts
+          ocaml_llvm_transform_utils
+          ocaml_llvm_vectorize
+        )
+endif()
+
+#add_custom_target(llvm-test-depends DEPENDS ${LLVM_TEST_DEPENDS})
+#set_target_properties(llvm-test-depends PROPERTIES FOLDER "Tests")
+
+add_lit_testsuite(check-hpvm "Running the LLVM regression tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS ${LLVM_TEST_DEPENDS}
+  )
+set_target_properties(check-hpvm PROPERTIES FOLDER "Tests")
+
+add_lit_testsuites(HPVM ${CMAKE_CURRENT_SOURCE_DIR}
+  DEPENDS ${LLVM_TEST_DEPENDS}
+  )
+
+# Setup a legacy alias for 'check-llvm'. This will likely change to be an
+# alias for 'check-all' at some point in the future.
+add_custom_target(hpvm-check)
+add_dependencies(hpvm-check check-hpvm)
+set_target_properties(hpvm-check PROPERTIES FOLDER "Tests")
diff --git a/hpvm/test/regressionTests/BuildDFG/CreateNode.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/CreateNode.genvisc.ll
new file mode 100644
index 0000000000..7de0471e45
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/CreateNode.genvisc.ll
@@ -0,0 +1,151 @@
+; RUN: opt -load LLVMBuildDFG.so  -S < %s | FileCheck %s
+; ModuleID = 'CreateNode.ll'
+source_filename = "CreateNode.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32* }
+%struct.out.Func = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]], i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func_cloned(
+; CHECK: [[RET:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func undef,
+; CHECK-NEXT: ret %struct.out.Func [[RET]]
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func (i32*
+; CHECK-NEXT: @llvm.hpvm.bind.input(i8* %Func_cloned.node
+; CHECK-NEXT: @llvm.hpvm.bind.output(i8* %Func_cloned.node
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In, i32** %input, align 8, !tbaa !4
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %Out, i32** %output, align 8, !tbaa !9
+  call void @llvm.hpvm.init()
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func @Func_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func undef, i32* %Out, 0
+  ret %struct.out.Func %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func (i32*, i32*)* @Func_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func (i32*, i32*)* @Func_cloned}
+!3 = !{%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned}
+!4 = !{!5, !6, i64 0}
+!5 = !{!"Root", !6, i64 0, !6, i64 8}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!5, !6, i64 8}
diff --git a/hpvm/test/regressionTests/BuildDFG/CreateNodeAndEdge.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/CreateNodeAndEdge.genvisc.ll
new file mode 100644
index 0000000000..8118932225
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/CreateNodeAndEdge.genvisc.ll
@@ -0,0 +1,191 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'CreateNodeAndEdge.ll'
+source_filename = "CreateNodeAndEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: [[RET2:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func2 undef,
+; CHECK-NEXT: ret %struct.out.Func2 [[RET2]]
+
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: @llvm.hpvm.bind.output(i8* %Func2_cloned.node
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !5
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !5
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !5
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !9
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !12
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !13
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func2 undef, i32* %Out, 0
+  ret %struct.out.Func2 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!4 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Root", !11, i64 0, !11, i64 8, !11, i64 16}
+!11 = !{!"any pointer", !7, i64 0}
+!12 = !{!10, !11, i64 8}
+!13 = !{!10, !11, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/LeafBindEdge.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/LeafBindEdge.genvisc.ll
new file mode 100644
index 0000000000..67f5c20e10
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/LeafBindEdge.genvisc.ll
@@ -0,0 +1,250 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'LeafBindEdge.ll'
+source_filename = "LeafBindEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func4 = type <{ i32* }>
+%struct.out.Func5 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
+; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !8
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !8
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !8
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !12
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !15
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
+  ret %struct.out.Func4 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func5 @Func5_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func5 undef, i32* %Out, 0
+  ret %struct.out.Func5 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
+  %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned to i8*), i64 4, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5, !6, !7}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
+!4 = !{%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned}
+!5 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!6 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!7 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!13, !14, i64 0}
+!13 = !{!"Root", !14, i64 0, !14, i64 8, !14, i64 16}
+!14 = !{!"any pointer", !10, i64 0}
+!15 = !{!13, !14, i64 8}
+!16 = !{!13, !14, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/LeafInComingBindEdge.ll b/hpvm/test/regressionTests/BuildDFG/LeafInComingBindEdge.ll
new file mode 100644
index 0000000000..7ea6bbf056
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/LeafInComingBindEdge.ll
@@ -0,0 +1,200 @@
+; RUN: opt -laod LLVMBuildDFG.so < %s 
+; ModuleID = 'LeafBindEdge.ll'
+source_filename = "LeafBindEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func4 = type <{ i32* }>
+%struct.out.Func5 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !8
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !8
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !8
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !12
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !15
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
+  ret %struct.out.Func4 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func5 @Func5_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func5 undef, i32* %Out, 0
+  ret %struct.out.Func5 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
+  %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned to i8*), i64 4, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5, !6, !7}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
+!4 = !{%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned}
+!5 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!6 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!7 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!13, !14, i64 0}
+!13 = !{!"Root", !14, i64 0, !14, i64 8, !14, i64 16}
+!14 = !{!"any pointer", !10, i64 0}
+!15 = !{!13, !14, i64 8}
+!16 = !{!13, !14, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/LeafNodeGetters.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/LeafNodeGetters.genvisc.ll
new file mode 100644
index 0000000000..4eb82515dc
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/LeafNodeGetters.genvisc.ll
@@ -0,0 +1,304 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'LeafNodeGetters.ll'
+source_filename = "LeafNodeGetters.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func4 = type <{ i32* }>
+%struct.out.Func5 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[NODE:%[1-9a-zA-Z]+]] = call i8* @llvm.hpvm.getNode()
+; CHECK-NEXT: call i8* @llvm.hpvm.getParentNode(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNodeInstanceID.x(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNodeInstanceID.y(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNodeInstanceID.z(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNumNodeInstances.x(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNumNodeInstances.y(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNumNodeInstances.z(i8* [[NODE]])
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_z(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_z(i8*) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
+  store i32 1, i32* %In1, align 4, !tbaa !8
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #4
+  store i32 2, i32* %In2, align 4, !tbaa !8
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #4
+  store i32 0, i32* %Out, align 4, !tbaa !8
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #4
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !12
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !15
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
+  ret %struct.out.Func4 %returnStruct
+}
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getNode() #3
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getParentNode(i8*) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.x(i8*) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.y(i8*) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.z(i8*) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.x(i8*) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.y(i8*) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.z(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func5 @Func5_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %call8 = call i8* @llvm.hpvm.getNode()
+  %call19 = call i8* @llvm.hpvm.getParentNode(i8* %call8)
+  %call210 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call8)
+  %call311 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call8)
+  %call412 = call i64 @llvm.hpvm.getNodeInstanceID.z(i8* %call8)
+  %call513 = call i64 @llvm.hpvm.getNumNodeInstances.x(i8* %call8)
+  %call614 = call i64 @llvm.hpvm.getNumNodeInstances.y(i8* %call8)
+  %call715 = call i64 @llvm.hpvm.getNumNodeInstances.z(i8* %call8)
+  %returnStruct = insertvalue %struct.out.Func5 undef, i32* %Out, 0
+  ret %struct.out.Func5 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #4
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #4
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #4
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
+  %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned to i8*), i64 4, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #4
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #4
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #4
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #4
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #4
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5, !6, !7}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
+!4 = !{%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned}
+!5 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!6 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!7 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!13, !14, i64 0}
+!13 = !{!"Root", !14, i64 0, !14, i64 8, !14, i64 16}
+!14 = !{!"any pointer", !10, i64 0}
+!15 = !{!13, !14, i64 8}
+!16 = !{!13, !14, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/OneRoot.ll b/hpvm/test/regressionTests/BuildDFG/OneRoot.ll
new file mode 100644
index 0000000000..3881f6943d
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/OneRoot.ll
@@ -0,0 +1,111 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'oneLaunchAlloca.ll'
+source_filename = "oneLaunchAlloca.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32* }
+%struct.out.PipeRoot = type <{ i32* }>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #1 {
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]], i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In, i32** %input, align 8, !tbaa !3
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %Out, i32** %output, align 8, !tbaa !8
+  call void @llvm.hpvm.init()
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In, i32* out %Out) #1 {
+entry:
+  %returnStruct = insertvalue %struct.out.PipeRoot undef, i32* %Out, 0
+  ret %struct.out.PipeRoot %returnStruct
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned}
+!3 = !{!4, !5, i64 0}
+!4 = !{!"Root", !5, i64 0, !5, i64 8}
+!5 = !{!"any pointer", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!4, !5, i64 8}
diff --git a/hpvm/test/regressionTests/BuildDFG/ThreeLevel.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/ThreeLevel.genvisc.ll
new file mode 100644
index 0000000000..431e604804
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/ThreeLevel.genvisc.ll
@@ -0,0 +1,231 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'ThreeLevel.ll'
+source_filename = "ThreeLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func4 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func4_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !7
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !7
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !7
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !11
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !14
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !15
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
+  ret %struct.out.Func4 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
+  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5, !6}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
+!4 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!5 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!6 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"int", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C/C++ TBAA"}
+!11 = !{!12, !13, i64 0}
+!12 = !{!"Root", !13, i64 0, !13, i64 8, !13, i64 16}
+!13 = !{!"any pointer", !9, i64 0}
+!14 = !{!12, !13, i64 8}
+!15 = !{!12, !13, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll
new file mode 100644
index 0000000000..8c76da27e8
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.genvisc.ll
@@ -0,0 +1,248 @@
+; RUN: opt -load LLVMBuildDFG.so -S <  %s | FileCheck %s
+; ModuleID = 'ThreeLevelEdge.ll'
+source_filename = "ThreeLevelEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func4 = type <{ i32* }>
+%struct.out.Func5 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
+; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !8
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !8
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !8
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !12
+  %intput2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %intput2, align 8, !tbaa !15
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func4 @Func4_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func4 undef, i32* %Out, 0
+  ret %struct.out.Func4 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func5 @Func5_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func5 undef, i32* %Out, 0
+  ret %struct.out.Func5 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*, i32*)* @Func4_cloned to i8*), i64 3, i64 6)
+  %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned to i8*), i64 4, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node, i32 0, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5, !6, !7}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func4 (i32*, i32*)* @Func4_cloned}
+!4 = !{%struct.out.Func5 (i32*, i32*, i32*)* @Func5_cloned}
+!5 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!6 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!7 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!13, !14, i64 0}
+!13 = !{!"Root", !14, i64 0, !14, i64 8, !14, i64 16}
+!14 = !{!"any pointer", !10, i64 0}
+!15 = !{!13, !14, i64 8}
+!16 = !{!13, !14, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll
new file mode 100644
index 0000000000..d1b930b529
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/ThreeLevelEdge.ll
@@ -0,0 +1,209 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'ThreeLevelEdge.c'
+source_filename = "ThreeLevelEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
+; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func1(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func4(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func4(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func5(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*)* nonnull @Func4, i64 3, i64 6) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*, i32*)* nonnull @Func5, i64 4, i64 5) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+; CHECK-NOT: @Func2(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %intput2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %intput2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/TwoLaunch.genvisc.ll b/hpvm/test/regressionTests/BuildDFG/TwoLaunch.genvisc.ll
new file mode 100644
index 0000000000..b775f67a27
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/TwoLaunch.genvisc.ll
@@ -0,0 +1,122 @@
+; RUN: opt -load LLVMBuild.so -S < %s | FileCheck %s 
+; ModuleID = 'TwoLaunch.ll'
+source_filename = "TwoLaunch.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.out.PipeRoot1 = type <{ i32* }>
+%struct.out.PipeRoot2 = type <{ i32* }>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #1 {
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot1 (i32*, i32*)* @PipeRoot1_cloned to i8*), i8* %call, i1 false)
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot2 (i32*, i32*)* @PipeRoot2_cloned to i8*), i8* %call, i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #4
+  %call = tail call noalias i8* @malloc(i64 16) #4
+  %input = bitcast i8* %call to i32**
+  store i32* %In, i32** %input, align 8, !tbaa !4
+  %output = getelementptr inbounds i8, i8* %call, i64 8
+  %2 = bitcast i8* %output to i32**
+  store i32* %Out, i32** %2, align 8, !tbaa !9
+  call void @llvm.hpvm.init()
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot1 (i32*, i32*)* @PipeRoot1_cloned to i8*), i8* %call, i1 false)
+  %graphID1 = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot2 (i32*, i32*)* @PipeRoot2_cloned to i8*), i8* %call, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.wait(i8* %graphID1)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot1 @PipeRoot1_cloned(i32* in %In, i32* out %Out) #1 {
+entry:
+  %returnStruct = insertvalue %struct.out.PipeRoot1 undef, i32* %Out, 0
+  ret %struct.out.PipeRoot1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot2 @PipeRoot2_cloned(i32* in %In, i32* out %Out) #1 {
+entry:
+  %returnStruct = insertvalue %struct.out.PipeRoot2 undef, i32* %Out, 0
+  ret %struct.out.PipeRoot2 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #4
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #4
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #4
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.PipeRoot1 (i32*, i32*)* @PipeRoot1_cloned}
+!3 = !{%struct.out.PipeRoot2 (i32*, i32*)* @PipeRoot2_cloned}
+!4 = !{!5, !6, i64 0}
+!5 = !{!"Root", !6, i64 0, !6, i64 8}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!5, !6, i64 8}
diff --git a/hpvm/test/regressionTests/BuildDFG/TwoLevel.ll b/hpvm/test/regressionTests/BuildDFG/TwoLevel.ll
new file mode 100644
index 0000000000..e5b4c21681
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/TwoLevel.ll
@@ -0,0 +1,167 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'TwoLevel.c'
+source_filename = "TwoLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4,
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: [[RET3:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func3 undef,
+; CHECK-NEXT: ret %struct.out.Func3 [[RET3]]
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/TwoLevelGraph.ll b/hpvm/test/regressionTests/BuildDFG/TwoLevelGraph.ll
new file mode 100644
index 0000000000..9257dd7e35
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/TwoLevelGraph.ll
@@ -0,0 +1,210 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'TwoLevel.ll'
+source_filename = "TwoLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4,
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: [[RET3:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func3 undef,
+; CHECK-NEXT: ret %struct.out.Func3 [[RET3]]
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !6
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !6
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !6
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !10
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !13
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !14
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func3 undef, i32* %Out, 0
+  ret %struct.out.Func3 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i32*)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func3 (i32*, i32*)* @Func3_cloned}
+!4 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!5 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C/C++ TBAA"}
+!10 = !{!11, !12, i64 0}
+!11 = !{!"Root", !12, i64 0, !12, i64 8, !12, i64 16}
+!12 = !{!"any pointer", !8, i64 0}
+!13 = !{!11, !12, i64 8}
+!14 = !{!11, !12, i64 16}
diff --git a/hpvm/test/regressionTests/BuildDFG/TwoNode.ll b/hpvm/test/regressionTests/BuildDFG/TwoNode.ll
new file mode 100644
index 0000000000..49b9b35205
--- /dev/null
+++ b/hpvm/test/regressionTests/BuildDFG/TwoNode.ll
@@ -0,0 +1,197 @@
+; RUN: opt -load LLVMBuildDFG.so -S < %s | FileCheck %s
+; ModuleID = 'CreateNodeAndEdge.ll'
+source_filename = "CreateNodeAndEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4,
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: [[RET3:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func3 undef,
+; CHECK-NEXT: ret %struct.out.Func3 [[RET3]]
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !5
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !5
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !5
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !9
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !12
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !13
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %BindIn, i32* in %SrcIn, i32* out %Out) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func2 undef, i32* %Out, 0
+  ret %struct.out.Func2 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i32* in %In2, i32* out %Out) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*, i32*)* @Func1_cloned to i8*))
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 0, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node, i1 false, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i32*)* @Func1_cloned}
+!3 = !{%struct.out.Func2 (i32*, i32*, i32*)* @Func2_cloned}
+!4 = !{%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Root", !11, i64 0, !11, i64 8, !11, i64 16}
+!11 = !{!"any pointer", !7, i64 0}
+!12 = !{!10, !11, i64 8}
+!13 = !{!10, !11, i64 16}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
new file mode 100644
index 0000000000..451035b21e
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
@@ -0,0 +1,280 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; ModuleID = 'ThreeLevel.atomic.ll'
+source_filename = "ThreeLevel.constmem.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64 }
+%struct.out.Allocation = type <{ i8*, i64 }>
+%emptyStruct = type <{}>
+%emptyStruct.0 = type <{}>
+%emptyStruct.1 = type <{}>
+%emptyStruct.2 = type <{}>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #0
+
+declare dso_local i32 @__hpvm__atomic_add(i32*, i32) local_unnamed_addr #0
+
+declare dso_local i32 @__hpvm__atomic_sub(i32*, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__malloc(i64) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @Launch() local_unnamed_addr #2 {
+entry:
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #6
+  %call = tail call noalias i8* @malloc(i64 1024) #6
+  %1 = bitcast %struct.Root* %RootArgs to i8**
+  store i8* %call, i8** %1, align 8, !tbaa !6
+  %Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 1024, i64* %Insize, align 8, !tbaa !12
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  %call1 = tail call noalias i8* @malloc(i64 1024) #6
+  %2 = bitcast i32** %output to i8**
+  store i8* %call1, i8** %2, align 8, !tbaa !13
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 1024, i64* %Outsize, align 8, !tbaa !14
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #6
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+; CHECK-LABEL: @main(
+; CHECK: call i8* @llvm_hpvm_ocl_initContext(i32
+; CHECK: call i8* @llvm_hpvm_ocl_launch(i8*
+; CHECK: call void @llvm_hpvm_ocl_clearContext(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: call i32 @llvm.hpvm.atomic.add
+; CHECK: call i32 @llvm.hpvm.atomic.sub
+
+; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned
+; CHECK: call i8* @llvm_hpvm_ocl_argument_ptr(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_shared(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
+; CHECK: call i8* @llvm_hpvm_ocl_executeNode(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_free(i8*
+
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #4 {
+entry:
+  call void @llvm.hpvm.init()
+  tail call void @Launch()
+  call void @llvm.hpvm.cleanup()
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getNode() #5
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getParentNode(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.x(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.y(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.x(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.y(i8*) #5
+
+; Function Attrs: nounwind
+declare i32 @llvm.hpvm.atomic.add(i8*, i32) #6
+
+; Function Attrs: nounwind
+declare i32 @llvm.hpvm.atomic.sub(i8*, i32) #6
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.malloc(i64) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Allocation @Allocation_cloned(i64 %block) #4 {
+entry:
+  %call1 = call i8* @llvm.hpvm.malloc(i64 %block)
+  %returnStruct = insertvalue %struct.out.Allocation undef, i8* %call1, 0
+  %returnStruct2 = insertvalue %struct.out.Allocation %returnStruct, i64 %block, 1
+  ret %struct.out.Allocation %returnStruct2
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct @Func1_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %call4 = call i8* @llvm.hpvm.getNode()
+  %call16 = call i8* @llvm.hpvm.getParentNode(i8* %call4)
+  %call27 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call4)
+  %call38 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call4)
+  %call59 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call16)
+  %call710 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call16)
+  %call911 = call i64 @llvm.hpvm.getNumNodeInstances.x(i8* %call4)
+  %call1112 = call i64 @llvm.hpvm.getNumNodeInstances.y(i8* %call4)
+  %mul = mul i64 %call911, %call59
+  %add = add i64 %mul, %call27
+  %mul13 = mul i64 %call1112, %call710
+  %add14 = add i64 %mul13, %call38
+  %sext = shl i64 %add14, 32
+  %idxprom = ashr exact i64 %sext, 32
+  %arrayidx = getelementptr inbounds i32, i32* %Out, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %1 = bitcast i32* %In to i8*
+  %call1513 = call i32 @llvm.hpvm.atomic.add(i8* %1, i32 %0)
+  %sext47 = shl i64 %add, 32
+  %idxprom16 = ashr exact i64 %sext47, 32
+  %arrayidx17 = getelementptr inbounds i32, i32* %Out, i64 %idxprom16
+  store i32 %call1513, i32* %arrayidx17, align 4, !tbaa !15
+  %2 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %3 = bitcast i32* %Out to i8*
+  %call2014 = call i32 @llvm.hpvm.atomic.sub(i8* %3, i32 %2)
+  store i32 %call2014, i32* %arrayidx17, align 4, !tbaa !15
+  ret %emptyStruct undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #6
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.0 @Func3_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
+  %Allocation_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Allocation (i64)* @Allocation_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Allocation_cloned.node, i32 1, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node, i8* %Func1_cloned.node, i1 true, i32 0, i32 0, i1 false)
+  %output1 = call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node, i8* %Func1_cloned.node, i1 true, i32 1, i32 1, i1 false)
+  ret %emptyStruct.0 undef
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.1 @Func2_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.1 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.2 @PipeRoot_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.2 undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #6
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #5 = { nounwind readnone }
+attributes #6 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_gpu = !{!2}
+!hpvm_hint_cpu = !{!3, !4, !5}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 3551132592a00cab6c966df508ab511598269f78)"}
+!2 = !{%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned}
+!4 = !{%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned}
+!5 = !{%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24}
+!8 = !{!"any pointer", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C/C++ TBAA"}
+!11 = !{!"long", !9, i64 0}
+!12 = !{!7, !11, i64 8}
+!13 = !{!7, !8, i64 16}
+!14 = !{!7, !11, i64 24}
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !9, i64 0}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
new file mode 100644
index 0000000000..ed99bee9f7
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
@@ -0,0 +1,239 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; ModuleID = 'ThreeLevel.ll'
+source_filename = "ThreeLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64 }
+%emptyStruct = type <{}>
+%emptyStruct.0 = type <{}>
+%emptyStruct.1 = type <{}>
+%emptyStruct.2 = type <{}>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @Launch() local_unnamed_addr #2 {
+entry:
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #6
+  %call = tail call noalias i8* @malloc(i64 1024) #6
+  %1 = bitcast %struct.Root* %RootArgs to i8**
+  store i8* %call, i8** %1, align 8, !tbaa !6
+  %Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 1024, i64* %Insize, align 8, !tbaa !12
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  %call1 = tail call noalias i8* @malloc(i64 1024) #6
+  %2 = bitcast i32** %output to i8**
+  store i8* %call1, i8** %2, align 8, !tbaa !13
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 1024, i64* %Outsize, align 8, !tbaa !14
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #6
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+; CHECK-LABEL: @main(
+; CHECK: call i8* @llvm_hpvm_ocl_initContext(i32
+; CHECK: call i8* @llvm_hpvm_ocl_launch(i8*
+; CHECK: call void @llvm_hpvm_ocl_clearContext(i8*
+
+; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned
+; CHECK: call i8* @llvm_hpvm_ocl_argument_ptr(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
+; CHECK: call i8* @llvm_hpvm_ocl_argument_ptr(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
+; CHECK: call i8* @llvm_hpvm_ocl_executeNode(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_free(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_free(i8*
+
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #4 {
+entry:
+  call void @llvm.hpvm.init()
+  tail call void @Launch()
+  call void @llvm.hpvm.cleanup()
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getNode() #5
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getParentNode(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.x(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.y(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.x(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.y(i8*) #5
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct @Func1_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %call4 = call i8* @llvm.hpvm.getNode()
+  %call15 = call i8* @llvm.hpvm.getParentNode(i8* %call4)
+  %call26 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call4)
+  %call37 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call4)
+  %call58 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call15)
+  %call79 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call15)
+  %call910 = call i64 @llvm.hpvm.getNumNodeInstances.x(i8* %call4)
+  %call1111 = call i64 @llvm.hpvm.getNumNodeInstances.y(i8* %call4)
+  %mul = mul i64 %call910, %call58
+  %add = add i64 %mul, %call26
+  %mul13 = mul i64 %call1111, %call79
+  %add14 = add i64 %mul13, %call37
+  %sext = shl i64 %add14, 32
+  %idxprom = ashr exact i64 %sext, 32
+  %arrayidx = getelementptr inbounds i32, i32* %In, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %sext36 = shl i64 %add, 32
+  %idxprom15 = ashr exact i64 %sext36, 32
+  %arrayidx16 = getelementptr inbounds i32, i32* %Out, i64 %idxprom15
+  %1 = load i32, i32* %arrayidx16, align 4, !tbaa !15
+  %add17 = add nsw i32 %1, %0
+  store i32 %add17, i32* %arrayidx16, align 4, !tbaa !15
+  ret %emptyStruct undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.0 @Func3_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.0 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.1 @Func2_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.1 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.2 @PipeRoot_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.2 undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #6
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #5 = { nounwind readnone }
+attributes #6 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_gpu = !{!2}
+!hpvm_hint_cpu = !{!3, !4, !5}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned}
+!4 = !{%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned}
+!5 = !{%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24}
+!8 = !{!"any pointer", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C/C++ TBAA"}
+!11 = !{!"long", !9, i64 0}
+!12 = !{!7, !11, i64 8}
+!13 = !{!7, !8, i64 16}
+!14 = !{!7, !11, i64 24}
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !9, i64 0}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
new file mode 100644
index 0000000000..060608fdc5
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
@@ -0,0 +1,261 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; ModuleID = 'ThreeLevel.opt.ll'
+source_filename = "ThreeLevel.opt.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64 }
+%struct.out.Allocation = type <{ i8*, i64 }>
+%emptyStruct = type <{}>
+%emptyStruct.0 = type <{}>
+%emptyStruct.1 = type <{}>
+%emptyStruct.2 = type <{}>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #0
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__malloc(i64) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @Launch() local_unnamed_addr #2 {
+entry:
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #6
+  %call = tail call noalias i8* @malloc(i64 1024) #6
+  %1 = bitcast %struct.Root* %RootArgs to i8**
+  store i8* %call, i8** %1, align 8, !tbaa !6
+  %Insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 1024, i64* %Insize, align 8, !tbaa !12
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  %call1 = tail call noalias i8* @malloc(i64 1024) #6
+  %2 = bitcast i32** %output to i8**
+  store i8* %call1, i8** %2, align 8, !tbaa !13
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 1024, i64* %Outsize, align 8, !tbaa !14
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #6
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+; CHECK-LABEL: @main(
+; CHECK: call i8* @llvm_hpvm_ocl_initContext(i32
+; CHECK: call i8* @llvm_hpvm_ocl_launch(i8*
+; CHECK: call void @llvm_hpvm_ocl_clearContext(i8*
+
+; CHECK-LABEL: @Allocation_cloned.2
+
+; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned
+; CHECK: call i8* @llvm_hpvm_ocl_argument_ptr(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
+; CHECK: @Allocation_cloned.2(
+; CHECK: call void @llvm_hpvm_ocl_argument_shared(i8*
+; CHECK: call void @llvm_hpvm_ocl_argument_scalar(i8*
+; CHECK: call i8* @llvm_hpvm_ocl_executeNode(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_ocl_free(i8*
+
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #4 {
+entry:
+  call void @llvm.hpvm.init()
+  tail call void @Launch()
+  call void @llvm.hpvm.cleanup()
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getNode() #5
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.hpvm.getParentNode(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.x(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNodeInstanceID.y(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.x(i8*) #5
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hpvm.getNumNodeInstances.y(i8*) #5
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.malloc(i64) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Allocation @Allocation_cloned(i64 %block) #4 {
+entry:
+  %call1 = call i8* @llvm.hpvm.malloc(i64 %block)
+  %returnStruct = insertvalue %struct.out.Allocation undef, i8* %call1, 0
+  %returnStruct2 = insertvalue %struct.out.Allocation %returnStruct, i64 %block, 1
+  ret %struct.out.Allocation %returnStruct2
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct @Func1_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %call4 = call i8* @llvm.hpvm.getNode()
+  %call15 = call i8* @llvm.hpvm.getParentNode(i8* %call4)
+  %call26 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call4)
+  %call37 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call4)
+  %call58 = call i64 @llvm.hpvm.getNodeInstanceID.x(i8* %call15)
+  %call79 = call i64 @llvm.hpvm.getNodeInstanceID.y(i8* %call15)
+  %call910 = call i64 @llvm.hpvm.getNumNodeInstances.x(i8* %call4)
+  %call1111 = call i64 @llvm.hpvm.getNumNodeInstances.y(i8* %call4)
+  %mul = mul i64 %call910, %call58
+  %add = add i64 %mul, %call26
+  %arrayidx = getelementptr inbounds i32, i32* %In, i64 3
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %sext = shl i64 %add, 32
+  %idxprom = ashr exact i64 %sext, 32
+  %arrayidx15 = getelementptr inbounds i32, i32* %Out, i64 %idxprom
+  %1 = load i32, i32* %arrayidx15, align 4, !tbaa !15
+  %add16 = add nsw i32 %1, %0
+  store i32 %add16, i32* %arrayidx15, align 4, !tbaa !15
+  ret %emptyStruct undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #6
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.0 @Func3_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
+  %Allocation_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Allocation (i64)* @Allocation_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Allocation_cloned.node, i32 1, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  %output = call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node, i8* %Func1_cloned.node, i1 true, i32 0, i32 0, i1 false)
+  %output1 = call i8* @llvm.hpvm.createEdge(i8* %Allocation_cloned.node, i8* %Func1_cloned.node, i1 true, i32 1, i32 1, i1 false)
+  ret %emptyStruct.0 undef
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.1 @Func2_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.1 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #6
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct.2 @PipeRoot_cloned(i32* in %In, i64 %Insize, i32* in out %Out, i64 %Outsize) #4 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  ret %emptyStruct.2 undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #6
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #6
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #5 = { nounwind readnone }
+attributes #6 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_gpu = !{!2}
+!hpvm_hint_cpu = !{!3, !4, !5}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%emptyStruct (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%emptyStruct.0 (i32*, i64, i32*, i64)* @Func3_cloned}
+!4 = !{%emptyStruct.1 (i32*, i64, i32*, i64)* @Func2_cloned}
+!5 = !{%emptyStruct.2 (i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !11, i64 8, !8, i64 16, !11, i64 24}
+!8 = !{!"any pointer", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C/C++ TBAA"}
+!11 = !{!"long", !9, i64 0}
+!12 = !{!7, !11, i64 8}
+!13 = !{!7, !8, i64 16}
+!14 = !{!7, !11, i64 24}
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !9, i64 0}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll
new file mode 100644
index 0000000000..1373d13159
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll
@@ -0,0 +1,182 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; ModuleID = 'CreateNode.ll'
+source_filename = "CreateNode.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64, i32*, i64 }
+%struct.out.Func = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
+; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+
+; CHECK-LABEL: @PipeRoot_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func_cloned.node
+
+; CHECK-LABEL: @Func_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned
+; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+
+; CHECK-LABEL: @PipeRoot_cloned.2(
+; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @LaunchDataflowGraph(i8*
+; call %struct.out.PipeRoot @PipeRoot_cloned.2(
+
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !4
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !4
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !4
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !8
+  %Insize1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 32, i64* %Insize1, align 8, !tbaa !12
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %In2, i32** %input2, align 8, !tbaa !13
+  %Insize2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 32, i64* %Insize2, align 8, !tbaa !14
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 4
+  store i32* %Out, i32** %output, align 8, !tbaa !15
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 5
+  store i64 32, i64* %Outsize, align 8, !tbaa !16
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func @Func_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func undef, i32* %Out, 0
+  ret %struct.out.Func %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i64 %Insize1, i32* in %In2, i64 %InSize2, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func (i32*, i64, i32*, i64)* @Func_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func (i32*, i64, i32*, i64)* @Func_cloned}
+!3 = !{%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !10, i64 0}
+!9 = !{!"Root", !10, i64 0, !11, i64 8, !10, i64 16, !11, i64 24, !10, i64 32, !11, i64 40}
+!10 = !{!"any pointer", !6, i64 0}
+!11 = !{!"long", !6, i64 0}
+!12 = !{!9, !11, i64 8}
+!13 = !{!9, !10, i64 16}
+!14 = !{!9, !11, i64 24}
+!15 = !{!9, !10, i64 32}
+!16 = !{!9, !11, i64 40}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll
new file mode 100644
index 0000000000..a60f28a08a
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll
@@ -0,0 +1,246 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; ModuleID = 'ThreeLevel.ll'
+source_filename = "ThreeLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64, i32*, i64 }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func3 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
+; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode2D(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode1D(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node
+
+; CHECK-LABEL: @PipeRoot_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
+
+; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned
+; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+
+; CHECK-LABEL: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-LABEL: for.body1:
+; CHECK: %index.y = phi i64 [ 0, %for.body ], [ %index.y.inc, %for.body1 ]
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-LABEL: for.body:
+; CHECK-NEXT: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @PipeRoot_cloned.4(
+; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @LaunchDataflowGraph(
+; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.4(
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !6
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !6
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !6
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !10
+  %Insize1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 32, i64* %Insize1, align 8, !tbaa !14
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %In2, i32** %input2, align 8, !tbaa !15
+  %Insize2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 32, i64* %Insize2, align 8, !tbaa !16
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 4
+  store i32* %Out, i32** %output, align 8, !tbaa !17
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 5
+  store i64 32, i64* %Outsize, align 8, !tbaa !18
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode2D(i8*, i64, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func3 @Func3_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func1 (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3, i64 5)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func3 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*, i64, i32*, i64)* @Func3_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i64 %Insize1, i32* in %In2, i64 %InSize2, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4, !5}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%struct.out.Func3 (i32*, i64, i32*, i64)* @Func3_cloned}
+!4 = !{%struct.out.Func2 (i32*, i64, i32*, i64)* @Func2_cloned}
+!5 = !{%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C/C++ TBAA"}
+!10 = !{!11, !12, i64 0}
+!11 = !{!"Root", !12, i64 0, !13, i64 8, !12, i64 16, !13, i64 24, !12, i64 32, !13, i64 40}
+!12 = !{!"any pointer", !8, i64 0}
+!13 = !{!"long", !8, i64 0}
+!14 = !{!11, !13, i64 8}
+!15 = !{!11, !12, i64 16}
+!16 = !{!11, !13, i64 24}
+!17 = !{!11, !12, i64 32}
+!18 = !{!11, !13, i64 40}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll
new file mode 100644
index 0000000000..5ce7a58e21
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll
@@ -0,0 +1,230 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; ModuleID = 'TwoLaunch.ll'
+source_filename = "TwoLaunch.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64, i32*, i64 }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
+; CHECK: @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph.7, i8*
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
+; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode1D(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node
+
+; CHECK-LABEL: @PipeRoot_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
+
+; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+
+; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @PipeRoot_cloned.3(
+; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @LaunchDataflowGraph(i8*
+; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.3(
+
+; CHECK-LABEL: @Func1_cloned.4_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK: @llvm_hpvm_x86_argument_ptr(
+
+; CHECK-LABEL: @Func2_cloned.5_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func1_cloned.4_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @LaunchDataflowGraph.7(i8*
+; call %struct.out.PipeRoot @PipeRoot_cloned.6(
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !5
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !5
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !5
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !9
+  %Insize1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 32, i64* %Insize1, align 8, !tbaa !13
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %In2, i32** %input2, align 8, !tbaa !14
+  %Insize2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 32, i64* %Insize2, align 8, !tbaa !15
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 4
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 5
+  store i64 32, i64* %Outsize, align 8, !tbaa !17
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  %5 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID1 = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %5, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.wait(i8* %graphID1)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func1 (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i64 %Insize1, i32* in %In2, i64 %InSize2, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%struct.out.Func2 (i32*, i64, i32*, i64)* @Func2_cloned}
+!4 = !{%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Root", !11, i64 0, !12, i64 8, !11, i64 16, !12, i64 24, !11, i64 32, !12, i64 40}
+!11 = !{!"any pointer", !7, i64 0}
+!12 = !{!"long", !7, i64 0}
+!13 = !{!10, !12, i64 8}
+!14 = !{!10, !11, i64 16}
+!15 = !{!10, !12, i64 24}
+!16 = !{!10, !11, i64 32}
+!17 = !{!10, !12, i64 40}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll
new file mode 100644
index 0000000000..b218b70fd0
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll
@@ -0,0 +1,213 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; ModuleID = 'TwoLevel.ll'
+source_filename = "TwoLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64, i32*, i64 }
+%struct.out.Func1 = type <{ i32* }>
+%struct.out.Func2 = type <{ i32* }>
+%struct.out.PipeRoot = type <{ i32* }>
+
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* 
+; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode1D(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node
+
+; CHECK-LABEL: @PipeRoot_cloned(
+; CHECK: call i8* @llvm.hpvm.createNode(
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
+
+; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+
+; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-LABEL: for.body
+; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @PipeRoot_cloned.3(
+; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
+; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+
+; CHECK-LABEL: @LaunchDataflowGraph(i8*
+; call %struct.out.PipeRoot @PipeRoot_cloned.3(
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !5
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !5
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !5
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !9
+  %Insize1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 32, i64* %Insize1, align 8, !tbaa !13
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %In2, i32** %input2, align 8, !tbaa !14
+  %Insize2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 32, i64* %Insize2, align 8, !tbaa !15
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 4
+  store i32* %Out, i32** %output, align 8, !tbaa !16
+  %Outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 5
+  store i64 32, i64* %Outsize, align 8, !tbaa !17
+  call void @llvm.hpvm.init()
+  %4 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %4, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func1 @Func1_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %returnStruct = insertvalue %struct.out.Func1 undef, i32* %Out, 0
+  ret %struct.out.Func1 %returnStruct
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode1D(i8*, i64) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.input(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.bind.output(i8*, i32, i32, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.Func2 @Func2_cloned(i32* in %In, i64 %Insize, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func1_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func1 (i32*, i64, i32*, i64)* @Func1_cloned to i8*), i64 3)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.Func2 undef
+}
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.createNode(i8*) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %struct.out.PipeRoot @PipeRoot_cloned(i32* in %In1, i64 %Insize1, i32* in %In2, i64 %InSize2, i32* out %Out, i64 %Outsize) #2 {
+entry:
+  %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*, i64, i32*, i64)* @Func2_cloned to i8*))
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 1, i32 1, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 2, i32 2, i1 false)
+  call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node, i32 3, i32 3, i1 false)
+  call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node, i32 0, i32 0, i1 false)
+  ret %struct.out.PipeRoot undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2, !3, !4}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%struct.out.Func1 (i32*, i64, i32*, i64)* @Func1_cloned}
+!3 = !{%struct.out.Func2 (i32*, i64, i32*, i64)* @Func2_cloned}
+!4 = !{%struct.out.PipeRoot (i32*, i64, i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Root", !11, i64 0, !12, i64 8, !11, i64 16, !12, i64 24, !11, i64 32, !12, i64 40}
+!11 = !{!"any pointer", !7, i64 0}
+!12 = !{!"long", !7, i64 0}
+!13 = !{!10, !12, i64 8}
+!14 = !{!10, !11, i64 16}
+!15 = !{!10, !12, i64 24}
+!16 = !{!10, !11, i64 32}
+!17 = !{!10, !12, i64 40}
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll
new file mode 100644
index 0000000000..a0f0f6ecfc
--- /dev/null
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll
@@ -0,0 +1,121 @@
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; ModuleID = 'oneLaunchAlloca.ll'
+source_filename = "oneLaunchAlloca.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i64, i32*, i64 }
+%emptyStruct = type <{}>
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
+
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
+; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+
+; CHECK-LABEL: @PipeRoot_cloned.1(
+
+; CHECK-LABEL: @LaunchDataflowGraph(i8*
+; call %struct.out.PipeRoot @PipeRoot_cloned.1(
+
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #1 {
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In, i32** %input, align 8, !tbaa !3
+  %insize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64 32, i64* %insize, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  %outsize = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 3
+  store i64 32, i64* %outsize, align 8, !tbaa !11
+  call void @llvm.hpvm.init()
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  %graphID = call i8* @llvm.hpvm.launch(i8* bitcast (%emptyStruct (i32*, i64, i32*, i64)* @PipeRoot_cloned to i8*), i8* %3, i1 false)
+  call void @llvm.hpvm.wait(i8* %graphID)
+  call void @llvm.hpvm.cleanup()
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #0
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #0
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare i8* @llvm_hpvm_initializeTimerSet()
+
+declare void @llvm_hpvm_switchToTimer(i8**, i32)
+
+declare void @llvm_hpvm_printTimerSet(i8**, i8*)
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.init() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.hpvm.launch(i8*, i8*, i1) #3
+
+; Function Attrs: nounwind uwtable
+define dso_local %emptyStruct @PipeRoot_cloned(i32* in %In, i64 %insize, i32* out %Out, i64 %outsize) #1 {
+entry:
+  ret %emptyStruct undef
+}
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.wait(i8*) #3
+
+; Function Attrs: nounwind
+declare void @llvm.hpvm.cleanup() #3
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!hpvm_hint_cpu = !{!2}
+!hpvm_hint_gpu = !{}
+!hpvm_hint_spir = !{}
+!hpvm_hint_cudnn = !{}
+!hpvm_hint_promise = !{}
+!hpvm_hint_cpu_gpu = !{}
+!hpvm_hint_cpu_spir = !{}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{%emptyStruct (i32*, i64, i32*, i64)* @PipeRoot_cloned}
+!3 = !{!4, !5, i64 0}
+!4 = !{!"Root", !5, i64 0, !8, i64 8, !5, i64 16, !8, i64 24}
+!5 = !{!"any pointer", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!"long", !6, i64 0}
+!9 = !{!4, !8, i64 8}
+!10 = !{!4, !5, i64 16}
+!11 = !{!4, !8, i64 24}
diff --git a/hpvm/test/regressionTests/GenVISC/AllocationNode.ll b/hpvm/test/regressionTests/GenVISC/AllocationNode.ll
new file mode 100644
index 0000000000..e3b2d81b94
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/AllocationNode.ll
@@ -0,0 +1,206 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'AllocationNode.c'
+source_filename = "AllocationNode.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i64*, i64* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i64*, i64*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]], i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = call i8* @llvm.hpvm.malloc(i64
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef, i8* [[RET4]], 0
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
+; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i64*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i64*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func4_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i64*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i64*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i64*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i64*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func1_cloned.node,
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i64* %In, i64* %Out) #0 {
+; CHECK-NOT: @Func1(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i64* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func4(i64* %In, i64* %Out) #0 {
+; CHECK-NOT: @Func4(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
+  %0 = load i64, i64* %In, align 8, !tbaa !2
+  %call = tail call i8* @__hpvm__malloc(i64 %0) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i8* %call) #3
+  ret void
+}
+
+declare dso_local i8* @__hpvm__malloc(i64) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func5(i64* %In, i64* %Out) #0 {
+; CHECK-NOT: @Func5(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i64* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i64* %In, i64* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i64*, i64*)* nonnull @Func4, i64 3, i64 6) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i64*, i64*)* nonnull @Func5, i64 4, i64 5) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 1, i32 0, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i64* %BindIn, i64* %SrcIn, i64* %Out) #0 {
+; CHECK-NOT: @Func2(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i64* %BindIn, i64* %SrcIn, i32 1, i64* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i64*, i64*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i64* %In, i64* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i64* %In, i32 1, i64* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i64*, i64*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i64*, i64*, i64*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 0, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 1, i32 0, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In = alloca i64, align 8
+  %Out = alloca i64, align 8
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i64* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3
+  store i64 1, i64* %In, align 8, !tbaa !2
+  %1 = bitcast i64* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %1) #3
+  store i64 0, i64* %Out, align 8, !tbaa !2
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i64* %In, i64** %input, align 8, !tbaa !6
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i64* %Out, i64** %output, align 8, !tbaa !9
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i64*, i64*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"long", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
diff --git a/hpvm/test/regressionTests/GenVISC/CreateNode.ll b/hpvm/test/regressionTests/GenVISC/CreateNode.ll
new file mode 100644
index 0000000000..72c3d110e2
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/CreateNode.ll
@@ -0,0 +1,119 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'CreateNode.c'
+source_filename = "CreateNode.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32* }
+; CHECK-LABEL: struct.Root = 
+
+; CHECK-LABEL: %struct.out.Func = 
+; CHECK-LABEL: %struct.out.PipeRoot = 
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]], i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func_cloned(
+; CHECK: [[RET:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func undef,
+; CHECK-NEXT: ret %struct.out.Func [[RET]]
+
+; CHECK-LABEL: @PipeRoot_cloned(i32* 
+; CHECK: %Func_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func (i32*
+; CHECK-NEXT: @llvm.hpvm.bind.input(i8* %Func_cloned.node
+; CHECK-NEXT: @llvm.hpvm.bind.output(i8* %Func_cloned.node
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In, i32** %input, align 8, !tbaa !2
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %Out, i32** %output, align 8, !tbaa !7
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"Root", !4, i64 0, !4, i64 8}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!3, !4, i64 8}
diff --git a/hpvm/test/regressionTests/GenVISC/CreateNodeAndEdge.ll b/hpvm/test/regressionTests/GenVISC/CreateNodeAndEdge.ll
new file mode 100644
index 0000000000..a4e802f5cd
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/CreateNodeAndEdge.ll
@@ -0,0 +1,153 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'CreateNodeAndEdge.c'
+source_filename = "CreateNodeAndEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: [[RET2:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func2 undef,
+; CHECK-NEXT: ret %struct.out.Func2 [[RET2]]
+
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: @llvm.hpvm.bind.output(i8* %Func2_cloned.node
+
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/GenVISC/LeafBindEdge.ll b/hpvm/test/regressionTests/GenVISC/LeafBindEdge.ll
new file mode 100644
index 0000000000..3f89f80c00
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/LeafBindEdge.ll
@@ -0,0 +1,212 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'LeafBindEdge.c'
+source_filename = "LeafBindEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
+; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func1(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func4(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func4(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func5(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @Func5(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*)* nonnull @Func4, i64 3, i64 6) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*, i32*)* nonnull @Func5, i64 4, i64 5) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+; CHECK-NOT: @Func2(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/GenVISC/LeafNodeGetters.ll b/hpvm/test/regressionTests/GenVISC/LeafNodeGetters.ll
new file mode 100644
index 0000000000..523499eccb
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/LeafNodeGetters.ll
@@ -0,0 +1,242 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'LeafNodeGetters.c'
+source_filename = "LeafNodeGetters.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[NODE:%[1-9a-zA-Z]+]] = call i8* @llvm.hpvm.getNode()
+; CHECK-NEXT: call i8* @llvm.hpvm.getParentNode(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNodeInstanceID.x(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNodeInstanceID.y(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNodeInstanceID.z(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNumNodeInstances.x(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNumNodeInstances.y(i8* [[NODE]])
+; CHECK-NEXT: call i64 @llvm.hpvm.getNumNodeInstances.z(i8* [[NODE]])
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func5_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func1(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func4(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func4(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func5(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @Func5(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (...) @__hpvm__getNode() #3
+  %call1 = tail call i8* @__hpvm__getParentNode(i8* %call) #3
+  %call2 = tail call i64 @__hpvm__getNodeInstanceID_x(i8* %call) #3
+  %call3 = tail call i64 @__hpvm__getNodeInstanceID_y(i8* %call) #3
+  %call4 = tail call i64 @__hpvm__getNodeInstanceID_z(i8* %call) #3
+  %call5 = tail call i64 @__hpvm__getNumNodeInstances_x(i8* %call) #3
+  %call6 = tail call i64 @__hpvm__getNumNodeInstances_y(i8* %call) #3
+  %call7 = tail call i64 @__hpvm__getNumNodeInstances_z(i8* %call) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__getNode(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__getParentNode(i8*) local_unnamed_addr #1
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_x(i8*) local_unnamed_addr #1
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_y(i8*) local_unnamed_addr #1
+
+declare dso_local i64 @__hpvm__getNodeInstanceID_z(i8*) local_unnamed_addr #1
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_x(i8*) local_unnamed_addr #1
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_y(i8*) local_unnamed_addr #1
+
+declare dso_local i64 @__hpvm__getNumNodeInstances_z(i8*) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*)* nonnull @Func4, i64 3, i64 6) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*, i32*)* nonnull @Func5, i64 4, i64 5) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+; CHECK-NOT: @Func2(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/GenVISC/PopAndPush.ll b/hpvm/test/regressionTests/GenVISC/PopAndPush.ll
new file mode 100644
index 0000000000..f21b072f5e
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/PopAndPush.ll
@@ -0,0 +1,92 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'PopAndPush.c'
+source_filename = "PopAndPush.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32* }
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: [[GRAPH:%[a-zA-Z1-9]+]] = call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+; CHECK-NEXT: call void @llvm.hpvm.push(i8* [[GRAPH]],
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In, i32** %input, align 8, !tbaa !2
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %Out, i32** %output, align 8, !tbaa !7
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void @__hpvm__push(i8* %call, i8* nonnull %2) #3
+  %call1 = call i8* @__hpvm__pop(i8* nonnull %2) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__push(i8*, i8*) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__pop(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"Root", !4, i64 0, !4, i64 8}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!3, !4, i64 8}
diff --git a/hpvm/test/regressionTests/GenVISC/ThreeLevel.ll b/hpvm/test/regressionTests/GenVISC/ThreeLevel.ll
new file mode 100644
index 0000000000..aeb9840302
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/ThreeLevel.ll
@@ -0,0 +1,190 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'ThreeLevel.c'
+source_filename = "ThreeLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func4_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func1(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func4(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func4(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+; CHECK-OUT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*)* nonnull @Func4, i64 3, i64 6) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+; CHECK-NOT: @Func2(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/GenVISC/ThreeLevelEdge.ll b/hpvm/test/regressionTests/GenVISC/ThreeLevelEdge.ll
new file mode 100644
index 0000000000..d1b930b529
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/ThreeLevelEdge.ll
@@ -0,0 +1,209 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'ThreeLevelEdge.c'
+source_filename = "ThreeLevelEdge.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func4 =
+; CHECK-LABEL: %struct.out.Func5 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]],
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func4_cloned(
+; CHECK: [[RET4:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func4 undef,
+; CHECK-NEXT: ret %struct.out.Func4 [[RET4]]
+
+; CHECK-LABEL: @Func5_cloned(
+; CHECK: [[RET5:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func5 undef,
+; CHECK-NEXT: ret %struct.out.Func5 [[RET5]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: %Func4_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func4 (i32*
+; CHECK-NEXT: %Func5_cloned.node = call i8* @llvm.hpvm.createNode2D(i8* bitcast (%struct.out.Func5 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func4_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func4_cloned.node, i8* %Func5_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func5_cloned.node,
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func1(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func4(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func4(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func5(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+; CHECK-NOT: @Func3(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*)* nonnull @Func4, i64 3, i64 6) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 2, void (i32*, i32*, i32*)* nonnull @Func5, i64 4, i64 5) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+; CHECK-NOT: @Func2(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+; CHECK-NOT: @PipeRoot(
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %intput2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %intput2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/GenVISC/TwoLaunch.ll b/hpvm/test/regressionTests/GenVISC/TwoLaunch.ll
new file mode 100644
index 0000000000..36a61feb1c
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/TwoLaunch.ll
@@ -0,0 +1,96 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'TwoLaunch.c'
+source_filename = "TwoLaunch.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot1(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #4
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #4
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #4
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot2(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #4
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #4
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #4
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: i32 @main(
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot1 (i32*, i32*)* @PipeRoot1_cloned to i8*), i8* %call, i1 false)
+; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot2 (i32*, i32*)* @PipeRoot2_cloned to i8*), i8* %call, i1 false) 
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #4
+  %call = tail call noalias i8* @malloc(i64 16) #4
+  %input = bitcast i8* %call to i32**
+  store i32* %In, i32** %input, align 8, !tbaa !2
+  %output = getelementptr inbounds i8, i8* %call, i64 8
+  %2 = bitcast i8* %output to i32**
+  store i32* %Out, i32** %2, align 8, !tbaa !7
+  call void (...) @__hpvm__init() #4
+  %call1 = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*)* nonnull @PipeRoot1, i8* %call) #4
+  %call2 = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*)* nonnull @PipeRoot2, i8* %call) #4
+  call void @__hpvm__wait(i8* %call1) #4
+  call void @__hpvm__wait(i8* %call2) #4
+  call void (...) @__hpvm__cleanup() #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"Root", !4, i64 0, !4, i64 8}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!3, !4, i64 8}
diff --git a/hpvm/test/regressionTests/GenVISC/TwoLevel.ll b/hpvm/test/regressionTests/GenVISC/TwoLevel.ll
new file mode 100644
index 0000000000..e5b4c21681
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/TwoLevel.ll
@@ -0,0 +1,167 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'TwoLevel.c'
+source_filename = "TwoLevel.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32*, i32* }
+; CHECK-LABEL: struct.Root =
+
+; CHECK-LABEL: %struct.out.Func1 =
+; CHECK-LABEL: %struct.out.Func3 =
+; CHECK-LABEL: %struct.out.Func2 =
+; CHECK-LABEL: %struct.out.PipeRoot =
+
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*, i32*)* @PipeRoot_cloned to i8*), i8* %4,
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+
+; CHECK-LABEL: @Func1_cloned(
+; CHECK: [[RET1:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func1 undef,
+; CHECK-NEXT: ret %struct.out.Func1 [[RET1]]
+
+; CHECK-LABEL: @Func3_cloned(
+; CHECK: [[RET3:%[1-9a-zA-Z]+]] = insertvalue %struct.out.Func3 undef,
+; CHECK-NEXT: ret %struct.out.Func3 [[RET3]]
+
+; CHECK-LABEL: @Func2_cloned(
+; CHECK: %Func3_cloned.node = call i8* @llvm.hpvm.createNode1D(i8* bitcast (%struct.out.Func3 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func3_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func3_cloned.node,
+
+; CHECK-LABEL: @PipeRoot_cloned(i32*
+; CHECK: %Func1_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func1 (i32*
+; CHECK-NEXT: %Func2_cloned.node = call i8* @llvm.hpvm.createNode(i8* bitcast (%struct.out.Func2 (i32*
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func1_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.input(i8* %Func2_cloned.node,
+; CHECK-NEXT: call i8* @llvm.hpvm.createEdge(i8* %Func1_cloned.node, i8* %Func2_cloned.node,
+; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node,
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func1(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func3(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @Func2(i32* %BindIn, i32* %SrcIn, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %BindIn, i32* %SrcIn, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 1, void (i32*, i32*)* nonnull @Func3, i64 3) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local i8* @__hpvm__createNodeND(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindIn(i8*, i32, i32, i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__bindOut(i8*, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In1, i32* %In2, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In1, i32* %In2, i32 1, i32* %Out) #3
+  %call = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*)* nonnull @Func1) #3
+  %call1 = tail call i8* (i32, ...) @__hpvm__createNodeND(i32 0, void (i32*, i32*, i32*)* nonnull @Func2) #3
+  tail call void @__hpvm__bindIn(i8* %call, i32 0, i32 0, i32 0) #3
+  tail call void @__hpvm__bindIn(i8* %call1, i32 1, i32 0, i32 0) #3
+  %call2 = tail call i8* @__hpvm__edge(i8* %call, i8* %call1, i32 0, i32 1, i32 1, i32 0) #3
+  tail call void @__hpvm__bindOut(i8* %call1, i32 0, i32 0, i32 0) #3
+  ret void
+}
+
+declare dso_local i8* @__hpvm__edge(i8*, i8*, i32, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+entry:
+  %In1 = alloca i32, align 4
+  %In2 = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  store i32 1, i32* %In1, align 4, !tbaa !2
+  %1 = bitcast i32* %In2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  store i32 2, i32* %In2, align 4, !tbaa !2
+  %2 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #3
+  store i32 0, i32* %Out, align 4, !tbaa !2
+  %3 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %3) #3
+  %input1 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In1, i32** %input1, align 8, !tbaa !6
+  %input2 = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %In2, i32** %input2, align 8, !tbaa !9
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 2
+  store i32* %Out, i32** %output, align 8, !tbaa !10
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Root", !8, i64 0, !8, i64 8, !8, i64 16}
+!8 = !{!"any pointer", !4, i64 0}
+!9 = !{!7, !8, i64 8}
+!10 = !{!7, !8, i64 16}
diff --git a/hpvm/test/regressionTests/GenVISC/oneLaunchAlloca.ll b/hpvm/test/regressionTests/GenVISC/oneLaunchAlloca.ll
new file mode 100644
index 0000000000..1c26746919
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/oneLaunchAlloca.ll
@@ -0,0 +1,86 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'oneLaunchAlloca.c'
+source_filename = "oneLaunchAlloca.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Root = type { i32*, i32* }
+; CHECK-LABEL: struct.Root
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: i32 @main(
+; CHECK: [[ALLOCA:%[1-9a-zA-Z]+]] = alloca %struct.Root
+; CHECK: call void @llvm.hpvm.init()
+; CHECK:  [[REGISTER:%[1-9]+]] = bitcast %struct.Root* [[ALLOCA]] to i8*
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* [[REGISTER]], i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %RootArgs = alloca %struct.Root, align 8
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #3
+  %2 = bitcast %struct.Root* %RootArgs to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2) #3
+  %input = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 0
+  store i32* %In, i32** %input, align 8, !tbaa !2
+  %output = getelementptr inbounds %struct.Root, %struct.Root* %RootArgs, i64 0, i32 1
+  store i32* %Out, i32** %output, align 8, !tbaa !7
+  call void (...) @__hpvm__init() #3
+  %call = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*)* nonnull @PipeRoot, %struct.Root* nonnull %RootArgs) #3
+  call void @__hpvm__wait(i8* %call) #3
+  call void (...) @__hpvm__cleanup() #3
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"Root", !4, i64 0, !4, i64 8}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!3, !4, i64 8}
diff --git a/hpvm/test/regressionTests/GenVISC/oneLaunchMalloc.ll b/hpvm/test/regressionTests/GenVISC/oneLaunchMalloc.ll
new file mode 100644
index 0000000000..892049e674
--- /dev/null
+++ b/hpvm/test/regressionTests/GenVISC/oneLaunchMalloc.ll
@@ -0,0 +1,84 @@
+; RUN: opt -load LLVMGenVISC.so -S -genhpvm <  %s | FileCheck %s
+; ModuleID = 'oneLaunchMalloc.c'
+source_filename = "oneLaunchMalloc.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define dso_local void @PipeRoot(i32* %In, i32* %Out) #0 {
+entry:
+  tail call void @__hpvm__hint(i32 1) #4
+  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32 1, i32* %Out) #4
+  tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #4
+  ret void
+}
+
+declare dso_local void @__hpvm__hint(i32) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__return(i32, ...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: i32 @main(
+; CHECK: %call = tail call noalias i8* @malloc(i64
+; CHECK: call void @llvm.hpvm.init()
+; CHECK: call i8* @llvm.hpvm.launch(i8* bitcast (%struct.out.PipeRoot (i32*, i32*)* @PipeRoot_cloned to i8*), i8* %call, i1 false)
+; CHECK-NEXT: call void @llvm.hpvm.wait(i8*
+entry:
+  %In = alloca i32, align 4
+  %Out = alloca i32, align 4
+  %0 = bitcast i32* %In to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
+  %1 = bitcast i32* %Out to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #4
+  %call = tail call noalias i8* @malloc(i64 16) #4
+  %input = bitcast i8* %call to i32**
+  store i32* %In, i32** %input, align 8, !tbaa !2
+  %output = getelementptr inbounds i8, i8* %call, i64 8
+  %2 = bitcast i8* %output to i32**
+  store i32* %Out, i32** %2, align 8, !tbaa !7
+  call void (...) @__hpvm__init() #4
+  %call1 = call i8* (i32, ...) @__hpvm__launch(i32 0, void (i32*, i32*)* nonnull @PipeRoot, i8* %call) #4
+  call void @__hpvm__wait(i8* %call1) #4
+  call void (...) @__hpvm__cleanup() #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: nofree nounwind
+declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #3
+
+declare dso_local void @__hpvm__init(...) local_unnamed_addr #1
+
+declare dso_local i8* @__hpvm__launch(i32, ...) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__wait(i8*) local_unnamed_addr #1
+
+declare dso_local void @__hpvm__cleanup(...) local_unnamed_addr #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://gitlab.engr.illinois.edu/llvm/hpvm.git 6690f9e7e8b46b96aea222d3e85315cd63545953)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"Root", !4, i64 0, !4, i64 8}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!3, !4, i64 8}
-- 
GitLab