diff --git a/llvm/test/VISC/unitTests/3level.ll b/llvm/test/VISC/unitTests/3level.ll
index 72248b3b95d837cbbc6b0671c984b341b7690ba5..340020ee54a3fd6fd265240ddad1948e2ebc08b8 100644
--- a/llvm/test/VISC/unitTests/3level.ll
+++ b/llvm/test/VISC/unitTests/3level.ll
@@ -1,12 +1,14 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/3level.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { i32, i32 }
+%rtype = type <{i32, i32}>
+%rtype_internal = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -34,18 +36,18 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
+  %args = bitcast %struct.arg* %in.addr to i8*
   %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output1 = extractvalue %rtype %outputstruct, 0
   %output2 = extractvalue %rtype %outputstruct, 1
@@ -54,35 +56,35 @@ entry:
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype_internal @producer(i32 %id) {
   %sum = add i32 4, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype_internal undef, i32 %sum, 0
+  ret %rtype_internal %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype_internal @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype_internal undef, i32 %sum, 0
+  ret %rtype_internal %output
 }
 
-define { i32 } @foo(i32 %id) {
+define %rtype_internal @foo(i32 %id) {
   %sum = add i32 15, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype_internal undef, i32 %sum, 0
+  ret %rtype_internal %output
 }
 
-define { i32 } @subNode(i32 %id) {
-  %foo_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @foo to i8*))
+define %rtype_internal @subNode(i32 %id) {
+  %foo_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @foo to i8*))
   call void @llvm.visc.bind.input(i8* %foo_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %foo_node, i32 0, i32 0)
-  ret { i32 } zeroinitializer
+  ret %rtype_internal zeroinitializer
 }
 
 define %rtype @Root(i32 %id) {
-  %p_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @producer to i8*))
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
-  %sub_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @subNode to i8*))
+  %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @producer to i8*))
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @consumer to i8*))
+  %sub_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype_internal (i32)* @subNode to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/Makefile b/llvm/test/VISC/unitTests/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..539ee5e8fbf010d33663c98470b245bb2710eeea
--- /dev/null
+++ b/llvm/test/VISC/unitTests/Makefile
@@ -0,0 +1,13 @@
+PASSES :=
+
+.PHONY: clean
+
+LLVM_INSTALL:=/home/psrivas2/Hetero/VISC/Code/trunk/llvm-install
+LIBCLC:=/home/psrivas2/Hetero/VISC/Code/trunk/libclc
+HOST:=gemm_opencl
+KERNELS:=matrixMul
+LLVM_CC:=$(LLVM_INSTALL)/bin/clang
+LLVM_LINK:=$(LLVM_INSTALL)/bin/llvm-link
+
+clean :
+	rm -f DataflowGraph.dot*
diff --git a/llvm/test/VISC/unitTests/query2D.ll b/llvm/test/VISC/unitTests/query2D.ll
index 9b2ad72c5abac5dd6488a3d8f0848d262d45f521..6d2bb9cea2649cee3b2955c3c5744520f60944cf 100644
--- a/llvm/test/VISC/unitTests/query2D.ll
+++ b/llvm/test/VISC/unitTests/query2D.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/query2D.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -23,70 +24,68 @@ declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getNode() #0
+declare void @llvm.visc.bind.input(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getParentNode(i8*) #0
+declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.getNumDims(i8*) #0
+declare i8* @llvm.visc.getNode() #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0
+declare i8* @llvm.visc.getParentNode(i8*) #0
 
 ; Function Attrs: nounwind
-declare void @llvm.visc.bind.input(i8*, i32, i32)
+declare i32 @llvm.visc.getNumDims(i8*) #0
 
 ; Function Attrs: nounwind
-declare void @llvm.visc.bind.output(i8*, i32, i32)
+declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
   %this_node = call i8* @llvm.visc.getNode()
   %dim = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
   %sum2 = add i32 %sum, %dim
-  %output = insertvalue { i32 } undef, i32 %sum2, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum2, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %dimension) {
-  %p_node = call i8* @llvm.visc.createNode2D(i8* bitcast ({ i32 } (i32)* @producer to i8*), i32 %dimension, i32 %dimension)
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension, i32 %dimension)
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/query3D.ll b/llvm/test/VISC/unitTests/query3D.ll
index bec2cb9ce94cf2be87fa8b2c107a26e7acae43c0..923a29c51ec847b50a9ff76ae077777d19de9e29 100644
--- a/llvm/test/VISC/unitTests/query3D.ll
+++ b/llvm/test/VISC/unitTests/query3D.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/query3D.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -26,10 +27,16 @@ declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.input(i8*, i32, i32)
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.getNode() #0
@@ -44,52 +51,47 @@ declare i32 @llvm.visc.getNumDims(i8*) #0
 declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0
 
 ; Function Attrs: nounwind
-declare void @llvm.visc.bind.input(i8*, i32, i32)
-
-; Function Attrs: nounwind
-declare void @llvm.visc.bind.output(i8*, i32, i32)
+declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
   %this_node = call i8* @llvm.visc.getNode()
-  %dim = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
+  %dim = call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node)
   %sum2 = add i32 %sum, %dim
-  %output = insertvalue { i32 } undef, i32 %sum2, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum2, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %dimension) {
-  %p_node = call i8* @llvm.visc.createNode3D(i8* bitcast ({ i32 } (i32)* @producer to i8*), i32 %dimension, i32 10, i32 30)
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode3D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension, i32 10, i32 30)
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/queryNodeInst.ll b/llvm/test/VISC/unitTests/queryNodeInst.ll
index 258dff23dd3cd4b20891c04d9f990ae32397b041..6572ae36339f596685c24d2537d6477eec3ca754 100644
--- a/llvm/test/VISC/unitTests/queryNodeInst.ll
+++ b/llvm/test/VISC/unitTests/queryNodeInst.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -17,67 +18,63 @@ declare i8* @llvm.visc.createNode(i8*) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getNode() #0
+declare void @llvm.visc.bind.input(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getParentNode(i8*) #0
+declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.getNumDims(i8*) #0
+declare i8* @llvm.visc.getNode() #0
 
 ; Function Attrs: nounwind
-declare void @llvm.visc.bind.input(i8*, i32, i32)
+declare i32 @llvm.visc.getNumDims(i8*) #0
 
-; Function Attrs: nounwind
-declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
   %this_node = call i8* @llvm.visc.getNode()
   %numDim = call i32 @llvm.visc.getNumDims(i8* %this_node)
   %sum2 = add i32 %sum, %numDim
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %id) {
-  %p_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @producer to i8*))
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*))
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/queryNumDim.ll b/llvm/test/VISC/unitTests/queryNumDim.ll
index 258dff23dd3cd4b20891c04d9f990ae32397b041..21de1ded737277e5071a60d0630a9023ccf8948c 100644
--- a/llvm/test/VISC/unitTests/queryNumDim.ll
+++ b/llvm/test/VISC/unitTests/queryNumDim.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -17,10 +18,10 @@ declare i8* @llvm.visc.createNode(i8*) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
 
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.getNode() #0
@@ -40,44 +41,42 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
   %this_node = call i8* @llvm.visc.getNode()
   %numDim = call i32 @llvm.visc.getNumDims(i8* %this_node)
   %sum2 = add i32 %sum, %numDim
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %id) {
-  %p_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @producer to i8*))
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*))
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/queryNumNodeInst.ll b/llvm/test/VISC/unitTests/queryNumNodeInst.ll
index ce6a639215f8ad2602452581d46d719980db36bf..ae96791491d0663e9055857c897ed4d5153b6c12 100644
--- a/llvm/test/VISC/unitTests/queryNumNodeInst.ll
+++ b/llvm/test/VISC/unitTests/queryNumNodeInst.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -20,10 +21,10 @@ declare i8* @llvm.visc.createNode1D(i8*, i32) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
 
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.getNode() #0
@@ -46,44 +47,42 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
   %this_node = call i8* @llvm.visc.getNode()
   %dim = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
   %sum2 = add i32 %sum, %dim
-  %output = insertvalue { i32 } undef, i32 %sum2, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum2, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %dimension) {
-  %p_node = call i8* @llvm.visc.createNode1D(i8* bitcast ({ i32 } (i32)* @producer to i8*), i32 %dimension)
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32)* @producer to i8*), i32 %dimension)
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/singleNode.ll b/llvm/test/VISC/unitTests/singleNode.ll
index 1d45ff55cb467301dc1ab49b642c221e7933ab5b..94bf7314f20f61c23fa359e9c606a500b3345986 100644
--- a/llvm/test/VISC/unitTests/singleNode.ll
+++ b/llvm/test/VISC/unitTests/singleNode.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%rtype = type { }
+%rtype = type <{i32}>
+%struct.arg = type <{ %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -17,36 +18,41 @@ declare i8* @llvm.visc.createNode(i8*) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.input(i8*, i32, i32)
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %args = bitcast { %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype ()* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
+  call void @llvm.visc.wait(i8* %graphID)
   ret i32 0
 }
 
-define { i32 } @foo() {
+define %rtype @foo() {
   %sum = add i32 4, 10 
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root() {
-  %node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } ()* @foo to i8*))
+  %node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype ()* @foo to i8*))
+  call void @llvm.visc.bind.output(i8* %node, i32 0, i32 0)
   ret %rtype zeroinitializer
 }
 
diff --git a/llvm/test/VISC/unitTests/twoNode.ll b/llvm/test/VISC/unitTests/twoNode.ll
index b0626a988ff68ee8aebb79a7edf85fc9bbb40a55..3fcc9a353206c4ce84fb6558b6751437227bf981 100644
--- a/llvm/test/VISC/unitTests/twoNode.ll
+++ b/llvm/test/VISC/unitTests/twoNode.ll
@@ -1,12 +1,12 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
-
-%rtype = type { }
+%rtype = type <{i32}>
+%struct.arg = type <{ %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -17,46 +17,51 @@ declare i8* @llvm.visc.createNode(i8*) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.visc.wait(i8*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.visc.bind.input(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
+  call void @llvm.visc.wait(i8* %graphID)
   ret i32 0
 }
 
-define { i32 } @producer() {
+define %rtype @producer() {
   %sum = add i32 4, 10
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %id) {
-  %p_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } ()* @producer to i8*))
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype ()* @producer to i8*))
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
+  call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
   ret %rtype zeroinitializer
 }
 
diff --git a/llvm/test/VISC/unitTests/twoNodeConnect.ll b/llvm/test/VISC/unitTests/twoNodeConnect.ll
index a005c8fadf11e555a892ab82114b09b6b8d75e39..e61636c71ab3fd8dbf7a6dca210bdaa04636048e 100644
--- a/llvm/test/VISC/unitTests/twoNodeConnect.ll
+++ b/llvm/test/VISC/unitTests/twoNodeConnect.ll
@@ -1,12 +1,12 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
-
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -17,13 +17,10 @@ declare i8* @llvm.visc.createNode(i8*) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
-
-; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getNode() #0
+declare void @llvm.visc.wait(i8*) #0
 
 ; Function Attrs: nounwind
 declare void @llvm.visc.bind.input(i8*, i32, i32)
@@ -34,41 +31,39 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %id) {
-  %p_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @producer to i8*))
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*))
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)
diff --git a/llvm/test/VISC/unitTests/twoNodeQuery.ll b/llvm/test/VISC/unitTests/twoNodeQuery.ll
index 08c69507be55c425f53aa8d41ea7f68d62e3ae48..a9df546b24c7d788144fc983a794acba068881e1 100644
--- a/llvm/test/VISC/unitTests/twoNodeQuery.ll
+++ b/llvm/test/VISC/unitTests/twoNodeQuery.ll
@@ -1,12 +1,12 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeQuery.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
-
-%rtype = type { i32 }
+%rtype = type <{i32}>
+%struct.arg = type <{ i32, %rtype }>
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
@@ -17,67 +17,65 @@ declare i8* @llvm.visc.createNode(i8*) #0
 declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.launch(i8**, i8*, i8*) #0
+declare i8* @llvm.visc.launch(i8*, i8*) #0
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.wait(i8*) #0
+declare void @llvm.visc.wait(i8*) #0
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getNode() #0
+declare void @llvm.visc.bind.input(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i8* @llvm.visc.getParentNode(i8*) #0
+declare void @llvm.visc.bind.output(i8*, i32, i32)
 
 ; Function Attrs: nounwind
-declare i32 @llvm.visc.getNumDims(i8*) #0
+declare i8* @llvm.visc.getNode() #0
 
 ; Function Attrs: nounwind
-declare void @llvm.visc.bind.input(i8*, i32, i32)
+declare i8* @llvm.visc.getParentNode(i8*) #0
 
 ; Function Attrs: nounwind
-declare void @llvm.visc.bind.output(i8*, i32, i32)
+declare i32 @llvm.visc.getNumDims(i8*) #0
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
-  %in.addr = alloca { i32, %rtype }
+  %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
-  %1 = bitcast { i32, %rtype }* %in.addr to i32*
+  %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
-  %args = bitcast { i32, %rtype }* %in.addr to i8*
-  %graphIDloc = alloca i8*
-  %launch = call i32 @llvm.visc.launch(i8** %graphIDloc, i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
+  %args = bitcast %struct.arg* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
-  %graphID = load i8** %graphIDloc
-  %wait = call i32 @llvm.visc.wait(i8* %graphID)
-  %2 = getelementptr { i32, %rtype }* %in.addr, i32 0, i32 1
+  call void @llvm.visc.wait(i8* %graphID)
+  %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
 
-define { i32 } @producer(i32 %id) {
+define %rtype @producer(i32 %id) {
   %sum = add i32 4, %id
   %this_node = call i8* @llvm.visc.getNode()
   %numDim = call i32 @llvm.visc.getNumDims(i8* %this_node)
   %sum2 = add i32 %sum, %numDim
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
-define { i32 } @consumer(i32 %id) {
+define %rtype @consumer(i32 %id) {
   %sum = add i32 10, %id
-  %output = insertvalue { i32 } undef, i32 %sum, 0
-  ret { i32 } %output
+  %output = insertvalue %rtype undef, i32 %sum, 0
+  ret %rtype %output
 }
 
 define %rtype @Root(i32 %id) {
-  %p_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @producer to i8*))
-  %c_node = call i8* @llvm.visc.createNode(i8* bitcast ({ i32 } (i32)* @consumer to i8*))
+  %p_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @producer to i8*))
+  %c_node = call i8* @llvm.visc.createNode(i8* bitcast (%rtype (i32)* @consumer to i8*))
   %edge = call i8* @llvm.visc.createEdge(i8* %p_node, i8* %c_node, i1 false, i32 0, i32 0)
   call void @llvm.visc.bind.input(i8* %p_node, i32 0, i32 0)
   call void @llvm.visc.bind.output(i8* %c_node, i32 0, i32 0)