diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll
index cd698d8e0b97127e5f75b9372017db7b9dd36e28..a767cca31e88c3a2dc4c63f7baf9ec2c94e887f5 100644
--- a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll
+++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/visc_mri-q.ll
@@ -271,7 +271,8 @@ declare noalias i8* @malloc(i64) #1
 ; ------------------ VISC Intrinsics -------------------
 ; Return Type of VISC computePhiMag and computeQ
 %rtype = type {}
-%struct.arg.phiMag = type <{ float*, i64, float*, i64, float*, i64, i32, %rtype }>
+%struct.arg.phiMag = type <{ float*, i64, float*, i64, float*, i64, i32, i64, i64 %rtype }>
+%struct.arg.Q = type <{i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64, i64, i64, %rtype}>
 
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.launch(i8*, i8*) #0
@@ -323,7 +324,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; ----------------- VISC intrinsics end ------------------
 
 ; Function Attrs: noinline nounwind uwtable
-define void @computePhiMag_kernel(float* in %phiR, i64 %bytes_phiR, float* in %phiI, i64 %bytes_phiI, float* out %phiMag, i64 %bytes_phiMag, i32 %numK) #4 {
+define %rtype @computePhiMag_kernel(float* in %phiR, i64 %bytes_phiR, float* in %phiI, i64 %bytes_phiI, float* out %phiMag, i64 %bytes_phiMag, i32 %numK) #4 {
   %1 = alloca float*, align 8
   %2 = alloca i64, align 8
   %3 = alloca float*, align 8
@@ -345,7 +346,7 @@ define void @computePhiMag_kernel(float* in %phiR, i64 %bytes_phiR, float* in %p
   ; ------------------------- VISC changes ------------------
   ; Replace get_global_id call with call to getNode followed by getNumNodeInstances.x
   ; Replaced statement -- 
-  ; -- %8 = call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0)
+  ; -- %global_id = call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0)
   %this_node = call i8* @llvm.visc.getNode()
   %L = call i32 @llvm.visc.getNodeInstanceID.x(i8* %this_node)
   %LLimit = call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
@@ -354,11 +355,11 @@ define void @computePhiMag_kernel(float* in %phiR, i64 %bytes_phiR, float* in %p
   %G = call i32 @llvm.visc.getNodeInstanceID.x(i8* %parent_node)
 
   %tmp = mul i32 %G, %LLimit
-  %8 = add i32 %tmp, %L
+  %global_id = add i32 %tmp, %L
 
-  ;%8 = call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0)
+  ;%global_id = call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0)
   ; ---------------------- VISC changes End ------------------
-  store i32 %8, i32* %indexK, align 4
+  store i32 %global_id, i32* %indexK, align 4
   %9 = load i32* %indexK, align 4
   %10 = load i32* %7, align 4
   %11 = icmp slt i32 %9, %10
@@ -392,16 +393,38 @@ define void @computePhiMag_kernel(float* in %phiR, i64 %bytes_phiR, float* in %p
   br label %34
 
 ; <label>:34                                      ; preds = %12, %0
-  ret void
+  ret %rtype undef
 }
 
-define void @computePhiMagi_internal(float* in %phiR, i64 %bytes_phiR, float* in %phiI, i64 %bytes_phiI, float* out %phiMag, i64 %bytes_phiMag, i32 %numK, i64 %DimPhiMagBlock) {
-
+; ----------------- Compute Phi Mag internal node ----------------
+define %rtype @computePhiMag_internal(float* in %phiR, i64 %bytes_phiR, float* in %phiI, i64 %bytes_phiI, float* out %phiMag, i64 %bytes_phiMag, i32 %numK, i64 %DimPhiMagBlock) {
+  %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (float*, i64, float*, i64, float*, i64, i32)* @computePhiMag_kernel to i8*), i64 %DimPhiMagBlock)
+  ; Bind Inputs
+  call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); phiR
+  call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); bytes_phiR
+  call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); phiI
+  call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_phiI
+  call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); phiMag
+  call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_phiMag
+  call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); numK
+  ; Bind Outputs
+  ret %rtype undef
 }
 
-define void @computePhiMagi_root(float* in %phiR, i64 %bytes_phiR, float* in %phiI, i64 %bytes_phiI, float* out %phiMag, i64 %bytes_phiMag, i32 %numK, i64 %DimPhiMagBlock, i64 %DimPhiMagGrid) {
-
-  
+; ----------------- Compute Phi Mag root node ----------------
+define %rtype @computePhiMag_root(float* in %phiR, i64 %bytes_phiR, float* in %phiI, i64 %bytes_phiI, float* out %phiMag, i64 %bytes_phiMag, i32 %numK, i64 %DimPhiMagBlock, i64 %DimPhiMagGrid) {
+  %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (float*, i64, float*, i64, float*, i64, i32, i64)* @computePhiMagi_internal to i8*), i64 %DimPhiMagGrid)
+  ; Bind Inputs
+  call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); phiR
+  call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); bytes_phiR
+  call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); phiI
+  call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_phiI
+  call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); phiMag
+  call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_phiMag
+  call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); numK
+  call void @llvm.visc.bind.input(i8* %kernel, i32 7, i32 7); DimPhiMagBlock
+  ; Bind Outputs
+  ret %rtype undef 
 }
 
 ; Function Attrs: noinline nounwind uwtable
@@ -449,12 +472,47 @@ define void @computePhiMag(i32 %numK, float* %phiR, float* %phiI, float* %phiMag
   %24 = load float** %4, align 8
   %25 = load i64* %bytes_phi, align 8
   %26 = load i32* %1, align 4
-  call void @computePhiMag_kernel(float* %20, i64 %21, float* %22, i64 %23, float* %24, i64 %25, i32 %26)
+  ; ---------------------------------- Adding VISC Launch Call --------------------------------
+  ; Replaced - call void @computePhiMag_kernel(float* %20, i64 %21, float* %22, i64 %23, float* %24, i64 %25, i32 %26)
+  ; Setting up launch input args
+  %in.addr = alloca %struct.arg.phiMag
+
+  ; Store arguments
+  %in.addr.phiR = getelementptr %struct.arg* %in.addr, i32 0, i32 0
+  %in.addr.bytes_phiR = getelementptr %struct.arg* %in.addr, i32 0, i32 1
+  %in.addr.phiI = getelementptr %struct.arg* %in.addr, i32 0, i32 2
+  %in.addr.bytes_phiI = getelementptr %struct.arg* %in.addr, i32 0, i32 3
+  %in.addr.phiMag = getelementptr %struct.arg* %in.addr, i32 0, i32 4
+  %in.addr.bytes_phiMag = getelementptr %struct.arg* %in.addr, i32 0, i32 5
+  %in.addr.numK = getelementptr %struct.arg* %in.addr, i32 0, i32 6
+  %in.addr.DimPhiMagBlock = getelementptr %struct.arg* %in.addr, i32 0, i32 7
+  %in.addr.DimPhiMagGrid = getelementptr %struct.arg* %in.addr, i32 0, i32 8
+
+  store float* %20, float** %in.addr.phiR
+  store i64 %21, i64* %in.addr.bytes_phiR
+  store float* %22, float** %in.addr.phiI
+  store i64 %23, i64* %in.addr.bytes_phiI
+  store float* %24, float** %in.addr.phiMag
+  store i64 %25, i64* %in.addr.bytes_phiMag
+  store i32 %26, i32* %in.addr.numK
+  store i64 256, i64* %in.addr.DimPhiMagBlock
+  store i64 %16, i64* %in.addr.DimPhiMagGrid
+
+  ; Change type to i8* and VISC launch call to computePhiMag_root
+  %args = bitcast %struct.arg.phiMag* %in.addr to i8*
+  %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (float*, i64, float*, i64, float*, i64, i32, i64, i64)* @computePhiMag_root to i8*), i8* %args)
+
+  ; Wait for result
+  call void @llvm.visc.wait(i8* %graphID)
+
+  ; Get the result
+  ; -- Not required as all output is through side effects --
+  ; -------------------------------- Completed VISC Launch Call --------------------------------
   ret void
 }
 
 ; Function Attrs: noinline nounwind uwtable
-define void @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck) #4 {
+define %rtype @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck) #4 {
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca float*, align 8
@@ -508,9 +566,19 @@ define void @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes
   br i1 %17, label %18, label %69
 
 ; <label>:18                                      ; preds = %15
-  %19 = call i32 (i32, ...)* bitcast (i32 (...)* @get_group_id to i32 (i32, ...)*)(i32 0)
+  ; ------------------------- VISC changes ------------------
+  ; Replace get_group_id / get_local_id calls with calls to Graph query intrinsics
+  ; Replaced statement -- 
+  ; %19 = call i32 (i32, ...)* bitcast (i32 (...)* @get_group_id to i32 (i32, ...)*)(i32 0)
+  ; %21 = call i32 (i32, ...)* bitcast (i32 (...)* @get_local_id to i32 (i32, ...)*)(i32 0)
+  %this_node = call i8* @llvm.visc.getNode()
+  %parent_node = call i8* @llvm.visc.getParentNode(i8* %this_node)
+
+  %19 = call i32 @llvm.visc.getNodeInstanceID.x(i8* %parent_node)
+  %21 = call i32 @llvm.visc.getNodeInstanceID.x(i8* %this_node)
+  ; ---------------------- VISC changes End ------------------
+
   %20 = mul nsw i32 %19, 256
-  %21 = call i32 (i32, ...)* bitcast (i32 (...)* @get_local_id to i32 (i32, ...)*)(i32 0)
   %22 = mul nsw i32 4, %21
   %23 = add nsw i32 %20, %22
   %24 = load i32* %tx, align 4
@@ -702,9 +770,21 @@ define void @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes
   br i1 %165, label %166, label %193
 
 ; <label>:166                                     ; preds = %163
-  %167 = call i32 (i32, ...)* bitcast (i32 (...)* @get_group_id to i32 (i32, ...)*)(i32 0)
+  ; ------------------------- VISC changes ------------------
+  ; Replace get_group_id / get_local_id calls with calls to Graph query intrinsics
+  ; Replaced statement -- 
+  ; %167 = call i32 (i32, ...)* bitcast (i32 (...)* @get_group_id to i32 (i32, ...)*)(i32 0)
+  ; %169 = call i32 (i32, ...)* bitcast (i32 (...)* @get_local_id to i32 (i32, ...)*)(i32 0)
+  %this_node = call i8* @llvm.visc.getNode()
+  %parent_node = call i8* @llvm.visc.getParentNode(i8* %this_node)
+
+  %167 = call i32 @llvm.visc.getNodeInstanceID.x(i8* %parent_node)
+  %169 = call i32 @llvm.visc.getNodeInstanceID.x(i8* %this_node)
+  ; ---------------------- VISC changes End ------------------
+
+
+
   %168 = mul nsw i32 %167, 256
-  %169 = call i32 (i32, ...)* bitcast (i32 (...)* @get_local_id to i32 (i32, ...)*)(i32 0)
   %170 = mul nsw i32 4, %169
   %171 = add nsw i32 %168, %170
   %172 = load i32* %tx2, align 4
@@ -737,19 +817,62 @@ define void @computeQ_kernel(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes
   br label %163
 
 ; <label>:193                                     ; preds = %163
-  ret void
+  ret %rtype undef
 }
 
-declare i32 @get_group_id(...) #1
-
-declare i32 @get_local_id(...) #1
-
 ; Function Attrs: nounwind readnone
 declare double @cos(double) #5
 
 ; Function Attrs: nounwind readnone
 declare double @sin(double) #5
 
+
+; ----------------- Compute Q internal node ----------------
+define %rtype @computeQ_internal(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck, i64 %DimQBlock) #4 {
+  %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64)* @computeQ_kernel to i8*), i64 %DimQBlock)
+  ; Bind Inputs
+  call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); numK
+  call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); kGlobalIndex
+  call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); x
+  call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_x
+  call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); y
+  call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_y
+  call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); z
+  call void @llvm.visc.bind.input(i8* %kernel, i32 7, i32 7); bytes_z
+  call void @llvm.visc.bind.input(i8* %kernel, i32 8, i32 8); Qr
+  call void @llvm.visc.bind.input(i8* %kernel, i32 9, i32 9); bytes_Qr
+  call void @llvm.visc.bind.input(i8* %kernel, i32 10, i32 10); Qi
+  call void @llvm.visc.bind.input(i8* %kernel, i32 11, i32 11); bytes_i
+  call void @llvm.visc.bind.input(i8* %kernel, i32 12, i32 12); ck
+  call void @llvm.visc.bind.input(i8* %kernel, i32 13, i32 13); bytes_ck
+  ; Bind Outputs
+  ret %rtype undef
+
+}
+
+; ----------------- Compute Q root node ----------------
+define %rtype @computeQ_root(i32 %numK, i32 %kGlobalIndex, float* %x, i64 %bytes_x, float* %y, i64 %bytes_y, float* %z, i64 %bytes_z, float* %Qr, i64 %bytes_Qr, float* %Qi, i64 %bytes_Qi, %struct.kValues* %ck, i64 %bytes_ck, i64 %DimQBlock, i64 %DimQGrid) #4 {
+  %kernel = call i8* @llvm.visc.createNode1D(i8* bitcast (%rtype (i32, i32, float*, i64, float*, i64, float*, i64, float*, i64, float*, i64, %struct.kValues*, i64, i64)* @computeQ_internal to i8*), i64 %DimQGrid)
+  ; Bind Inputs
+  call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0); numK
+  call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1); kGlobalIndex
+  call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2); x
+  call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3); bytes_x
+  call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4); y
+  call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5); bytes_y
+  call void @llvm.visc.bind.input(i8* %kernel, i32 6, i32 6); z
+  call void @llvm.visc.bind.input(i8* %kernel, i32 7, i32 7); bytes_z
+  call void @llvm.visc.bind.input(i8* %kernel, i32 8, i32 8); Qr
+  call void @llvm.visc.bind.input(i8* %kernel, i32 9, i32 9); bytes_Qr
+  call void @llvm.visc.bind.input(i8* %kernel, i32 10, i32 10); Qi
+  call void @llvm.visc.bind.input(i8* %kernel, i32 11, i32 11); bytes_i
+  call void @llvm.visc.bind.input(i8* %kernel, i32 12, i32 12); ck
+  call void @llvm.visc.bind.input(i8* %kernel, i32 13, i32 13); bytes_ck
+  call void @llvm.visc.bind.input(i8* %kernel, i32 14, i32 14); DimQBlock
+  ; Bind Outputs
+  ret %rtype undef
+}
+
 ; Function Attrs: noinline nounwind uwtable
 define void @computeQ(i32 %numK, i32 %numX, float* %x, float* %y, float* %z, %struct.kValues* %kVals, float* %Qr, float* %Qi) #4 {
   %1 = alloca i32, align 4