diff --git a/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll b/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
index da7bd5ec6a0c76a3e5cd539eb4a49af2c310cebc..d0f23ce14ea399adad3a91068980a226b1426a2c 100644
--- a/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
+++ b/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
@@ -542,6 +542,21 @@ declare i32 @clReleaseProgram(%struct._cl_program*) #3
 
 declare i32 @clReleaseCommandQueue(%struct._cl_command_queue*) #3
 
+define %rtype @MatrixMulRoot(float* h_A, i32 bytes_A, float* h_B, i32 bytes_B, i32 WA, i32 WB, i32 HA) {
+  %kernel = call i8* @llvm.visc.createNode2D(i8* bitcast (%rtype (float*, i32, float*, i32, i32, i32)* @matrixMul to i8*), i32 WB, i32 HA)
+  ; Bind Inputs
+  call void @llvm.visc.bind.input(i8* %kernel, i32 0, i32 0)
+  call void @llvm.visc.bind.input(i8* %kernel, i32 1, i32 1)
+  call void @llvm.visc.bind.input(i8* %kernel, i32 2, i32 2)
+  call void @llvm.visc.bind.input(i8* %kernel, i32 3, i32 3)
+  call void @llvm.visc.bind.input(i8* %kernel, i32 4, i32 4)
+  call void @llvm.visc.bind.input(i8* %kernel, i32 5, i32 5)
+  ; Bind Outputs
+  call void @llvm.visc.bind.output(i8* %kernel, i32 0, i32 0)
+  call void @llvm.visc.bind.output(i8* %kernel, i32 1, i32 1)
+  ret %rtype zeroinitializer
+}
+
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #0 {
 entry:
@@ -583,7 +598,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   %call12 = tail call noalias i8* @malloc(i64 4194304) #4
   %2 = bitcast i8* %call12 to float*
   
-  ; ----- Adding VISC Launch Call -----
+  ; ---------------------------------- Adding VISC Launch Call --------------------------------
   ; Setting up launch input args
   %in.addr = alloca %struct.arg
 
@@ -598,7 +613,7 @@ randomInit.exit41:                                ; preds = %for.body.i40
   store i32 4194304, i32* %in.addr.bytes_B
 
   ; Change type to i8* and VISC Launch call
-  %args = bitcast { float*, i32, float*, i32, %rtype }* %in.addr to i8*
+  %args = bitcast %struct.arg* %in.addr to i8*
   %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (float*, i32, float*, i32)* @MatrixMulRoot to i8*), i8* %args)
   ;tail call void @computeMatrixMul(float* %0, i32 4194304, float* %1, i32 4194304, float* %2, i32 4194304)
 
@@ -610,9 +625,9 @@ randomInit.exit41:                                ; preds = %for.body.i40
   %out = load %rtype* %out.addr
   %out.h_C = extractvalue %rtype %out, 0
   ;%2 = extractvalue %rtype %out, 0
-  %out.bytes_C = extractvalue %rtype %outputstruct, 1
+  %out.bytes_C = extractvalue %rtype %out, 1
 
-  ; ----- Completed VISC Launch Call Code -----
+  ; -------------------------------- Completed VISC Launch Call Cod --------------------------------
   
   br label %for.cond4.preheader.i
 
@@ -652,6 +667,7 @@ for.body12.i:                                     ; preds = %for.body12.i, %for.
 
 for.end.i:                                        ; preds = %for.body12.i
   %11 = add nsw i64 %indvars.iv88.i, %3
+  ; Replace use of %2 with %out.h_C
   ;%arrayidx34.i = getelementptr inbounds float* %2, i64 %11
   %arrayidx34.i = getelementptr inbounds float* %out.h_C, i64 %11
   %12 = load float* %arrayidx34.i, align 4, !tbaa !0
@@ -688,6 +704,7 @@ if.end:                                           ; preds = %if.else, %if.then
   ret i32 0
 }
 
+
 ; Function Attrs: nounwind
 declare void @srand(i32) #1