diff --git a/llvm/test/VISC/testKernel/kernel-spir32.ll b/llvm/test/VISC/testKernel/kernel-spir32.ll
new file mode 100644
index 0000000000000000000000000000000000000000..e5c403f87b49073edfe691515d47d42b7846933c
--- /dev/null
+++ b/llvm/test/VISC/testKernel/kernel-spir32.ll
@@ -0,0 +1,38 @@
+; ModuleID = '/tmp/qt_temp.w24812'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
+target triple = "spir-unknown-unknown"
+
+define cc76 void @mysgemmNT(float addrspace(1)* nocapture %A, float addrspace(1)* nocapture %B, float addrspace(1)* nocapture %C) nounwind {
+  %1 = tail call cc75 i32 @_Z13get_global_idj(i32 0) nounwind readnone
+  %2 = getelementptr inbounds float addrspace(1)* %A, i32 %1
+  %3 = load float addrspace(1)* %2, align 4, !tbaa !9
+  %4 = getelementptr inbounds float addrspace(1)* %B, i32 %1
+  %5 = load float addrspace(1)* %4, align 4, !tbaa !9
+  %6 = fmul float %3, %5
+  %7 = getelementptr inbounds float addrspace(1)* %C, i32 %1
+  store float %6, float addrspace(1)* %7, align 4, !tbaa !9
+  ret void
+}
+
+declare cc75 i32 @_Z13get_global_idj(i32) nounwind readnone
+
+!opencl.kernels = !{!0}
+!opencl.enable.FP_CONTRACT = !{}
+!opencl.spir.version = !{!7}
+!opencl.ocl.version = !{!7}
+!opencl.used.extensions = !{!8}
+!opencl.used.optional.core.features = !{!8}
+!opencl.compiler.options = !{!8}
+
+!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mysgemmNT, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6}
+!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1}
+!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
+!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"}
+!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""}
+!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"}
+!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"}
+!7 = metadata !{i32 1, i32 2}
+!8 = metadata !{}
+!9 = metadata !{metadata !"float", metadata !10}
+!10 = metadata !{metadata !"omnipotent char", metadata !11}
+!11 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/llvm/test/VISC/testKernel/kernel-spir64.ll b/llvm/test/VISC/testKernel/kernel-spir64.ll
new file mode 100644
index 0000000000000000000000000000000000000000..9d97d957fa316110c5461cbd329e731491dc5c89
--- /dev/null
+++ b/llvm/test/VISC/testKernel/kernel-spir64.ll
@@ -0,0 +1,40 @@
+; ModuleID = '/tmp/qt_temp.w24812'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
+target triple = "spir64-unknown-unknown"
+
+define cc76 void @mysgemmNT(float addrspace(1)* nocapture %A, float addrspace(1)* nocapture %B, float addrspace(1)* nocapture %C) nounwind {
+  %1 = tail call cc75 i64 @_Z13get_global_idj(i32 0) nounwind readnone
+  %sext = shl i64 %1, 32
+  %2 = ashr exact i64 %sext, 32
+  %3 = getelementptr inbounds float addrspace(1)* %A, i64 %2
+  %4 = load float addrspace(1)* %3, align 4, !tbaa !9
+  %5 = getelementptr inbounds float addrspace(1)* %B, i64 %2
+  %6 = load float addrspace(1)* %5, align 4, !tbaa !9
+  %7 = fmul float %4, %6
+  %8 = getelementptr inbounds float addrspace(1)* %C, i64 %2
+  store float %7, float addrspace(1)* %8, align 4, !tbaa !9
+  ret void
+}
+
+declare cc75 i64 @_Z13get_global_idj(i32) nounwind readnone
+
+!opencl.kernels = !{!0}
+!opencl.enable.FP_CONTRACT = !{}
+!opencl.spir.version = !{!7}
+!opencl.ocl.version = !{!7}
+!opencl.used.extensions = !{!8}
+!opencl.used.optional.core.features = !{!8}
+!opencl.compiler.options = !{!8}
+
+!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mysgemmNT, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6}
+!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1}
+!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
+!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"}
+!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""}
+!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"}
+!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"}
+!7 = metadata !{i32 1, i32 2}
+!8 = metadata !{}
+!9 = metadata !{metadata !"float", metadata !10}
+!10 = metadata !{metadata !"omnipotent char", metadata !11}
+!11 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/llvm/test/VISC/testKernel/kernel.asm b/llvm/test/VISC/testKernel/kernel.asm
new file mode 100644
index 0000000000000000000000000000000000000000..fe6cadaf41b3683ae89710711ea763f76096edf3
--- /dev/null
+++ b/llvm/test/VISC/testKernel/kernel.asm
@@ -0,0 +1,83 @@
+	.file	"main"
+	.text
+	.globl	mysgemmNT
+	.align	16, 0x90
+	.type	mysgemmNT,@function
+mysgemmNT:
+	.cfi_startproc
+	pushq	%rbp
+.Ltmp3:
+	.cfi_def_cfa_offset 16
+.Ltmp4:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp5:
+	.cfi_def_cfa_register %rbp
+	pushq	%r14
+	pushq	%rbx
+	andq	$-8, %rsp
+.Ltmp6:
+	.cfi_offset %rbx, -32
+.Ltmp7:
+	.cfi_offset %r14, -24
+	movq	(%rsi), %r9
+	movq	32(%rdi), %r8
+	movq	16(%rdi), %r14
+	movq	(%rdi), %rcx
+	movq	8(%rdi), %rdx
+	movq	80(%rdi), %r10
+	movq	%r10, %rsi
+	sarq	$2, %rsi
+	je	.LBB0_3
+	movl	%r10d, %eax
+	imull	%r9d, %eax
+	addl	%r8d, %eax
+	shlq	$32, %rax
+	movabsq	$17179869184, %r11
+	movq	%rsi, %rbx
+	.align	16, 0x90
+.LBB0_2:
+	movq	%rax, %rdi
+	sarq	$32, %rdi
+	vmovups	(%rdx,%rdi,4), %xmm0
+	vmulps	(%rcx,%rdi,4), %xmm0, %xmm0
+	vmovups	%xmm0, (%r14,%rdi,4)
+	addq	%r11, %rax
+	decq	%rbx
+	jne	.LBB0_2
+.LBB0_3:
+	movq	%r10, %rax
+	andq	$-4, %rax
+	cmpq	%rax, %r10
+	je	.LBB0_6
+	shlq	$2, %rsi
+	movq	%r10, %rdi
+	subq	%rsi, %rdi
+	negq	%rdi
+	imull	%r9d, %r10d
+	addl	%r10d, %r8d
+	addl	%r8d, %esi
+	shlq	$32, %rsi
+	movabsq	$4294967296, %r8
+	.align	16, 0x90
+.LBB0_5:
+	movq	%rsi, %rax
+	sarq	$32, %rax
+	vmovss	(%rcx,%rax,4), %xmm0
+	vmulss	(%rdx,%rax,4), %xmm0, %xmm0
+	vmovss	%xmm0, (%r14,%rax,4)
+	addq	%r8, %rsi
+	incq	%rdi
+	jne	.LBB0_5
+.LBB0_6:
+	leaq	-16(%rbp), %rsp
+	popq	%rbx
+	popq	%r14
+	popq	%rbp
+	ret
+.Ltmp8:
+	.size	mysgemmNT, .Ltmp8-mysgemmNT
+	.cfi_endproc
+
+
+	.section	".note.GNU-stack","",@progbits
diff --git a/llvm/test/VISC/testKernel/kernel.cl b/llvm/test/VISC/testKernel/kernel.cl
new file mode 100644
index 0000000000000000000000000000000000000000..235a6498c4a01d43af7933f1bc2ef585a80fdedf
--- /dev/null
+++ b/llvm/test/VISC/testKernel/kernel.cl
@@ -0,0 +1,18 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+/* 
+ * Kernel of dense matrix-matrix multiplication kernel.
+ */
+
+__kernel void mysgemmNT( __global const float *A, __global const float *B, __global float* C)
+{
+    int m = get_global_id(0);
+ 
+    C[m] = A[m] * B[m];
+}
diff --git a/llvm/test/VISC/testKernel/kernel.ll b/llvm/test/VISC/testKernel/kernel.ll
new file mode 100644
index 0000000000000000000000000000000000000000..03a29ff6979b9648bae22f3caf6722e102fd78dd
--- /dev/null
+++ b/llvm/test/VISC/testKernel/kernel.ll
@@ -0,0 +1,154 @@
+; ModuleID = 'main'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux"
+
+; Function Attrs: nounwind
+declare void @__mysgemmNT_before.AddImplicitArgs(float addrspace(1)* nocapture, float addrspace(1)* nocapture, float addrspace(1)* nocapture) #0
+
+; Function Attrs: nounwind readnone
+declare i64 @_Z13get_global_idj(i32) #1
+
+declare [7 x i64] @__WG.boundaries.mysgemmNT_before.AddImplicitArgs(float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)
+
+declare i64 @_Z14get_local_sizej(i32)
+
+declare i64 @get_base_global_id.(i32)
+
+declare i1 @__ocl_allOne(i1)
+
+declare i1 @__ocl_allZero(i1)
+
+; Function Attrs: alwaysinline nounwind
+declare void @__mysgemmNT_separated_args(float addrspace(1)* nocapture, float addrspace(1)* nocapture, float addrspace(1)* nocapture, i8 addrspace(3)* noalias, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }* noalias, i64* noalias, [4 x i64], i8* noalias, {}* noalias) #2
+
+declare [7 x i64] @WG.boundaries.mysgemmNT(float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)* noalias, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }* noalias, i64* noalias, [4 x i64], i8* noalias, {}* noalias)
+
+define void @mysgemmNT(i8* noalias %pUniformArgs, i64* noalias %pWGId, {}* noalias %RuntimeHandle) {
+wrapper_entry:
+  %0 = bitcast i8* %pUniformArgs to float addrspace(1)**
+  %explicit_0 = load float addrspace(1)** %0, align 8
+  %1 = getelementptr i8* %pUniformArgs, i64 8
+  %2 = bitcast i8* %1 to float addrspace(1)**
+  %explicit_1 = load float addrspace(1)** %2, align 8
+  %3 = getelementptr i8* %pUniformArgs, i64 16
+  %4 = bitcast i8* %3 to float addrspace(1)**
+  %explicit_2 = load float addrspace(1)** %4, align 8
+  %5 = getelementptr i8* %pUniformArgs, i64 80
+  %6 = bitcast i8* %5 to i64*
+  %LocalSize_0 = load i64* %6, align 8
+  %7 = getelementptr i8* %pUniformArgs, i64 32
+  %8 = bitcast i8* %7 to i64*
+  %GlobalOffset_0 = load i64* %8, align 8
+  %GroupID_0 = load i64* %pWGId, align 8
+  %vector.size.i = ashr i64 %LocalSize_0, 2
+  %num.vector.wi.i = shl nsw i64 %vector.size.i, 2
+  %9 = icmp eq i64 %vector.size.i, 0
+  br i1 %9, label %scalarIf.i, label %dim_0_vector_pre_head.i.preheader
+
+dim_0_vector_pre_head.i.preheader:                ; preds = %wrapper_entry
+  %10 = mul i64 %LocalSize_0, %GroupID_0
+  %11 = add i64 %GlobalOffset_0, %10
+  %12 = mul i64 %11, 4294967296
+  br label %dim_0_vector_pre_head.i
+
+dim_0_vector_pre_head.i:                          ; preds = %dim_0_vector_pre_head.i.preheader, %dim_0_vector_pre_head.i
+  %lsr.iv5 = phi i64 [ %12, %dim_0_vector_pre_head.i.preheader ], [ %lsr.iv.next6, %dim_0_vector_pre_head.i ]
+  %lsr.iv3 = phi i64 [ %vector.size.i, %dim_0_vector_pre_head.i.preheader ], [ %lsr.iv.next4, %dim_0_vector_pre_head.i ]
+  %extractvector_func.i = ashr exact i64 %lsr.iv5, 32
+  %13 = getelementptr inbounds float addrspace(1)* %explicit_0, i64 %extractvector_func.i
+  %ptrTypeCastvector_func.i = bitcast float addrspace(1)* %13 to <4 x float> addrspace(1)*
+  %14 = load <4 x float> addrspace(1)* %ptrTypeCastvector_func.i, align 1
+  %15 = getelementptr inbounds float addrspace(1)* %explicit_1, i64 %extractvector_func.i
+  %ptrTypeCast5vector_func.i = bitcast float addrspace(1)* %15 to <4 x float> addrspace(1)*
+  %16 = load <4 x float> addrspace(1)* %ptrTypeCast5vector_func.i, align 1
+  %17 = fmul <4 x float> %14, %16
+  %18 = getelementptr inbounds float addrspace(1)* %explicit_2, i64 %extractvector_func.i
+  %ptrTypeCast6vector_func.i = bitcast float addrspace(1)* %18 to <4 x float> addrspace(1)*
+  store <4 x float> %17, <4 x float> addrspace(1)* %ptrTypeCast6vector_func.i, align 1
+  %lsr.iv.next4 = add i64 %lsr.iv3, -1
+  %lsr.iv.next6 = add i64 %lsr.iv5, 17179869184
+  %dim_0_vector_cmp.to.max.i = icmp eq i64 %lsr.iv.next4, 0
+  br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %dim_0_vector_pre_head.i
+
+scalarIf.i:                                       ; preds = %dim_0_vector_pre_head.i, %wrapper_entry
+  %19 = icmp eq i64 %LocalSize_0, %num.vector.wi.i
+  br i1 %19, label %__mysgemmNT_separated_args.exit, label %scalar_kernel_entry.i.preheader
+
+scalar_kernel_entry.i.preheader:                  ; preds = %scalarIf.i
+  %20 = mul i64 %vector.size.i, 4
+  %21 = sub i64 %LocalSize_0, %20
+  %22 = mul i64 %LocalSize_0, %GroupID_0
+  %23 = add i64 %GlobalOffset_0, %22
+  %24 = add i64 %23, %20
+  %25 = mul i64 %24, 4294967296
+  %26 = sub i64 0, %21
+  br label %scalar_kernel_entry.i
+
+scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i.preheader, %scalar_kernel_entry.i
+  %lsr.iv7 = phi i64 [ %26, %scalar_kernel_entry.i.preheader ], [ %lsr.iv.next8, %scalar_kernel_entry.i ]
+  %lsr.iv1 = phi i64 [ %25, %scalar_kernel_entry.i.preheader ], [ %lsr.iv.next2, %scalar_kernel_entry.i ]
+  %27 = ashr exact i64 %lsr.iv1, 32
+  %28 = getelementptr inbounds float addrspace(1)* %explicit_0, i64 %27
+  %29 = load float addrspace(1)* %28, align 1
+  %30 = getelementptr inbounds float addrspace(1)* %explicit_1, i64 %27
+  %31 = load float addrspace(1)* %30, align 1
+  %32 = fmul float %29, %31
+  %33 = getelementptr inbounds float addrspace(1)* %explicit_2, i64 %27
+  store float %32, float addrspace(1)* %33, align 1
+  %lsr.iv.next2 = add i64 %lsr.iv1, 4294967296
+  %lsr.iv.next8 = add i64 %lsr.iv7, 1
+  %dim_0_cmp.to.max.i = icmp eq i64 %lsr.iv.next8, 0
+  br i1 %dim_0_cmp.to.max.i, label %__mysgemmNT_separated_args.exit, label %scalar_kernel_entry.i
+
+__mysgemmNT_separated_args.exit:                  ; preds = %scalar_kernel_entry.i, %scalarIf.i
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0}
+!opencl.enable.FP_CONTRACT = !{}
+!opencl.spir.version = !{!7}
+!opencl.ocl.version = !{!7}
+!opencl.used.extensions = !{!8}
+!opencl.used.optional.core.features = !{!8}
+!opencl.compiler.options = !{!8}
+!opencl.kernel_info = !{!9}
+!opencl.module_info_list = !{!26}
+!llvm.functions_info = !{}
+!opencl.functions_stats = !{}
+!opencl.stat_descriptions = !{}
+!opencl.module_stat_info = !{}
+
+!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)*, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }*, i64*, [4 x i64], i8*, {}*)* @__mysgemmNT_separated_args, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6}
+!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1}
+!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
+!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"}
+!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""}
+!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"}
+!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"}
+!7 = metadata !{i32 1, i32 2}
+!8 = metadata !{}
+!9 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)*, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }*, i64*, [4 x i64], i8*, {}*)* @__mysgemmNT_separated_args, metadata !10}
+!10 = metadata !{metadata !11, metadata !12, metadata !13, metadata !14, metadata !15, metadata !16, metadata !17, metadata !18, metadata !19, metadata !20, metadata !21, metadata !22, metadata !23, metadata !24, metadata !25}
+!11 = metadata !{metadata !"local_buffer_size", i32 0}
+!12 = metadata !{metadata !"barrier_buffer_size", i32 0}
+!13 = metadata !{metadata !"kernel_execution_length", i32 11}
+!14 = metadata !{metadata !"max_wg_dimensions", i32 1}
+!15 = metadata !{metadata !"kernel_has_barrier", i1 false}
+!16 = metadata !{metadata !"kernel_has_global_sync", i1 false}
+!17 = metadata !{metadata !"no_barrier_path", i1 true}
+!18 = metadata !{metadata !"vectorized_kernel", null}
+!19 = metadata !{metadata !"vectorized_width", i32 4}
+!20 = metadata !{metadata !"kernel_wrapper", void (i8*, i64*, {}*)* @mysgemmNT}
+!21 = metadata !{metadata !"scalarized_kernel", null}
+!22 = metadata !{metadata !"block_literal_size", null}
+!23 = metadata !{metadata !"private_memory_size", i32 0}
+!24 = metadata !{metadata !"vectorization_dimension", i32 0}
+!25 = metadata !{metadata !"can_unite_workgroups", i1 true}
+!26 = metadata !{metadata !27, metadata !28, metadata !29}
+!27 = metadata !{metadata !"global_variable_total_size", i64 0}
+!28 = metadata !{metadata !"gen_addr_space_pointer_counter", null}
+!29 = metadata !{metadata !"gen_addr_space_pointer_warnings"}