diff --git a/llvm/test/VISC/testKernel/kernel-spir32.ll b/llvm/test/VISC/testKernel/kernel-spir32.ll new file mode 100644 index 0000000000000000000000000000000000000000..e5c403f87b49073edfe691515d47d42b7846933c --- /dev/null +++ b/llvm/test/VISC/testKernel/kernel-spir32.ll @@ -0,0 +1,38 @@ +; ModuleID = '/tmp/qt_temp.w24812' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" +target triple = "spir-unknown-unknown" + +define cc76 void @mysgemmNT(float addrspace(1)* nocapture %A, float addrspace(1)* nocapture %B, float addrspace(1)* nocapture %C) nounwind { + %1 = tail call cc75 i32 @_Z13get_global_idj(i32 0) nounwind readnone + %2 = getelementptr inbounds float addrspace(1)* %A, i32 %1 + %3 = load float addrspace(1)* %2, align 4, !tbaa !9 + %4 = getelementptr inbounds float addrspace(1)* %B, i32 %1 + %5 = load float addrspace(1)* %4, align 4, !tbaa !9 + %6 = fmul float %3, %5 + %7 = getelementptr inbounds float addrspace(1)* %C, i32 %1 + store float %6, float addrspace(1)* %7, align 4, !tbaa !9 + ret void +} + +declare cc75 i32 @_Z13get_global_idj(i32) nounwind readnone + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!7} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} + +!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mysgemmNT, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6} +!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1} +!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"} +!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"} +!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""} +!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"} +!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"} +!7 = metadata !{i32 1, i32 2} +!8 = metadata !{} +!9 = metadata !{metadata !"float", metadata !10} +!10 = metadata !{metadata !"omnipotent char", metadata !11} +!11 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/llvm/test/VISC/testKernel/kernel-spir64.ll b/llvm/test/VISC/testKernel/kernel-spir64.ll new file mode 100644 index 0000000000000000000000000000000000000000..9d97d957fa316110c5461cbd329e731491dc5c89 --- /dev/null +++ b/llvm/test/VISC/testKernel/kernel-spir64.ll @@ -0,0 +1,40 @@ +; ModuleID = '/tmp/qt_temp.w24812' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" +target triple = "spir64-unknown-unknown" + +define cc76 void @mysgemmNT(float addrspace(1)* nocapture %A, float addrspace(1)* nocapture %B, float addrspace(1)* nocapture %C) nounwind { + %1 = tail call cc75 i64 @_Z13get_global_idj(i32 0) nounwind readnone + %sext = shl i64 %1, 32 + %2 = ashr exact i64 %sext, 32 + %3 = getelementptr inbounds float addrspace(1)* %A, i64 %2 + %4 = load float addrspace(1)* %3, align 4, !tbaa !9 + %5 = getelementptr inbounds float addrspace(1)* %B, i64 %2 + %6 = load float addrspace(1)* %5, align 4, !tbaa !9 + %7 = fmul float %4, %6 + %8 = getelementptr inbounds float addrspace(1)* %C, i64 %2 + store float %7, float addrspace(1)* %8, align 4, !tbaa !9 + ret void +} + +declare cc75 i64 @_Z13get_global_idj(i32) nounwind readnone + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!7} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} + +!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mysgemmNT, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6} +!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1} +!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"} +!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"} +!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""} +!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"} +!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"} +!7 = metadata !{i32 1, i32 2} +!8 = metadata !{} +!9 = metadata !{metadata !"float", metadata !10} +!10 = metadata !{metadata !"omnipotent char", metadata !11} +!11 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/llvm/test/VISC/testKernel/kernel.asm b/llvm/test/VISC/testKernel/kernel.asm new file mode 100644 index 0000000000000000000000000000000000000000..fe6cadaf41b3683ae89710711ea763f76096edf3 --- /dev/null +++ b/llvm/test/VISC/testKernel/kernel.asm @@ -0,0 +1,83 @@ + .file "main" + .text + .globl mysgemmNT + .align 16, 0x90 + .type mysgemmNT,@function +mysgemmNT: + .cfi_startproc + pushq %rbp +.Ltmp3: + .cfi_def_cfa_offset 16 +.Ltmp4: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp5: + .cfi_def_cfa_register %rbp + pushq %r14 + pushq %rbx + andq $-8, %rsp +.Ltmp6: + .cfi_offset %rbx, -32 +.Ltmp7: + .cfi_offset %r14, -24 + movq (%rsi), %r9 + movq 32(%rdi), %r8 + movq 16(%rdi), %r14 + movq (%rdi), %rcx + movq 8(%rdi), %rdx + movq 80(%rdi), %r10 + movq %r10, %rsi + sarq $2, %rsi + je .LBB0_3 + movl %r10d, %eax + imull %r9d, %eax + addl %r8d, %eax + shlq $32, %rax + movabsq $17179869184, %r11 + movq %rsi, %rbx + .align 16, 0x90 +.LBB0_2: + movq %rax, %rdi + sarq $32, %rdi + vmovups (%rdx,%rdi,4), %xmm0 + vmulps (%rcx,%rdi,4), %xmm0, %xmm0 + vmovups %xmm0, (%r14,%rdi,4) + addq %r11, %rax + decq %rbx + jne .LBB0_2 +.LBB0_3: + movq %r10, %rax + andq $-4, %rax + cmpq %rax, %r10 + je .LBB0_6 + shlq $2, %rsi + movq %r10, %rdi + subq %rsi, %rdi + negq %rdi + imull %r9d, %r10d + addl %r10d, %r8d + addl %r8d, %esi + shlq $32, %rsi + movabsq $4294967296, %r8 + .align 16, 0x90 +.LBB0_5: + movq %rsi, %rax + sarq $32, %rax + vmovss (%rcx,%rax,4), %xmm0 + vmulss (%rdx,%rax,4), %xmm0, %xmm0 + vmovss %xmm0, (%r14,%rax,4) + addq %r8, %rsi + incq %rdi + jne .LBB0_5 +.LBB0_6: + leaq -16(%rbp), %rsp + popq %rbx + popq %r14 + popq %rbp + ret +.Ltmp8: + .size mysgemmNT, .Ltmp8-mysgemmNT + .cfi_endproc + + + .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/VISC/testKernel/kernel.cl b/llvm/test/VISC/testKernel/kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..235a6498c4a01d43af7933f1bc2ef585a80fdedf --- /dev/null +++ b/llvm/test/VISC/testKernel/kernel.cl @@ -0,0 +1,18 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Kernel of dense matrix-matrix multiplication kernel. + */ + +__kernel void mysgemmNT( __global const float *A, __global const float *B, __global float* C) +{ + int m = get_global_id(0); + + C[m] = A[m] * B[m]; +} diff --git a/llvm/test/VISC/testKernel/kernel.ll b/llvm/test/VISC/testKernel/kernel.ll new file mode 100644 index 0000000000000000000000000000000000000000..03a29ff6979b9648bae22f3caf6722e102fd78dd --- /dev/null +++ b/llvm/test/VISC/testKernel/kernel.ll @@ -0,0 +1,154 @@ +; ModuleID = 'main' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux" + +; Function Attrs: nounwind +declare void @__mysgemmNT_before.AddImplicitArgs(float addrspace(1)* nocapture, float addrspace(1)* nocapture, float addrspace(1)* nocapture) #0 + +; Function Attrs: nounwind readnone +declare i64 @_Z13get_global_idj(i32) #1 + +declare [7 x i64] @__WG.boundaries.mysgemmNT_before.AddImplicitArgs(float addrspace(1)*, float addrspace(1)*, float addrspace(1)*) + +declare i64 @_Z14get_local_sizej(i32) + +declare i64 @get_base_global_id.(i32) + +declare i1 @__ocl_allOne(i1) + +declare i1 @__ocl_allZero(i1) + +; Function Attrs: alwaysinline nounwind +declare void @__mysgemmNT_separated_args(float addrspace(1)* nocapture, float addrspace(1)* nocapture, float addrspace(1)* nocapture, i8 addrspace(3)* noalias, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }* noalias, i64* noalias, [4 x i64], i8* noalias, {}* noalias) #2 + +declare [7 x i64] @WG.boundaries.mysgemmNT(float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)* noalias, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }* noalias, i64* noalias, [4 x i64], i8* noalias, {}* noalias) + +define void @mysgemmNT(i8* noalias %pUniformArgs, i64* noalias %pWGId, {}* noalias %RuntimeHandle) { +wrapper_entry: + %0 = bitcast i8* %pUniformArgs to float addrspace(1)** + %explicit_0 = load float addrspace(1)** %0, align 8 + %1 = getelementptr i8* %pUniformArgs, i64 8 + %2 = bitcast i8* %1 to float addrspace(1)** + %explicit_1 = load float addrspace(1)** %2, align 8 + %3 = getelementptr i8* %pUniformArgs, i64 16 + %4 = bitcast i8* %3 to float addrspace(1)** + %explicit_2 = load float addrspace(1)** %4, align 8 + %5 = getelementptr i8* %pUniformArgs, i64 80 + %6 = bitcast i8* %5 to i64* + %LocalSize_0 = load i64* %6, align 8 + %7 = getelementptr i8* %pUniformArgs, i64 32 + %8 = bitcast i8* %7 to i64* + %GlobalOffset_0 = load i64* %8, align 8 + %GroupID_0 = load i64* %pWGId, align 8 + %vector.size.i = ashr i64 %LocalSize_0, 2 + %num.vector.wi.i = shl nsw i64 %vector.size.i, 2 + %9 = icmp eq i64 %vector.size.i, 0 + br i1 %9, label %scalarIf.i, label %dim_0_vector_pre_head.i.preheader + +dim_0_vector_pre_head.i.preheader: ; preds = %wrapper_entry + %10 = mul i64 %LocalSize_0, %GroupID_0 + %11 = add i64 %GlobalOffset_0, %10 + %12 = mul i64 %11, 4294967296 + br label %dim_0_vector_pre_head.i + +dim_0_vector_pre_head.i: ; preds = %dim_0_vector_pre_head.i.preheader, %dim_0_vector_pre_head.i + %lsr.iv5 = phi i64 [ %12, %dim_0_vector_pre_head.i.preheader ], [ %lsr.iv.next6, %dim_0_vector_pre_head.i ] + %lsr.iv3 = phi i64 [ %vector.size.i, %dim_0_vector_pre_head.i.preheader ], [ %lsr.iv.next4, %dim_0_vector_pre_head.i ] + %extractvector_func.i = ashr exact i64 %lsr.iv5, 32 + %13 = getelementptr inbounds float addrspace(1)* %explicit_0, i64 %extractvector_func.i + %ptrTypeCastvector_func.i = bitcast float addrspace(1)* %13 to <4 x float> addrspace(1)* + %14 = load <4 x float> addrspace(1)* %ptrTypeCastvector_func.i, align 1 + %15 = getelementptr inbounds float addrspace(1)* %explicit_1, i64 %extractvector_func.i + %ptrTypeCast5vector_func.i = bitcast float addrspace(1)* %15 to <4 x float> addrspace(1)* + %16 = load <4 x float> addrspace(1)* %ptrTypeCast5vector_func.i, align 1 + %17 = fmul <4 x float> %14, %16 + %18 = getelementptr inbounds float addrspace(1)* %explicit_2, i64 %extractvector_func.i + %ptrTypeCast6vector_func.i = bitcast float addrspace(1)* %18 to <4 x float> addrspace(1)* + store <4 x float> %17, <4 x float> addrspace(1)* %ptrTypeCast6vector_func.i, align 1 + %lsr.iv.next4 = add i64 %lsr.iv3, -1 + %lsr.iv.next6 = add i64 %lsr.iv5, 17179869184 + %dim_0_vector_cmp.to.max.i = icmp eq i64 %lsr.iv.next4, 0 + br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %dim_0_vector_pre_head.i + +scalarIf.i: ; preds = %dim_0_vector_pre_head.i, %wrapper_entry + %19 = icmp eq i64 %LocalSize_0, %num.vector.wi.i + br i1 %19, label %__mysgemmNT_separated_args.exit, label %scalar_kernel_entry.i.preheader + +scalar_kernel_entry.i.preheader: ; preds = %scalarIf.i + %20 = mul i64 %vector.size.i, 4 + %21 = sub i64 %LocalSize_0, %20 + %22 = mul i64 %LocalSize_0, %GroupID_0 + %23 = add i64 %GlobalOffset_0, %22 + %24 = add i64 %23, %20 + %25 = mul i64 %24, 4294967296 + %26 = sub i64 0, %21 + br label %scalar_kernel_entry.i + +scalar_kernel_entry.i: ; preds = %scalar_kernel_entry.i.preheader, %scalar_kernel_entry.i + %lsr.iv7 = phi i64 [ %26, %scalar_kernel_entry.i.preheader ], [ %lsr.iv.next8, %scalar_kernel_entry.i ] + %lsr.iv1 = phi i64 [ %25, %scalar_kernel_entry.i.preheader ], [ %lsr.iv.next2, %scalar_kernel_entry.i ] + %27 = ashr exact i64 %lsr.iv1, 32 + %28 = getelementptr inbounds float addrspace(1)* %explicit_0, i64 %27 + %29 = load float addrspace(1)* %28, align 1 + %30 = getelementptr inbounds float addrspace(1)* %explicit_1, i64 %27 + %31 = load float addrspace(1)* %30, align 1 + %32 = fmul float %29, %31 + %33 = getelementptr inbounds float addrspace(1)* %explicit_2, i64 %27 + store float %32, float addrspace(1)* %33, align 1 + %lsr.iv.next2 = add i64 %lsr.iv1, 4294967296 + %lsr.iv.next8 = add i64 %lsr.iv7, 1 + %dim_0_cmp.to.max.i = icmp eq i64 %lsr.iv.next8, 0 + br i1 %dim_0_cmp.to.max.i, label %__mysgemmNT_separated_args.exit, label %scalar_kernel_entry.i + +__mysgemmNT_separated_args.exit: ; preds = %scalar_kernel_entry.i, %scalarIf.i + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!7} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} +!opencl.kernel_info = !{!9} +!opencl.module_info_list = !{!26} +!llvm.functions_info = !{} +!opencl.functions_stats = !{} +!opencl.stat_descriptions = !{} +!opencl.module_stat_info = !{} + +!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)*, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }*, i64*, [4 x i64], i8*, {}*)* @__mysgemmNT_separated_args, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6} +!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1} +!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"} +!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"} +!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""} +!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"} +!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"} +!7 = metadata !{i32 1, i32 2} +!8 = metadata !{} +!9 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)*, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }*, i64*, [4 x i64], i8*, {}*)* @__mysgemmNT_separated_args, metadata !10} +!10 = metadata !{metadata !11, metadata !12, metadata !13, metadata !14, metadata !15, metadata !16, metadata !17, metadata !18, metadata !19, metadata !20, metadata !21, metadata !22, metadata !23, metadata !24, metadata !25} +!11 = metadata !{metadata !"local_buffer_size", i32 0} +!12 = metadata !{metadata !"barrier_buffer_size", i32 0} +!13 = metadata !{metadata !"kernel_execution_length", i32 11} +!14 = metadata !{metadata !"max_wg_dimensions", i32 1} +!15 = metadata !{metadata !"kernel_has_barrier", i1 false} +!16 = metadata !{metadata !"kernel_has_global_sync", i1 false} +!17 = metadata !{metadata !"no_barrier_path", i1 true} +!18 = metadata !{metadata !"vectorized_kernel", null} +!19 = metadata !{metadata !"vectorized_width", i32 4} +!20 = metadata !{metadata !"kernel_wrapper", void (i8*, i64*, {}*)* @mysgemmNT} +!21 = metadata !{metadata !"scalarized_kernel", null} +!22 = metadata !{metadata !"block_literal_size", null} +!23 = metadata !{metadata !"private_memory_size", i32 0} +!24 = metadata !{metadata !"vectorization_dimension", i32 0} +!25 = metadata !{metadata !"can_unite_workgroups", i1 true} +!26 = metadata !{metadata !27, metadata !28, metadata !29} +!27 = metadata !{metadata !"global_variable_total_size", i64 0} +!28 = metadata !{metadata !"gen_addr_space_pointer_counter", null} +!29 = metadata !{metadata !"gen_addr_space_pointer_warnings"}