Skip to content
Snippets Groups Projects
Commit fe96896b authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Added a very simple testKernel to understand intel opencl compilation

parent a2f95ffa
No related branches found
No related tags found
No related merge requests found
; ModuleID = '/tmp/qt_temp.w24812'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir-unknown-unknown"
define cc76 void @mysgemmNT(float addrspace(1)* nocapture %A, float addrspace(1)* nocapture %B, float addrspace(1)* nocapture %C) nounwind {
%1 = tail call cc75 i32 @_Z13get_global_idj(i32 0) nounwind readnone
%2 = getelementptr inbounds float addrspace(1)* %A, i32 %1
%3 = load float addrspace(1)* %2, align 4, !tbaa !9
%4 = getelementptr inbounds float addrspace(1)* %B, i32 %1
%5 = load float addrspace(1)* %4, align 4, !tbaa !9
%6 = fmul float %3, %5
%7 = getelementptr inbounds float addrspace(1)* %C, i32 %1
store float %6, float addrspace(1)* %7, align 4, !tbaa !9
ret void
}
declare cc75 i32 @_Z13get_global_idj(i32) nounwind readnone
!opencl.kernels = !{!0}
!opencl.enable.FP_CONTRACT = !{}
!opencl.spir.version = !{!7}
!opencl.ocl.version = !{!7}
!opencl.used.extensions = !{!8}
!opencl.used.optional.core.features = !{!8}
!opencl.compiler.options = !{!8}
!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mysgemmNT, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6}
!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1}
!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"}
!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""}
!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"}
!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"}
!7 = metadata !{i32 1, i32 2}
!8 = metadata !{}
!9 = metadata !{metadata !"float", metadata !10}
!10 = metadata !{metadata !"omnipotent char", metadata !11}
!11 = metadata !{metadata !"Simple C/C++ TBAA"}
; ModuleID = '/tmp/qt_temp.w24812'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir64-unknown-unknown"
define cc76 void @mysgemmNT(float addrspace(1)* nocapture %A, float addrspace(1)* nocapture %B, float addrspace(1)* nocapture %C) nounwind {
%1 = tail call cc75 i64 @_Z13get_global_idj(i32 0) nounwind readnone
%sext = shl i64 %1, 32
%2 = ashr exact i64 %sext, 32
%3 = getelementptr inbounds float addrspace(1)* %A, i64 %2
%4 = load float addrspace(1)* %3, align 4, !tbaa !9
%5 = getelementptr inbounds float addrspace(1)* %B, i64 %2
%6 = load float addrspace(1)* %5, align 4, !tbaa !9
%7 = fmul float %4, %6
%8 = getelementptr inbounds float addrspace(1)* %C, i64 %2
store float %7, float addrspace(1)* %8, align 4, !tbaa !9
ret void
}
declare cc75 i64 @_Z13get_global_idj(i32) nounwind readnone
!opencl.kernels = !{!0}
!opencl.enable.FP_CONTRACT = !{}
!opencl.spir.version = !{!7}
!opencl.ocl.version = !{!7}
!opencl.used.extensions = !{!8}
!opencl.used.optional.core.features = !{!8}
!opencl.compiler.options = !{!8}
!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mysgemmNT, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6}
!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1}
!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"}
!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""}
!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"}
!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"}
!7 = metadata !{i32 1, i32 2}
!8 = metadata !{}
!9 = metadata !{metadata !"float", metadata !10}
!10 = metadata !{metadata !"omnipotent char", metadata !11}
!11 = metadata !{metadata !"Simple C/C++ TBAA"}
.file "main"
.text
.globl mysgemmNT
.align 16, 0x90
.type mysgemmNT,@function
mysgemmNT:
.cfi_startproc
pushq %rbp
.Ltmp3:
.cfi_def_cfa_offset 16
.Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp5:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
andq $-8, %rsp
.Ltmp6:
.cfi_offset %rbx, -32
.Ltmp7:
.cfi_offset %r14, -24
movq (%rsi), %r9
movq 32(%rdi), %r8
movq 16(%rdi), %r14
movq (%rdi), %rcx
movq 8(%rdi), %rdx
movq 80(%rdi), %r10
movq %r10, %rsi
sarq $2, %rsi
je .LBB0_3
movl %r10d, %eax
imull %r9d, %eax
addl %r8d, %eax
shlq $32, %rax
movabsq $17179869184, %r11
movq %rsi, %rbx
.align 16, 0x90
.LBB0_2:
movq %rax, %rdi
sarq $32, %rdi
vmovups (%rdx,%rdi,4), %xmm0
vmulps (%rcx,%rdi,4), %xmm0, %xmm0
vmovups %xmm0, (%r14,%rdi,4)
addq %r11, %rax
decq %rbx
jne .LBB0_2
.LBB0_3:
movq %r10, %rax
andq $-4, %rax
cmpq %rax, %r10
je .LBB0_6
shlq $2, %rsi
movq %r10, %rdi
subq %rsi, %rdi
negq %rdi
imull %r9d, %r10d
addl %r10d, %r8d
addl %r8d, %esi
shlq $32, %rsi
movabsq $4294967296, %r8
.align 16, 0x90
.LBB0_5:
movq %rsi, %rax
sarq $32, %rax
vmovss (%rcx,%rax,4), %xmm0
vmulss (%rdx,%rax,4), %xmm0, %xmm0
vmovss %xmm0, (%r14,%rax,4)
addq %r8, %rsi
incq %rdi
jne .LBB0_5
.LBB0_6:
leaq -16(%rbp), %rsp
popq %rbx
popq %r14
popq %rbp
ret
.Ltmp8:
.size mysgemmNT, .Ltmp8-mysgemmNT
.cfi_endproc
.section ".note.GNU-stack","",@progbits
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
/*
* Kernel of dense matrix-matrix multiplication kernel.
*/
__kernel void mysgemmNT( __global const float *A, __global const float *B, __global float* C)
{
int m = get_global_id(0);
C[m] = A[m] * B[m];
}
; ModuleID = 'main'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux"
; Function Attrs: nounwind
declare void @__mysgemmNT_before.AddImplicitArgs(float addrspace(1)* nocapture, float addrspace(1)* nocapture, float addrspace(1)* nocapture) #0
; Function Attrs: nounwind readnone
declare i64 @_Z13get_global_idj(i32) #1
declare [7 x i64] @__WG.boundaries.mysgemmNT_before.AddImplicitArgs(float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)
declare i64 @_Z14get_local_sizej(i32)
declare i64 @get_base_global_id.(i32)
declare i1 @__ocl_allOne(i1)
declare i1 @__ocl_allZero(i1)
; Function Attrs: alwaysinline nounwind
declare void @__mysgemmNT_separated_args(float addrspace(1)* nocapture, float addrspace(1)* nocapture, float addrspace(1)* nocapture, i8 addrspace(3)* noalias, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }* noalias, i64* noalias, [4 x i64], i8* noalias, {}* noalias) #2
declare [7 x i64] @WG.boundaries.mysgemmNT(float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)* noalias, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }* noalias, i64* noalias, [4 x i64], i8* noalias, {}* noalias)
define void @mysgemmNT(i8* noalias %pUniformArgs, i64* noalias %pWGId, {}* noalias %RuntimeHandle) {
wrapper_entry:
%0 = bitcast i8* %pUniformArgs to float addrspace(1)**
%explicit_0 = load float addrspace(1)** %0, align 8
%1 = getelementptr i8* %pUniformArgs, i64 8
%2 = bitcast i8* %1 to float addrspace(1)**
%explicit_1 = load float addrspace(1)** %2, align 8
%3 = getelementptr i8* %pUniformArgs, i64 16
%4 = bitcast i8* %3 to float addrspace(1)**
%explicit_2 = load float addrspace(1)** %4, align 8
%5 = getelementptr i8* %pUniformArgs, i64 80
%6 = bitcast i8* %5 to i64*
%LocalSize_0 = load i64* %6, align 8
%7 = getelementptr i8* %pUniformArgs, i64 32
%8 = bitcast i8* %7 to i64*
%GlobalOffset_0 = load i64* %8, align 8
%GroupID_0 = load i64* %pWGId, align 8
%vector.size.i = ashr i64 %LocalSize_0, 2
%num.vector.wi.i = shl nsw i64 %vector.size.i, 2
%9 = icmp eq i64 %vector.size.i, 0
br i1 %9, label %scalarIf.i, label %dim_0_vector_pre_head.i.preheader
dim_0_vector_pre_head.i.preheader: ; preds = %wrapper_entry
%10 = mul i64 %LocalSize_0, %GroupID_0
%11 = add i64 %GlobalOffset_0, %10
%12 = mul i64 %11, 4294967296
br label %dim_0_vector_pre_head.i
dim_0_vector_pre_head.i: ; preds = %dim_0_vector_pre_head.i.preheader, %dim_0_vector_pre_head.i
%lsr.iv5 = phi i64 [ %12, %dim_0_vector_pre_head.i.preheader ], [ %lsr.iv.next6, %dim_0_vector_pre_head.i ]
%lsr.iv3 = phi i64 [ %vector.size.i, %dim_0_vector_pre_head.i.preheader ], [ %lsr.iv.next4, %dim_0_vector_pre_head.i ]
%extractvector_func.i = ashr exact i64 %lsr.iv5, 32
%13 = getelementptr inbounds float addrspace(1)* %explicit_0, i64 %extractvector_func.i
%ptrTypeCastvector_func.i = bitcast float addrspace(1)* %13 to <4 x float> addrspace(1)*
%14 = load <4 x float> addrspace(1)* %ptrTypeCastvector_func.i, align 1
%15 = getelementptr inbounds float addrspace(1)* %explicit_1, i64 %extractvector_func.i
%ptrTypeCast5vector_func.i = bitcast float addrspace(1)* %15 to <4 x float> addrspace(1)*
%16 = load <4 x float> addrspace(1)* %ptrTypeCast5vector_func.i, align 1
%17 = fmul <4 x float> %14, %16
%18 = getelementptr inbounds float addrspace(1)* %explicit_2, i64 %extractvector_func.i
%ptrTypeCast6vector_func.i = bitcast float addrspace(1)* %18 to <4 x float> addrspace(1)*
store <4 x float> %17, <4 x float> addrspace(1)* %ptrTypeCast6vector_func.i, align 1
%lsr.iv.next4 = add i64 %lsr.iv3, -1
%lsr.iv.next6 = add i64 %lsr.iv5, 17179869184
%dim_0_vector_cmp.to.max.i = icmp eq i64 %lsr.iv.next4, 0
br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %dim_0_vector_pre_head.i
scalarIf.i: ; preds = %dim_0_vector_pre_head.i, %wrapper_entry
%19 = icmp eq i64 %LocalSize_0, %num.vector.wi.i
br i1 %19, label %__mysgemmNT_separated_args.exit, label %scalar_kernel_entry.i.preheader
scalar_kernel_entry.i.preheader: ; preds = %scalarIf.i
%20 = mul i64 %vector.size.i, 4
%21 = sub i64 %LocalSize_0, %20
%22 = mul i64 %LocalSize_0, %GroupID_0
%23 = add i64 %GlobalOffset_0, %22
%24 = add i64 %23, %20
%25 = mul i64 %24, 4294967296
%26 = sub i64 0, %21
br label %scalar_kernel_entry.i
scalar_kernel_entry.i: ; preds = %scalar_kernel_entry.i.preheader, %scalar_kernel_entry.i
%lsr.iv7 = phi i64 [ %26, %scalar_kernel_entry.i.preheader ], [ %lsr.iv.next8, %scalar_kernel_entry.i ]
%lsr.iv1 = phi i64 [ %25, %scalar_kernel_entry.i.preheader ], [ %lsr.iv.next2, %scalar_kernel_entry.i ]
%27 = ashr exact i64 %lsr.iv1, 32
%28 = getelementptr inbounds float addrspace(1)* %explicit_0, i64 %27
%29 = load float addrspace(1)* %28, align 1
%30 = getelementptr inbounds float addrspace(1)* %explicit_1, i64 %27
%31 = load float addrspace(1)* %30, align 1
%32 = fmul float %29, %31
%33 = getelementptr inbounds float addrspace(1)* %explicit_2, i64 %27
store float %32, float addrspace(1)* %33, align 1
%lsr.iv.next2 = add i64 %lsr.iv1, 4294967296
%lsr.iv.next8 = add i64 %lsr.iv7, 1
%dim_0_cmp.to.max.i = icmp eq i64 %lsr.iv.next8, 0
br i1 %dim_0_cmp.to.max.i, label %__mysgemmNT_separated_args.exit, label %scalar_kernel_entry.i
__mysgemmNT_separated_args.exit: ; preds = %scalar_kernel_entry.i, %scalarIf.i
ret void
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
!opencl.kernels = !{!0}
!opencl.enable.FP_CONTRACT = !{}
!opencl.spir.version = !{!7}
!opencl.ocl.version = !{!7}
!opencl.used.extensions = !{!8}
!opencl.used.optional.core.features = !{!8}
!opencl.compiler.options = !{!8}
!opencl.kernel_info = !{!9}
!opencl.module_info_list = !{!26}
!llvm.functions_info = !{}
!opencl.functions_stats = !{}
!opencl.stat_descriptions = !{}
!opencl.module_stat_info = !{}
!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)*, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }*, i64*, [4 x i64], i8*, {}*)* @__mysgemmNT_separated_args, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5, metadata !6}
!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1, i32 1}
!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
!3 = metadata !{metadata !"kernel_arg_type", metadata !"float*", metadata !"float*", metadata !"float*"}
!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"const", metadata !"const", metadata !""}
!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"float*", metadata !"float*", metadata !"float*"}
!6 = metadata !{metadata !"kernel_arg_name", metadata !"A", metadata !"B", metadata !"C"}
!7 = metadata !{i32 1, i32 2}
!8 = metadata !{}
!9 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i8 addrspace(3)*, { i64, [3 x i64], [3 x i64], [2 x [3 x i64]], [3 x i64], {}*, {}* }*, i64*, [4 x i64], i8*, {}*)* @__mysgemmNT_separated_args, metadata !10}
!10 = metadata !{metadata !11, metadata !12, metadata !13, metadata !14, metadata !15, metadata !16, metadata !17, metadata !18, metadata !19, metadata !20, metadata !21, metadata !22, metadata !23, metadata !24, metadata !25}
!11 = metadata !{metadata !"local_buffer_size", i32 0}
!12 = metadata !{metadata !"barrier_buffer_size", i32 0}
!13 = metadata !{metadata !"kernel_execution_length", i32 11}
!14 = metadata !{metadata !"max_wg_dimensions", i32 1}
!15 = metadata !{metadata !"kernel_has_barrier", i1 false}
!16 = metadata !{metadata !"kernel_has_global_sync", i1 false}
!17 = metadata !{metadata !"no_barrier_path", i1 true}
!18 = metadata !{metadata !"vectorized_kernel", null}
!19 = metadata !{metadata !"vectorized_width", i32 4}
!20 = metadata !{metadata !"kernel_wrapper", void (i8*, i64*, {}*)* @mysgemmNT}
!21 = metadata !{metadata !"scalarized_kernel", null}
!22 = metadata !{metadata !"block_literal_size", null}
!23 = metadata !{metadata !"private_memory_size", i32 0}
!24 = metadata !{metadata !"vectorization_dimension", i32 0}
!25 = metadata !{metadata !"can_unite_workgroups", i1 true}
!26 = metadata !{metadata !27, metadata !28, metadata !29}
!27 = metadata !{metadata !"global_variable_total_size", i64 0}
!28 = metadata !{metadata !"gen_addr_space_pointer_counter", null}
!29 = metadata !{metadata !"gen_addr_space_pointer_warnings"}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment