Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hpvm-release
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
llvm
hpvm-release
Commits
4434c2e8
Commit
4434c2e8
authored
10 years ago
by
Prakalp Srivastava
Browse files
Options
Downloads
Patches
Plain Diff
Added matrix mul ptx test case to repo
parent
881903ec
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
+1
-1
1 addition, 1 deletion
llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
+413
-0
413 additions, 0 deletions
llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
with
414 additions
and
1 deletion
llvm/test/VISC/MatrixMultiplication/visc_gemm.ll
+
1
−
1
View file @
4434c2e8
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S
<
%s
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S %s
; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
; RUN: %t.bin
; RUN: %t.bin
...
...
This diff is collapsed.
Click to expand it.
llvm/test/VISC/MatrixMultiplication/visc_gemm_ptx.ll
0 → 100644
+
413
−
0
View file @
4434c2e8
; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG -o %t.ll -S %s
; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
; RUN: clang -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
; RUN: %t.bin
; ModuleID = 'gemm_opencl.c'
target
datalayout
=
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target
triple
=
"x86_64-unknown-linux-gnu"
@custom_str
=
private
unnamed_addr
constant
[
12
x
i8
]
c"Value = %d\0A\00"
,
align
1
@hex_str
=
private
unnamed_addr
constant
[
14
x
i8
]
c"Value = 0x%x\0A\00"
,
align
1
@.str
=
private
unnamed_addr
constant
[
45
x
i8
]
c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00"
,
align
1
@.str2
=
private
unnamed_addr
constant
[
28
x
i8
]
c"Computing element (%d, %d)\0A\00"
,
align
1
@.str3
=
private
unnamed_addr
constant
[
32
x
i8
]
c"Accessing k = %d, A[%d], B[%d]\0A\00"
,
align
1
@str
=
private
unnamed_addr
constant
[
17
x
i8
]
c"Entered function\00"
@str10
=
private
unnamed_addr
constant
[
16
x
i8
]
c"Result computed\00"
@str11
=
private
unnamed_addr
constant
[
20
x
i8
]
c"Result written to C\00"
@str12
=
private
unnamed_addr
constant
[
17
x
i8
]
c"Output allocated\00"
@str13
=
private
unnamed_addr
constant
[
9
x
i8
]
c"\0AFailed!\00"
@str14
=
private
unnamed_addr
constant
[
7
x
i8
]
c"\0ADone!\00"
@str15
=
private
unnamed_addr
constant
[
7
x
i8
]
c"\0APass!\00"
; Function Attrs: nounwind uwtable
define
void
@randomInit
(
float
*
nocapture
%data
,
i32
%size
)
#0
{
entry:
%cmp3
=
icmp
sgt
i32
%size
,
0
br
i1
%cmp3
,
label
%for.body
,
label
%for.end
for.body:
; preds = %entry, %for.body
%indvars.iv
=
phi
i64
[
%indvars.iv.next
,
%for.body
],
[
0
,
%entry
]
%call
=
tail
call
i32
@rand
()
#5
%conv
=
sitofp
i32
%call
to
float
%div
=
fmul
float
%conv
,
0x3E00000000000000
%arrayidx
=
getelementptr
inbounds
float
*
%data
,
i64
%indvars.iv
store
float
%div
,
float
*
%arrayidx
,
align
4
,
!tbaa
!0
%indvars.iv.next
=
add
i64
%indvars.iv
,
1
%lftr.wideiv
=
trunc
i64
%indvars.iv.next
to
i32
%exitcond
=
icmp
eq
i32
%lftr.wideiv
,
%size
br
i1
%exitcond
,
label
%for.end
,
label
%for.body
for.end:
; preds = %for.body, %entry
ret
void
}
; Function Attrs: nounwind
declare
i32
@rand
()
#1
; Function Attrs: nounwind readnone uwtable
define
i32
@isEqual
(
float
%a
,
float
%b
)
#2
{
entry:
%sub
=
fsub
float
%a
,
%b
%fabsf
=
tail
call
float
@fabsf
(
float
%sub
)
#6
%0
=
fpext
float
%fabsf
to
double
%cmp
=
fcmp
olt
double
%0
,
1.000000e-03
%conv1
=
zext
i1
%cmp
to
i32
ret
i32
%conv1
}
; Function Attrs: noinline nounwind uwtable
define
i32
@checkResults
(
float
*
nocapture
%A
,
float
*
nocapture
%B
,
float
*
nocapture
%C
)
#3
{
entry:
br
label
%for.cond4.preheader
for.cond4.preheader:
; preds = %entry, %for.inc50
%indvars.iv92
=
phi
i64
[
0
,
%entry
],
[
%indvars.iv.next93
,
%for.inc50
]
%i.081
=
phi
i32
[
0
,
%entry
],
[
%inc51
,
%for.inc50
]
%0
=
shl
nsw
i64
%indvars.iv92
,
10
br
label
%for.body7
for.cond4:
; preds = %for.end
%inc48
=
add
nsw
i32
%j.079
,
1
%1
=
trunc
i64
%indvars.iv.next89
to
i32
%cmp5
=
icmp
slt
i32
%1
,
1024
br
i1
%cmp5
,
label
%for.body7
,
label
%for.inc50
for.body7:
; preds = %for.cond4.preheader, %for.cond4
%indvars.iv88
=
phi
i64
[
0
,
%for.cond4.preheader
],
[
%indvars.iv.next89
,
%for.cond4
]
%j.079
=
phi
i32
[
0
,
%for.cond4.preheader
],
[
%inc48
,
%for.cond4
]
%2
=
add
nsw
i64
%indvars.iv88
,
%0
br
label
%for.body12
for.body12:
; preds = %for.body12, %for.body7
%indvars.iv
=
phi
i64
[
0
,
%for.body7
],
[
%indvars.iv.next
,
%for.body12
]
%3
=
phi
float
[
0.000000e+00
,
%for.body7
],
[
%add26
,
%for.body12
]
%4
=
add
nsw
i64
%indvars.iv
,
%0
%arrayidx16
=
getelementptr
inbounds
float
*
%A
,
i64
%4
%5
=
load
float
*
%arrayidx16
,
align
4
,
!tbaa
!0
%6
=
shl
i64
%indvars.iv
,
10
%7
=
add
nsw
i64
%6
,
%indvars.iv88
%arrayidx20
=
getelementptr
inbounds
float
*
%B
,
i64
%7
%8
=
load
float
*
%arrayidx20
,
align
4
,
!tbaa
!0
%mul21
=
fmul
float
%5
,
%8
%add26
=
fadd
float
%3
,
%mul21
%indvars.iv.next
=
add
i64
%indvars.iv
,
1
%lftr.wideiv
=
trunc
i64
%indvars.iv.next
to
i32
%exitcond
=
icmp
eq
i32
%lftr.wideiv
,
1024
br
i1
%exitcond
,
label
%for.end
,
label
%for.body12
for.end:
; preds = %for.body12
%arrayidx34
=
getelementptr
inbounds
float
*
%C
,
i64
%2
%9
=
load
float
*
%arrayidx34
,
align
4
,
!tbaa
!0
%sub.i
=
fsub
float
%add26
,
%9
%fabsf.i
=
tail
call
float
@fabsf
(
float
%sub.i
)
#6
%10
=
fpext
float
%fabsf.i
to
double
%cmp.i
=
fcmp
olt
double
%10
,
1.000000e-03
%indvars.iv.next89
=
add
i64
%indvars.iv88
,
1
br
i1
%cmp.i
,
label
%for.cond4
,
label
%if.then
if.then:
; preds = %for.end
%conv40
=
fpext
float
%9
to
double
%conv45
=
fpext
float
%add26
to
double
%call46
=
tail
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
45
x
i8
]*
@.str
,
i64
0
,
i64
0
),
i32
%i.081
,
i32
%j.079
,
double
%conv40
,
double
%conv45
)
#5
br
label
%return
for.inc50:
; preds = %for.cond4
%indvars.iv.next93
=
add
i64
%indvars.iv92
,
1
%inc51
=
add
nsw
i32
%i.081
,
1
%11
=
trunc
i64
%indvars.iv.next93
to
i32
%cmp
=
icmp
slt
i32
%11
,
1024
br
i1
%cmp
,
label
%for.cond4.preheader
,
label
%return
return:
; preds = %for.inc50, %if.then
%retval.0
=
phi
i32
[
0
,
%if.then
],
[
1
,
%for.inc50
]
ret
i32
%retval.0
}
; Function Attrs: nounwind
declare
noalias
i8
*
@malloc
(
i64
)
#1
; Function Attrs: nounwind
declare
i32
@printf
(
i8
*
nocapture
,
...)
#1
; --------------- VISC Intrinsics ---------------
; Return Type of VISC Compute Matrix Mul
%rtype
=
type
{
float
*,
i64
}
%struct.arg
=
type
<{
float
*,
i64
,
float
*,
i64
,
float
*,
i64
,
i32
,
i32
,
i32
,
%rtype
}>
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.launch
(
i8
*,
i8
*)
#0
; Function Attrs: nounwind
declare
void
@llvm.visc.wait
(
i8
*)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode
(
i8
*)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode1D
(
i8
*,
i32
)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode2D
(
i8
*,
i32
,
i32
)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode3D
(
i8
*,
i32
,
i32
,
i32
)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createEdge
(
i8
*,
i8
*,
i1
,
i32
,
i32
)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.getNode
()
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.getParentNode
(
i8
*)
#0
; Function Attrs: nounwind
declare
i32
@llvm.visc.getNumDims
(
i8
*)
#0
; Function Attrs: nounwind
declare
i32
@llvm.visc.getNodeInstanceID.x
(
i8
*)
#0
; Function Attrs: nounwind
declare
i32
@llvm.visc.getNodeInstanceID.y
(
i8
*)
#0
; Function Attrs: nounwind
declare
void
@llvm.visc.bind.input
(
i8
*,
i32
,
i32
)
; Function Attrs: nounwind
declare
void
@llvm.visc.bind.output
(
i8
*,
i32
,
i32
)
; ----------------- VISC intrinsics end ------------------
; Function Attrs: nounwind uwtable
define
%rtype
@matrixMul
(
float
*
nocapture
%A
,
i64
%bytes_A
,
float
*
nocapture
%B
,
i64
%bytes_B
,
float
*
%C
,
i64
%bytes_C
,
i32
%k
,
i32
%n
,
i32
%m
)
#0
{
entry:
;%puts = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str, i64 0, i64 0))
; ------------------------- VISC changes ------------------
; Replace get_global_id calls with calls to getNode followed by getNumNodeInstances.x
; Replaced statement --
; -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #5
; -- %call2 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #5
%this_node
=
call
i8
*
@llvm.visc.getNode
()
%call1
=
call
i32
@llvm.visc.getNodeInstanceID.x
(
i8
*
%this_node
)
%call2
=
call
i32
@llvm.visc.getNodeInstanceID.y
(
i8
*
%this_node
)
; ---------------------- VISC changes End ------------------
;%call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str2, i64 0, i64 0), i32 %call1, i32 %call2) #5
%cmp44
=
icmp
eq
i32
%k
,
0
br
i1
%cmp44
,
label
%for.end
,
label
%for.body.lr.ph
for.body.lr.ph:
; preds = %entry
%mul
=
mul
i32
%call2
,
%k
br
label
%for.body
for.body:
; preds = %for.body, %for.body.lr.ph
%indvars.iv
=
phi
i64
[
0
,
%for.body.lr.ph
],
[
%indvars.iv.next
,
%for.body
]
%res.046
=
phi
float
[
0.000000e+00
,
%for.body.lr.ph
],
[
%add14
,
%for.body
]
%0
=
trunc
i64
%indvars.iv
to
i32
%add
=
add
i32
%0
,
%mul
%mul4
=
mul
i32
%0
,
%n
%add5
=
add
i32
%mul4
,
%call1
;%call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str3, i64 0, i64 0), i32 %k, i32 %add, i32 %add5) #5
%idxprom
=
zext
i32
%add
to
i64
%arrayidx
=
getelementptr
inbounds
float
*
%A
,
i64
%idxprom
%1
=
load
float
*
%arrayidx
,
align
4
,
!tbaa
!0
%idxprom11
=
zext
i32
%add5
to
i64
%arrayidx12
=
getelementptr
inbounds
float
*
%B
,
i64
%idxprom11
%2
=
load
float
*
%arrayidx12
,
align
4
,
!tbaa
!0
%mul13
=
fmul
float
%1
,
%2
%add14
=
fadd
float
%res.046
,
%mul13
%indvars.iv.next
=
add
i64
%indvars.iv
,
1
%lftr.wideiv
=
trunc
i64
%indvars.iv.next
to
i32
%exitcond
=
icmp
eq
i32
%lftr.wideiv
,
%k
br
i1
%exitcond
,
label
%for.end
,
label
%for.body
for.end:
; preds = %for.body, %entry
%res.0.lcssa
=
phi
float
[
0.000000e+00
,
%entry
],
[
%add14
,
%for.body
]
;%puts41 = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str10, i64 0, i64 0))
%mul16
=
mul
i32
%call2
,
%n
%add17
=
add
i32
%mul16
,
%call1
%idxprom18
=
zext
i32
%add17
to
i64
%arrayidx19
=
getelementptr
inbounds
float
*
%C
,
i64
%idxprom18
store
float
%res.0.lcssa
,
float
*
%arrayidx19
,
align
4
,
!tbaa
!0
;%puts42 = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8]* @str11, i64 0, i64 0))
;%puts43 = tail call i32 @puts(i8* getelementptr inbounds ([17 x i8]* @str12, i64 0, i64 0))
%.fca.0.insert
=
insertvalue
%rtype
undef
,
float
*
%C
,
0
%.fca.1.insert
=
insertvalue
%rtype
%.fca.0.insert
,
i64
%bytes_C
,
1
ret
%rtype
%.fca.1.insert
}
; ----------------- VISC SGEMM root node ----------------
define
%rtype
@MatrixMulRoot
(
float
*
%h_A
,
i64
%bytes_A
,
float
*
%h_B
,
i64
%bytes_B
,
float
*
%h_C
,
i64
%bytes_C
,
i32
%WA
,
i32
%WB
,
i32
%HA
)
{
%kernel
=
call
i8
*
@llvm.visc.createNode2D
(
i8
*
bitcast
(
%rtype
(
float
*,
i64
,
float
*,
i64
,
float
*,
i64
,
i32
,
i32
,
i32
)*
@matrixMul
to
i8
*),
i32
%WB
,
i32
%HA
)
; Bind Inputs
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
0
,
i32
0
)
; h_A
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
1
,
i32
1
)
; bytes_A
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
2
,
i32
2
)
; h_B
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
3
,
i32
3
)
; bytes_B
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
4
,
i32
4
)
; h_C
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
5
,
i32
5
)
; bytes_C
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
6
,
i32
6
)
; WA = HB = k
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
7
,
i32
7
)
; WB = WC = n
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
8
,
i32
8
)
; HA = HC = m
; Bind Outputs
call
void
@llvm.visc.bind.output
(
i8
*
%kernel
,
i32
0
,
i32
0
)
; d_C
call
void
@llvm.visc.bind.output
(
i8
*
%kernel
,
i32
1
,
i32
1
)
; bytes_C
ret
%rtype
zeroinitializer
}
; Function Attrs: noinline nounwind uwtable
;define %rtype @computeMatrixMul(float* nocapture %h_A, i64 %bytes_A, float* nocapture %h_B, i64 %bytes_B, float* %h_C, i64 %bytes_C, i32 %k, i32 %n, i32 %m) #3 {
;entry:
; %cmp18 = icmp eq i32 %m, 0
; %cmp215 = icmp eq i32 %n, 0
; %or.cond = or i1 %cmp18, %cmp215
; br i1 %or.cond, label %for.end6, label %for.body3.lr.ph.us
;
;for.inc4.us: ; preds = %for.body3.us
; %0 = extractvalue %rtype %call.us, 0
; %1 = extractvalue %rtype %call.us, 1
; %inc5.us = add i32 %i.019.us, 1
; %exitcond24 = icmp eq i32 %inc5.us, %m
; br i1 %exitcond24, label %for.end6, label %for.body3.lr.ph.us
;
;for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
; %j.016.us = phi i32 [ 0, %for.body3.lr.ph.us ], [ %inc.us, %for.body3.us ]
; %call.us = tail call %rtype @matrixMul(float* %h_A, i64 undef, float* %h_B, i64 undef, float* %h_C, i64 %bytes_C, i32 %k, i32 %n, i32 undef, i32 undef, i32 undef)
; %inc.us = add i32 %j.016.us, 1
; %exitcond = icmp eq i32 %inc.us, %n
; br i1 %exitcond, label %for.inc4.us, label %for.body3.us
;
;for.body3.lr.ph.us: ; preds = %entry, %for.inc4.us
; %i.019.us = phi i32 [ %inc5.us, %for.inc4.us ], [ 0, %entry ]
; br label %for.body3.us
;
;for.end6: ; preds = %for.inc4.us, %entry
; %Out.sroa.1.0.lcssa = phi i32 [ undef, %entry ], [ %1, %for.inc4.us ]
; %Out.sroa.0.0.lcssa = phi float* [ undef, %entry ], [ %0, %for.inc4.us ]
; %.fca.0.insert = insertvalue %rtype undef, float* %Out.sroa.0.0.lcssa, 0
; %.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %Out.sroa.1.0.lcssa, 1
; ret %rtype %.fca.1.insert
;}
; Function Attrs: nounwind uwtable
define
i32
@main
(
i32
%argc
,
i8
**
nocapture
%argv
)
#0
{
entry:
tail
call
void
@srand
(
i32
2006
)
#5
%call
=
tail
call
noalias
i8
*
@malloc
(
i64
4194304
)
#5
%0
=
bitcast
i8
*
%call
to
float
*
%call7
=
tail
call
noalias
i8
*
@malloc
(
i64
4194304
)
#5
br
label
%for.body.i
for.body.i:
; preds = %for.body.i, %entry
%indvars.iv.i
=
phi
i64
[
%indvars.iv.next.i
,
%for.body.i
],
[
0
,
%entry
]
%call.i
=
tail
call
i32
@rand
()
#5
%conv.i
=
sitofp
i32
%call.i
to
float
%div.i
=
fmul
float
%conv.i
,
0x3E00000000000000
%arrayidx.i
=
getelementptr
inbounds
float
*
%0
,
i64
%indvars.iv.i
store
float
%div.i
,
float
*
%arrayidx.i
,
align
4
,
!tbaa
!0
%indvars.iv.next.i
=
add
i64
%indvars.iv.i
,
1
%lftr.wideiv42
=
trunc
i64
%indvars.iv.next.i
to
i32
%exitcond43
=
icmp
eq
i32
%lftr.wideiv42
,
1048576
br
i1
%exitcond43
,
label
%for.body.i40.preheader
,
label
%for.body.i
for.body.i40.preheader:
; preds = %for.body.i
%1
=
bitcast
i8
*
%call7
to
float
*
br
label
%for.body.i40
for.body.i40:
; preds = %for.body.i40.preheader, %for.body.i40
%indvars.iv.i32
=
phi
i64
[
%indvars.iv.next.i37
,
%for.body.i40
],
[
0
,
%for.body.i40.preheader
]
%call.i33
=
tail
call
i32
@rand
()
#5
%conv.i34
=
sitofp
i32
%call.i33
to
float
%div.i35
=
fmul
float
%conv.i34
,
0x3E00000000000000
%arrayidx.i36
=
getelementptr
inbounds
float
*
%1
,
i64
%indvars.iv.i32
store
float
%div.i35
,
float
*
%arrayidx.i36
,
align
4
,
!tbaa
!0
%indvars.iv.next.i37
=
add
i64
%indvars.iv.i32
,
1
%lftr.wideiv
=
trunc
i64
%indvars.iv.next.i37
to
i32
%exitcond
=
icmp
eq
i32
%lftr.wideiv
,
1048576
br
i1
%exitcond
,
label
%randomInit.exit41
,
label
%for.body.i40
randomInit.exit41:
; preds = %for.body.i40
%call12
=
tail
call
noalias
i8
*
@malloc
(
i64
4194304
)
#5
%2
=
bitcast
i8
*
%call12
to
float
*
; ---------------------------------- Adding VISC Launch Call --------------------------------
; Replaced - %out = tail call %rtype @computeMatrixMul(float* %0, i32 undef, float* %1, i32 undef, float* %2, i32 4194304, i32 1024, i32 1024, i32 1024)
; Setting up launch input args
%in.addr
=
alloca
%struct.arg
; Store arguments
%in.addr.h_A
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
0
%in.addr.bytes_A
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
1
%in.addr.h_B
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
2
%in.addr.bytes_B
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
3
%in.addr.h_C
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
4
%in.addr.bytes_C
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
5
%in.addr.WA
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
6
%in.addr.WB
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
7
%in.addr.HA
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
8
store
float
*
%0
,
float
**
%in.addr.h_A
store
i64
4194304
,
i64
*
%in.addr.bytes_A
store
float
*
%1
,
float
**
%in.addr.h_B
store
i64
4194304
,
i64
*
%in.addr.bytes_B
store
float
*
%2
,
float
**
%in.addr.h_C
store
i64
4194304
,
i64
*
%in.addr.bytes_C
store
i32
1024
,
i32
*
%in.addr.WA
store
i32
1024
,
i32
*
%in.addr.WB
store
i32
1024
,
i32
*
%in.addr.HA
; Change type to i8* and VISC Launch call
%args
=
bitcast
%struct.arg
*
%in.addr
to
i8
*
%graphID
=
call
i8
*
@llvm.visc.launch
(
i8
*
bitcast
(
%rtype
(
float
*,
i64
,
float
*,
i64
,
float
*,
i64
,
i32
,
i32
,
i32
)*
@MatrixMulRoot
to
i8
*),
i8
*
%args
)
; Wait for result
call
void
@llvm.visc.wait
(
i8
*
%graphID
)
; Get the result
%out.addr
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
9
%out
=
load
%rtype
*
%out.addr
; -------------------------------- Completed VISC Launch Call --------------------------------
%3
=
extractvalue
%rtype
%out
,
0
%call14
=
tail
call
i32
@checkResults
(
float
*
%0
,
float
*
%1
,
float
*
%3
)
%tobool
=
icmp
eq
i32
%call14
,
0
br
i1
%tobool
,
label
%if.else
,
label
%if.then
if.then:
; preds = %randomInit.exit41
%puts31
=
tail
call
i32
@puts
(
i8
*
getelementptr
inbounds
([
7
x
i8
]*
@str15
,
i64
0
,
i64
0
))
br
label
%if.end
if.else:
; preds = %randomInit.exit41
%puts
=
tail
call
i32
@puts
(
i8
*
getelementptr
inbounds
([
9
x
i8
]*
@str13
,
i64
0
,
i64
0
))
br
label
%if.end
if.end:
; preds = %if.else, %if.then
%puts30
=
tail
call
i32
@puts
(
i8
*
getelementptr
inbounds
([
7
x
i8
]*
@str14
,
i64
0
,
i64
0
))
tail
call
void
@free
(
i8
*
%call
)
#5
tail
call
void
@free
(
i8
*
%call7
)
#5
tail
call
void
@free
(
i8
*
%call12
)
#5
ret
i32
0
}
; Function Attrs: nounwind
declare
void
@srand
(
i32
)
#1
; Function Attrs: nounwind
declare
void
@free
(
i8
*
nocapture
)
#1
declare
float
@fabsf
(
float
)
; Function Attrs: nounwind
declare
i32
@puts
(
i8
*
nocapture
)
#5
attributes
#0
=
{
nounwind
uwtable
"less-precise-fpmad"
=
"false"
"no-frame-pointer-elim"
=
"false"
"no-frame-pointer-elim-non-leaf"
=
"false"
"no-infs-fp-math"
=
"false"
"no-nans-fp-math"
=
"false"
"unsafe-fp-math"
=
"false"
"use-soft-float"
=
"false"
}
attributes
#1
=
{
nounwind
"less-precise-fpmad"
=
"false"
"no-frame-pointer-elim"
=
"false"
"no-frame-pointer-elim-non-leaf"
=
"false"
"no-infs-fp-math"
=
"false"
"no-nans-fp-math"
=
"false"
"unsafe-fp-math"
=
"false"
"use-soft-float"
=
"false"
}
attributes
#2
=
{
nounwind
readnone
uwtable
"less-precise-fpmad"
=
"false"
"no-frame-pointer-elim"
=
"false"
"no-frame-pointer-elim-non-leaf"
=
"false"
"no-infs-fp-math"
=
"false"
"no-nans-fp-math"
=
"false"
"unsafe-fp-math"
=
"false"
"use-soft-float"
=
"false"
}
attributes
#3
=
{
noinline
nounwind
uwtable
"less-precise-fpmad"
=
"false"
"no-frame-pointer-elim"
=
"false"
"no-frame-pointer-elim-non-leaf"
=
"false"
"no-infs-fp-math"
=
"false"
"no-nans-fp-math"
=
"false"
"unsafe-fp-math"
=
"false"
"use-soft-float"
=
"false"
}
attributes
#4
=
{
"less-precise-fpmad"
=
"false"
"no-frame-pointer-elim"
=
"false"
"no-frame-pointer-elim-non-leaf"
=
"false"
"no-infs-fp-math"
=
"false"
"no-nans-fp-math"
=
"false"
"unsafe-fp-math"
=
"false"
"use-soft-float"
=
"false"
}
attributes
#5
=
{
nounwind
}
attributes
#6
=
{
nounwind
readnone
"less-precise-fpmad"
=
"false"
"no-frame-pointer-elim"
=
"false"
"no-frame-pointer-elim-non-leaf"
=
"false"
"no-infs-fp-math"
=
"false"
"no-nans-fp-math"
=
"false"
"unsafe-fp-math"
=
"false"
"use-soft-float"
=
"false"
}
!0
=
metadata
!{
metadata
!"float"
,
metadata
!1
}
!1
=
metadata
!{
metadata
!"omnipotent char"
,
metadata
!2
}
!2
=
metadata
!{
metadata
!"Simple C/C++ TBAA"
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment