Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hpvm-release
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
llvm
hpvm-release
Commits
a12e4c7b
Commit
a12e4c7b
authored
10 years ago
by
Prakalp Srivastava
Browse files
Options
Downloads
Patches
Plain Diff
Example visc_sgemm almost complete
parent
bdf25051
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
+71
-13
71 additions, 13 deletions
llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
with
71 additions
and
13 deletions
llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
+
71
−
13
View file @
a12e4c7b
...
...
@@ -19,7 +19,7 @@ target triple = "x86_64-unknown-linux-gnu"
; Return Type of VISC Compute Matrix Mul
%rtype
=
type
{
float
*,
i32
}
%struct.arg
=
type
{
float
*,
i32
,
float
*,
i32
,
%rtype
}
%struct.arg
=
type
{
float
*,
i32
,
float
*,
i32
,
float
*,
i32
,
i32
,
i32
,
i32
,
%rtype
}
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.launch
(
i8
*,
i8
*)
#0
...
...
@@ -542,18 +542,65 @@ declare i32 @clReleaseProgram(%struct._cl_program*) #3
declare
i32
@clReleaseCommandQueue
(
%struct._cl_command_queue
*)
#3
define
%rtype
@MatrixMulRoot
(
float
*
h_A
,
i32
bytes_A
,
float
*
h_B
,
i32
bytes_B
,
i32
WA
,
i32
WB
,
i32
HA
)
{
%kernel
=
call
i8
*
@llvm.visc.createNode2D
(
i8
*
bitcast
(
%rtype
(
float
*,
i32
,
float
*,
i32
,
i32
,
i32
)*
@matrixMul
to
i8
*),
i32
WB
,
i32
HA
)
; Function Attrs: nounwind uwtable
define
%rtype
@matrixMul
(
float
*
nocapture
%A
,
i32
%bytes_A
,
float
*
nocapture
%B
,
i32
%bytes_B
,
float
*
%C
,
i32
%bytes_C
,
i32
%k
,
i32
%n
,
i32
%m
)
#0
{
entry:
%call
=
tail
call
i32
(
i32
,
...)*
bitcast
(
i32
(...)*
@get_global_id
to
i32
(
i32
,
...)*)(
i32
0
)
#2
%call1
=
tail
call
i32
(
i32
,
...)*
bitcast
(
i32
(...)*
@get_global_id
to
i32
(
i32
,
...)*)(
i32
1
)
#2
%cmp22
=
icmp
sgt
i32
%k
,
0
br
i1
%cmp22
,
label
%for.body.lr.ph
,
label
%for.end
for.body.lr.ph:
; preds = %entry
%mul
=
mul
nsw
i32
%call1
,
%k
%0
=
sext
i32
%mul
to
i64
br
label
%for.body
for.body:
; preds = %for.body, %for.body.lr.ph
%indvars.iv
=
phi
i64
[
0
,
%for.body.lr.ph
],
[
%indvars.iv.next
,
%for.body
]
%res.024
=
phi
float
[
0.000000e+00
,
%for.body.lr.ph
],
[
%add7
,
%for.body
]
%1
=
add
nsw
i64
%indvars.iv
,
%0
%arrayidx
=
getelementptr
inbounds
float
*
%A
,
i64
%1
%2
=
load
float
*
%arrayidx
,
align
4
,
!tbaa
!0
%3
=
trunc
i64
%indvars.iv
to
i32
%mul2
=
mul
nsw
i32
%3
,
%n
%add3
=
add
nsw
i32
%mul2
,
%call
%idxprom4
=
sext
i32
%add3
to
i64
%arrayidx5
=
getelementptr
inbounds
float
*
%B
,
i64
%idxprom4
%4
=
load
float
*
%arrayidx5
,
align
4
,
!tbaa
!0
%mul6
=
fmul
float
%2
,
%4
%add7
=
fadd
float
%res.024
,
%mul6
%indvars.iv.next
=
add
i64
%indvars.iv
,
1
%lftr.wideiv
=
trunc
i64
%indvars.iv.next
to
i32
%exitcond
=
icmp
eq
i32
%lftr.wideiv
,
%k
br
i1
%exitcond
,
label
%for.end
,
label
%for.body
for.end:
; preds = %for.body, %entry
%res.0.lcssa
=
phi
float
[
0.000000e+00
,
%entry
],
[
%add7
,
%for.body
]
%mul8
=
mul
nsw
i32
%call1
,
%n
%add9
=
add
nsw
i32
%mul8
,
%call
%idxprom10
=
sext
i32
%add9
to
i64
%arrayidx11
=
getelementptr
inbounds
float
*
%C
,
i64
%idxprom10
store
float
%res.0.lcssa
,
float
*
%arrayidx11
,
align
4
,
!tbaa
!0
%.fca.0.insert
=
insertvalue
{
float
*,
i32
}
undef
,
float
*
%C
,
0
%.fca.1.insert
=
insertvalue
{
float
*,
i32
}
%.fca.0.insert
,
i32
%bytes_C
,
1
ret
{
float
*,
i32
}
%.fca.1.insert
}
define
%rtype
@MatrixMulRoot
(
float
*
%h_A
,
i32
%bytes_A
,
float
*
%h_B
,
i32
%bytes_B
,
float
*
%h_C
,
i32
%bytes_C
,
i32
%WA
,
i32
%WB
,
i32
%HA
)
{
%kernel
=
call
i8
*
@llvm.visc.createNode2D
(
i8
*
bitcast
(
%rtype
(
float
*,
i32
,
float
*,
i32
,
float
*,
i32
,
i32
,
i32
,
i32
)*
@matrixMul
to
i8
*),
i32
%WB
,
i32
%HA
)
; Bind Inputs
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
0
,
i32
0
)
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
1
,
i32
1
)
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
2
,
i32
2
)
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
3
,
i32
3
)
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
4
,
i32
4
)
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
5
,
i32
5
)
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
0
,
i32
0
)
; h_A
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
1
,
i32
1
)
; bytes_A
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
2
,
i32
2
)
; h_B
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
3
,
i32
3
)
; bytes_B
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
4
,
i32
4
)
; h_C
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
5
,
i32
5
)
; bytes_C
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
6
,
i32
6
)
; WA = HB = k
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
7
,
i32
7
)
; WB = WC = n
call
void
@llvm.visc.bind.input
(
i8
*
%kernel
,
i32
8
,
i32
8
)
; HA = HC = m
; Bind Outputs
call
void
@llvm.visc.bind.output
(
i8
*
%kernel
,
i32
0
,
i32
0
)
call
void
@llvm.visc.bind.output
(
i8
*
%kernel
,
i32
1
,
i32
1
)
call
void
@llvm.visc.bind.output
(
i8
*
%kernel
,
i32
0
,
i32
0
)
; d_C
call
void
@llvm.visc.bind.output
(
i8
*
%kernel
,
i32
1
,
i32
1
)
; bytes_C
ret
%rtype
zeroinitializer
}
...
...
@@ -607,14 +654,25 @@ randomInit.exit41: ; preds = %for.body.i40
%in.addr.bytes_A
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
1
%in.addr.h_B
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
2
%in.addr.bytes_B
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
3
%in.addr.h_C
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
4
%in.addr.bytes_C
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
5
%in.addr.WA
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
6
%in.addr.WB
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
7
%in.addr.HA
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
8
store
float
*
%0
,
float
**
%in.addr.h_A
store
i32
4194304
,
i32
*
%in.addr.bytes_A
store
float
*
%1
,
float
**
%in.addr.h_B
store
i32
4194304
,
i32
*
%in.addr.bytes_B
store
float
*
%2
,
float
**
%in.addr.h_C
store
i32
4194304
,
i32
*
%in.addr.bytes_C
store
i32
1024
,
i32
*
%in.addr.WA
store
i32
1024
,
i32
*
%in.addr.WB
store
i32
1024
,
i32
*
%in.addr.HA
; Change type to i8* and VISC Launch call
%args
=
bitcast
%struct.arg
*
%in.addr
to
i8
*
%graphID
=
call
i8
*
@llvm.visc.launch
(
i8
*
bitcast
(
%rtype
(
float
*,
i32
,
float
*,
i32
)*
@MatrixMulRoot
to
i8
*),
i8
*
%args
)
%graphID
=
call
i8
*
@llvm.visc.launch
(
i8
*
bitcast
(
%rtype
(
float
*,
i32
,
float
*,
i32
,
float
*,
i32
,
i32
,
i32
,
i32
)*
@MatrixMulRoot
to
i8
*),
i8
*
%args
)
;tail call void @computeMatrixMul(float* %0, i32 4194304, float* %1, i32 4194304, float* %2, i32 4194304)
; Wait for result
...
...
@@ -627,7 +685,7 @@ randomInit.exit41: ; preds = %for.body.i40
;%2 = extractvalue %rtype %out, 0
%out.bytes_C
=
extractvalue
%rtype
%out
,
1
; -------------------------------- Completed VISC Launch Call
Cod
--------------------------------
; -------------------------------- Completed VISC Launch Call --------------------------------
br
label
%for.cond4.preheader.i
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment