Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hpvm-release
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
llvm
hpvm-release
Commits
aa480186
Commit
aa480186
authored
10 years ago
by
Prakalp Srivastava
Browse files
Options
Downloads
Patches
Plain Diff
First cut of example visc_gemm_opencl.ll
parent
a12e4c7b
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
+53
-372
53 additions, 372 deletions
llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
with
53 additions
and
372 deletions
llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll
+
53
−
372
View file @
aa480186
...
@@ -17,16 +17,6 @@ target triple = "x86_64-unknown-linux-gnu"
...
@@ -17,16 +17,6 @@ target triple = "x86_64-unknown-linux-gnu"
%struct._cl_kernel
=
type
opaque
%struct._cl_kernel
=
type
opaque
%struct._cl_event
=
type
opaque
%struct._cl_event
=
type
opaque
; Return Type of VISC Compute Matrix Mul
%rtype
=
type
{
float
*,
i32
}
%struct.arg
=
type
{
float
*,
i32
,
float
*,
i32
,
float
*,
i32
,
i32
,
i32
,
i32
,
%rtype
}
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.launch
(
i8
*,
i8
*)
#0
; Function Attrs: nounwind
declare
void
@llvm.visc.wait
(
i8
*)
#0
@.str
=
private
unnamed_addr
constant
[
3
x
i8
]
c"rb\00"
,
align
1
@.str
=
private
unnamed_addr
constant
[
3
x
i8
]
c"rb\00"
,
align
1
@.str1
=
private
unnamed_addr
constant
[
45
x
i8
]
c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00"
,
align
1
@.str1
=
private
unnamed_addr
constant
[
45
x
i8
]
c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00"
,
align
1
@.str2
=
private
unnamed_addr
constant
[
35
x
i8
]
c"Failure to get number of platforms\00"
,
align
1
@.str2
=
private
unnamed_addr
constant
[
35
x
i8
]
c"Failure to get number of platforms\00"
,
align
1
...
@@ -80,64 +70,6 @@ for.end: ; preds = %for.body, %entry
...
@@ -80,64 +70,6 @@ for.end: ; preds = %for.body, %entry
; Function Attrs: nounwind
; Function Attrs: nounwind
declare
i32
@rand
()
#1
declare
i32
@rand
()
#1
; Function Attrs: nounwind uwtable
define
noalias
i8
*
@LoadProgSource
(
i8
*
nocapture
%cFilename
,
i64
*
%szFinalLength
)
#0
{
entry:
%call
=
tail
call
%struct._IO_FILE
*
@fopen
(
i8
*
%cFilename
,
i8
*
getelementptr
inbounds
([
3
x
i8
]*
@.str
,
i64
0
,
i64
0
))
#4
%cmp
=
icmp
eq
%struct._IO_FILE
*
%call
,
null
br
i1
%cmp
,
label
%return
,
label
%if.end
if.end:
; preds = %entry
%call1
=
tail
call
i32
@fseek
(
%struct._IO_FILE
*
%call
,
i64
0
,
i32
2
)
#4
%call2
=
tail
call
i64
@ftell
(
%struct._IO_FILE
*
%call
)
#4
%call3
=
tail
call
i32
@fseek
(
%struct._IO_FILE
*
%call
,
i64
0
,
i32
0
)
#4
%add
=
add
i64
%call2
,
1
%call4
=
tail
call
noalias
i8
*
@malloc
(
i64
%add
)
#4
%call5
=
tail
call
i64
@fread
(
i8
*
%call4
,
i64
%call2
,
i64
1
,
%struct._IO_FILE
*
%call
)
#4
%cmp6
=
icmp
eq
i64
%call5
,
1
%call8
=
tail
call
i32
@fclose
(
%struct._IO_FILE
*
%call
)
#4
br
i1
%cmp6
,
label
%if.end9
,
label
%if.then7
if.then7:
; preds = %if.end
tail
call
void
@free
(
i8
*
%call4
)
#4
br
label
%return
if.end9:
; preds = %if.end
%cmp11
=
icmp
eq
i64
*
%szFinalLength
,
null
br
i1
%cmp11
,
label
%if.end13
,
label
%if.then12
if.then12:
; preds = %if.end9
store
i64
%call2
,
i64
*
%szFinalLength
,
align
8
,
!tbaa
!3
br
label
%if.end13
if.end13:
; preds = %if.end9, %if.then12
%arrayidx
=
getelementptr
inbounds
i8
*
%call4
,
i64
%call2
store
i8
0
,
i8
*
%arrayidx
,
align
1
,
!tbaa
!1
br
label
%return
return:
; preds = %entry, %if.end13, %if.then7
%retval.0
=
phi
i8
*
[
null
,
%if.then7
],
[
%call4
,
%if.end13
],
[
null
,
%entry
]
ret
i8
*
%retval.0
}
; Function Attrs: nounwind
declare
noalias
%struct._IO_FILE
*
@fopen
(
i8
*
nocapture
,
i8
*
nocapture
)
#1
; Function Attrs: nounwind
declare
i32
@fseek
(
%struct._IO_FILE
*
nocapture
,
i64
,
i32
)
#1
; Function Attrs: nounwind
declare
i64
@ftell
(
%struct._IO_FILE
*
nocapture
)
#1
; Function Attrs: nounwind
declare
noalias
i8
*
@malloc
(
i64
)
#1
; Function Attrs: nounwind
declare
i64
@fread
(
i8
*
nocapture
,
i64
,
i64
,
%struct._IO_FILE
*
nocapture
)
#1
; Function Attrs: nounwind
declare
i32
@fclose
(
%struct._IO_FILE
*
nocapture
)
#1
; Function Attrs: nounwind
; Function Attrs: nounwind
declare
void
@free
(
i8
*
nocapture
)
#1
declare
void
@free
(
i8
*
nocapture
)
#1
...
@@ -223,330 +155,76 @@ return: ; preds = %for.inc50, %if.then
...
@@ -223,330 +155,76 @@ return: ; preds = %for.inc50, %if.then
; Function Attrs: nounwind
; Function Attrs: nounwind
declare
i32
@printf
(
i8
*
nocapture
,
...)
#1
declare
i32
@printf
(
i8
*
nocapture
,
...)
#1
; Function Attrs: nounwind uwtable
define
void
@computeMatrixMul
(
float
*
%h_A
,
i32
%bytes_A
,
float
*
%h_B
,
i32
%bytes_B
,
float
*
%h_C
,
i32
%bytes_C
)
#0
{
entry:
%dataBytes
=
alloca
i64
,
align
8
%errcode
=
alloca
i32
,
align
4
%d_A
=
alloca
%struct._cl_mem
*,
align
8
%d_B
=
alloca
%struct._cl_mem
*,
align
8
%d_C
=
alloca
%struct._cl_mem
*,
align
8
%numPlatforms
=
alloca
i32
,
align
4
%buffer
=
alloca
[
10240
x
i8
],
align
16
%properties
=
alloca
[
3
x
i64
],
align
16
%binaryLength
=
alloca
i64
,
align
8
%clMatrixMul
=
alloca
i8
*,
align
8
%binaryStatus
=
alloca
i32
,
align
4
%localWorkSize
=
alloca
[
2
x
i64
],
align
16
%globalWorkSize
=
alloca
[
2
x
i64
],
align
16
%wA
=
alloca
i32
,
align
4
%wC
=
alloca
i32
,
align
4
%call
=
call
i32
@clGetPlatformIDs
(
i32
0
,
%struct._cl_platform_id
**
null
,
i32
*
%numPlatforms
)
#4
store
i32
%call
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i
=
icmp
eq
i32
%call
,
0
br
i1
%cmp.i
,
label
%checkErr.exit
,
label
%if.then.i
if.then.i:
; preds = %entry
%0
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%0
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
35
x
i8
]*
@.str2
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit:
; preds = %entry
%1
=
load
i32
*
%numPlatforms
,
align
4
,
!tbaa
!4
%2
=
zext
i32
%1
to
i64
%vla
=
alloca
%struct._cl_platform_id
*,
i64
%2
,
align
16
%call1
=
call
i32
@clGetPlatformIDs
(
i32
%1
,
%struct._cl_platform_id
**
%vla
,
i32
*
null
)
#4
store
i32
%call1
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i105
=
icmp
eq
i32
%call1
,
0
br
i1
%cmp.i105
,
label
%for.cond.preheader
,
label
%if.then.i107
for.cond.preheader:
; preds = %checkErr.exit
%3
=
load
i32
*
%numPlatforms
,
align
4
,
!tbaa
!4
%cmp148
=
icmp
eq
i32
%3
,
0
br
i1
%cmp148
,
label
%for.end
,
label
%for.body.lr.ph
for.body.lr.ph:
; preds = %for.cond.preheader
%4
=
getelementptr
inbounds
[
10240
x
i8
]*
%buffer
,
i64
0
,
i64
0
br
label
%for.body
if.then.i107:
; preds = %checkErr.exit
%5
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i106
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%5
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
28
x
i8
]*
@.str3
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
for.body:
; preds = %for.body.lr.ph, %for.body
%i.0149
=
phi
i32
[
0
,
%for.body.lr.ph
],
[
%inc
,
%for.body
]
call
void
@llvm.lifetime.start
(
i64
10240
,
i8
*
%4
)
#4
%call2
=
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
12
x
i8
]*
@.str4
,
i64
0
,
i64
0
),
i32
%i.0149
)
#4
%idxprom
=
zext
i32
%i.0149
to
i64
%arrayidx
=
getelementptr
inbounds
%struct._cl_platform_id
**
%vla
,
i64
%idxprom
%6
=
load
%struct._cl_platform_id
**
%arrayidx
,
align
8
,
!tbaa
!5
%call3
=
call
i32
@clGetPlatformInfo
(
%struct._cl_platform_id
*
%6
,
i32
2304
,
i64
10240
,
i8
*
%4
,
i64
*
null
)
#4
%call5
=
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
16
x
i8
]*
@.str5
,
i64
0
,
i64
0
),
i8
*
%4
)
#4
%7
=
load
%struct._cl_platform_id
**
%arrayidx
,
align
8
,
!tbaa
!5
%call9
=
call
i32
@clGetPlatformInfo
(
%struct._cl_platform_id
*
%7
,
i32
2305
,
i64
10240
,
i8
*
%4
,
i64
*
null
)
#4
%call11
=
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
16
x
i8
]*
@.str6
,
i64
0
,
i64
0
),
i8
*
%4
)
#4
%8
=
load
%struct._cl_platform_id
**
%arrayidx
,
align
8
,
!tbaa
!5
%call15
=
call
i32
@clGetPlatformInfo
(
%struct._cl_platform_id
*
%8
,
i32
2306
,
i64
10240
,
i8
*
%4
,
i64
*
null
)
#4
%call17
=
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
13
x
i8
]*
@.str7
,
i64
0
,
i64
0
),
i8
*
%4
)
#4
%9
=
load
%struct._cl_platform_id
**
%arrayidx
,
align
8
,
!tbaa
!5
%call21
=
call
i32
@clGetPlatformInfo
(
%struct._cl_platform_id
*
%9
,
i32
2307
,
i64
10240
,
i8
*
%4
,
i64
*
null
)
#4
%call23
=
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
15
x
i8
]*
@.str8
,
i64
0
,
i64
0
),
i8
*
%4
)
#4
%10
=
load
%struct._cl_platform_id
**
%arrayidx
,
align
8
,
!tbaa
!5
%call27
=
call
i32
@clGetPlatformInfo
(
%struct._cl_platform_id
*
%10
,
i32
2308
,
i64
10240
,
i8
*
%4
,
i64
*
null
)
#4
%call29
=
call
i32
(
i8
*,
...)*
@printf
(
i8
*
getelementptr
inbounds
([
19
x
i8
]*
@.str9
,
i64
0
,
i64
0
),
i8
*
%4
)
#4
call
void
@llvm.lifetime.end
(
i64
10240
,
i8
*
%4
)
#4
%inc
=
add
i32
%i.0149
,
1
%11
=
load
i32
*
%numPlatforms
,
align
4
,
!tbaa
!4
%cmp
=
icmp
ult
i32
%inc
,
%11
br
i1
%cmp
,
label
%for.body
,
label
%for.end
for.end:
; preds = %for.body, %for.cond.preheader
%arrayinit.begin
=
getelementptr
inbounds
[
3
x
i64
]*
%properties
,
i64
0
,
i64
0
store
i64
4228
,
i64
*
%arrayinit.begin
,
align
16
,
!tbaa
!3
%arrayinit.element
=
getelementptr
inbounds
[
3
x
i64
]*
%properties
,
i64
0
,
i64
1
%12
=
load
%struct._cl_platform_id
**
%vla
,
align
16
,
!tbaa
!5
%13
=
ptrtoint
%struct._cl_platform_id
*
%12
to
i64
%sext
=
shl
i64
%13
,
32
%conv
=
ashr
exact
i64
%sext
,
32
store
i64
%conv
,
i64
*
%arrayinit.element
,
align
8
,
!tbaa
!3
%arrayinit.element31
=
getelementptr
inbounds
[
3
x
i64
]*
%properties
,
i64
0
,
i64
2
store
i64
0
,
i64
*
%arrayinit.element31
,
align
16
,
!tbaa
!3
%call33
=
call
%struct._cl_context
*
@clCreateContextFromType
(
i64
*
%arrayinit.begin
,
i64
4
,
void
(
i8
*,
i8
*,
i64
,
i8
*)*
null
,
i8
*
null
,
i32
*
%errcode
)
#4
%14
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i109
=
icmp
eq
i32
%14
,
0
br
i1
%cmp.i109
,
label
%checkErr.exit112
,
label
%if.then.i111
if.then.i111:
; preds = %for.end
%15
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i110
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%15
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
30
x
i8
]*
@.str10
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit112:
; preds = %for.end
%call34
=
call
i32
@clGetContextInfo
(
%struct._cl_context
*
%call33
,
i32
4225
,
i64
0
,
i8
*
null
,
i64
*
%dataBytes
)
#4
store
i32
%call34
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%16
=
load
i64
*
%dataBytes
,
align
8
,
!tbaa
!3
%call35
=
call
noalias
i8
*
@malloc
(
i64
%16
)
#4
%17
=
bitcast
i8
*
%call35
to
%struct._cl_device_id
**
%call36
=
call
i32
@clGetContextInfo
(
%struct._cl_context
*
%call33
,
i32
4225
,
i64
%16
,
i8
*
%call35
,
i64
*
null
)
#4
%18
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%or
=
or
i32
%18
,
%call36
store
i32
%or
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i113
=
icmp
eq
i32
%or
,
0
br
i1
%cmp.i113
,
label
%checkErr.exit116
,
label
%if.then.i115
if.then.i115:
; preds = %checkErr.exit112
%19
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i114
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%19
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
28
x
i8
]*
@.str11
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit116:
; preds = %checkErr.exit112
%20
=
load
%struct._cl_device_id
**
%17
,
align
8
,
!tbaa
!5
%call38
=
call
%struct._cl_command_queue
*
@clCreateCommandQueue
(
%struct._cl_context
*
%call33
,
%struct._cl_device_id
*
%20
,
i64
0
,
i32
*
%errcode
)
#4
%21
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i117
=
icmp
eq
i32
%21
,
0
br
i1
%cmp.i117
,
label
%checkErr.exit120
,
label
%if.then.i119
if.then.i119:
; preds = %checkErr.exit116
%22
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i118
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%22
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
32
x
i8
]*
@.str12
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit120:
; preds = %checkErr.exit116
%conv39
=
zext
i32
%bytes_C
to
i64
%call40
=
call
%struct._cl_mem
*
@clCreateBuffer
(
%struct._cl_context
*
%call33
,
i64
1
,
i64
%conv39
,
i8
*
null
,
i32
*
%errcode
)
#4
store
%struct._cl_mem
*
%call40
,
%struct._cl_mem
**
%d_C
,
align
8
,
!tbaa
!5
%conv41
=
zext
i32
%bytes_A
to
i64
%23
=
bitcast
float
*
%h_A
to
i8
*
%call42
=
call
%struct._cl_mem
*
@clCreateBuffer
(
%struct._cl_context
*
%call33
,
i64
33
,
i64
%conv41
,
i8
*
%23
,
i32
*
%errcode
)
#4
store
%struct._cl_mem
*
%call42
,
%struct._cl_mem
**
%d_A
,
align
8
,
!tbaa
!5
%conv43
=
zext
i32
%bytes_B
to
i64
%24
=
bitcast
float
*
%h_B
to
i8
*
%call44
=
call
%struct._cl_mem
*
@clCreateBuffer
(
%struct._cl_context
*
%call33
,
i64
33
,
i64
%conv43
,
i8
*
%24
,
i32
*
%errcode
)
#4
store
%struct._cl_mem
*
%call44
,
%struct._cl_mem
**
%d_B
,
align
8
,
!tbaa
!5
%call45
=
call
i8
*
@LoadProgSource
(
i8
*
getelementptr
inbounds
([
18
x
i8
]*
@.str13
,
i64
0
,
i64
0
),
i64
*
%binaryLength
)
store
i8
*
%call45
,
i8
**
%clMatrixMul
,
align
8
,
!tbaa
!5
%cmp46
=
icmp
eq
i8
*
%call45
,
null
br
i1
%cmp46
,
label
%if.then.i122
,
label
%checkErr.exit123
if.then.i122:
; preds = %checkErr.exit120
%25
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i121
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%25
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
31
x
i8
]*
@.str14
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit123:
; preds = %checkErr.exit120
%call49
=
call
%struct._cl_program
*
@clCreateProgramWithBinary
(
%struct._cl_context
*
%call33
,
i32
1
,
%struct._cl_device_id
**
%17
,
i64
*
%binaryLength
,
i8
**
%clMatrixMul
,
i32
*
%binaryStatus
,
i32
*
%errcode
)
#4
%26
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i124
=
icmp
eq
i32
%26
,
0
br
i1
%cmp.i124
,
label
%checkErr.exit127
,
label
%if.then.i126
if.then.i126:
; preds = %checkErr.exit123
%27
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i125
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%27
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
38
x
i8
]*
@.str15
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit127:
; preds = %checkErr.exit123
%call50
=
call
i32
@clBuildProgram
(
%struct._cl_program
*
%call49
,
i32
0
,
%struct._cl_device_id
**
null
,
i8
*
null
,
void
(
%struct._cl_program
*,
i8
*)*
null
,
i8
*
null
)
#4
store
i32
%call50
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i128
=
icmp
eq
i32
%call50
,
0
br
i1
%cmp.i128
,
label
%checkErr.exit131
,
label
%if.then.i130
if.then.i130:
; preds = %checkErr.exit127
%28
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i129
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%28
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
25
x
i8
]*
@.str16
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit131:
; preds = %checkErr.exit127
%call51
=
call
%struct._cl_kernel
*
@clCreateKernel
(
%struct._cl_program
*
%call49
,
i8
*
getelementptr
inbounds
([
10
x
i8
]*
@.str17
,
i64
0
,
i64
0
),
i32
*
%errcode
)
#4
%29
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i132
=
icmp
eq
i32
%29
,
0
br
i1
%cmp.i132
,
label
%checkErr.exit135
,
label
%if.then.i134
if.then.i134:
; preds = %checkErr.exit131
%30
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i133
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%30
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
25
x
i8
]*
@.str18
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit135:
; preds = %checkErr.exit131
store
i32
1024
,
i32
*
%wA
,
align
4
,
!tbaa
!4
store
i32
1024
,
i32
*
%wC
,
align
4
,
!tbaa
!4
%31
=
bitcast
%struct._cl_mem
**
%d_C
to
i8
*
%call52
=
call
i32
@clSetKernelArg
(
%struct._cl_kernel
*
%call51
,
i32
0
,
i64
8
,
i8
*
%31
)
#4
store
i32
%call52
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%32
=
bitcast
%struct._cl_mem
**
%d_A
to
i8
*
%call53
=
call
i32
@clSetKernelArg
(
%struct._cl_kernel
*
%call51
,
i32
1
,
i64
8
,
i8
*
%32
)
#4
%33
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%or54
=
or
i32
%33
,
%call53
store
i32
%or54
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%34
=
bitcast
%struct._cl_mem
**
%d_B
to
i8
*
%call55
=
call
i32
@clSetKernelArg
(
%struct._cl_kernel
*
%call51
,
i32
2
,
i64
8
,
i8
*
%34
)
#4
%35
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%or56
=
or
i32
%35
,
%call55
store
i32
%or56
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%36
=
bitcast
i32
*
%wA
to
i8
*
%call57
=
call
i32
@clSetKernelArg
(
%struct._cl_kernel
*
%call51
,
i32
3
,
i64
4
,
i8
*
%36
)
#4
%37
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%or58
=
or
i32
%37
,
%call57
store
i32
%or58
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%38
=
bitcast
i32
*
%wC
to
i8
*
%call59
=
call
i32
@clSetKernelArg
(
%struct._cl_kernel
*
%call51
,
i32
4
,
i64
4
,
i8
*
%38
)
#4
%39
=
load
i32
*
%errcode
,
align
4
,
!tbaa
!4
%or60
=
or
i32
%39
,
%call59
store
i32
%or60
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i136
=
icmp
eq
i32
%or60
,
0
br
i1
%cmp.i136
,
label
%checkErr.exit139
,
label
%if.then.i138
if.then.i138:
; preds = %checkErr.exit135
%40
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i137
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%40
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
32
x
i8
]*
@.str19
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit139:
; preds = %checkErr.exit135
%arrayidx61
=
getelementptr
inbounds
[
2
x
i64
]*
%localWorkSize
,
i64
0
,
i64
0
store
i64
16
,
i64
*
%arrayidx61
,
align
16
,
!tbaa
!3
%arrayidx62
=
getelementptr
inbounds
[
2
x
i64
]*
%localWorkSize
,
i64
0
,
i64
1
store
i64
16
,
i64
*
%arrayidx62
,
align
8
,
!tbaa
!3
%arrayidx63
=
getelementptr
inbounds
[
2
x
i64
]*
%globalWorkSize
,
i64
0
,
i64
0
store
i64
1024
,
i64
*
%arrayidx63
,
align
16
,
!tbaa
!3
%arrayidx64
=
getelementptr
inbounds
[
2
x
i64
]*
%globalWorkSize
,
i64
0
,
i64
1
store
i64
1024
,
i64
*
%arrayidx64
,
align
8
,
!tbaa
!3
%call67
=
call
i32
@clEnqueueNDRangeKernel
(
%struct._cl_command_queue
*
%call38
,
%struct._cl_kernel
*
%call51
,
i32
2
,
i64
*
null
,
i64
*
%arrayidx63
,
i64
*
%arrayidx61
,
i32
0
,
%struct._cl_event
**
null
,
%struct._cl_event
**
null
)
#4
store
i32
%call67
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i140
=
icmp
eq
i32
%call67
,
0
br
i1
%cmp.i140
,
label
%checkErr.exit143
,
label
%if.then.i142
if.then.i142:
; preds = %checkErr.exit139
%41
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i141
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%41
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
26
x
i8
]*
@.str20
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit143:
; preds = %checkErr.exit139
%42
=
load
%struct._cl_mem
**
%d_C
,
align
8
,
!tbaa
!5
%43
=
bitcast
float
*
%h_C
to
i8
*
%call69
=
call
i32
@clEnqueueReadBuffer
(
%struct._cl_command_queue
*
%call38
,
%struct._cl_mem
*
%42
,
i32
1
,
i64
0
,
i64
%conv39
,
i8
*
%43
,
i32
0
,
%struct._cl_event
**
null
,
%struct._cl_event
**
null
)
#4
store
i32
%call69
,
i32
*
%errcode
,
align
4
,
!tbaa
!4
%cmp.i144
=
icmp
eq
i32
%call69
,
0
br
i1
%cmp.i144
,
label
%checkErr.exit147
,
label
%if.then.i146
if.then.i146:
; preds = %checkErr.exit143
%44
=
load
%struct._IO_FILE
**
@stderr
,
align
8
,
!tbaa
!5
%call.i145
=
call
i32
(
%struct._IO_FILE
*,
i8
*,
...)*
@fprintf
(
%struct._IO_FILE
*
%44
,
i8
*
getelementptr
inbounds
([
11
x
i8
]*
@.str25
,
i64
0
,
i64
0
),
i8
*
getelementptr
inbounds
([
23
x
i8
]*
@.str21
,
i64
0
,
i64
0
))
#4
call
void
@exit
(
i32
1
)
#7
unreachable
checkErr.exit147:
; preds = %checkErr.exit143
%45
=
load
%struct._cl_mem
**
%d_A
,
align
8
,
!tbaa
!5
%call70
=
call
i32
@clReleaseMemObject
(
%struct._cl_mem
*
%45
)
#4
%46
=
load
%struct._cl_mem
**
%d_C
,
align
8
,
!tbaa
!5
%call71
=
call
i32
@clReleaseMemObject
(
%struct._cl_mem
*
%46
)
#4
%47
=
load
%struct._cl_mem
**
%d_B
,
align
8
,
!tbaa
!5
%call72
=
call
i32
@clReleaseMemObject
(
%struct._cl_mem
*
%47
)
#4
call
void
@free
(
i8
*
%call35
)
#4
%48
=
load
i8
**
%clMatrixMul
,
align
8
,
!tbaa
!5
call
void
@free
(
i8
*
%48
)
#4
%call73
=
call
i32
@clReleaseContext
(
%struct._cl_context
*
%call33
)
#4
%call74
=
call
i32
@clReleaseKernel
(
%struct._cl_kernel
*
%call51
)
#4
%call75
=
call
i32
@clReleaseProgram
(
%struct._cl_program
*
%call49
)
#4
%call76
=
call
i32
@clReleaseCommandQueue
(
%struct._cl_command_queue
*
%call38
)
#4
ret
void
}
declare
i32
@clGetPlatformIDs
(
i32
,
%struct._cl_platform_id
**,
i32
*)
#3
; Function Attrs: nounwind
; Function Attrs: nounwind
declare
void
@llvm.lifetime.start
(
i64
,
i8
*
nocapture
)
#4
declare
void
@llvm.lifetime.start
(
i64
,
i8
*
nocapture
)
#4
declare
i32
@clGetPlatformInfo
(
%struct._cl_platform_id
*,
i32
,
i64
,
i8
*,
i64
*)
#3
; Function Attrs: nounwind
; Function Attrs: nounwind
declare
void
@llvm.lifetime.end
(
i64
,
i8
*
nocapture
)
#4
declare
void
@llvm.lifetime.end
(
i64
,
i8
*
nocapture
)
#4
declare
%struct._cl_context
*
@clCreateContextFromType
(
i64
*,
i64
,
void
(
i8
*,
i8
*,
i64
,
i8
*)*,
i8
*,
i32
*)
#3
; --------------- VISC Intrinsics ---------------
; Return Type of VISC Compute Matrix Mul
%rtype
=
type
{
float
*,
i32
}
%struct.arg
=
type
{
float
*,
i32
,
float
*,
i32
,
float
*,
i32
,
i32
,
i32
,
i32
,
%rtype
}
declare
i32
@clGetContextInfo
(
%struct._cl_context
*,
i32
,
i64
,
i8
*,
i64
*)
#3
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.launch
(
i8
*,
i8
*)
#0
declare
%struct._cl_command_queue
*
@clCreateCommandQueue
(
%struct._cl_context
*,
%struct._cl_device_id
*,
i64
,
i32
*)
#3
; Function Attrs: nounwind
declare
void
@llvm.visc.wait
(
i8
*)
#0
declare
%struct._cl_mem
*
@clCreateBuffer
(
%struct._cl_context
*,
i64
,
i64
,
i8
*,
i32
*)
#3
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode
(
i8
*)
#0
declare
%struct._cl_program
*
@clCreateProgramWithBinary
(
%struct._cl_context
*,
i32
,
%struct._cl_device_id
**,
i64
*,
i8
**,
i32
*,
i32
*)
#3
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode1D
(
i8
*,
i32
)
#0
declare
i32
@clBuildProgram
(
%struct._cl_program
*,
i32
,
%struct._cl_device_id
**,
i8
*,
void
(
%struct._cl_program
*,
i8
*)*,
i8
*)
#3
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode2D
(
i8
*,
i32
,
i32
)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createNode3D
(
i8
*,
i32
,
i32
,
i32
)
#0
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.createEdge
(
i8
*,
i8
*,
i1
,
i32
,
i32
)
#0
declare
%struct._cl_kernel
*
@clCreateKernel
(
%struct._cl_program
*,
i8
*,
i32
*)
#3
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.getNode
()
#0
declare
i32
@clSetKernelArg
(
%struct._cl_kernel
*,
i32
,
i64
,
i8
*)
#3
; Function Attrs: nounwind
declare
i8
*
@llvm.visc.getParentNode
(
i8
*)
#0
declare
i32
@clEnqueueNDRangeKernel
(
%struct._cl_command_queue
*,
%struct._cl_kernel
*,
i32
,
i64
*,
i64
*,
i64
*,
i32
,
%struct._cl_event
**,
%struct._cl_event
**)
#3
; Function Attrs: nounwind
declare
i32
@llvm.visc.getNumDims
(
i8
*)
#0
declare
i32
@clEnqueueReadBuffer
(
%struct._cl_command_queue
*,
%struct._cl_mem
*,
i32
,
i64
,
i64
,
i8
*,
i32
,
%struct._cl_event
**,
%struct._cl_event
**)
#3
; Function Attrs: nounwind
declare
i32
@llvm.visc.getNumNodeInstances.x
(
i8
*)
#0
declare
i32
@clReleaseMemObject
(
%struct._cl_mem
*)
#3
; Function Attrs: nounwind
declare
i32
@llvm.visc.getNumNodeInstances.y
(
i8
*)
#0
declare
i32
@clReleaseContext
(
%struct._cl_context
*)
#3
; Function Attrs: nounwind
declare
void
@llvm.visc.bind.input
(
i8
*,
i32
,
i32
)
declare
i32
@clReleaseKernel
(
%struct._cl_kernel
*)
#3
; Function Attrs: nounwind
declare
void
@llvm.visc.bind.output
(
i8
*,
i32
,
i32
)
; ----------------- VISC intrinsics end ------------------
declare
i32
@clReleaseProgram
(
%struct._cl_program
*)
#3
declare
i32
@clReleaseCommandQueue
(
%struct._cl_command_queue
*)
#3
; Function Attrs: nounwind uwtable
; Function Attrs: nounwind uwtable
define
%rtype
@matrixMul
(
float
*
nocapture
%A
,
i32
%bytes_A
,
float
*
nocapture
%B
,
i32
%bytes_B
,
float
*
%C
,
i32
%bytes_C
,
i32
%k
,
i32
%n
,
i32
%m
)
#0
{
define
%rtype
@matrixMul
(
float
*
nocapture
%A
,
i32
%bytes_A
,
float
*
nocapture
%B
,
i32
%bytes_B
,
float
*
%C
,
i32
%bytes_C
,
i32
%k
,
i32
%n
,
i32
%m
)
#0
{
entry:
entry:
%call
=
tail
call
i32
(
i32
,
...)*
bitcast
(
i32
(...)*
@get_global_id
to
i32
(
i32
,
...)*)(
i32
0
)
#2
; ------------------------- VISC changes ------------------
%call1
=
tail
call
i32
(
i32
,
...)*
bitcast
(
i32
(...)*
@get_global_id
to
i32
(
i32
,
...)*)(
i32
1
)
#2
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
; Replaced statement -- %call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
%this_node
=
call
i8
*
@llvm.visc.getNode
()
%call
=
tail
call
i32
@llvm.visc.getNumNodeInstances.x
(
i8
*
%this_node
)
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
; Replaced statement -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2
%call1
=
tail
call
i32
@llvm.visc.getNumNodeInstances.y
(
i8
*
%this_node
)
; ---------------------- VISC changes End ------------------
%cmp22
=
icmp
sgt
i32
%k
,
0
%cmp22
=
icmp
sgt
i32
%k
,
0
br
i1
%cmp22
,
label
%for.body.lr.ph
,
label
%for.end
br
i1
%cmp22
,
label
%for.body.lr.ph
,
label
%for.end
...
@@ -581,9 +259,9 @@ for.end: ; preds = %for.body, %entry
...
@@ -581,9 +259,9 @@ for.end: ; preds = %for.body, %entry
%idxprom10
=
sext
i32
%add9
to
i64
%idxprom10
=
sext
i32
%add9
to
i64
%arrayidx11
=
getelementptr
inbounds
float
*
%C
,
i64
%idxprom10
%arrayidx11
=
getelementptr
inbounds
float
*
%C
,
i64
%idxprom10
store
float
%res.0.lcssa
,
float
*
%arrayidx11
,
align
4
,
!tbaa
!0
store
float
%res.0.lcssa
,
float
*
%arrayidx11
,
align
4
,
!tbaa
!0
%.fca.0.insert
=
insertvalue
{
float
*,
i32
}
undef
,
float
*
%C
,
0
%.fca.0.insert
=
insertvalue
%rtype
undef
,
float
*
%C
,
0
%.fca.1.insert
=
insertvalue
{
float
*,
i32
}
%.fca.0.insert
,
i32
%bytes_C
,
1
%.fca.1.insert
=
insertvalue
%rtype
%.fca.0.insert
,
i32
%bytes_C
,
1
ret
{
float
*,
i32
}
%.fca.1.insert
ret
%rtype
%.fca.1.insert
}
}
define
%rtype
@MatrixMulRoot
(
float
*
%h_A
,
i32
%bytes_A
,
float
*
%h_B
,
i32
%bytes_B
,
float
*
%h_C
,
i32
%bytes_C
,
i32
%WA
,
i32
%WB
,
i32
%HA
)
{
define
%rtype
@MatrixMulRoot
(
float
*
%h_A
,
i32
%bytes_A
,
float
*
%h_B
,
i32
%bytes_B
,
float
*
%h_C
,
i32
%bytes_C
,
i32
%WA
,
i32
%WB
,
i32
%HA
)
{
...
@@ -604,6 +282,9 @@ define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_
...
@@ -604,6 +282,9 @@ define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_
ret
%rtype
zeroinitializer
ret
%rtype
zeroinitializer
}
}
; Function Attrs: nounwind
declare
noalias
i8
*
@malloc
(
i64
)
#1
; Function Attrs: nounwind uwtable
; Function Attrs: nounwind uwtable
define
i32
@main
(
i32
%argc
,
i8
**
nocapture
%argv
)
#0
{
define
i32
@main
(
i32
%argc
,
i8
**
nocapture
%argv
)
#0
{
entry:
entry:
...
@@ -679,7 +360,7 @@ randomInit.exit41: ; preds = %for.body.i40
...
@@ -679,7 +360,7 @@ randomInit.exit41: ; preds = %for.body.i40
call
void
@llvm.visc.wait
(
i8
*
%graphID
)
call
void
@llvm.visc.wait
(
i8
*
%graphID
)
; Get the result
; Get the result
%out.addr
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
4
%out.addr
=
getelementptr
%struct.arg
*
%in.addr
,
i32
0
,
i32
9
%out
=
load
%rtype
*
%out.addr
%out
=
load
%rtype
*
%out.addr
%out.h_C
=
extractvalue
%rtype
%out
,
0
%out.h_C
=
extractvalue
%rtype
%out
,
0
;%2 = extractvalue %rtype %out, 0
;%2 = extractvalue %rtype %out, 0
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment