Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hpvm-release
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
llvm
hpvm-release
Commits
0ff16a7f
Commit
0ff16a7f
authored
10 years ago
by
Prakalp Srivastava
Browse files
Options
Downloads
Patches
Plain Diff
Creating VISC Runtime Library
parent
b2920eec
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
llvm/projects/visc-rt/visc-rt.cpp
+255
-0
255 additions, 0 deletions
llvm/projects/visc-rt/visc-rt.cpp
with
255 additions
and
0 deletions
llvm/projects/visc-rt/visc-rt.cpp
0 → 100644
+
255
−
0
View file @
0ff16a7f
#include
<pthread.h>
#include
<cstdlib>
#include
<cstdio>
#include
<CL/cl.h>
typedef
struct
{
pthread_t
threadID
;
}
DFNodeContext_X86
;
typedef
struct
{
cl_context
clGPUContext
;
cl_command_queue
clCommandQue
;
cl_program
clProgram
;
cl_kernel
clKernel
;
}
DFNodeContext_PTX
;
extern
"C"
__int32_t
llvm_visc_launch_x86
(
size_t
*
graphID
,
void
*
(
*
rootFunc
)(
void
*
),
void
*
arguments
)
{
DFNodeContext_X86
*
Context
=
(
DFNodeContext_X86
*
)
malloc
(
sizeof
(
DFNodeContext_X86
));
return
pthread_create
(
&
Context
->
threadID
,
NULL
,
rootFunc
,
arguments
);
*
graphID
=
(
size_t
)
Context
;
}
extern
"C"
__int32_t
llvm_visc_wait_x86
(
size_t
graphID
)
{
DFNodeContext_X86
*
Context
=
(
DFNodeContext_X86
*
)
graphID
;
return
pthread_join
(
Context
->
threadID
,
NULL
);
}
static
inline
void
checkErr
(
cl_int
err
,
cl_int
success
,
const
char
*
name
)
{
if
(
err
!=
success
)
{
printf
(
"ERROR: %s
\n
"
,
name
);
exit
(
EXIT_FAILURE
);
}
}
//////////////////////////////////////////////////////////////////////////////
//! Loads a Program binary file.
//!
//! @return the source string if succeeded, 0 otherwise
//! @param cFilename program filename
//! @param szFinalLength returned length of the code string
//////////////////////////////////////////////////////////////////////////////
static
char
*
LoadProgSource
(
const
char
*
cFilename
,
size_t
*
szFinalLength
)
{
// locals
FILE
*
pFileStream
=
NULL
;
size_t
szSourceLength
;
// open the OpenCL source code file
#ifdef _WIN32 // Windows version
if
(
fopen_s
(
&
pFileStream
,
cFilename
,
"rb"
)
!=
0
)
{
return
NULL
;
}
#else // Linux version
pFileStream
=
fopen
(
cFilename
,
"rb"
);
if
(
pFileStream
==
0
)
{
return
NULL
;
}
#endif
// get the length of the source code
fseek
(
pFileStream
,
0
,
SEEK_END
);
szSourceLength
=
ftell
(
pFileStream
);
fseek
(
pFileStream
,
0
,
SEEK_SET
);
// allocate a buffer for the source code string and read it in
char
*
cSourceString
=
(
char
*
)
malloc
(
szSourceLength
+
1
);
if
(
fread
((
cSourceString
),
szSourceLength
,
1
,
pFileStream
)
!=
1
)
{
fclose
(
pFileStream
);
free
(
cSourceString
);
return
0
;
}
// close the file and return the total length of the combined (preamble + source) string
fclose
(
pFileStream
);
if
(
szFinalLength
!=
0
)
{
*
szFinalLength
=
szSourceLength
;
}
cSourceString
[
szSourceLength
]
=
'\0'
;
return
cSourceString
;
}
extern
"C"
__int32_t
llvm_visc_launch_ptx
(
size_t
*
graphID
,
void
*
(
*
rootFunc
)
(
void
*
),
void
*
arguments
)
{
// Initialize OpenCL
// OpenCL specific variables
DFNodeContext_PTX
*
Context
=
(
DFNodeContext_PTX
*
)
malloc
(
sizeof
(
DFNodeContext_PTX
));
// Return Context pointer as grpahID;
*
graphID
=
(
size_t
)
Context
;
size_t
dataBytes
;
size_t
kernelLength
;
cl_int
errcode
;
/* Application Specific Code
// OpenCL device memory for matrices
cl_mem d_A;
cl_mem d_B;
cl_mem d_C;
*/
/*****************************************/
/* Initialize OpenCL */
/*****************************************/
// query the number of platforms
cl_uint
numPlatforms
;
errcode
=
clGetPlatformIDs
(
0
,
NULL
,
&
numPlatforms
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to get number of platforms"
);
// now get all the platform IDs
cl_platform_id
platforms
[
numPlatforms
];
errcode
=
clGetPlatformIDs
(
numPlatforms
,
platforms
,
NULL
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to get platform IDs"
);
for
(
unsigned
i
=
0
;
i
<
numPlatforms
;
i
++
)
{
char
buffer
[
10240
];
printf
(
" -- %d --
\n
"
,
i
);
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_PROFILE
,
10240
,
buffer
,
NULL
);
printf
(
" PROFILE = %s
\n
"
,
buffer
);
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_VERSION
,
10240
,
buffer
,
NULL
);
printf
(
" VERSION = %s
\n
"
,
buffer
);
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_NAME
,
10240
,
buffer
,
NULL
);
printf
(
" NAME = %s
\n
"
,
buffer
);
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_VENDOR
,
10240
,
buffer
,
NULL
);
printf
(
" VENDOR = %s
\n
"
,
buffer
);
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_EXTENSIONS
,
10240
,
buffer
,
NULL
);
printf
(
" EXTENSIONS = %s
\n
"
,
buffer
);
}
// set platform property - just pick the first one
cl_context_properties
properties
[]
=
{
CL_CONTEXT_PLATFORM
,
(
long
)
platforms
[
0
],
0
};
Context
->
clGPUContext
=
clCreateContextFromType
(
properties
,
CL_DEVICE_TYPE_GPU
,
NULL
,
NULL
,
&
errcode
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to create GPU context"
);
// get the list of GPU devices associated with context
errcode
=
clGetContextInfo
(
Context
->
clGPUContext
,
CL_CONTEXT_DEVICES
,
0
,
NULL
,
&
dataBytes
);
cl_device_id
*
clDevices
=
(
cl_device_id
*
)
malloc
(
dataBytes
);
errcode
|=
clGetContextInfo
(
Context
->
clGPUContext
,
CL_CONTEXT_DEVICES
,
dataBytes
,
clDevices
,
NULL
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to get context info"
);
//Create a command-queue
Context
->
clCommandQue
=
clCreateCommandQueue
(
Context
->
clGPUContext
,
clDevices
[
0
],
0
,
&
errcode
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to create command queue"
);
/* Application specific code
// Setup device memory
d_C = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, bytes_C, NULL,
&errcode);
d_A = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
bytes_A, h_A, &errcode);
d_B = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
bytes_B, h_B, &errcode);
*/
char
*
clMatrixMul
=
LoadProgSource
(
"matrixMul.nvptx.s"
,
&
kernelLength
);
checkErr
(
clMatrixMul
!=
NULL
,
1
/*bool true*/
,
"Failure to load Program Binary"
);
cl_int
binaryStatus
;
Context
->
clProgram
=
clCreateProgramWithBinary
(
Context
->
clGPUContext
,
1
,
&
clDevices
[
0
],
&
kernelLength
,
(
const
unsigned
char
**
)
&
clMatrixMul
,
&
binaryStatus
,
&
errcode
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to create program from binary"
);
errcode
=
clBuildProgram
(
Context
->
clProgram
,
0
,
NULL
,
NULL
,
NULL
,
NULL
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to build program"
);
Context
->
clKernel
=
clCreateKernel
(
Context
->
clProgram
,
"matrixMul"
,
&
errcode
);
checkErr
(
errcode
,
CL_SUCCESS
,
"Failure to create kernel"
);
// Invoke the callback function to put memory allocations in place
rootFunc
(
graphID
);
/* Application Specific Code
// Launch OpenCL kernel
size_t localWorkSize[2], globalWorkSize[2];
int wA = WA;
int wC = WC;
errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&d_C);
errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&d_A);
errcode |= clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void *)&d_B);
errcode |= clSetKernelArg(clKernel, 3, sizeof(int), (void *)&wA);
errcode |= clSetKernelArg(clKernel, 4, sizeof(int), (void *)&wC);
checkErr(errcode, CL_SUCCESS, "Failure to set kernel arguments");
localWorkSize[0] = BLOCK_SIZE;
localWorkSize[1] = BLOCK_SIZE;
globalWorkSize[0] = ((WB-1)/BLOCK_SIZE + 1) * BLOCK_SIZE;
globalWorkSize[1] = ((HA-1)/BLOCK_SIZE + 1) * BLOCK_SIZE;
errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel, 2, NULL,
globalWorkSize, localWorkSize,
0, NULL, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
// Retrieve result from device
errcode = clEnqueueReadBuffer(clCommandQue, d_C, CL_TRUE, 0, bytes_C,
h_C, 0, NULL, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to read buffer");
*/
/* App specific code
// Deallocate memory
free(h_A);
free(h_B);
free(h_C);
clReleaseMemObject(d_A);
clReleaseMemObject(d_C);
clReleaseMemObject(d_B);
*/
free
(
clDevices
);
free
(
clMatrixMul
);
/*
// Free in wait implementation
clReleaseContext(Context->clGPUContext);
clReleaseKernel(Context->clKernel);
clReleaseProgram(Context->clProgram);
*/
return
0
;
}
extern
"C"
__int32_t
llvm_visc_wait_ptx
(
size_t
graphID
)
{
DFNodeContext_PTX
*
Context
=
(
DFNodeContext_PTX
*
)
graphID
;
clFinish
(
Context
->
clCommandQue
);
// Release
clReleaseContext
(
Context
->
clGPUContext
);
clReleaseKernel
(
Context
->
clKernel
);
clReleaseProgram
(
Context
->
clProgram
);
return
0
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment