Skip to content
Snippets Groups Projects
Commit 0ff16a7f authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Creating VISC Runtime Library

parent b2920eec
No related branches found
No related tags found
No related merge requests found
#include <pthread.h>
#include <cstdlib>
#include <cstdio>
#include <CL/cl.h>
typedef struct {
pthread_t threadID;
} DFNodeContext_X86;
typedef struct {
cl_context clGPUContext;
cl_command_queue clCommandQue;
cl_program clProgram;
cl_kernel clKernel;
} DFNodeContext_PTX;
extern "C"
__int32_t llvm_visc_launch_x86(size_t* graphID, void* (*rootFunc)(void*), void* arguments) {
DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86));
return pthread_create(&Context->threadID, NULL, rootFunc, arguments);
*graphID = (size_t) Context;
}
extern "C"
__int32_t llvm_visc_wait_x86(size_t graphID) {
DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID;
return pthread_join(Context->threadID, NULL);
}
static inline void checkErr(cl_int err, cl_int success, const char * name) {
if (err != success) {
printf("ERROR: %s\n", name);
exit(EXIT_FAILURE);
}
}
//////////////////////////////////////////////////////////////////////////////
//! Loads a Program binary file.
//!
//! @return the source string if succeeded, 0 otherwise
//! @param cFilename program filename
//! @param szFinalLength returned length of the code string
//////////////////////////////////////////////////////////////////////////////
static char* LoadProgSource(const char* cFilename, size_t* szFinalLength)
{
// locals
FILE* pFileStream = NULL;
size_t szSourceLength;
// open the OpenCL source code file
#ifdef _WIN32 // Windows version
if(fopen_s(&pFileStream, cFilename, "rb") != 0)
{
return NULL;
}
#else // Linux version
pFileStream = fopen(cFilename, "rb");
if(pFileStream == 0)
{
return NULL;
}
#endif
// get the length of the source code
fseek(pFileStream, 0, SEEK_END);
szSourceLength = ftell(pFileStream);
fseek(pFileStream, 0, SEEK_SET);
// allocate a buffer for the source code string and read it in
char* cSourceString = (char *)malloc(szSourceLength + 1);
if (fread((cSourceString), szSourceLength, 1, pFileStream) != 1)
{
fclose(pFileStream);
free(cSourceString);
return 0;
}
// close the file and return the total length of the combined (preamble + source) string
fclose(pFileStream);
if(szFinalLength != 0)
{
*szFinalLength = szSourceLength;
}
cSourceString[szSourceLength] = '\0';
return cSourceString;
}
extern "C"
__int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* arguments) {
// Initialize OpenCL
// OpenCL specific variables
DFNodeContext_PTX *Context = (DFNodeContext_PTX *) malloc(sizeof(DFNodeContext_PTX));
// Return Context pointer as grpahID;
*graphID = (size_t) Context;
size_t dataBytes;
size_t kernelLength;
cl_int errcode;
/* Application Specific Code
// OpenCL device memory for matrices
cl_mem d_A;
cl_mem d_B;
cl_mem d_C;
*/
/*****************************************/
/* Initialize OpenCL */
/*****************************************/
// query the number of platforms
cl_uint numPlatforms;
errcode = clGetPlatformIDs(0, NULL, &numPlatforms);
checkErr(errcode, CL_SUCCESS, "Failure to get number of platforms");
// now get all the platform IDs
cl_platform_id platforms[numPlatforms];
errcode = clGetPlatformIDs(numPlatforms, platforms, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to get platform IDs");
for(unsigned i=0; i < numPlatforms; i++) {
char buffer[10240];
printf(" -- %d --\n", i);
clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL);
printf(" PROFILE = %s\n", buffer);
clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL);
printf(" VERSION = %s\n", buffer);
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL);
printf(" NAME = %s\n", buffer);
clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL);
printf(" VENDOR = %s\n", buffer);
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
printf(" EXTENSIONS = %s\n", buffer);
}
// set platform property - just pick the first one
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
(long) platforms[0],
0};
Context->clGPUContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errcode);
checkErr(errcode, CL_SUCCESS, "Failure to create GPU context");
// get the list of GPU devices associated with context
errcode = clGetContextInfo(Context->clGPUContext, CL_CONTEXT_DEVICES, 0,
NULL, &dataBytes);
cl_device_id *clDevices = (cl_device_id *) malloc(dataBytes);
errcode |= clGetContextInfo(Context->clGPUContext, CL_CONTEXT_DEVICES, dataBytes,
clDevices, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to get context info");
//Create a command-queue
Context->clCommandQue = clCreateCommandQueue(Context->clGPUContext, clDevices[0], 0, &errcode);
checkErr(errcode, CL_SUCCESS, "Failure to create command queue");
/* Application specific code
// Setup device memory
d_C = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, bytes_C, NULL,
&errcode);
d_A = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
bytes_A, h_A, &errcode);
d_B = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
bytes_B, h_B, &errcode);
*/
char *clMatrixMul = LoadProgSource("matrixMul.nvptx.s", &kernelLength);
checkErr(clMatrixMul != NULL, 1 /*bool true*/, "Failure to load Program Binary");
cl_int binaryStatus;
Context->clProgram = clCreateProgramWithBinary(Context->clGPUContext, 1, &clDevices[0],
&kernelLength,
(const unsigned char **)&clMatrixMul,
&binaryStatus, &errcode);
checkErr(errcode, CL_SUCCESS, "Failure to create program from binary");
errcode = clBuildProgram(Context->clProgram, 0, NULL, NULL, NULL, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to build program");
Context->clKernel = clCreateKernel(Context->clProgram, "matrixMul", &errcode);
checkErr(errcode, CL_SUCCESS, "Failure to create kernel");
// Invoke the callback function to put memory allocations in place
rootFunc(graphID);
/* Application Specific Code
// Launch OpenCL kernel
size_t localWorkSize[2], globalWorkSize[2];
int wA = WA;
int wC = WC;
errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&d_C);
errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&d_A);
errcode |= clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void *)&d_B);
errcode |= clSetKernelArg(clKernel, 3, sizeof(int), (void *)&wA);
errcode |= clSetKernelArg(clKernel, 4, sizeof(int), (void *)&wC);
checkErr(errcode, CL_SUCCESS, "Failure to set kernel arguments");
localWorkSize[0] = BLOCK_SIZE;
localWorkSize[1] = BLOCK_SIZE;
globalWorkSize[0] = ((WB-1)/BLOCK_SIZE + 1) * BLOCK_SIZE;
globalWorkSize[1] = ((HA-1)/BLOCK_SIZE + 1) * BLOCK_SIZE;
errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel, 2, NULL,
globalWorkSize, localWorkSize,
0, NULL, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
// Retrieve result from device
errcode = clEnqueueReadBuffer(clCommandQue, d_C, CL_TRUE, 0, bytes_C,
h_C, 0, NULL, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to read buffer");
*/
/* App specific code
// Deallocate memory
free(h_A);
free(h_B);
free(h_C);
clReleaseMemObject(d_A);
clReleaseMemObject(d_C);
clReleaseMemObject(d_B);
*/
free(clDevices);
free(clMatrixMul);
/*
// Free in wait implementation
clReleaseContext(Context->clGPUContext);
clReleaseKernel(Context->clKernel);
clReleaseProgram(Context->clProgram);
*/
return 0;
}
extern "C"
__int32_t llvm_visc_wait_ptx(size_t graphID) {
DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID;
clFinish(Context->clCommandQue);
// Release
clReleaseContext(Context->clGPUContext);
clReleaseKernel(Context->clKernel);
clReleaseProgram(Context->clProgram);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment