diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7945230f264f1ec6a5ca9fd7fe02d0e3b7fbb42c --- /dev/null +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -0,0 +1,255 @@ +#include <pthread.h> +#include <cstdlib> +#include <cstdio> +#include <CL/cl.h> + +typedef struct { + pthread_t threadID; +} DFNodeContext_X86; + +typedef struct { + cl_context clGPUContext; + cl_command_queue clCommandQue; + cl_program clProgram; + cl_kernel clKernel; +} DFNodeContext_PTX; + +extern "C" +__int32_t llvm_visc_launch_x86(size_t* graphID, void* (*rootFunc)(void*), void* arguments) { + DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86)); + return pthread_create(&Context->threadID, NULL, rootFunc, arguments); + *graphID = (size_t) Context; +} + +extern "C" +__int32_t llvm_visc_wait_x86(size_t graphID) { + DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID; + return pthread_join(Context->threadID, NULL); +} + +static inline void checkErr(cl_int err, cl_int success, const char * name) { + if (err != success) { + printf("ERROR: %s\n", name); + exit(EXIT_FAILURE); + } +} + +////////////////////////////////////////////////////////////////////////////// +//! Loads a Program binary file. +//! +//! @return the source string if succeeded, 0 otherwise +//! @param cFilename program filename +//! @param szFinalLength returned length of the code string +////////////////////////////////////////////////////////////////////////////// +static char* LoadProgSource(const char* cFilename, size_t* szFinalLength) +{ + // locals + FILE* pFileStream = NULL; + size_t szSourceLength; + + // open the OpenCL source code file + #ifdef _WIN32 // Windows version + if(fopen_s(&pFileStream, cFilename, "rb") != 0) + { + return NULL; + } + #else // Linux version + pFileStream = fopen(cFilename, "rb"); + if(pFileStream == 0) + { + return NULL; + } + #endif + + // get the length of the source code + fseek(pFileStream, 0, SEEK_END); + szSourceLength = ftell(pFileStream); + fseek(pFileStream, 0, SEEK_SET); + + // allocate a buffer for the source code string and read it in + char* cSourceString = (char *)malloc(szSourceLength + 1); + if (fread((cSourceString), szSourceLength, 1, pFileStream) != 1) + { + fclose(pFileStream); + free(cSourceString); + return 0; + } + + // close the file and return the total length of the combined (preamble + source) string + fclose(pFileStream); + if(szFinalLength != 0) + { + *szFinalLength = szSourceLength; + } + cSourceString[szSourceLength] = '\0'; + + return cSourceString; +} + +extern "C" +__int32_t llvm_visc_launch_ptx(size_t* graphID, void* (*rootFunc) (void*), void* arguments) { + // Initialize OpenCL + + // OpenCL specific variables + DFNodeContext_PTX *Context = (DFNodeContext_PTX *) malloc(sizeof(DFNodeContext_PTX)); + + // Return Context pointer as grpahID; + *graphID = (size_t) Context; + + + size_t dataBytes; + size_t kernelLength; + cl_int errcode; + + /* Application Specific Code + // OpenCL device memory for matrices + cl_mem d_A; + cl_mem d_B; + cl_mem d_C; + */ + + /*****************************************/ + /* Initialize OpenCL */ + /*****************************************/ + // query the number of platforms + cl_uint numPlatforms; + errcode = clGetPlatformIDs(0, NULL, &numPlatforms); + checkErr(errcode, CL_SUCCESS, "Failure to get number of platforms"); + + // now get all the platform IDs + cl_platform_id platforms[numPlatforms]; + errcode = clGetPlatformIDs(numPlatforms, platforms, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to get platform IDs"); + + for(unsigned i=0; i < numPlatforms; i++) { + char buffer[10240]; + printf(" -- %d --\n", i); + clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL); + printf(" PROFILE = %s\n", buffer); + clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL); + printf(" VERSION = %s\n", buffer); + clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL); + printf(" NAME = %s\n", buffer); + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL); + printf(" VENDOR = %s\n", buffer); + clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL); + printf(" EXTENSIONS = %s\n", buffer); + } + // set platform property - just pick the first one + cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, + (long) platforms[0], + 0}; + Context->clGPUContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, + NULL, NULL, &errcode); + checkErr(errcode, CL_SUCCESS, "Failure to create GPU context"); + + // get the list of GPU devices associated with context + errcode = clGetContextInfo(Context->clGPUContext, CL_CONTEXT_DEVICES, 0, + NULL, &dataBytes); + cl_device_id *clDevices = (cl_device_id *) malloc(dataBytes); + errcode |= clGetContextInfo(Context->clGPUContext, CL_CONTEXT_DEVICES, dataBytes, + clDevices, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to get context info"); + + //Create a command-queue + Context->clCommandQue = clCreateCommandQueue(Context->clGPUContext, clDevices[0], 0, &errcode); + checkErr(errcode, CL_SUCCESS, "Failure to create command queue"); + + /* Application specific code + // Setup device memory + d_C = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE, bytes_C, NULL, + &errcode); + d_A = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + bytes_A, h_A, &errcode); + d_B = clCreateBuffer(clGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + bytes_B, h_B, &errcode); + */ + + char *clMatrixMul = LoadProgSource("matrixMul.nvptx.s", &kernelLength); + checkErr(clMatrixMul != NULL, 1 /*bool true*/, "Failure to load Program Binary"); + + cl_int binaryStatus; + Context->clProgram = clCreateProgramWithBinary(Context->clGPUContext, 1, &clDevices[0], + &kernelLength, + (const unsigned char **)&clMatrixMul, + &binaryStatus, &errcode); + checkErr(errcode, CL_SUCCESS, "Failure to create program from binary"); + + errcode = clBuildProgram(Context->clProgram, 0, NULL, NULL, NULL, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to build program"); + + Context->clKernel = clCreateKernel(Context->clProgram, "matrixMul", &errcode); + checkErr(errcode, CL_SUCCESS, "Failure to create kernel"); + + + + // Invoke the callback function to put memory allocations in place + rootFunc(graphID); + + /* Application Specific Code + // Launch OpenCL kernel + size_t localWorkSize[2], globalWorkSize[2]; + + int wA = WA; + int wC = WC; + errcode = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void *)&d_C); + errcode |= clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void *)&d_A); + errcode |= clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void *)&d_B); + errcode |= clSetKernelArg(clKernel, 3, sizeof(int), (void *)&wA); + errcode |= clSetKernelArg(clKernel, 4, sizeof(int), (void *)&wC); + checkErr(errcode, CL_SUCCESS, "Failure to set kernel arguments"); + + localWorkSize[0] = BLOCK_SIZE; + localWorkSize[1] = BLOCK_SIZE; + globalWorkSize[0] = ((WB-1)/BLOCK_SIZE + 1) * BLOCK_SIZE; + globalWorkSize[1] = ((HA-1)/BLOCK_SIZE + 1) * BLOCK_SIZE; + + errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel, 2, NULL, + globalWorkSize, localWorkSize, + 0, NULL, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel"); + + + // Retrieve result from device + errcode = clEnqueueReadBuffer(clCommandQue, d_C, CL_TRUE, 0, bytes_C, + h_C, 0, NULL, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to read buffer"); + */ + + /* App specific code + // Deallocate memory + free(h_A); + free(h_B); + free(h_C); + + clReleaseMemObject(d_A); + clReleaseMemObject(d_C); + clReleaseMemObject(d_B); + */ + + free(clDevices); + free(clMatrixMul); + + /* + // Free in wait implementation + clReleaseContext(Context->clGPUContext); + clReleaseKernel(Context->clKernel); + clReleaseProgram(Context->clProgram); + */ + + return 0; +} + + +extern "C" +__int32_t llvm_visc_wait_ptx(size_t graphID) { + DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID; + clFinish(Context->clCommandQue); + + // Release + clReleaseContext(Context->clGPUContext); + clReleaseKernel(Context->clKernel); + clReleaseProgram(Context->clProgram); + + return 0; +}