Skip to content
Snippets Groups Projects
Commit 802e35f6 authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Changes to makefile and visc-rt to hide print statements by default and only...

Changes to makefile and visc-rt to hide print statements by default and only enable when using make DEBUG=1
parent ce1305e0
No related branches found
No related tags found
No related merge requests found
LLVM_INSTALL:=/home/psrivas2/Hetero/VISC/Code/trunk/llvm-install
LLVM_SRC_ROOT = /home/psrivas2/Hetero/VISC/Code/trunk/llvm
LLVM_INSTALL = /home/psrivas2/Hetero/VISC/Code/trunk/llvm-install
CPP_FLAGS = -I $(LLVM_SRC_ROOT)/include -I /usr/local/cuda/include
TARGET:=visc-rt
LLVM_CC:=$(LLVM_INSTALL)/bin/clang
LLVM_CXX:=$(LLVM_INSTALL)/bin/clang++
OPTS =
ifeq ($(DEBUG),1)
OPTS+=-DDEBUG_BUILD
endif
all: $(TARGET:%=%.ll)
$(TARGET:%=%.ll):%.ll:%.cpp
$(LLVM_CC) -O3 -S -emit-llvm -I /usr/local/cuda/include $< -o $@
$(LLVM_CXX) -O3 -S -emit-llvm $(CPP_FLAGS) $(OPTS) $< -o $@
clean :
rm -f $(TARGET).ll
#include <iostream>
#include <pthread.h>
#include <cstdlib>
#include <cstdio>
#include <string>
#include <CL/cl.h>
#include <cassert>
#include "llvm/Support/Debug.h"
//#include "visc-rt.h"
#ifndef DEBUG_BUILD
#define DEBUG(s) {}
#else
#define DEBUG(s) s
#endif
using namespace std;
typedef struct {
pthread_t threadID;
pthread_t threadID;
} DFNodeContext_X86;
typedef struct {
......@@ -22,7 +29,7 @@ cl_context globalGPUContext;
static inline void checkErr(cl_int err, cl_int success, const char * name) {
if (err != success) {
printf("ERROR: %s\n", name);
cout << "ERROR:" << name << "\n";
exit(EXIT_FAILURE);
}
}
......@@ -32,16 +39,16 @@ void* llvm_visc_x86_launch(void* (*rootFunc)(void*), void* arguments) {
DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86));
int err;
if((err = pthread_create(&Context->threadID, NULL, rootFunc, arguments)) != 0)
printf("Failed to create pthread. Error code = %d\n", err);
cout << "Failed to create pthread. Error code = " << err << "\n";
return Context;
}
extern "C"
void llvm_visc_x86_wait(void* graphID) {
printf("Waiting for pthread to finish ...\n");
cout << "Waiting for pthread to finish ...\n";
DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID;
pthread_join(Context->threadID, NULL);
printf("\t... pthread Done!\n");
cout << "\t... pthread Done!\n";
}
extern "C"
......@@ -58,17 +65,17 @@ void* llvm_visc_ptx_initContext() {
for(unsigned i=0; i < numPlatforms; i++) {
char buffer[10240];
printf(" -- Device %d Info --\n", i);
DEBUG(cout << "Device " << i << " Info -->\n");
clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL);
printf(" PROFILE = %s\n", buffer);
DEBUG(cout << "\tPROFILE = " << buffer << "\n");
clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL);
printf(" VERSION = %s\n", buffer);
DEBUG(cout << "\tVERSION = "<< buffer << "\n");
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL);
printf(" NAME = %s\n", buffer);
DEBUG(cout << "\tNAME = " << buffer << "\n");
clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL);
printf(" VENDOR = %s\n", buffer);
DEBUG(cout << "\tVENDOR = " << buffer << "\n");
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
printf(" EXTENSIONS = %s\n", buffer);
DEBUG(cout << "\tEXTENSIONS = " << buffer << "\n");
}
// set platform property - just pick the first one
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
......@@ -82,13 +89,14 @@ void* llvm_visc_ptx_initContext() {
extern "C"
void llvm_visc_ptx_clearContext() {
printf("Clear Context\n");
DEBUG(cout << "Clear Context\n");
clReleaseContext(globalGPUContext);
}
extern "C"
void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, size_t size) {
printf("Set Scalar Input. Argument Index = %d, Size = %lu\n", arg_index, size);
DEBUG(cout << "Set Scalar Input:");
DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size << "\n");
DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input);
checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument");
......@@ -96,10 +104,11 @@ void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, si
extern "C"
void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size_t size, bool isInput, bool isOutput) {
printf("Set Pointer Input. Argument Index = %d, Ptr = %p, Size = %lu\n", arg_index, input, size);
DEBUG(cout << "Set Pointer Input:");
DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input << ", Size = "<< size << "\n");
// Size should be non-zero
assert(size != 0 && "Size of data pointed to has to be non-zero!");
printf("\tInput: %d, Output: %d\n", isInput, isOutput);
DEBUG(cout << "\tInput = "<< isInput << "\tOutput = " << isOutput << "\n");
DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
cl_int errcode;
cl_mem_flags clFlags;
......@@ -112,13 +121,14 @@ void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size
checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device");
errcode |= clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void*)&d_input);
checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument");
printf("\tDevicePtr = %p, Size = %lu\n", d_input, size);
DEBUG(cout << "\tDevicePtr = " << d_input << "\n");
return d_input;
}
extern "C"
void* llvm_visc_ptx_getOutput(void* graphID, void* h_output, void* d_output, size_t size) {
printf("Get Output: HostPtr = %p, DevicePtr = %p, Size = %lu\n", h_output, d_output, size);
DEBUG(cout << "Get Output:\n");
DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output << ", Size = "<< size << "\n");
if(h_output == NULL)
h_output = malloc(size);
DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
......@@ -134,26 +144,19 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
size_t GlobalWG[3];
size_t LocalWG[3];
printf("Execute Node: Number of Dimensions = %u\n", workDim);
// OpenCL EnqeueNDRangeKernel function results in segementation fault if we
// directly use local and global work groups arguments. Hence, allocating it
// on stack and copying.
printf("Global Work Group: ");
for(unsigned i=0; i<workDim; i++) {
printf("%lu ", globalWorkSize[i]);
GlobalWG[i] = globalWorkSize[i];
}
printf("\n");
// OpenCL allows local workgroup to be null.
if(localWorkSize != NULL) {
printf("Local Work Group: ");
for(unsigned i=0; i<workDim; i++) {
printf("%lu ", localWorkSize[i]);
LocalWG[i] = localWorkSize[i];
}
printf("\n");
}
DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
......@@ -161,10 +164,26 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
// Currently passing the event paratemeter results in seg fault with
// clEnqueueNDRangeKernel.
cl_event* event;
printf("Enqueuing kernel: %p, %p, %d, %lu, %lu\n", Context->clCommandQue, Context->clKernel, workDim, GlobalWG[0], GlobalWG[1]);
DEBUG(cout << "Enqueuing kernel:\n");
DEBUG(cout << "\tCommand Queue: " << Context->clCommandQue << "\n");
DEBUG(cout << "\tKernel: " << Context->clKernel << "\n");
DEBUG(cout << "\tNumber of dimensions: " << workDim << "\n");
DEBUG(cout << "\tGlobal Work Group: ( ");
for(unsigned i = 0; i<workDim; i++) {
DEBUG(cout << GlobalWG[i] << " ");
}
DEBUG(cout << ")\n");
if(localWorkSize != NULL) {
DEBUG(cout << "\tLocal Work Group: ( ");
for(unsigned i = 0; i<workDim; i++) {
DEBUG(cout << LocalWG[i] << " ");
}
DEBUG(cout << ")\n");
}
cl_int errcode = clEnqueueNDRangeKernel(Context->clCommandQue,
Context->clKernel, workDim, NULL, GlobalWG, NULL, 0, NULL, NULL);
printf("Enqueued kernel\n");
DEBUG(cout << "Enqueued kernel\n");
checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
return event;
}
......@@ -179,7 +198,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
//////////////////////////////////////////////////////////////////////////////
static char* LoadProgSource(const char* Filename, size_t* szFinalLength)
{
printf("Load Prog Source\n");
DEBUG(cout << "Load Prog Source\n");
// locals
FILE* pFileStream = NULL;
size_t szSourceLength;
......@@ -218,7 +237,7 @@ static char* LoadProgSource(const char* Filename, size_t* szFinalLength)
extern "C"
void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
printf("Launch PTX Kernel\n");
DEBUG(cout << "Launch PTX Kernel\n");
// Initialize OpenCL
// OpenCL specific variables
......@@ -255,22 +274,22 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
&binaryStatus, &errcode);
checkErr(errcode, CL_SUCCESS, "Failure to create program from binary");
printf("Building kernel - %s from file %s\n", KernelName, FileName);
DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName << "\n");
errcode = clBuildProgram(Context->clProgram, 0, NULL, NULL, NULL, NULL);
// If build fails, get build log from device
if(errcode != CL_SUCCESS) {
printf("Failure to build program\n");
cout << "ERROR: Failure to build program\n";
size_t len = 0;
errcode = clGetProgramBuildInfo(Context->clProgram, clDevices[0] , CL_PROGRAM_BUILD_LOG, 0,
NULL, &len);
printf("LOG LENGTH: %lu\n", len);
cout << "LOG LENGTH: " << len << "\n";
checkErr(errcode, CL_SUCCESS, "Failure to collect program build log length");
char *log = (char*) malloc(len*sizeof(char));
errcode = clGetProgramBuildInfo(Context->clProgram, clDevices[0], CL_PROGRAM_BUILD_LOG, len,
log, NULL);
checkErr(errcode, CL_SUCCESS, "Failure to collect program build log");
printf("Device Build Log:\n%s\n", log);
cout << "Device Build Log:\n" << log << "\n";
exit(EXIT_FAILURE);
}
......@@ -286,7 +305,7 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
extern "C"
void llvm_visc_ptx_wait(void* graphID) {
printf("Wait\n");
DEBUG(cout << "Wait\n");
DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID;
clFinish(Context->clCommandQue);
......@@ -294,5 +313,5 @@ void llvm_visc_ptx_wait(void* graphID) {
// clReleaseContext(Context->clGPUContext);
clReleaseKernel(Context->clKernel);
clReleaseProgram(Context->clProgram);
printf("Done with PTX kernel\n");
DEBUG(cout << "Done with PTX kernel\n");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment