From 802e35f6ce509f072c9e0b4ef5e99cec8274bb2e Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Tue, 18 Nov 2014 18:35:56 +0000 Subject: [PATCH] Changes to makefile and visc-rt to hide print statements by default and only enable when using make DEBUG=1 --- llvm/projects/visc-rt/Makefile | 15 ++++- llvm/projects/visc-rt/visc-rt.cpp | 91 +++++++++++++++++++------------ 2 files changed, 68 insertions(+), 38 deletions(-) diff --git a/llvm/projects/visc-rt/Makefile b/llvm/projects/visc-rt/Makefile index 68e3e5f79c..8d1cde4708 100644 --- a/llvm/projects/visc-rt/Makefile +++ b/llvm/projects/visc-rt/Makefile @@ -1,11 +1,22 @@ -LLVM_INSTALL:=/home/psrivas2/Hetero/VISC/Code/trunk/llvm-install +LLVM_SRC_ROOT = /home/psrivas2/Hetero/VISC/Code/trunk/llvm +LLVM_INSTALL = /home/psrivas2/Hetero/VISC/Code/trunk/llvm-install + +CPP_FLAGS = -I $(LLVM_SRC_ROOT)/include -I /usr/local/cuda/include TARGET:=visc-rt + LLVM_CC:=$(LLVM_INSTALL)/bin/clang +LLVM_CXX:=$(LLVM_INSTALL)/bin/clang++ + +OPTS = + +ifeq ($(DEBUG),1) + OPTS+=-DDEBUG_BUILD +endif all: $(TARGET:%=%.ll) $(TARGET:%=%.ll):%.ll:%.cpp - $(LLVM_CC) -O3 -S -emit-llvm -I /usr/local/cuda/include $< -o $@ + $(LLVM_CXX) -O3 -S -emit-llvm $(CPP_FLAGS) $(OPTS) $< -o $@ clean : rm -f $(TARGET).ll diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index e1d34c7133..ef9990785f 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -1,14 +1,21 @@ +#include <iostream> #include <pthread.h> #include <cstdlib> #include <cstdio> #include <string> #include <CL/cl.h> #include <cassert> -#include "llvm/Support/Debug.h" -//#include "visc-rt.h" + +#ifndef DEBUG_BUILD +#define DEBUG(s) {} +#else +#define DEBUG(s) s +#endif + +using namespace std; typedef struct { - pthread_t threadID; + pthread_t threadID; } DFNodeContext_X86; typedef struct { @@ -22,7 +29,7 @@ cl_context globalGPUContext; static inline void checkErr(cl_int err, cl_int success, const char * name) { if (err != success) { - printf("ERROR: %s\n", name); + cout << "ERROR:" << name << "\n"; exit(EXIT_FAILURE); } } @@ -32,16 +39,16 @@ void* llvm_visc_x86_launch(void* (*rootFunc)(void*), void* arguments) { DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86)); int err; if((err = pthread_create(&Context->threadID, NULL, rootFunc, arguments)) != 0) - printf("Failed to create pthread. Error code = %d\n", err); + cout << "Failed to create pthread. Error code = " << err << "\n"; return Context; } extern "C" void llvm_visc_x86_wait(void* graphID) { - printf("Waiting for pthread to finish ...\n"); + cout << "Waiting for pthread to finish ...\n"; DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID; pthread_join(Context->threadID, NULL); - printf("\t... pthread Done!\n"); + cout << "\t... pthread Done!\n"; } extern "C" @@ -58,17 +65,17 @@ void* llvm_visc_ptx_initContext() { for(unsigned i=0; i < numPlatforms; i++) { char buffer[10240]; - printf(" -- Device %d Info --\n", i); + DEBUG(cout << "Device " << i << " Info -->\n"); clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL); - printf(" PROFILE = %s\n", buffer); + DEBUG(cout << "\tPROFILE = " << buffer << "\n"); clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL); - printf(" VERSION = %s\n", buffer); + DEBUG(cout << "\tVERSION = "<< buffer << "\n"); clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL); - printf(" NAME = %s\n", buffer); + DEBUG(cout << "\tNAME = " << buffer << "\n"); clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL); - printf(" VENDOR = %s\n", buffer); + DEBUG(cout << "\tVENDOR = " << buffer << "\n"); clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL); - printf(" EXTENSIONS = %s\n", buffer); + DEBUG(cout << "\tEXTENSIONS = " << buffer << "\n"); } // set platform property - just pick the first one cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, @@ -82,13 +89,14 @@ void* llvm_visc_ptx_initContext() { extern "C" void llvm_visc_ptx_clearContext() { - printf("Clear Context\n"); + DEBUG(cout << "Clear Context\n"); clReleaseContext(globalGPUContext); } extern "C" void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, size_t size) { - printf("Set Scalar Input. Argument Index = %d, Size = %lu\n", arg_index, size); + DEBUG(cout << "Set Scalar Input:"); + DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size << "\n"); DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID; cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input); checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument"); @@ -96,10 +104,11 @@ void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, si extern "C" void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size_t size, bool isInput, bool isOutput) { - printf("Set Pointer Input. Argument Index = %d, Ptr = %p, Size = %lu\n", arg_index, input, size); + DEBUG(cout << "Set Pointer Input:"); + DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input << ", Size = "<< size << "\n"); // Size should be non-zero assert(size != 0 && "Size of data pointed to has to be non-zero!"); - printf("\tInput: %d, Output: %d\n", isInput, isOutput); + DEBUG(cout << "\tInput = "<< isInput << "\tOutput = " << isOutput << "\n"); DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID; cl_int errcode; cl_mem_flags clFlags; @@ -112,13 +121,14 @@ void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device"); errcode |= clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void*)&d_input); checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument"); - printf("\tDevicePtr = %p, Size = %lu\n", d_input, size); + DEBUG(cout << "\tDevicePtr = " << d_input << "\n"); return d_input; } extern "C" void* llvm_visc_ptx_getOutput(void* graphID, void* h_output, void* d_output, size_t size) { - printf("Get Output: HostPtr = %p, DevicePtr = %p, Size = %lu\n", h_output, d_output, size); + DEBUG(cout << "Get Output:\n"); + DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output << ", Size = "<< size << "\n"); if(h_output == NULL) h_output = malloc(size); DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID; @@ -134,26 +144,19 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t* size_t GlobalWG[3]; size_t LocalWG[3]; - printf("Execute Node: Number of Dimensions = %u\n", workDim); // OpenCL EnqeueNDRangeKernel function results in segementation fault if we // directly use local and global work groups arguments. Hence, allocating it // on stack and copying. - printf("Global Work Group: "); for(unsigned i=0; i<workDim; i++) { - printf("%lu ", globalWorkSize[i]); GlobalWG[i] = globalWorkSize[i]; } - printf("\n"); // OpenCL allows local workgroup to be null. if(localWorkSize != NULL) { - printf("Local Work Group: "); for(unsigned i=0; i<workDim; i++) { - printf("%lu ", localWorkSize[i]); LocalWG[i] = localWorkSize[i]; } - printf("\n"); } DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID; @@ -161,10 +164,26 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t* // Currently passing the event paratemeter results in seg fault with // clEnqueueNDRangeKernel. cl_event* event; - printf("Enqueuing kernel: %p, %p, %d, %lu, %lu\n", Context->clCommandQue, Context->clKernel, workDim, GlobalWG[0], GlobalWG[1]); + DEBUG(cout << "Enqueuing kernel:\n"); + DEBUG(cout << "\tCommand Queue: " << Context->clCommandQue << "\n"); + DEBUG(cout << "\tKernel: " << Context->clKernel << "\n"); + DEBUG(cout << "\tNumber of dimensions: " << workDim << "\n"); + DEBUG(cout << "\tGlobal Work Group: ( "); + for(unsigned i = 0; i<workDim; i++) { + DEBUG(cout << GlobalWG[i] << " "); + } + DEBUG(cout << ")\n"); + if(localWorkSize != NULL) { + DEBUG(cout << "\tLocal Work Group: ( "); + for(unsigned i = 0; i<workDim; i++) { + DEBUG(cout << LocalWG[i] << " "); + } + DEBUG(cout << ")\n"); + } + cl_int errcode = clEnqueueNDRangeKernel(Context->clCommandQue, Context->clKernel, workDim, NULL, GlobalWG, NULL, 0, NULL, NULL); - printf("Enqueued kernel\n"); + DEBUG(cout << "Enqueued kernel\n"); checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel"); return event; } @@ -179,7 +198,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t* ////////////////////////////////////////////////////////////////////////////// static char* LoadProgSource(const char* Filename, size_t* szFinalLength) { - printf("Load Prog Source\n"); + DEBUG(cout << "Load Prog Source\n"); // locals FILE* pFileStream = NULL; size_t szSourceLength; @@ -218,7 +237,7 @@ static char* LoadProgSource(const char* Filename, size_t* szFinalLength) extern "C" void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) { - printf("Launch PTX Kernel\n"); + DEBUG(cout << "Launch PTX Kernel\n"); // Initialize OpenCL // OpenCL specific variables @@ -255,22 +274,22 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) { &binaryStatus, &errcode); checkErr(errcode, CL_SUCCESS, "Failure to create program from binary"); - printf("Building kernel - %s from file %s\n", KernelName, FileName); + DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName << "\n"); errcode = clBuildProgram(Context->clProgram, 0, NULL, NULL, NULL, NULL); // If build fails, get build log from device if(errcode != CL_SUCCESS) { - printf("Failure to build program\n"); + cout << "ERROR: Failure to build program\n"; size_t len = 0; errcode = clGetProgramBuildInfo(Context->clProgram, clDevices[0] , CL_PROGRAM_BUILD_LOG, 0, NULL, &len); - printf("LOG LENGTH: %lu\n", len); + cout << "LOG LENGTH: " << len << "\n"; checkErr(errcode, CL_SUCCESS, "Failure to collect program build log length"); char *log = (char*) malloc(len*sizeof(char)); errcode = clGetProgramBuildInfo(Context->clProgram, clDevices[0], CL_PROGRAM_BUILD_LOG, len, log, NULL); checkErr(errcode, CL_SUCCESS, "Failure to collect program build log"); - printf("Device Build Log:\n%s\n", log); + cout << "Device Build Log:\n" << log << "\n"; exit(EXIT_FAILURE); } @@ -286,7 +305,7 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) { extern "C" void llvm_visc_ptx_wait(void* graphID) { - printf("Wait\n"); + DEBUG(cout << "Wait\n"); DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID; clFinish(Context->clCommandQue); @@ -294,5 +313,5 @@ void llvm_visc_ptx_wait(void* graphID) { // clReleaseContext(Context->clGPUContext); clReleaseKernel(Context->clKernel); clReleaseProgram(Context->clProgram); - printf("Done with PTX kernel\n"); + DEBUG(cout << "Done with PTX kernel\n"); } -- GitLab