From 802e35f6ce509f072c9e0b4ef5e99cec8274bb2e Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <psrivas2@illinois.edu>
Date: Tue, 18 Nov 2014 18:35:56 +0000
Subject: [PATCH] Changes to makefile and visc-rt to hide print statements by
 default and only enable when using make DEBUG=1

---
 llvm/projects/visc-rt/Makefile    | 15 ++++-
 llvm/projects/visc-rt/visc-rt.cpp | 91 +++++++++++++++++++------------
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/llvm/projects/visc-rt/Makefile b/llvm/projects/visc-rt/Makefile
index 68e3e5f79c..8d1cde4708 100644
--- a/llvm/projects/visc-rt/Makefile
+++ b/llvm/projects/visc-rt/Makefile
@@ -1,11 +1,22 @@
-LLVM_INSTALL:=/home/psrivas2/Hetero/VISC/Code/trunk/llvm-install
+LLVM_SRC_ROOT = /home/psrivas2/Hetero/VISC/Code/trunk/llvm
+LLVM_INSTALL = /home/psrivas2/Hetero/VISC/Code/trunk/llvm-install
+
+CPP_FLAGS = -I $(LLVM_SRC_ROOT)/include -I /usr/local/cuda/include
 TARGET:=visc-rt
+
 LLVM_CC:=$(LLVM_INSTALL)/bin/clang
+LLVM_CXX:=$(LLVM_INSTALL)/bin/clang++
+
+OPTS =
+
+ifeq ($(DEBUG),1)
+  OPTS+=-DDEBUG_BUILD
+endif
 
 all: $(TARGET:%=%.ll)
 
 $(TARGET:%=%.ll):%.ll:%.cpp
-	$(LLVM_CC) -O3 -S -emit-llvm -I /usr/local/cuda/include $< -o $@
+	$(LLVM_CXX) -O3 -S -emit-llvm $(CPP_FLAGS) $(OPTS) $< -o $@
 
 clean :
 	rm -f $(TARGET).ll
diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp
index e1d34c7133..ef9990785f 100644
--- a/llvm/projects/visc-rt/visc-rt.cpp
+++ b/llvm/projects/visc-rt/visc-rt.cpp
@@ -1,14 +1,21 @@
+#include <iostream>
 #include <pthread.h>
 #include <cstdlib>
 #include <cstdio>
 #include <string>
 #include <CL/cl.h>
 #include <cassert>
-#include "llvm/Support/Debug.h"
-//#include "visc-rt.h"
+
+#ifndef DEBUG_BUILD
+#define DEBUG(s) {}
+#else
+#define DEBUG(s) s
+#endif
+
+using namespace std;
 
 typedef struct {
-  pthread_t threadID; 
+  pthread_t threadID;
 } DFNodeContext_X86;
 
 typedef struct {
@@ -22,7 +29,7 @@ cl_context globalGPUContext;
 
 static inline void checkErr(cl_int err, cl_int success, const char * name) {
   if (err != success) {
-    printf("ERROR: %s\n", name);
+    cout << "ERROR:" << name << "\n";
     exit(EXIT_FAILURE);
   }
 }
@@ -32,16 +39,16 @@ void* llvm_visc_x86_launch(void* (*rootFunc)(void*), void* arguments) {
   DFNodeContext_X86 *Context = (DFNodeContext_X86 *) malloc(sizeof(DFNodeContext_X86));
   int err;
   if((err = pthread_create(&Context->threadID, NULL, rootFunc, arguments)) != 0)
-    printf("Failed to create pthread. Error code = %d\n", err);
+    cout << "Failed to create pthread. Error code = " << err << "\n";
   return Context;
 }
 
 extern "C"
 void llvm_visc_x86_wait(void* graphID) {
-  printf("Waiting for pthread to finish ...\n");
+  cout << "Waiting for pthread to finish ...\n";
   DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID;
   pthread_join(Context->threadID, NULL);
-  printf("\t... pthread Done!\n");
+  cout << "\t... pthread Done!\n";
 }
 
 extern "C"
@@ -58,17 +65,17 @@ void* llvm_visc_ptx_initContext() {
 
   for(unsigned i=0; i < numPlatforms; i++) {
     char buffer[10240];
-    printf("  -- Device %d Info --\n", i);
+    DEBUG(cout << "Device " << i << " Info -->\n");
     clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL);
-    printf("  PROFILE = %s\n", buffer);
+    DEBUG(cout << "\tPROFILE = " << buffer << "\n");
     clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL);
-    printf("  VERSION = %s\n", buffer);
+    DEBUG(cout << "\tVERSION = "<< buffer << "\n");
     clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL);
-    printf("  NAME = %s\n", buffer);
+    DEBUG(cout << "\tNAME = " << buffer << "\n");
     clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL);
-    printf("  VENDOR = %s\n", buffer);
+    DEBUG(cout << "\tVENDOR = " << buffer << "\n");
     clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
-    printf("  EXTENSIONS = %s\n", buffer);
+    DEBUG(cout << "\tEXTENSIONS = " << buffer << "\n");
   }
   // set platform property - just pick the first one
   cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
@@ -82,13 +89,14 @@ void* llvm_visc_ptx_initContext() {
 
 extern "C"
 void llvm_visc_ptx_clearContext() {
-  printf("Clear Context\n");
+  DEBUG(cout << "Clear Context\n");
   clReleaseContext(globalGPUContext);
 }
 
 extern "C"
 void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, size_t size) {
-  printf("Set Scalar Input. Argument Index = %d, Size = %lu\n", arg_index, size);
+  DEBUG(cout << "Set Scalar Input:");
+  DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size << "\n");
   DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
   cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input);
   checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument");
@@ -96,10 +104,11 @@ void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, si
 
 extern "C"
 void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size_t size, bool isInput, bool isOutput) {
-  printf("Set Pointer Input. Argument Index = %d, Ptr = %p, Size = %lu\n", arg_index, input, size);
+  DEBUG(cout << "Set Pointer Input:");
+  DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input << ", Size = "<< size << "\n");
   // Size should be non-zero
   assert(size != 0 && "Size of data pointed to has to be non-zero!");
-  printf("\tInput: %d, Output: %d\n", isInput, isOutput);
+  DEBUG(cout << "\tInput = "<< isInput << "\tOutput = " << isOutput << "\n");
   DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
   cl_int errcode;
   cl_mem_flags clFlags;
@@ -112,13 +121,14 @@ void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size
   checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device");
   errcode |= clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void*)&d_input);
   checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument");
-  printf("\tDevicePtr = %p, Size = %lu\n", d_input, size);
+  DEBUG(cout << "\tDevicePtr = " << d_input << "\n");
   return d_input;
 }
 
 extern "C"
 void* llvm_visc_ptx_getOutput(void* graphID, void* h_output, void* d_output, size_t size) {
-  printf("Get Output: HostPtr = %p, DevicePtr = %p, Size = %lu\n", h_output, d_output, size);
+  DEBUG(cout << "Get Output:\n");
+  DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output << ", Size = "<< size << "\n");
   if(h_output == NULL)
     h_output = malloc(size);
   DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
@@ -134,26 +144,19 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
 
   size_t GlobalWG[3];
   size_t LocalWG[3];
-  printf("Execute Node: Number of Dimensions = %u\n", workDim);
 
   // OpenCL EnqeueNDRangeKernel function results in segementation fault if we
   // directly use local and global work groups arguments. Hence, allocating it
   // on stack and copying.
-  printf("Global Work Group: ");
   for(unsigned i=0; i<workDim; i++) {
-    printf("%lu ", globalWorkSize[i]);
     GlobalWG[i] = globalWorkSize[i];
   }
-  printf("\n");
 
   // OpenCL allows local workgroup to be null.
   if(localWorkSize != NULL) {
-    printf("Local Work Group: ");
     for(unsigned i=0; i<workDim; i++) {
-      printf("%lu ", localWorkSize[i]);
       LocalWG[i] = localWorkSize[i];
     }
-    printf("\n");
   }
 
   DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
@@ -161,10 +164,26 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
   // Currently passing the event paratemeter results in seg fault with
   // clEnqueueNDRangeKernel.
   cl_event* event;
-  printf("Enqueuing kernel: %p, %p, %d, %lu, %lu\n", Context->clCommandQue, Context->clKernel,  workDim, GlobalWG[0], GlobalWG[1]);
+  DEBUG(cout << "Enqueuing kernel:\n");
+  DEBUG(cout << "\tCommand Queue: " << Context->clCommandQue << "\n");
+  DEBUG(cout << "\tKernel: " << Context->clKernel << "\n");
+  DEBUG(cout << "\tNumber of dimensions: " << workDim << "\n");
+  DEBUG(cout << "\tGlobal Work Group: ( ");
+  for(unsigned i = 0; i<workDim; i++) {
+    DEBUG(cout << GlobalWG[i] << " ");
+  }
+  DEBUG(cout << ")\n");
+  if(localWorkSize != NULL) {
+    DEBUG(cout << "\tLocal Work Group: ( ");
+    for(unsigned i = 0; i<workDim; i++) {
+      DEBUG(cout << LocalWG[i] << " ");
+    }
+    DEBUG(cout << ")\n");
+  }
+
   cl_int errcode = clEnqueueNDRangeKernel(Context->clCommandQue,
       Context->clKernel, workDim, NULL, GlobalWG, NULL, 0, NULL, NULL);
-  printf("Enqueued kernel\n");
+  DEBUG(cout << "Enqueued kernel\n");
   checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
   return event;
 }
@@ -179,7 +198,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
 //////////////////////////////////////////////////////////////////////////////
 static char* LoadProgSource(const char* Filename, size_t* szFinalLength)
 {
-  printf("Load Prog Source\n");
+  DEBUG(cout << "Load Prog Source\n");
   // locals
   FILE* pFileStream = NULL;
   size_t szSourceLength;
@@ -218,7 +237,7 @@ static char* LoadProgSource(const char* Filename, size_t* szFinalLength)
 
 extern "C"
 void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
-  printf("Launch PTX Kernel\n");
+  DEBUG(cout << "Launch PTX Kernel\n");
   // Initialize OpenCL
 
   // OpenCL specific variables
@@ -255,22 +274,22 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
                                         &binaryStatus, &errcode);
   checkErr(errcode, CL_SUCCESS, "Failure to create program from binary");
 
-  printf("Building kernel - %s from file %s\n", KernelName, FileName);
+  DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName << "\n");
   errcode = clBuildProgram(Context->clProgram, 0, NULL, NULL, NULL, NULL);
   // If build fails, get build log from device
   if(errcode != CL_SUCCESS) {
-    printf("Failure to build program\n");
+    cout << "ERROR: Failure to build program\n";
     size_t len = 0;
     errcode = clGetProgramBuildInfo(Context->clProgram, clDevices[0] , CL_PROGRAM_BUILD_LOG, 0,
         NULL, &len);
-    printf("LOG LENGTH: %lu\n", len);
+    cout << "LOG LENGTH: " << len << "\n";
     checkErr(errcode, CL_SUCCESS, "Failure to collect program build log length");
     char *log = (char*) malloc(len*sizeof(char));
     errcode = clGetProgramBuildInfo(Context->clProgram, clDevices[0], CL_PROGRAM_BUILD_LOG, len,
         log, NULL);
     checkErr(errcode, CL_SUCCESS, "Failure to collect program build log");
 
-    printf("Device Build Log:\n%s\n", log);
+    cout << "Device Build Log:\n" << log << "\n";
     exit(EXIT_FAILURE);
   }
 
@@ -286,7 +305,7 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
 
 extern "C" 
 void llvm_visc_ptx_wait(void* graphID) {
-  printf("Wait\n");
+  DEBUG(cout << "Wait\n");
   DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID;
   clFinish(Context->clCommandQue);
 
@@ -294,5 +313,5 @@ void llvm_visc_ptx_wait(void* graphID) {
   // clReleaseContext(Context->clGPUContext);
   clReleaseKernel(Context->clKernel);
   clReleaseProgram(Context->clProgram);
-  printf("Done with PTX kernel\n");
+  DEBUG(cout << "Done with PTX kernel\n");
 }
-- 
GitLab