From d737aa6fa841aaedfd2bcca6e316e203a85952ec Mon Sep 17 00:00:00 2001 From: Akash Kothari <akashk4@tyler.cs.illinois.edu> Date: Tue, 4 Feb 2020 02:43:09 -0600 Subject: [PATCH] Remove commented code from HPVM runtime --- hpvm/projects/hpvm-rt/hpvm-rt.cpp | 88 ------------------------------- hpvm/projects/hpvm-rt/hpvm-rt.h | 17 ------ hpvm/projects/hpvm-rt/policy.h | 9 ---- 3 files changed, 114 deletions(-) diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp index b42668f2b4..56a84a2711 100644 --- a/hpvm/projects/hpvm-rt/hpvm-rt.cpp +++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp @@ -31,7 +31,6 @@ typedef struct { std::vector<pthread_t> *threads; // Map from InputPort to Size std::map<unsigned, uint64_t> *ArgInPortSizeMap; - // std::vector<uint64_t>* BindInSizes; std::vector<unsigned> *BindInSourcePort; std::vector<uint64_t> *BindOutSizes; std::vector<uint64_t> *EdgeSizes; @@ -326,18 +325,14 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, clFlags = CL_MEM_READ_ONLY; hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY); - // pthread_mutex_lock(&ocl_mtx); cl_mem d_input = clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device"); DEBUG(cout << "\nMemory allocated on device: " << d_input << flush << "\n"); if (isInput) { DEBUG(cout << "\tCopying ..."); - // pthread_mutex_lock(&ocl_mtx); errcode = clEnqueueWriteBuffer(Context->clCommandQue, d_input, CL_TRUE, 0, size, MTE->getAddress(), 0, NULL, NULL); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to copy memory to device"); } @@ -435,30 +430,14 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { *new_event = (struct hpvm_async_time_marker_list *)malloc( sizeof(struct hpvm_async_time_marker_list)); (*new_event)->marker = calloc(1, sizeof(cl_event)); - /* - // I don't think this is needed at all. I believe clEnqueueMarker 'creates' -the event #if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) fprintf(stderr, "Creating -Marker [%d]\n", timer); - *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, -&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User -Event Object!\n"); - } - ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), -CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User -Event Status!\n"); - } -#endif -*/ (*new_event)->next = NULL; } /* valid event handle now aquired: insert the event record */ (*new_event)->label = NULL; (*new_event)->timerID = timer; - // pthread_mutex_lock(&ocl_mtx); ciErrNum = clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker); - // pthread_mutex_unlock(&ocl_mtx); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Enqueueing Marker!\n"); } @@ -477,29 +456,14 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label, *new_event = (struct hpvm_async_time_marker_list *)malloc( sizeof(struct hpvm_async_time_marker_list)); (*new_event)->marker = calloc(1, sizeof(cl_event)); - /* -#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) -fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer); - *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, -&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User -Event Object!\n"); - } - ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), -CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User -Event Status!\n"); - } -#endif -*/ (*new_event)->next = NULL; } /* valid event handle now aquired: insert the event record */ (*new_event)->label = label; (*new_event)->timerID = timer; - // pthread_mutex_lock(&ocl_mtx); ciErrNum = clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker); - // pthread_mutex_unlock(&ocl_mtx); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Enqueueing Marker!\n"); } @@ -617,7 +581,6 @@ void hpvm_StartTimerAndSubTimer(struct hpvm_Timer *timer, numNotStopped &= 0x2; // Zero out 2^0 } if (numNotStopped == 0x0) { - // fputs("Ignoring attempt to start running timer and subtimer\n", stderr); return; } @@ -681,7 +644,6 @@ void hpvm_StopTimerAndSubTimer(struct hpvm_Timer *timer, numNotRunning &= 0x2; // Zero out 2^0 } if (numNotRunning == 0x0) { - // fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); return; } @@ -1127,7 +1089,6 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { free(event); - // (*event) = NULL; event = next; } @@ -1157,7 +1118,6 @@ void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { Context->threads = new std::vector<pthread_t>(); Context->ArgInPortSizeMap = new std::map<unsigned, uint64_t>(); - // Context->BindInSizes = new std::vector<uint64_t>(); Context->BindInSourcePort = new std::vector<unsigned>(); Context->BindOutSizes = new std::vector<uint64_t>(); Context->EdgeSizes = new std::vector<uint64_t>(); @@ -1185,7 +1145,6 @@ void llvm_hpvm_streamPush(void *graphID, void *args) { for (unsigned j = 0; j < Ctx->BindInputBuffers->size(); j++) { if (Ctx->BindInSourcePort->at(j) == i) { // Push to all bind buffers connected to parent node at this port - // DEBUG(cout << "\tPushing Value " << element << " to buffer\n"); llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(j), element); } } @@ -1206,7 +1165,6 @@ void *llvm_hpvm_streamPop(void *graphID) { unsigned offset = 0; for (unsigned i = 0; i < Ctx->BindOutputBuffers->size(); i++) { uint64_t element = llvm_hpvm_bufferPop(Ctx->BindOutputBuffers->at(i)); - // DEBUG(cout << "\tPopped Value " << element << " from buffer\n"); memcpy((char *)output + offset, &element, Ctx->BindOutSizes->at(i)); offset += Ctx->BindOutSizes->at(i); } @@ -1220,7 +1178,6 @@ void llvm_hpvm_streamWait(void *graphID) { // Push garbage to all other input buffers for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) { uint64_t element = 0; - // DEBUG(cout << "\tPushing Value " << element << " to buffer\n"); llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(i), element); } // Push 1 in isLastInput buffers of all child nodes @@ -1250,7 +1207,6 @@ void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; - // Twine name = Twine("Bind.Out.")+Twine(Context->BindOutputBuffers->size()); CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindOut"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); @@ -1262,7 +1218,6 @@ void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; - // Twine name = Twine("Edge.")+Twine(Context->EdgeBuffers->size()); CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "Edge"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); @@ -1275,7 +1230,6 @@ void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; - // Twine name = Twine("isLastInput.")+Twine(Context->EdgeBuffers->size()); CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "LastInput"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); @@ -1346,8 +1300,6 @@ void *llvm_hpvm_cpu_launch(void *(*rootFunc)(void *), void *arguments) { void llvm_hpvm_cpu_wait(void *graphID) { DEBUG(cout << "Waiting for pthread to finish ...\n"); - // DFNodeContext_CPU* Context = (DFNodeContext_CPU*) graphID; - // pthread_join(Context->threadID, NULL); free(graphID); DEBUG(cout << "\t... pthread Done!\n"); } @@ -1500,9 +1452,6 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { cl_uint numDevices; clCreateSubDevices(clDevices[0], props, num_entries, subdevice_id, &numDevices); - // printf("Num of devices = %d\n", numDevices); - // for(unsigned i =0 ; i< numDevices; i++) - // printf("Subdevice id %d = %p\n", i, subdevice_id[i]); clDevices[0] = subdevice_id[0]; globalOCLContext = clCreateContext(properties, 1, clDevices, NULL, NULL, &errcode); @@ -1527,10 +1476,6 @@ void llvm_hpvm_ocl_clearContext(void *graphID) { // FIXME: Have separate function to release command queue and clear context. // Would be useful when a context has multiple command queues clReleaseKernel(Context->clKernel); - // clReleaseProgram(Context->clProgram); - // clReleaseCommandQueue(Context->clCommandQue); - // clReleaseContext(globalOCLContext); - // DEBUG(cout << "Released context at: " << globalOCLContext); free(Context); DEBUG(cout << "Done with OCL kernel\n"); cout << "Printing HPVM Timer: KernelTimer\n"; @@ -1546,9 +1491,7 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DEBUG(cout << "Using Context: " << Context << flush << "\n"); DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n"); - // pthread_mutex_lock(&ocl_mtx); cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument"); pthread_mutex_unlock(&ocl_mtx); } @@ -1562,9 +1505,7 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DEBUG(cout << "Using Context: " << Context << flush << "\n"); DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n"); - // pthread_mutex_lock(&ocl_mtx); cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument"); pthread_mutex_unlock(&ocl_mtx); } @@ -1588,10 +1529,8 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, pthread_mutex_lock(&ocl_mtx); // Set Kernel Argument - // pthread_mutex_lock(&ocl_mtx); cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void *)&d_input); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument"); DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n"); pthread_mutex_unlock(&ocl_mtx); @@ -1605,15 +1544,11 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { << flush << "\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; cl_int errcode; - // pthread_mutex_lock(&ocl_mtx); cl_mem d_output = clCreateBuffer(Context->clOCLContext, CL_MEM_WRITE_ONLY, size, NULL, &errcode); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to create output buffer on device"); - // pthread_mutex_lock(&ocl_mtx); errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void *)&d_output); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument"); DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n"); pthread_mutex_unlock(&ocl_mtx); @@ -1621,9 +1556,6 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { } void llvm_hpvm_ocl_free(void *ptr) { - // DEBUG(cout << "Release Device Pointer: " << ptr << flush << "\n"); - // cl_mem d_ptr = (cl_mem) ptr; - // clReleaseMemObject(d_ptr); } void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, @@ -1635,11 +1567,9 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, if (h_output == NULL) h_output = malloc(size); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; - // pthread_mutex_lock(&ocl_mtx); cl_int errcode = clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0, size, h_output, 0, NULL, NULL); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output"); pthread_mutex_unlock(&ocl_mtx); return h_output; @@ -1687,22 +1617,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, } DEBUG(cout << ")\n"); } - // pthread_mutex_lock(&ocl_mtx); clFinish(Context->clCommandQue); - // pthread_mutex_unlock(&ocl_mtx); hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COMPUTATION); - // for(int i=0 ;i < NUM_TESTS; i++) { - // cout << "Iteration = " << i << flush << "\n"; - // pthread_mutex_lock(&ocl_mtx); cl_int errcode = clEnqueueNDRangeKernel( Context->clCommandQue, Context->clKernel, workDim, NULL, GlobalWG, (localWorkSize == NULL) ? NULL : LocalWG, 0, NULL, NULL); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel"); - //} - // pthread_mutex_lock(&ocl_mtx); clFinish(Context->clCommandQue); - // pthread_mutex_unlock(&ocl_mtx); hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE); pthread_mutex_unlock(&ocl_mtx); @@ -1768,11 +1689,9 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { Context->clOCLContext = globalOCLContext; // Create a command-queue - // pthread_mutex_lock(&ocl_mtx); Context->clCommandQue = clCreateCommandQueue( Context->clOCLContext, clDevices[0], CL_QUEUE_PROFILING_ENABLE, &errcode); globalCommandQue = Context->clCommandQue; - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to create command queue"); DEBUG(cout << "Loading program binary: " << FileName << flush << "\n"); @@ -1780,10 +1699,8 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { checkErr(programSource != NULL, 1 /*bool true*/, "Failure to load Program Binary"); - // pthread_mutex_lock(&ocl_mtx); Context->clProgram = clCreateProgramWithSource( Context->clOCLContext, 1, (const char **)&programSource, NULL, &errcode); - // pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to create program from binary"); DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName @@ -1814,7 +1731,6 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { checkErr(errcode, CL_SUCCESS, "Failure to create kernel"); DEBUG(cout << "Kernel ID = " << Context->clKernel << "\n"); - // free(clDevices); free(programSource); pthread_mutex_unlock(&ocl_mtx); @@ -1825,16 +1741,12 @@ void llvm_hpvm_ocl_wait(void *graphID) { pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Wait\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; - // pthread_mutex_lock(&ocl_mtx); clFinish(Context->clCommandQue); - // pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx); } void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) { - // cout << "Switching to timer " << timer << flush << "\n"; pthread_mutex_lock(&ocl_mtx); - // hpvm_SwitchToTimer((hpvm_TimerSet*)(*timerSet), timer); pthread_mutex_unlock(&ocl_mtx); } void llvm_hpvm_printTimerSet(void **timerSet, char *timerName) { diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.h b/hpvm/projects/hpvm-rt/hpvm-rt.h index 94fe5b5ef0..8f8b3eef69 100644 --- a/hpvm/projects/hpvm-rt/hpvm-rt.h +++ b/hpvm/projects/hpvm-rt/hpvm-rt.h @@ -11,7 +11,6 @@ #include <pthread.h> #include <string> #include <vector> -//#include <condition_variable> #include "../../include/SupportHPVM/HPVMHint.h" #include "../../include/SupportHPVM/HPVMTimer.h" @@ -206,43 +205,27 @@ public: template <class ElementType> bool CircularBuffer<ElementType>::push(ElementType E) { - // DEBUG(cout << name << " Buffer[" << ID << "]: Push " << E << flush << - // "\n"); unique_lock<mutex> lk(mtx); pthread_mutex_lock(&mtx); if ((Head + 1) % bufferSize == Tail) { - // DEBUG(cout << name << " Buffer[" << ID << "]: Push going to sleep - // ...\n"); cv.wait(lk); pthread_cond_wait(&cv, &mtx); - // DEBUG(cout << name << " Buffer[" << ID << "]: Push woke up\n"); } buffer[Head] = E; Head = (Head + 1) % bufferSize; numElements++; - // DEBUG(cout << name << " Buffer[" << ID << "]: Total Elements = " << - // numElements << flush << "\n"); lk.unlock(); pthread_mutex_unlock(&mtx); - // cv.notify_one(); pthread_cond_signal(&cv); return true; } template <class ElementType> ElementType CircularBuffer<ElementType>::pop() { - // unique_lock<mutex> lk(mtx); - // DEBUG(cout << name << " Buffer[" << ID << "]: Pop\n"); pthread_mutex_lock(&mtx); if (Tail == Head) { - // DEBUG(cout << name << " Buffer[" << ID << "]: Pop going to sleep ...\n"); - // cv.wait(lk); pthread_cond_wait(&cv, &mtx); - // DEBUG(cout << name << " Buffer[" << ID << "]: Pop woke up\n"); } ElementType E = buffer[Tail]; Tail = (Tail + 1) % bufferSize; numElements--; - // DEBUG(cout << name << " Buffer[" << ID << "]: Total Elements = " << - // numElements << flush << "\n"); lk.unlock(); pthread_mutex_unlock(&mtx); - // cv.notify_one(); pthread_cond_signal(&cv); return E; } diff --git a/hpvm/projects/hpvm-rt/policy.h b/hpvm/projects/hpvm-rt/policy.h index d50e65868b..78aacfc94a 100644 --- a/hpvm/projects/hpvm-rt/policy.h +++ b/hpvm/projects/hpvm-rt/policy.h @@ -24,8 +24,6 @@ private: class NodePolicy : public Policy { virtual int getVersion(const char *name, int64_t it) override { std::string s(name); - // std::string NodeNames[1] = { - // "_Z9mysgemmNTPfiS_iS_iiff_clonedInternal_level2_cloned" }; std::string NodeNames[] = { "WrapperGaussianSmoothing_cloned", "WrapperlaplacianEstimate_cloned", @@ -34,10 +32,6 @@ class NodePolicy : public Policy { "WrapperComputeMaxGradient_cloned", "WrapperRejectZeroCrossings_cloned", }; - // if (!s.compare(NodeNames[4])) { - // std::cout << s << ": CPU" << "\n"; - // return 0; - //} return 2; } }; @@ -54,17 +48,14 @@ class IterationPolicy : public Policy { class DeviceStatusPolicy : public Policy { virtual int getVersion(const char *name, int64_t it) override { if (deviceStatus) { - // std::cout << "Returning GPU\n"; return 2; } else { - // std::cout << "Returning CPU\n"; return 0; } } }; /* ------------------------------------------------------------------------- */ -// Added for the CFAR interactive policy demo. class InteractivePolicy : public Policy { private: -- GitLab