Commit d737aa6f authored by Akash Kothari's avatar Akash Kothari
Browse files

Remove commented code from HPVM runtime

parent 517533da
......@@ -31,7 +31,6 @@ typedef struct {
std::vector<pthread_t> *threads;
// Map from InputPort to Size
std::map<unsigned, uint64_t> *ArgInPortSizeMap;
// std::vector<uint64_t>* BindInSizes;
std::vector<unsigned> *BindInSourcePort;
std::vector<uint64_t> *BindOutSizes;
std::vector<uint64_t> *EdgeSizes;
......@@ -326,18 +325,14 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
clFlags = CL_MEM_READ_ONLY;
hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY);
// pthread_mutex_lock(&ocl_mtx);
cl_mem d_input =
clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device");
DEBUG(cout << "\nMemory allocated on device: " << d_input << flush << "\n");
if (isInput) {
DEBUG(cout << "\tCopying ...");
// pthread_mutex_lock(&ocl_mtx);
errcode = clEnqueueWriteBuffer(Context->clCommandQue, d_input, CL_TRUE, 0,
size, MTE->getAddress(), 0, NULL, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to copy memory to device");
}
......@@ -435,30 +430,14 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) {
*new_event = (struct hpvm_async_time_marker_list *)malloc(
sizeof(struct hpvm_async_time_marker_list));
(*new_event)->marker = calloc(1, sizeof(cl_event));
/*
// I don't think this is needed at all. I believe clEnqueueMarker 'creates'
the event #if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) fprintf(stderr, "Creating
Marker [%d]\n", timer);
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr,
&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User
Event Object!\n");
}
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)),
CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User
Event Status!\n");
}
#endif
*/
(*new_event)->next = NULL;
}
/* valid event handle now aquired: insert the event record */
(*new_event)->label = NULL;
(*new_event)->timerID = timer;
// pthread_mutex_lock(&ocl_mtx);
ciErrNum =
clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker);
// pthread_mutex_unlock(&ocl_mtx);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Enqueueing Marker!\n");
}
......@@ -477,29 +456,14 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label,
*new_event = (struct hpvm_async_time_marker_list *)malloc(
sizeof(struct hpvm_async_time_marker_list));
(*new_event)->marker = calloc(1, sizeof(cl_event));
/*
#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer);
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr,
&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User
Event Object!\n");
}
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)),
CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User
Event Status!\n");
}
#endif
*/
(*new_event)->next = NULL;
}
/* valid event handle now aquired: insert the event record */
(*new_event)->label = label;
(*new_event)->timerID = timer;
// pthread_mutex_lock(&ocl_mtx);
ciErrNum =
clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker);
// pthread_mutex_unlock(&ocl_mtx);
if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Enqueueing Marker!\n");
}
......@@ -617,7 +581,6 @@ void hpvm_StartTimerAndSubTimer(struct hpvm_Timer *timer,
numNotStopped &= 0x2; // Zero out 2^0
}
if (numNotStopped == 0x0) {
// fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
return;
}
......@@ -681,7 +644,6 @@ void hpvm_StopTimerAndSubTimer(struct hpvm_Timer *timer,
numNotRunning &= 0x2; // Zero out 2^0
}
if (numNotRunning == 0x0) {
// fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
return;
}
......@@ -1127,7 +1089,6 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) {
free(event);
// (*event) = NULL;
event = next;
}
......@@ -1157,7 +1118,6 @@ void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) {
Context->threads = new std::vector<pthread_t>();
Context->ArgInPortSizeMap = new std::map<unsigned, uint64_t>();
// Context->BindInSizes = new std::vector<uint64_t>();
Context->BindInSourcePort = new std::vector<unsigned>();
Context->BindOutSizes = new std::vector<uint64_t>();
Context->EdgeSizes = new std::vector<uint64_t>();
......@@ -1185,7 +1145,6 @@ void llvm_hpvm_streamPush(void *graphID, void *args) {
for (unsigned j = 0; j < Ctx->BindInputBuffers->size(); j++) {
if (Ctx->BindInSourcePort->at(j) == i) {
// Push to all bind buffers connected to parent node at this port
// DEBUG(cout << "\tPushing Value " << element << " to buffer\n");
llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(j), element);
}
}
......@@ -1206,7 +1165,6 @@ void *llvm_hpvm_streamPop(void *graphID) {
unsigned offset = 0;
for (unsigned i = 0; i < Ctx->BindOutputBuffers->size(); i++) {
uint64_t element = llvm_hpvm_bufferPop(Ctx->BindOutputBuffers->at(i));
// DEBUG(cout << "\tPopped Value " << element << " from buffer\n");
memcpy((char *)output + offset, &element, Ctx->BindOutSizes->at(i));
offset += Ctx->BindOutSizes->at(i);
}
......@@ -1220,7 +1178,6 @@ void llvm_hpvm_streamWait(void *graphID) {
// Push garbage to all other input buffers
for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) {
uint64_t element = 0;
// DEBUG(cout << "\tPushing Value " << element << " to buffer\n");
llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(i), element);
}
// Push 1 in isLastInput buffers of all child nodes
......@@ -1250,7 +1207,6 @@ void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) {
DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID
<< ", Size: " << size << flush << "\n");
DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
// Twine name = Twine("Bind.Out.")+Twine(Context->BindOutputBuffers->size());
CircularBuffer<uint64_t> *bufferID =
new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindOut");
DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
......@@ -1262,7 +1218,6 @@ void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) {
DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size
<< flush << "\n");
DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
// Twine name = Twine("Edge.")+Twine(Context->EdgeBuffers->size());
CircularBuffer<uint64_t> *bufferID =
new CircularBuffer<uint64_t>(BUFFER_SIZE, "Edge");
DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
......@@ -1275,7 +1230,6 @@ void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) {
DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID
<< ", Size: " << size << flush << "\n");
DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
// Twine name = Twine("isLastInput.")+Twine(Context->EdgeBuffers->size());
CircularBuffer<uint64_t> *bufferID =
new CircularBuffer<uint64_t>(BUFFER_SIZE, "LastInput");
DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
......@@ -1346,8 +1300,6 @@ void *llvm_hpvm_cpu_launch(void *(*rootFunc)(void *), void *arguments) {
void llvm_hpvm_cpu_wait(void *graphID) {
DEBUG(cout << "Waiting for pthread to finish ...\n");
// DFNodeContext_CPU* Context = (DFNodeContext_CPU*) graphID;
// pthread_join(Context->threadID, NULL);
free(graphID);
DEBUG(cout << "\t... pthread Done!\n");
}
......@@ -1500,9 +1452,6 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
cl_uint numDevices;
clCreateSubDevices(clDevices[0], props, num_entries, subdevice_id,
&numDevices);
// printf("Num of devices = %d\n", numDevices);
// for(unsigned i =0 ; i< numDevices; i++)
// printf("Subdevice id %d = %p\n", i, subdevice_id[i]);
clDevices[0] = subdevice_id[0];
globalOCLContext =
clCreateContext(properties, 1, clDevices, NULL, NULL, &errcode);
......@@ -1527,10 +1476,6 @@ void llvm_hpvm_ocl_clearContext(void *graphID) {
// FIXME: Have separate function to release command queue and clear context.
// Would be useful when a context has multiple command queues
clReleaseKernel(Context->clKernel);
// clReleaseProgram(Context->clProgram);
// clReleaseCommandQueue(Context->clCommandQue);
// clReleaseContext(globalOCLContext);
// DEBUG(cout << "Released context at: " << globalOCLContext);
free(Context);
DEBUG(cout << "Done with OCL kernel\n");
cout << "Printing HPVM Timer: KernelTimer\n";
......@@ -1546,9 +1491,7 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) {
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
DEBUG(cout << "Using Context: " << Context << flush << "\n");
DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n");
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument");
pthread_mutex_unlock(&ocl_mtx);
}
......@@ -1562,9 +1505,7 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index,
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
DEBUG(cout << "Using Context: " << Context << flush << "\n");
DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n");
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument");
pthread_mutex_unlock(&ocl_mtx);
}
......@@ -1588,10 +1529,8 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index,
pthread_mutex_lock(&ocl_mtx);
// Set Kernel Argument
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem),
(void *)&d_input);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument");
DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n");
pthread_mutex_unlock(&ocl_mtx);
......@@ -1605,15 +1544,11 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) {
<< flush << "\n");
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
cl_int errcode;
// pthread_mutex_lock(&ocl_mtx);
cl_mem d_output = clCreateBuffer(Context->clOCLContext, CL_MEM_WRITE_ONLY,
size, NULL, &errcode);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to create output buffer on device");
// pthread_mutex_lock(&ocl_mtx);
errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem),
(void *)&d_output);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument");
DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n");
pthread_mutex_unlock(&ocl_mtx);
......@@ -1621,9 +1556,6 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) {
}
void llvm_hpvm_ocl_free(void *ptr) {
// DEBUG(cout << "Release Device Pointer: " << ptr << flush << "\n");
// cl_mem d_ptr = (cl_mem) ptr;
// clReleaseMemObject(d_ptr);
}
void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output,
......@@ -1635,11 +1567,9 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output,
if (h_output == NULL)
h_output = malloc(size);
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode =
clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0,
size, h_output, 0, NULL, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output");
pthread_mutex_unlock(&ocl_mtx);
return h_output;
......@@ -1687,22 +1617,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim,
}
DEBUG(cout << ")\n");
}
// pthread_mutex_lock(&ocl_mtx);
clFinish(Context->clCommandQue);
// pthread_mutex_unlock(&ocl_mtx);
hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COMPUTATION);
// for(int i=0 ;i < NUM_TESTS; i++) {
// cout << "Iteration = " << i << flush << "\n";
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clEnqueueNDRangeKernel(
Context->clCommandQue, Context->clKernel, workDim, NULL, GlobalWG,
(localWorkSize == NULL) ? NULL : LocalWG, 0, NULL, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
//}
// pthread_mutex_lock(&ocl_mtx);
clFinish(Context->clCommandQue);
// pthread_mutex_unlock(&ocl_mtx);
hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE);
pthread_mutex_unlock(&ocl_mtx);
......@@ -1768,11 +1689,9 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
Context->clOCLContext = globalOCLContext;
// Create a command-queue
// pthread_mutex_lock(&ocl_mtx);
Context->clCommandQue = clCreateCommandQueue(
Context->clOCLContext, clDevices[0], CL_QUEUE_PROFILING_ENABLE, &errcode);
globalCommandQue = Context->clCommandQue;
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to create command queue");
DEBUG(cout << "Loading program binary: " << FileName << flush << "\n");
......@@ -1780,10 +1699,8 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
checkErr(programSource != NULL, 1 /*bool true*/,
"Failure to load Program Binary");
// pthread_mutex_lock(&ocl_mtx);
Context->clProgram = clCreateProgramWithSource(
Context->clOCLContext, 1, (const char **)&programSource, NULL, &errcode);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to create program from binary");
DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName
......@@ -1814,7 +1731,6 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
checkErr(errcode, CL_SUCCESS, "Failure to create kernel");
DEBUG(cout << "Kernel ID = " << Context->clKernel << "\n");
// free(clDevices);
free(programSource);
pthread_mutex_unlock(&ocl_mtx);
......@@ -1825,16 +1741,12 @@ void llvm_hpvm_ocl_wait(void *graphID) {
pthread_mutex_lock(&ocl_mtx);
DEBUG(cout << "Wait\n");
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
// pthread_mutex_lock(&ocl_mtx);
clFinish(Context->clCommandQue);
// pthread_mutex_unlock(&ocl_mtx);
pthread_mutex_unlock(&ocl_mtx);
}
void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) {
// cout << "Switching to timer " << timer << flush << "\n";
pthread_mutex_lock(&ocl_mtx);
// hpvm_SwitchToTimer((hpvm_TimerSet*)(*timerSet), timer);
pthread_mutex_unlock(&ocl_mtx);
}
void llvm_hpvm_printTimerSet(void **timerSet, char *timerName) {
......
......@@ -11,7 +11,6 @@
#include <pthread.h>
#include <string>
#include <vector>
//#include <condition_variable>
#include "../../include/SupportHPVM/HPVMHint.h"
#include "../../include/SupportHPVM/HPVMTimer.h"
......@@ -206,43 +205,27 @@ public:
template <class ElementType>
bool CircularBuffer<ElementType>::push(ElementType E) {
// DEBUG(cout << name << " Buffer[" << ID << "]: Push " << E << flush <<
// "\n"); unique_lock<mutex> lk(mtx);
pthread_mutex_lock(&mtx);
if ((Head + 1) % bufferSize == Tail) {
// DEBUG(cout << name << " Buffer[" << ID << "]: Push going to sleep
// ...\n"); cv.wait(lk);
pthread_cond_wait(&cv, &mtx);
// DEBUG(cout << name << " Buffer[" << ID << "]: Push woke up\n");
}
buffer[Head] = E;
Head = (Head + 1) % bufferSize;
numElements++;
// DEBUG(cout << name << " Buffer[" << ID << "]: Total Elements = " <<
// numElements << flush << "\n"); lk.unlock();
pthread_mutex_unlock(&mtx);
// cv.notify_one();
pthread_cond_signal(&cv);
return true;
}
template <class ElementType> ElementType CircularBuffer<ElementType>::pop() {
// unique_lock<mutex> lk(mtx);
// DEBUG(cout << name << " Buffer[" << ID << "]: Pop\n");
pthread_mutex_lock(&mtx);
if (Tail == Head) {
// DEBUG(cout << name << " Buffer[" << ID << "]: Pop going to sleep ...\n");
// cv.wait(lk);
pthread_cond_wait(&cv, &mtx);
// DEBUG(cout << name << " Buffer[" << ID << "]: Pop woke up\n");
}
ElementType E = buffer[Tail];
Tail = (Tail + 1) % bufferSize;
numElements--;
// DEBUG(cout << name << " Buffer[" << ID << "]: Total Elements = " <<
// numElements << flush << "\n"); lk.unlock();
pthread_mutex_unlock(&mtx);
// cv.notify_one();
pthread_cond_signal(&cv);
return E;
}
......
......@@ -24,8 +24,6 @@ private:
class NodePolicy : public Policy {
virtual int getVersion(const char *name, int64_t it) override {
std::string s(name);
// std::string NodeNames[1] = {
// "_Z9mysgemmNTPfiS_iS_iiff_clonedInternal_level2_cloned" };
std::string NodeNames[] = {
"WrapperGaussianSmoothing_cloned",
"WrapperlaplacianEstimate_cloned",
......@@ -34,10 +32,6 @@ class NodePolicy : public Policy {
"WrapperComputeMaxGradient_cloned",
"WrapperRejectZeroCrossings_cloned",
};
// if (!s.compare(NodeNames[4])) {
// std::cout << s << ": CPU" << "\n";
// return 0;
//}
return 2;
}
};
......@@ -54,17 +48,14 @@ class IterationPolicy : public Policy {
class DeviceStatusPolicy : public Policy {
virtual int getVersion(const char *name, int64_t it) override {
if (deviceStatus) {
// std::cout << "Returning GPU\n";
return 2;
} else {
// std::cout << "Returning CPU\n";
return 0;
}
}
};
/* ------------------------------------------------------------------------- */
// Added for the CFAR interactive policy demo.
class InteractivePolicy : public Policy {
private:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment