Skip to content
Snippets Groups Projects
Commit d737aa6f authored by Akash Kothari's avatar Akash Kothari
Browse files

Remove commented code from HPVM runtime

parent 517533da
No related branches found
No related tags found
No related merge requests found
...@@ -31,7 +31,6 @@ typedef struct { ...@@ -31,7 +31,6 @@ typedef struct {
std::vector<pthread_t> *threads; std::vector<pthread_t> *threads;
// Map from InputPort to Size // Map from InputPort to Size
std::map<unsigned, uint64_t> *ArgInPortSizeMap; std::map<unsigned, uint64_t> *ArgInPortSizeMap;
// std::vector<uint64_t>* BindInSizes;
std::vector<unsigned> *BindInSourcePort; std::vector<unsigned> *BindInSourcePort;
std::vector<uint64_t> *BindOutSizes; std::vector<uint64_t> *BindOutSizes;
std::vector<uint64_t> *EdgeSizes; std::vector<uint64_t> *EdgeSizes;
...@@ -326,18 +325,14 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, ...@@ -326,18 +325,14 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
clFlags = CL_MEM_READ_ONLY; clFlags = CL_MEM_READ_ONLY;
hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY); hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY);
// pthread_mutex_lock(&ocl_mtx);
cl_mem d_input = cl_mem d_input =
clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode); clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device"); checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device");
DEBUG(cout << "\nMemory allocated on device: " << d_input << flush << "\n"); DEBUG(cout << "\nMemory allocated on device: " << d_input << flush << "\n");
if (isInput) { if (isInput) {
DEBUG(cout << "\tCopying ..."); DEBUG(cout << "\tCopying ...");
// pthread_mutex_lock(&ocl_mtx);
errcode = clEnqueueWriteBuffer(Context->clCommandQue, d_input, CL_TRUE, 0, errcode = clEnqueueWriteBuffer(Context->clCommandQue, d_input, CL_TRUE, 0,
size, MTE->getAddress(), 0, NULL, NULL); size, MTE->getAddress(), 0, NULL, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to copy memory to device"); checkErr(errcode, CL_SUCCESS, "Failure to copy memory to device");
} }
...@@ -435,30 +430,14 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { ...@@ -435,30 +430,14 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) {
*new_event = (struct hpvm_async_time_marker_list *)malloc( *new_event = (struct hpvm_async_time_marker_list *)malloc(
sizeof(struct hpvm_async_time_marker_list)); sizeof(struct hpvm_async_time_marker_list));
(*new_event)->marker = calloc(1, sizeof(cl_event)); (*new_event)->marker = calloc(1, sizeof(cl_event));
/*
// I don't think this is needed at all. I believe clEnqueueMarker 'creates'
the event #if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) fprintf(stderr, "Creating
Marker [%d]\n", timer);
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr,
&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User
Event Object!\n");
}
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)),
CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User
Event Status!\n");
}
#endif
*/
(*new_event)->next = NULL; (*new_event)->next = NULL;
} }
/* valid event handle now aquired: insert the event record */ /* valid event handle now aquired: insert the event record */
(*new_event)->label = NULL; (*new_event)->label = NULL;
(*new_event)->timerID = timer; (*new_event)->timerID = timer;
// pthread_mutex_lock(&ocl_mtx);
ciErrNum = ciErrNum =
clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker); clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker);
// pthread_mutex_unlock(&ocl_mtx);
if (ciErrNum != CL_SUCCESS) { if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Enqueueing Marker!\n"); fprintf(stderr, "Error Enqueueing Marker!\n");
} }
...@@ -477,29 +456,14 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label, ...@@ -477,29 +456,14 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label,
*new_event = (struct hpvm_async_time_marker_list *)malloc( *new_event = (struct hpvm_async_time_marker_list *)malloc(
sizeof(struct hpvm_async_time_marker_list)); sizeof(struct hpvm_async_time_marker_list));
(*new_event)->marker = calloc(1, sizeof(cl_event)); (*new_event)->marker = calloc(1, sizeof(cl_event));
/*
#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer);
*((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr,
&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User
Event Object!\n");
}
ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)),
CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User
Event Status!\n");
}
#endif
*/
(*new_event)->next = NULL; (*new_event)->next = NULL;
} }
/* valid event handle now aquired: insert the event record */ /* valid event handle now aquired: insert the event record */
(*new_event)->label = label; (*new_event)->label = label;
(*new_event)->timerID = timer; (*new_event)->timerID = timer;
// pthread_mutex_lock(&ocl_mtx);
ciErrNum = ciErrNum =
clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker); clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker);
// pthread_mutex_unlock(&ocl_mtx);
if (ciErrNum != CL_SUCCESS) { if (ciErrNum != CL_SUCCESS) {
fprintf(stderr, "Error Enqueueing Marker!\n"); fprintf(stderr, "Error Enqueueing Marker!\n");
} }
...@@ -617,7 +581,6 @@ void hpvm_StartTimerAndSubTimer(struct hpvm_Timer *timer, ...@@ -617,7 +581,6 @@ void hpvm_StartTimerAndSubTimer(struct hpvm_Timer *timer,
numNotStopped &= 0x2; // Zero out 2^0 numNotStopped &= 0x2; // Zero out 2^0
} }
if (numNotStopped == 0x0) { if (numNotStopped == 0x0) {
// fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
return; return;
} }
...@@ -681,7 +644,6 @@ void hpvm_StopTimerAndSubTimer(struct hpvm_Timer *timer, ...@@ -681,7 +644,6 @@ void hpvm_StopTimerAndSubTimer(struct hpvm_Timer *timer,
numNotRunning &= 0x2; // Zero out 2^0 numNotRunning &= 0x2; // Zero out 2^0
} }
if (numNotRunning == 0x0) { if (numNotRunning == 0x0) {
// fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
return; return;
} }
...@@ -1127,7 +1089,6 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { ...@@ -1127,7 +1089,6 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) {
free(event); free(event);
// (*event) = NULL;
event = next; event = next;
} }
...@@ -1157,7 +1118,6 @@ void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { ...@@ -1157,7 +1118,6 @@ void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) {
Context->threads = new std::vector<pthread_t>(); Context->threads = new std::vector<pthread_t>();
Context->ArgInPortSizeMap = new std::map<unsigned, uint64_t>(); Context->ArgInPortSizeMap = new std::map<unsigned, uint64_t>();
// Context->BindInSizes = new std::vector<uint64_t>();
Context->BindInSourcePort = new std::vector<unsigned>(); Context->BindInSourcePort = new std::vector<unsigned>();
Context->BindOutSizes = new std::vector<uint64_t>(); Context->BindOutSizes = new std::vector<uint64_t>();
Context->EdgeSizes = new std::vector<uint64_t>(); Context->EdgeSizes = new std::vector<uint64_t>();
...@@ -1185,7 +1145,6 @@ void llvm_hpvm_streamPush(void *graphID, void *args) { ...@@ -1185,7 +1145,6 @@ void llvm_hpvm_streamPush(void *graphID, void *args) {
for (unsigned j = 0; j < Ctx->BindInputBuffers->size(); j++) { for (unsigned j = 0; j < Ctx->BindInputBuffers->size(); j++) {
if (Ctx->BindInSourcePort->at(j) == i) { if (Ctx->BindInSourcePort->at(j) == i) {
// Push to all bind buffers connected to parent node at this port // Push to all bind buffers connected to parent node at this port
// DEBUG(cout << "\tPushing Value " << element << " to buffer\n");
llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(j), element); llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(j), element);
} }
} }
...@@ -1206,7 +1165,6 @@ void *llvm_hpvm_streamPop(void *graphID) { ...@@ -1206,7 +1165,6 @@ void *llvm_hpvm_streamPop(void *graphID) {
unsigned offset = 0; unsigned offset = 0;
for (unsigned i = 0; i < Ctx->BindOutputBuffers->size(); i++) { for (unsigned i = 0; i < Ctx->BindOutputBuffers->size(); i++) {
uint64_t element = llvm_hpvm_bufferPop(Ctx->BindOutputBuffers->at(i)); uint64_t element = llvm_hpvm_bufferPop(Ctx->BindOutputBuffers->at(i));
// DEBUG(cout << "\tPopped Value " << element << " from buffer\n");
memcpy((char *)output + offset, &element, Ctx->BindOutSizes->at(i)); memcpy((char *)output + offset, &element, Ctx->BindOutSizes->at(i));
offset += Ctx->BindOutSizes->at(i); offset += Ctx->BindOutSizes->at(i);
} }
...@@ -1220,7 +1178,6 @@ void llvm_hpvm_streamWait(void *graphID) { ...@@ -1220,7 +1178,6 @@ void llvm_hpvm_streamWait(void *graphID) {
// Push garbage to all other input buffers // Push garbage to all other input buffers
for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) { for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) {
uint64_t element = 0; uint64_t element = 0;
// DEBUG(cout << "\tPushing Value " << element << " to buffer\n");
llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(i), element); llvm_hpvm_bufferPush(Ctx->BindInputBuffers->at(i), element);
} }
// Push 1 in isLastInput buffers of all child nodes // Push 1 in isLastInput buffers of all child nodes
...@@ -1250,7 +1207,6 @@ void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) { ...@@ -1250,7 +1207,6 @@ void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) {
DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID
<< ", Size: " << size << flush << "\n"); << ", Size: " << size << flush << "\n");
DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
// Twine name = Twine("Bind.Out.")+Twine(Context->BindOutputBuffers->size());
CircularBuffer<uint64_t> *bufferID = CircularBuffer<uint64_t> *bufferID =
new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindOut"); new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindOut");
DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
...@@ -1262,7 +1218,6 @@ void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) { ...@@ -1262,7 +1218,6 @@ void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) {
DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size
<< flush << "\n"); << flush << "\n");
DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
// Twine name = Twine("Edge.")+Twine(Context->EdgeBuffers->size());
CircularBuffer<uint64_t> *bufferID = CircularBuffer<uint64_t> *bufferID =
new CircularBuffer<uint64_t>(BUFFER_SIZE, "Edge"); new CircularBuffer<uint64_t>(BUFFER_SIZE, "Edge");
DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
...@@ -1275,7 +1230,6 @@ void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) { ...@@ -1275,7 +1230,6 @@ void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) {
DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID
<< ", Size: " << size << flush << "\n"); << ", Size: " << size << flush << "\n");
DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
// Twine name = Twine("isLastInput.")+Twine(Context->EdgeBuffers->size());
CircularBuffer<uint64_t> *bufferID = CircularBuffer<uint64_t> *bufferID =
new CircularBuffer<uint64_t>(BUFFER_SIZE, "LastInput"); new CircularBuffer<uint64_t>(BUFFER_SIZE, "LastInput");
DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
...@@ -1346,8 +1300,6 @@ void *llvm_hpvm_cpu_launch(void *(*rootFunc)(void *), void *arguments) { ...@@ -1346,8 +1300,6 @@ void *llvm_hpvm_cpu_launch(void *(*rootFunc)(void *), void *arguments) {
void llvm_hpvm_cpu_wait(void *graphID) { void llvm_hpvm_cpu_wait(void *graphID) {
DEBUG(cout << "Waiting for pthread to finish ...\n"); DEBUG(cout << "Waiting for pthread to finish ...\n");
// DFNodeContext_CPU* Context = (DFNodeContext_CPU*) graphID;
// pthread_join(Context->threadID, NULL);
free(graphID); free(graphID);
DEBUG(cout << "\t... pthread Done!\n"); DEBUG(cout << "\t... pthread Done!\n");
} }
...@@ -1500,9 +1452,6 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { ...@@ -1500,9 +1452,6 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
cl_uint numDevices; cl_uint numDevices;
clCreateSubDevices(clDevices[0], props, num_entries, subdevice_id, clCreateSubDevices(clDevices[0], props, num_entries, subdevice_id,
&numDevices); &numDevices);
// printf("Num of devices = %d\n", numDevices);
// for(unsigned i =0 ; i< numDevices; i++)
// printf("Subdevice id %d = %p\n", i, subdevice_id[i]);
clDevices[0] = subdevice_id[0]; clDevices[0] = subdevice_id[0];
globalOCLContext = globalOCLContext =
clCreateContext(properties, 1, clDevices, NULL, NULL, &errcode); clCreateContext(properties, 1, clDevices, NULL, NULL, &errcode);
...@@ -1527,10 +1476,6 @@ void llvm_hpvm_ocl_clearContext(void *graphID) { ...@@ -1527,10 +1476,6 @@ void llvm_hpvm_ocl_clearContext(void *graphID) {
// FIXME: Have separate function to release command queue and clear context. // FIXME: Have separate function to release command queue and clear context.
// Would be useful when a context has multiple command queues // Would be useful when a context has multiple command queues
clReleaseKernel(Context->clKernel); clReleaseKernel(Context->clKernel);
// clReleaseProgram(Context->clProgram);
// clReleaseCommandQueue(Context->clCommandQue);
// clReleaseContext(globalOCLContext);
// DEBUG(cout << "Released context at: " << globalOCLContext);
free(Context); free(Context);
DEBUG(cout << "Done with OCL kernel\n"); DEBUG(cout << "Done with OCL kernel\n");
cout << "Printing HPVM Timer: KernelTimer\n"; cout << "Printing HPVM Timer: KernelTimer\n";
...@@ -1546,9 +1491,7 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { ...@@ -1546,9 +1491,7 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) {
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
DEBUG(cout << "Using Context: " << Context << flush << "\n"); DEBUG(cout << "Using Context: " << Context << flush << "\n");
DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n"); DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n");
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL); cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument"); checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument");
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
} }
...@@ -1562,9 +1505,7 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, ...@@ -1562,9 +1505,7 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index,
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
DEBUG(cout << "Using Context: " << Context << flush << "\n"); DEBUG(cout << "Using Context: " << Context << flush << "\n");
DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n"); DEBUG(cout << "Using clKernel: " << Context->clKernel << flush << "\n");
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input); cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument"); checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument");
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
} }
...@@ -1588,10 +1529,8 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, ...@@ -1588,10 +1529,8 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index,
pthread_mutex_lock(&ocl_mtx); pthread_mutex_lock(&ocl_mtx);
// Set Kernel Argument // Set Kernel Argument
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem),
(void *)&d_input); (void *)&d_input);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument"); checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument");
DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n"); DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n");
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
...@@ -1605,15 +1544,11 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { ...@@ -1605,15 +1544,11 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) {
<< flush << "\n"); << flush << "\n");
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
cl_int errcode; cl_int errcode;
// pthread_mutex_lock(&ocl_mtx);
cl_mem d_output = clCreateBuffer(Context->clOCLContext, CL_MEM_WRITE_ONLY, cl_mem d_output = clCreateBuffer(Context->clOCLContext, CL_MEM_WRITE_ONLY,
size, NULL, &errcode); size, NULL, &errcode);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to create output buffer on device"); checkErr(errcode, CL_SUCCESS, "Failure to create output buffer on device");
// pthread_mutex_lock(&ocl_mtx);
errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem),
(void *)&d_output); (void *)&d_output);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument"); checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument");
DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n"); DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n");
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
...@@ -1621,9 +1556,6 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { ...@@ -1621,9 +1556,6 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) {
} }
void llvm_hpvm_ocl_free(void *ptr) { void llvm_hpvm_ocl_free(void *ptr) {
// DEBUG(cout << "Release Device Pointer: " << ptr << flush << "\n");
// cl_mem d_ptr = (cl_mem) ptr;
// clReleaseMemObject(d_ptr);
} }
void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output,
...@@ -1635,11 +1567,9 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, ...@@ -1635,11 +1567,9 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output,
if (h_output == NULL) if (h_output == NULL)
h_output = malloc(size); h_output = malloc(size);
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = cl_int errcode =
clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0, clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0,
size, h_output, 0, NULL, NULL); size, h_output, 0, NULL, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output"); checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output");
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
return h_output; return h_output;
...@@ -1687,22 +1617,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, ...@@ -1687,22 +1617,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim,
} }
DEBUG(cout << ")\n"); DEBUG(cout << ")\n");
} }
// pthread_mutex_lock(&ocl_mtx);
clFinish(Context->clCommandQue); clFinish(Context->clCommandQue);
// pthread_mutex_unlock(&ocl_mtx);
hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COMPUTATION); hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COMPUTATION);
// for(int i=0 ;i < NUM_TESTS; i++) {
// cout << "Iteration = " << i << flush << "\n";
// pthread_mutex_lock(&ocl_mtx);
cl_int errcode = clEnqueueNDRangeKernel( cl_int errcode = clEnqueueNDRangeKernel(
Context->clCommandQue, Context->clKernel, workDim, NULL, GlobalWG, Context->clCommandQue, Context->clKernel, workDim, NULL, GlobalWG,
(localWorkSize == NULL) ? NULL : LocalWG, 0, NULL, NULL); (localWorkSize == NULL) ? NULL : LocalWG, 0, NULL, NULL);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel"); checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel");
//}
// pthread_mutex_lock(&ocl_mtx);
clFinish(Context->clCommandQue); clFinish(Context->clCommandQue);
// pthread_mutex_unlock(&ocl_mtx);
hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE); hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE);
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
...@@ -1768,11 +1689,9 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { ...@@ -1768,11 +1689,9 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
Context->clOCLContext = globalOCLContext; Context->clOCLContext = globalOCLContext;
// Create a command-queue // Create a command-queue
// pthread_mutex_lock(&ocl_mtx);
Context->clCommandQue = clCreateCommandQueue( Context->clCommandQue = clCreateCommandQueue(
Context->clOCLContext, clDevices[0], CL_QUEUE_PROFILING_ENABLE, &errcode); Context->clOCLContext, clDevices[0], CL_QUEUE_PROFILING_ENABLE, &errcode);
globalCommandQue = Context->clCommandQue; globalCommandQue = Context->clCommandQue;
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to create command queue"); checkErr(errcode, CL_SUCCESS, "Failure to create command queue");
DEBUG(cout << "Loading program binary: " << FileName << flush << "\n"); DEBUG(cout << "Loading program binary: " << FileName << flush << "\n");
...@@ -1780,10 +1699,8 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { ...@@ -1780,10 +1699,8 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
checkErr(programSource != NULL, 1 /*bool true*/, checkErr(programSource != NULL, 1 /*bool true*/,
"Failure to load Program Binary"); "Failure to load Program Binary");
// pthread_mutex_lock(&ocl_mtx);
Context->clProgram = clCreateProgramWithSource( Context->clProgram = clCreateProgramWithSource(
Context->clOCLContext, 1, (const char **)&programSource, NULL, &errcode); Context->clOCLContext, 1, (const char **)&programSource, NULL, &errcode);
// pthread_mutex_unlock(&ocl_mtx);
checkErr(errcode, CL_SUCCESS, "Failure to create program from binary"); checkErr(errcode, CL_SUCCESS, "Failure to create program from binary");
DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName DEBUG(cout << "Building kernel - " << KernelName << " from file " << FileName
...@@ -1814,7 +1731,6 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { ...@@ -1814,7 +1731,6 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) {
checkErr(errcode, CL_SUCCESS, "Failure to create kernel"); checkErr(errcode, CL_SUCCESS, "Failure to create kernel");
DEBUG(cout << "Kernel ID = " << Context->clKernel << "\n"); DEBUG(cout << "Kernel ID = " << Context->clKernel << "\n");
// free(clDevices);
free(programSource); free(programSource);
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
...@@ -1825,16 +1741,12 @@ void llvm_hpvm_ocl_wait(void *graphID) { ...@@ -1825,16 +1741,12 @@ void llvm_hpvm_ocl_wait(void *graphID) {
pthread_mutex_lock(&ocl_mtx); pthread_mutex_lock(&ocl_mtx);
DEBUG(cout << "Wait\n"); DEBUG(cout << "Wait\n");
DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID;
// pthread_mutex_lock(&ocl_mtx);
clFinish(Context->clCommandQue); clFinish(Context->clCommandQue);
// pthread_mutex_unlock(&ocl_mtx);
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
} }
void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) { void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) {
// cout << "Switching to timer " << timer << flush << "\n";
pthread_mutex_lock(&ocl_mtx); pthread_mutex_lock(&ocl_mtx);
// hpvm_SwitchToTimer((hpvm_TimerSet*)(*timerSet), timer);
pthread_mutex_unlock(&ocl_mtx); pthread_mutex_unlock(&ocl_mtx);
} }
void llvm_hpvm_printTimerSet(void **timerSet, char *timerName) { void llvm_hpvm_printTimerSet(void **timerSet, char *timerName) {
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <pthread.h> #include <pthread.h>
#include <string> #include <string>
#include <vector> #include <vector>
//#include <condition_variable>
#include "../../include/SupportHPVM/HPVMHint.h" #include "../../include/SupportHPVM/HPVMHint.h"
#include "../../include/SupportHPVM/HPVMTimer.h" #include "../../include/SupportHPVM/HPVMTimer.h"
...@@ -206,43 +205,27 @@ public: ...@@ -206,43 +205,27 @@ public:
template <class ElementType> template <class ElementType>
bool CircularBuffer<ElementType>::push(ElementType E) { bool CircularBuffer<ElementType>::push(ElementType E) {
// DEBUG(cout << name << " Buffer[" << ID << "]: Push " << E << flush <<
// "\n"); unique_lock<mutex> lk(mtx);
pthread_mutex_lock(&mtx); pthread_mutex_lock(&mtx);
if ((Head + 1) % bufferSize == Tail) { if ((Head + 1) % bufferSize == Tail) {
// DEBUG(cout << name << " Buffer[" << ID << "]: Push going to sleep
// ...\n"); cv.wait(lk);
pthread_cond_wait(&cv, &mtx); pthread_cond_wait(&cv, &mtx);
// DEBUG(cout << name << " Buffer[" << ID << "]: Push woke up\n");
} }
buffer[Head] = E; buffer[Head] = E;
Head = (Head + 1) % bufferSize; Head = (Head + 1) % bufferSize;
numElements++; numElements++;
// DEBUG(cout << name << " Buffer[" << ID << "]: Total Elements = " <<
// numElements << flush << "\n"); lk.unlock();
pthread_mutex_unlock(&mtx); pthread_mutex_unlock(&mtx);
// cv.notify_one();
pthread_cond_signal(&cv); pthread_cond_signal(&cv);
return true; return true;
} }
template <class ElementType> ElementType CircularBuffer<ElementType>::pop() { template <class ElementType> ElementType CircularBuffer<ElementType>::pop() {
// unique_lock<mutex> lk(mtx);
// DEBUG(cout << name << " Buffer[" << ID << "]: Pop\n");
pthread_mutex_lock(&mtx); pthread_mutex_lock(&mtx);
if (Tail == Head) { if (Tail == Head) {
// DEBUG(cout << name << " Buffer[" << ID << "]: Pop going to sleep ...\n");
// cv.wait(lk);
pthread_cond_wait(&cv, &mtx); pthread_cond_wait(&cv, &mtx);
// DEBUG(cout << name << " Buffer[" << ID << "]: Pop woke up\n");
} }
ElementType E = buffer[Tail]; ElementType E = buffer[Tail];
Tail = (Tail + 1) % bufferSize; Tail = (Tail + 1) % bufferSize;
numElements--; numElements--;
// DEBUG(cout << name << " Buffer[" << ID << "]: Total Elements = " <<
// numElements << flush << "\n"); lk.unlock();
pthread_mutex_unlock(&mtx); pthread_mutex_unlock(&mtx);
// cv.notify_one();
pthread_cond_signal(&cv); pthread_cond_signal(&cv);
return E; return E;
} }
......
...@@ -24,8 +24,6 @@ private: ...@@ -24,8 +24,6 @@ private:
class NodePolicy : public Policy { class NodePolicy : public Policy {
virtual int getVersion(const char *name, int64_t it) override { virtual int getVersion(const char *name, int64_t it) override {
std::string s(name); std::string s(name);
// std::string NodeNames[1] = {
// "_Z9mysgemmNTPfiS_iS_iiff_clonedInternal_level2_cloned" };
std::string NodeNames[] = { std::string NodeNames[] = {
"WrapperGaussianSmoothing_cloned", "WrapperGaussianSmoothing_cloned",
"WrapperlaplacianEstimate_cloned", "WrapperlaplacianEstimate_cloned",
...@@ -34,10 +32,6 @@ class NodePolicy : public Policy { ...@@ -34,10 +32,6 @@ class NodePolicy : public Policy {
"WrapperComputeMaxGradient_cloned", "WrapperComputeMaxGradient_cloned",
"WrapperRejectZeroCrossings_cloned", "WrapperRejectZeroCrossings_cloned",
}; };
// if (!s.compare(NodeNames[4])) {
// std::cout << s << ": CPU" << "\n";
// return 0;
//}
return 2; return 2;
} }
}; };
...@@ -54,17 +48,14 @@ class IterationPolicy : public Policy { ...@@ -54,17 +48,14 @@ class IterationPolicy : public Policy {
class DeviceStatusPolicy : public Policy { class DeviceStatusPolicy : public Policy {
virtual int getVersion(const char *name, int64_t it) override { virtual int getVersion(const char *name, int64_t it) override {
if (deviceStatus) { if (deviceStatus) {
// std::cout << "Returning GPU\n";
return 2; return 2;
} else { } else {
// std::cout << "Returning CPU\n";
return 0; return 0;
} }
} }
}; };
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
// Added for the CFAR interactive policy demo.
class InteractivePolicy : public Policy { class InteractivePolicy : public Policy {
private: private:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment