hpvm-rt.cpp 60.3 KB
Newer Older
Yifan Zhao's avatar
Yifan Zhao committed
1
#include <CL/cl.h>
Yifan Zhao's avatar
Yifan Zhao committed
2
#include <algorithm>
Yifan Zhao's avatar
Yifan Zhao committed
3
#include <cassert>
4
#include <cstdio>
Yifan Zhao's avatar
Yifan Zhao committed
5
#include <cstdlib>
6
#include <cstring>
Yifan Zhao's avatar
Yifan Zhao committed
7
#include <iostream>
8
#include <map>
Yifan Zhao's avatar
Yifan Zhao committed
9
10
#include <pthread.h>
#include <string>
11
12
13
14

#include <unistd.h>

#if _POSIX_VERSION >= 200112L
Yifan Zhao's avatar
Yifan Zhao committed
15
#include <sys/time.h>
16
#endif
Yifan Zhao's avatar
Yifan Zhao committed
17
#include "hpvm-rt.h"
18

19
//#define DEBUG_BUILD
20
#ifndef DEBUG_BUILD
Yifan Zhao's avatar
Yifan Zhao committed
21
22
#define DEBUG(s)                                                               \
  {}
23
24
25
26
#else
#define DEBUG(s) s
#endif

Yifan Zhao's avatar
Yifan Zhao committed
27
#define BILLION 1000000000LL
28
29
30

typedef struct {
  pthread_t threadID;
Yifan Zhao's avatar
Yifan Zhao committed
31
32
33
34
35
36
37
38
39
40
  std::vector<pthread_t> *threads;
  // Map from InputPort to Size
  std::map<unsigned, uint64_t> *ArgInPortSizeMap;
  std::vector<unsigned> *BindInSourcePort;
  std::vector<uint64_t> *BindOutSizes;
  std::vector<uint64_t> *EdgeSizes;
  std::vector<CircularBuffer<uint64_t> *> *BindInputBuffers;
  std::vector<CircularBuffer<uint64_t> *> *BindOutputBuffers;
  std::vector<CircularBuffer<uint64_t> *> *EdgeBuffers;
  std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers;
41
} DFNodeContext_CPU;
42
43
44
45
46
47
48
49
50

typedef struct {
  cl_context clOCLContext;
  cl_command_queue clCommandQue;
  cl_program clProgram;
  cl_kernel clKernel;
} DFNodeContext_OCL;

cl_context globalOCLContext;
Yifan Zhao's avatar
Yifan Zhao committed
51
cl_device_id *clDevices;
52
53
54
55
56
57
58
59
cl_command_queue globalCommandQue;

MemTracker MTracker;
vector<DFGDepth> DStack;
// Mutex to prevent concurrent access by multiple thereads in pipeline
pthread_mutex_t ocl_mtx;

#define NUM_TESTS 1
Yifan Zhao's avatar
Yifan Zhao committed
60
hpvm_TimerSet kernel_timer;
61

Yifan Zhao's avatar
Yifan Zhao committed
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
static const char *getErrorString(cl_int error) {
  switch (error) {
  // run-time and JIT compiler errors
  case 0:
    return "CL_SUCCESS";
  case -1:
    return "CL_DEVICE_NOT_FOUND";
  case -2:
    return "CL_DEVICE_NOT_AVAILABLE";
  case -3:
    return "CL_COMPILER_NOT_AVAILABLE";
  case -4:
    return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  case -5:
    return "CL_OUT_OF_RESOURCES";
  case -6:
    return "CL_OUT_OF_HOST_MEMORY";
  case -7:
    return "CL_PROFILING_INFO_NOT_AVAILABLE";
  case -8:
    return "CL_MEM_COPY_OVERLAP";
  case -9:
    return "CL_IMAGE_FORMAT_MISMATCH";
  case -10:
    return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
  case -11:
    return "CL_BUILD_PROGRAM_FAILURE";
  case -12:
    return "CL_MAP_FAILURE";
  case -13:
    return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  case -14:
    return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  case -15:
    return "CL_COMPILE_PROGRAM_FAILURE";
  case -16:
    return "CL_LINKER_NOT_AVAILABLE";
  case -17:
    return "CL_LINK_PROGRAM_FAILURE";
  case -18:
    return "CL_DEVICE_PARTITION_FAILED";
  case -19:
    return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";

  // compile-time errors
  case -30:
    return "CL_INVALID_VALUE";
  case -31:
    return "CL_INVALID_DEVICE_TYPE";
  case -32:
    return "CL_INVALID_PLATFORM";
  case -33:
    return "CL_INVALID_DEVICE";
  case -34:
    return "CL_INVALID_CONTEXT";
  case -35:
    return "CL_INVALID_QUEUE_PROPERTIES";
  case -36:
    return "CL_INVALID_COMMAND_QUEUE";
  case -37:
    return "CL_INVALID_HOST_PTR";
  case -38:
    return "CL_INVALID_MEM_OBJECT";
  case -39:
    return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
  case -40:
    return "CL_INVALID_IMAGE_SIZE";
  case -41:
    return "CL_INVALID_SAMPLER";
  case -42:
    return "CL_INVALID_BINARY";
  case -43:
    return "CL_INVALID_BUILD_OPTIONS";
  case -44:
    return "CL_INVALID_PROGRAM";
  case -45:
    return "CL_INVALID_PROGRAM_EXECUTABLE";
  case -46:
    return "CL_INVALID_KERNEL_NAME";
  case -47:
    return "CL_INVALID_KERNEL_DEFINITION";
  case -48:
    return "CL_INVALID_KERNEL";
  case -49:
    return "CL_INVALID_ARG_INDEX";
  case -50:
    return "CL_INVALID_ARG_VALUE";
  case -51:
    return "CL_INVALID_ARG_SIZE";
  case -52:
    return "CL_INVALID_KERNEL_ARGS";
  case -53:
    return "CL_INVALID_WORK_DIMENSION";
  case -54:
    return "CL_INVALID_WORK_GROUP_SIZE";
  case -55:
    return "CL_INVALID_WORK_ITEM_SIZE";
  case -56:
    return "CL_INVALID_GLOBAL_OFFSET";
  case -57:
    return "CL_INVALID_EVENT_WAIT_LIST";
  case -58:
    return "CL_INVALID_EVENT";
  case -59:
    return "CL_INVALID_OPERATION";
  case -60:
    return "CL_INVALID_GL_OBJECT";
  case -61:
    return "CL_INVALID_BUFFER_SIZE";
  case -62:
    return "CL_INVALID_MIP_LEVEL";
  case -63:
    return "CL_INVALID_GLOBAL_WORK_SIZE";
  case -64:
    return "CL_INVALID_PROPERTY";
  case -65:
    return "CL_INVALID_IMAGE_DESCRIPTOR";
  case -66:
    return "CL_INVALID_COMPILER_OPTIONS";
  case -67:
    return "CL_INVALID_LINKER_OPTIONS";
  case -68:
    return "CL_INVALID_DEVICE_PARTITION_COUNT";

  // extension errors
  case -1000:
    return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
  case -1001:
    return "CL_PLATFORM_NOT_FOUND_KHR";
  case -1002:
    return "CL_INVALID_D3D10_DEVICE_KHR";
  case -1003:
    return "CL_INVALID_D3D10_RESOURCE_KHR";
  case -1004:
    return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
  case -1005:
    return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
  default:
    return "Unknown OpenCL error";
  }
}

Yifan Zhao's avatar
Yifan Zhao committed
204
static inline void checkErr(cl_int err, cl_int success, const char *name) {
205
  if (err != success) {
Yifan Zhao's avatar
Yifan Zhao committed
206
    cout << "ERROR: " << name << flush << "\n";
Yifan Zhao's avatar
Yifan Zhao committed
207
    cout << "ErrorCode: " << getErrorString(err) << flush << "\n";
208
209
210
211
212
213
    exit(EXIT_FAILURE);
  }
}

/************************* Depth Stack Routines ***************************/

214
void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
Yifan Zhao's avatar
Yifan Zhao committed
215
216
217
218
219
220
221
222
223
224
225
                               uint64_t limitY, uint64_t iY, uint64_t limitZ,
                               uint64_t iZ) {
  DEBUG(cout << "Pushing node information on stack:\n");
  DEBUG(cout << "\tNumDim = " << n << "\t Limit(" << limitX << ", " << limitY
             << ", " << limitZ << ")\n");
  DEBUG(cout << "\tInstance(" << iX << ", " << iY << ", " << iZ << ")\n");
  DFGDepth nodeInfo(n, limitX, iX, limitY, iY, limitZ, iZ);
  pthread_mutex_lock(&ocl_mtx);
  DStack.push_back(nodeInfo);
  DEBUG(cout << "DStack size = " << DStack.size() << flush << "\n");
  pthread_mutex_unlock(&ocl_mtx);
226
227
}

228
void llvm_hpvm_cpu_dstack_pop() {
Yifan Zhao's avatar
Yifan Zhao committed
229
230
231
232
233
  DEBUG(cout << "Popping from depth stack\n");
  pthread_mutex_lock(&ocl_mtx);
  DStack.pop_back();
  DEBUG(cout << "DStack size = " << DStack.size() << flush << "\n");
  pthread_mutex_unlock(&ocl_mtx);
234
235
}

236
uint64_t llvm_hpvm_cpu_getDimLimit(unsigned level, unsigned dim) {
Yifan Zhao's avatar
Yifan Zhao committed
237
238
239
240
241
242
243
244
245
  DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level
             << flush << "\n");
  pthread_mutex_lock(&ocl_mtx);
  unsigned size = DStack.size();
  DEBUG(cout << "\t Return: " << DStack[size - level - 1].getDimLimit(dim)
             << flush << "\n");
  uint64_t result = DStack[size - level - 1].getDimLimit(dim);
  pthread_mutex_unlock(&ocl_mtx);
  return result;
246
247
}

248
uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) {
Yifan Zhao's avatar
Yifan Zhao committed
249
250
251
252
253
254
255
256
257
  DEBUG(cout << "Request instance id for dim " << dim << " of ancestor "
             << level << flush << "\n");
  pthread_mutex_lock(&ocl_mtx);
  unsigned size = DStack.size();
  DEBUG(cout << "\t Return: " << DStack[size - level - 1].getDimInstance(dim)
             << flush << "\n");
  uint64_t result = DStack[size - level - 1].getDimInstance(dim);
  pthread_mutex_unlock(&ocl_mtx);
  return result;
258
259
260
261
}

/********************** Memory Tracking Routines **************************/

Yifan Zhao's avatar
Yifan Zhao committed
262
void llvm_hpvm_track_mem(void *ptr, size_t size) {
263
  DEBUG(cout << "Start tracking memory: " << ptr << flush << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
264
265
  MemTrackerEntry *MTE = MTracker.lookup(ptr);
  if (MTE != NULL) {
266
267
268
269
270
271
272
273
    DEBUG(cout << "ID " << ptr << " already present in the MemTracker Table\n");
    return;
  }
  DEBUG(cout << "Inserting ID " << ptr << " in the MemTracker Table\n");
  MTracker.insert(ptr, size, MemTrackerEntry::HOST, ptr);
  DEBUG(MTracker.print());
}

Yifan Zhao's avatar
Yifan Zhao committed
274
void llvm_hpvm_untrack_mem(void *ptr) {
275
  DEBUG(cout << "Stop tracking memory: " << ptr << flush << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
276
277
278
279
  MemTrackerEntry *MTE = MTracker.lookup(ptr);
  if (MTE == NULL) {
    cout << "WARNING: Trying to remove ID " << ptr
         << " not present in the MemTracker Table\n";
280
281
282
    return;
  }
  DEBUG(cout << "Removing ID " << ptr << " from MemTracker Table\n");
Yifan Zhao's avatar
Yifan Zhao committed
283
284
  if (MTE->getLocation() == MemTrackerEntry::DEVICE)
    clReleaseMemObject((cl_mem)MTE->getAddress());
285
286
287
288
  MTracker.remove(ptr);
  DEBUG(MTracker.print());
}

Yifan Zhao's avatar
Yifan Zhao committed
289
static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
Yifan Zhao's avatar
Yifan Zhao committed
290
291
                                       DFNodeContext_OCL *Context, bool isInput,
                                       bool isOutput) {
292
  pthread_mutex_lock(&ocl_mtx);
Yifan Zhao's avatar
Yifan Zhao committed
293
294
295
  DEBUG(cout << "[OCL] Request memory: " << ptr
             << " for context: " << Context->clOCLContext << flush << "\n");
  MemTrackerEntry *MTE = MTracker.lookup(ptr);
296
297
298
299
300
301
302
  if (MTE == NULL) {
    MTracker.print();
    cout << "ERROR: Requesting memory not present in Table\n";
    exit(EXIT_FAILURE);
  }
  // If already on device
  if (MTE->getLocation() == MemTrackerEntry::DEVICE &&
Yifan Zhao's avatar
Yifan Zhao committed
303
304
305
306
      ((DFNodeContext_OCL *)MTE->getContext())->clOCLContext ==
          Context->clOCLContext) {
    DEBUG(cout << "\tMemory found on device at: " << MTE->getAddress() << flush
               << "\n");
307
308
309
    pthread_mutex_unlock(&ocl_mtx);
    return MTE->getAddress();
  }
Yifan Zhao's avatar
Yifan Zhao committed
310
311
312

  DEBUG(cout << "\tMemory found on host at: " << MTE->getAddress() << flush
             << "\n");
313
314
315
316
317
  DEBUG(cout << "\t"; MTE->print(); cout << flush << "\n");
  // Else copy and update the latest copy
  cl_mem_flags clFlags;
  cl_int errcode;

Yifan Zhao's avatar
Yifan Zhao committed
318
319
320
321
322
323
324
325
  if (isInput && isOutput)
    clFlags = CL_MEM_READ_WRITE;
  else if (isInput)
    clFlags = CL_MEM_READ_ONLY;
  else if (isOutput)
    clFlags = CL_MEM_WRITE_ONLY;
  else
    clFlags = CL_MEM_READ_ONLY;
326

Yifan Zhao's avatar
Yifan Zhao committed
327
  hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY);
Yifan Zhao's avatar
Yifan Zhao committed
328
329
  cl_mem d_input =
      clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode);
330
  checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device");
Yifan Zhao's avatar
Yifan Zhao committed
331
332
  DEBUG(cout << "\nMemory allocated on device: " << d_input << flush << "\n");
  if (isInput) {
333
    DEBUG(cout << "\tCopying ...");
Yifan Zhao's avatar
Yifan Zhao committed
334
335
    errcode = clEnqueueWriteBuffer(Context->clCommandQue, d_input, CL_TRUE, 0,
                                   size, MTE->getAddress(), 0, NULL, NULL);
336
337
338
    checkErr(errcode, CL_SUCCESS, "Failure to copy memory to device");
  }

Yifan Zhao's avatar
Yifan Zhao committed
339
  hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE);
340
  DEBUG(cout << " done\n");
Yifan Zhao's avatar
Yifan Zhao committed
341
  MTE->update(MemTrackerEntry::DEVICE, (void *)d_input, Context);
342
343
344
345
346
347
  DEBUG(cout << "Updated Table\n");
  DEBUG(MTracker.print());
  pthread_mutex_unlock(&ocl_mtx);
  return d_input;
}

348
void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) {
Yifan Zhao's avatar
Yifan Zhao committed
349
  return llvm_hpvm_request_mem(ptr, size);
350
351
}

Yifan Zhao's avatar
Yifan Zhao committed
352
void *llvm_hpvm_request_mem(void *ptr, size_t size) {
353
  pthread_mutex_lock(&ocl_mtx);
354
  DEBUG(cout << "[CPU] Request memory: " << ptr << flush << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
355
356
  MemTrackerEntry *MTE = MTracker.lookup(ptr);
  if (MTE == NULL) {
357
358
359
360
361
    cout << "ERROR: Requesting memory not present in Table\n";
    pthread_mutex_unlock(&ocl_mtx);
    exit(EXIT_FAILURE);
  }
  // If already on host
Yifan Zhao's avatar
Yifan Zhao committed
362
363
364
  if (MTE->getLocation() == MemTrackerEntry::HOST) {
    DEBUG(cout << "\tMemory found on host at: " << MTE->getAddress() << flush
               << "\n");
365
366
367
368
369
    pthread_mutex_unlock(&ocl_mtx);
    return MTE->getAddress();
  }

  // Else copy from device and update table
Yifan Zhao's avatar
Yifan Zhao committed
370
371
  DEBUG(cout << "\tMemory found on device at: " << MTE->getAddress() << flush
             << "\n");
372
  DEBUG(cout << "\tCopying ...");
Yifan Zhao's avatar
Yifan Zhao committed
373
  hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_COPY);
Yifan Zhao's avatar
Yifan Zhao committed
374
375
376
377
378
  // pthread_mutex_lock(&ocl_mtx);
  cl_int errcode = clEnqueueReadBuffer(
      ((DFNodeContext_OCL *)MTE->getContext())->clCommandQue,
      (cl_mem)MTE->getAddress(), CL_TRUE, 0, size, ptr, 0, NULL, NULL);
  // pthread_mutex_unlock(&ocl_mtx);
Yifan Zhao's avatar
Yifan Zhao committed
379
  hpvm_SwitchToTimer(&kernel_timer, hpvm_TimerID_NONE);
380
381
382
  DEBUG(cout << " done\n");
  checkErr(errcode, CL_SUCCESS, "[request mem] Failure to read output");
  DEBUG(cout << "Free mem object on device\n");
Yifan Zhao's avatar
Yifan Zhao committed
383
  clReleaseMemObject((cl_mem)MTE->getAddress());
384
385
386
387
388
389
390
391
392
  DEBUG(cout << "Updated Table\n");
  MTE->update(MemTrackerEntry::HOST, ptr);
  DEBUG(MTracker.print());
  pthread_mutex_unlock(&ocl_mtx);
  return ptr;
}

/*************************** Timer Routines **********************************/

Yifan Zhao's avatar
Yifan Zhao committed
393
394
static int is_async(enum hpvm_TimerID timer) {
  return (timer == hpvm_TimerID_KERNEL) || (timer == hpvm_TimerID_COPY_ASYNC);
395
396
}

Yifan Zhao's avatar
Yifan Zhao committed
397
398
static int is_blocking(enum hpvm_TimerID timer) {
  return (timer == hpvm_TimerID_COPY) || (timer == hpvm_TimerID_NONE);
399
400
}

Yifan Zhao's avatar
Yifan Zhao committed
401
#define INVALID_TIMERID hpvm_TimerID_LAST
402

Yifan Zhao's avatar
Yifan Zhao committed
403
static int asyncs_outstanding(struct hpvm_TimerSet *timers) {
404
  return (timers->async_markers != NULL) &&
Yifan Zhao's avatar
Yifan Zhao committed
405
         (timers->async_markers->timerID != INVALID_TIMERID);
406
407
}

Yifan Zhao's avatar
Yifan Zhao committed
408
409
static struct hpvm_async_time_marker_list *
get_last_async(struct hpvm_TimerSet *timers) {
410
  /* Find the last event recorded thus far */
Yifan Zhao's avatar
Yifan Zhao committed
411
  struct hpvm_async_time_marker_list *last_event = timers->async_markers;
Yifan Zhao's avatar
Yifan Zhao committed
412
413
414
  if (last_event != NULL && last_event->timerID != INVALID_TIMERID) {
    while (last_event->next != NULL &&
           last_event->next->timerID != INVALID_TIMERID)
415
416
417
418
419
420
      last_event = last_event->next;
    return last_event;
  } else
    return NULL;
}

Yifan Zhao's avatar
Yifan Zhao committed
421
static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) {
422
  cl_int ciErrNum = CL_SUCCESS;
Yifan Zhao's avatar
Yifan Zhao committed
423
  struct hpvm_async_time_marker_list **new_event = &(tset->async_markers);
424

Yifan Zhao's avatar
Yifan Zhao committed
425
  while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
426
427
428
    new_event = &((*new_event)->next);
  }

Yifan Zhao's avatar
Yifan Zhao committed
429
  if (*new_event == NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
430
431
    *new_event = (struct hpvm_async_time_marker_list *)malloc(
        sizeof(struct hpvm_async_time_marker_list));
432
433
434
435
436
437
438
    (*new_event)->marker = calloc(1, sizeof(cl_event));
    (*new_event)->next = NULL;
  }

  /* valid event handle now aquired: insert the event record */
  (*new_event)->label = NULL;
  (*new_event)->timerID = timer;
Yifan Zhao's avatar
Yifan Zhao committed
439
440
  ciErrNum =
      clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker);
441
  if (ciErrNum != CL_SUCCESS) {
Yifan Zhao's avatar
Yifan Zhao committed
442
    fprintf(stderr, "Error Enqueueing Marker!\n");
443
444
445
  }
}

Yifan Zhao's avatar
Yifan Zhao committed
446
447
static void insert_submarker(struct hpvm_TimerSet *tset, char *label,
                             enum hpvm_TimerID timer) {
448
  cl_int ciErrNum = CL_SUCCESS;
Yifan Zhao's avatar
Yifan Zhao committed
449
  struct hpvm_async_time_marker_list **new_event = &(tset->async_markers);
450

Yifan Zhao's avatar
Yifan Zhao committed
451
  while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
452
453
454
    new_event = &((*new_event)->next);
  }

Yifan Zhao's avatar
Yifan Zhao committed
455
  if (*new_event == NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
456
457
    *new_event = (struct hpvm_async_time_marker_list *)malloc(
        sizeof(struct hpvm_async_time_marker_list));
458
459
460
461
462
463
464
    (*new_event)->marker = calloc(1, sizeof(cl_event));
    (*new_event)->next = NULL;
  }

  /* valid event handle now aquired: insert the event record */
  (*new_event)->label = label;
  (*new_event)->timerID = timer;
Yifan Zhao's avatar
Yifan Zhao committed
465
466
  ciErrNum =
      clEnqueueMarker(globalCommandQue, (cl_event *)(*new_event)->marker);
467
  if (ciErrNum != CL_SUCCESS) {
Yifan Zhao's avatar
Yifan Zhao committed
468
    fprintf(stderr, "Error Enqueueing Marker!\n");
469
470
471
472
  }
}

/* Assumes that all recorded events have completed */
Yifan Zhao's avatar
Yifan Zhao committed
473
474
475
476
static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) {
  struct hpvm_async_time_marker_list *next_interval = NULL;
  struct hpvm_async_time_marker_list *last_marker = get_last_async(tset);
  hpvm_Timestamp total_async_time = 0;
477

Yifan Zhao's avatar
Yifan Zhao committed
478
479
480
  for (next_interval = tset->async_markers; next_interval != last_marker;
       next_interval = next_interval->next) {
    cl_ulong command_start = 0, command_end = 0;
481
482
    cl_int ciErrNum = CL_SUCCESS;

Yifan Zhao's avatar
Yifan Zhao committed
483
484
485
    ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker),
                                       CL_PROFILING_COMMAND_END,
                                       sizeof(cl_ulong), &command_start, NULL);
486
487
488
489
    if (ciErrNum != CL_SUCCESS) {
      fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum);
    }

Yifan Zhao's avatar
Yifan Zhao committed
490
491
492
    ciErrNum = clGetEventProfilingInfo(
        *((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END,
        sizeof(cl_ulong), &command_end, NULL);
493
    if (ciErrNum != CL_SUCCESS) {
Yifan Zhao's avatar
Yifan Zhao committed
494
495
      fprintf(stderr, "Error getting second EventProfilingInfo: %d\n",
              ciErrNum);
496
497
    }

Yifan Zhao's avatar
Yifan Zhao committed
498
499
    hpvm_Timestamp interval =
        (hpvm_Timestamp)(((double)(command_end - command_start)));
500
501
    tset->timers[next_interval->timerID].elapsed += interval;
    if (next_interval->label != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
502
      struct hpvm_SubTimer *subtimer =
Yifan Zhao's avatar
Yifan Zhao committed
503
          tset->sub_timer_list[next_interval->timerID]->subtimer_list;
504
      while (subtimer != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
505
        if (strcmp(subtimer->label, next_interval->label) == 0) {
506
507
508
509
510
511
512
513
514
515
          subtimer->timer.elapsed += interval;
          break;
        }
        subtimer = subtimer->next;
      }
    }
    total_async_time += interval;
    next_interval->timerID = INVALID_TIMERID;
  }

Yifan Zhao's avatar
Yifan Zhao committed
516
  if (next_interval != NULL)
517
518
519
520
521
    next_interval->timerID = INVALID_TIMERID;

  return total_async_time;
}

Yifan Zhao's avatar
Yifan Zhao committed
522
523
static void accumulate_time(hpvm_Timestamp *accum, hpvm_Timestamp start,
                            hpvm_Timestamp end) {
524
525
526
#if _POSIX_VERSION >= 200112L
  *accum += end - start;
#else
Yifan Zhao's avatar
Yifan Zhao committed
527
#error "Timestamps not implemented for this system"
528
529
530
531
#endif
}

#if _POSIX_VERSION >= 200112L
Yifan Zhao's avatar
Yifan Zhao committed
532
static hpvm_Timestamp get_time() {
533
534
  struct timespec tv;
  clock_gettime(CLOCK_MONOTONIC, &tv);
Yifan Zhao's avatar
Yifan Zhao committed
535
  return (hpvm_Timestamp)(tv.tv_sec * BILLION + tv.tv_nsec);
536
537
}
#else
Yifan Zhao's avatar
Yifan Zhao committed
538
#error "no supported time libraries are available on this platform"
539
540
#endif

Yifan Zhao's avatar
Yifan Zhao committed
541
542
void hpvm_ResetTimer(struct hpvm_Timer *timer) {
  timer->state = hpvm_Timer_STOPPED;
543
544
545
546

#if _POSIX_VERSION >= 200112L
  timer->elapsed = 0;
#else
Yifan Zhao's avatar
Yifan Zhao committed
547
#error "hpvm_ResetTimer: not implemented for this system"
548
549
550
#endif
}

Yifan Zhao's avatar
Yifan Zhao committed
551
552
void hpvm_StartTimer(struct hpvm_Timer *timer) {
  if (timer->state != hpvm_Timer_STOPPED) {
553
554
555
556
557
    // FIXME: Removing warning statement to avoid printing this error
    // fputs("Ignoring attempt to start a running timer\n", stderr);
    return;
  }

Yifan Zhao's avatar
Yifan Zhao committed
558
  timer->state = hpvm_Timer_RUNNING;
559
560
561
562
563
564
565
566

#if _POSIX_VERSION >= 200112L
  {
    struct timespec tv;
    clock_gettime(CLOCK_MONOTONIC, &tv);
    timer->init = tv.tv_sec * BILLION + tv.tv_nsec;
  }
#else
Yifan Zhao's avatar
Yifan Zhao committed
567
#error "hpvm_StartTimer: not implemented for this system"
568
569
570
#endif
}

Yifan Zhao's avatar
Yifan Zhao committed
571
572
void hpvm_StartTimerAndSubTimer(struct hpvm_Timer *timer,
                                struct hpvm_Timer *subtimer) {
573
574

  unsigned int numNotStopped = 0x3; // 11
Yifan Zhao's avatar
Yifan Zhao committed
575
  if (timer->state != hpvm_Timer_STOPPED) {
576
577
578
    fputs("Warning: Timer was not stopped\n", stderr);
    numNotStopped &= 0x1; // Zero out 2^1
  }
Yifan Zhao's avatar
Yifan Zhao committed
579
  if (subtimer->state != hpvm_Timer_STOPPED) {
580
581
582
583
584
585
586
    fputs("Warning: Subtimer was not stopped\n", stderr);
    numNotStopped &= 0x2; // Zero out 2^0
  }
  if (numNotStopped == 0x0) {
    return;
  }

Yifan Zhao's avatar
Yifan Zhao committed
587
588
  timer->state = hpvm_Timer_RUNNING;
  subtimer->state = hpvm_Timer_RUNNING;
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603

#if _POSIX_VERSION >= 200112L
  {
    struct timespec tv;
    clock_gettime(CLOCK_MONOTONIC, &tv);

    if (numNotStopped & 0x2) {
      timer->init = tv.tv_sec * BILLION + tv.tv_nsec;
    }

    if (numNotStopped & 0x1) {
      subtimer->init = tv.tv_sec * BILLION + tv.tv_nsec;
    }
  }
#else
Yifan Zhao's avatar
Yifan Zhao committed
604
#error "hpvm_StartTimer: not implemented for this system"
605
606
607
#endif
}

Yifan Zhao's avatar
Yifan Zhao committed
608
609
void hpvm_StopTimer(struct hpvm_Timer *timer) {
  hpvm_Timestamp fini;
610

Yifan Zhao's avatar
Yifan Zhao committed
611
  if (timer->state != hpvm_Timer_RUNNING) {
Yifan Zhao's avatar
Yifan Zhao committed
612
    // fputs("Ignoring attempt to stop a stopped timer\n", stderr);
613
614
615
    return;
  }

Yifan Zhao's avatar
Yifan Zhao committed
616
  timer->state = hpvm_Timer_STOPPED;
617
618
619
620
621
622
623
624

#if _POSIX_VERSION >= 200112L
  {
    struct timespec tv;
    clock_gettime(CLOCK_MONOTONIC, &tv);
    fini = tv.tv_sec * BILLION + tv.tv_nsec;
  }
#else
Yifan Zhao's avatar
Yifan Zhao committed
625
#error "hpvm_StopTimer: not implemented for this system"
626
627
628
629
630
631
#endif

  accumulate_time(&timer->elapsed, timer->init, fini);
  timer->init = fini;
}

Yifan Zhao's avatar
Yifan Zhao committed
632
633
void hpvm_StopTimerAndSubTimer(struct hpvm_Timer *timer,
                               struct hpvm_Timer *subtimer) {
634

Yifan Zhao's avatar
Yifan Zhao committed
635
  hpvm_Timestamp fini;
636
637

  unsigned int numNotRunning = 0x3; // 11
Yifan Zhao's avatar
Yifan Zhao committed
638
  if (timer->state != hpvm_Timer_RUNNING) {
639
640
641
    fputs("Warning: Timer was not running\n", stderr);
    numNotRunning &= 0x1; // Zero out 2^1
  }
Yifan Zhao's avatar
Yifan Zhao committed
642
  if (subtimer->state != hpvm_Timer_RUNNING) {
643
644
645
646
647
648
649
    fputs("Warning: Subtimer was not running\n", stderr);
    numNotRunning &= 0x2; // Zero out 2^0
  }
  if (numNotRunning == 0x0) {
    return;
  }

Yifan Zhao's avatar
Yifan Zhao committed
650
651
  timer->state = hpvm_Timer_STOPPED;
  subtimer->state = hpvm_Timer_STOPPED;
652
653
654
655
656
657
658
659

#if _POSIX_VERSION >= 200112L
  {
    struct timespec tv;
    clock_gettime(CLOCK_MONOTONIC, &tv);
    fini = tv.tv_sec * BILLION + tv.tv_nsec;
  }
#else
Yifan Zhao's avatar
Yifan Zhao committed
660
#error "hpvm_StopTimer: not implemented for this system"
661
662
663
664
665
666
667
668
669
670
671
672
673
674
#endif

  if (numNotRunning & 0x2) {
    accumulate_time(&timer->elapsed, timer->init, fini);
    timer->init = fini;
  }

  if (numNotRunning & 0x1) {
    accumulate_time(&subtimer->elapsed, subtimer->init, fini);
    subtimer->init = fini;
  }
}

/* Get the elapsed time in seconds. */
Yifan Zhao's avatar
Yifan Zhao committed
675
double hpvm_GetElapsedTime(struct hpvm_Timer *timer) {
676
677
  double ret;

Yifan Zhao's avatar
Yifan Zhao committed
678
  if (timer->state != hpvm_Timer_STOPPED) {
679
680
681
682
683
684
    fputs("Elapsed time from a running timer is inaccurate\n", stderr);
  }

#if _POSIX_VERSION >= 200112L
  ret = timer->elapsed / 1e9;
#else
Yifan Zhao's avatar
Yifan Zhao committed
685
#error "hpvm_GetElapsedTime: not implemented for this system"
686
687
688
689
#endif
  return ret;
}

Yifan Zhao's avatar
Yifan Zhao committed
690
void hpvm_InitializeTimerSet(struct hpvm_TimerSet *timers) {
691
692
693
  int n;

  timers->wall_begin = get_time();
Yifan Zhao's avatar
Yifan Zhao committed
694
  timers->current = hpvm_TimerID_NONE;
695
696
697

  timers->async_markers = NULL;

Yifan Zhao's avatar
Yifan Zhao committed
698
699
  for (n = 0; n < hpvm_TimerID_LAST; n++) {
    hpvm_ResetTimer(&timers->timers[n]);
700
701
702
703
    timers->sub_timer_list[n] = NULL;
  }
}

Yifan Zhao's avatar
Yifan Zhao committed
704
705
void hpvm_AddSubTimer(struct hpvm_TimerSet *timers, char *label,
                      enum hpvm_TimerID hpvm_Category) {
706

Yifan Zhao's avatar
Yifan Zhao committed
707
708
  struct hpvm_SubTimer *subtimer =
      (struct hpvm_SubTimer *)malloc(sizeof(struct hpvm_SubTimer));
709
710
711

  int len = strlen(label);

Yifan Zhao's avatar
Yifan Zhao committed
712
  subtimer->label = (char *)malloc(sizeof(char) * (len + 1));
713
714
  sprintf(subtimer->label, "%s", label);

Yifan Zhao's avatar
Yifan Zhao committed
715
  hpvm_ResetTimer(&subtimer->timer);
716
717
  subtimer->next = NULL;

Yifan Zhao's avatar
Yifan Zhao committed
718
719
  struct hpvm_SubTimerList *subtimerlist =
      timers->sub_timer_list[hpvm_Category];
720
  if (subtimerlist == NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
721
    subtimerlist =
Yifan Zhao's avatar
Yifan Zhao committed
722
        (struct hpvm_SubTimerList *)calloc(1, sizeof(struct hpvm_SubTimerList));
723
    subtimerlist->subtimer_list = subtimer;
Yifan Zhao's avatar
Yifan Zhao committed
724
    timers->sub_timer_list[hpvm_Category] = subtimerlist;
725
726
  } else {
    // Append to list
Yifan Zhao's avatar
Yifan Zhao committed
727
    struct hpvm_SubTimer *element = subtimerlist->subtimer_list;
728
729
730
731
732
733
734
    while (element->next != NULL) {
      element = element->next;
    }
    element->next = subtimer;
  }
}

Yifan Zhao's avatar
Yifan Zhao committed
735
void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) {
Yifan Zhao's avatar
Yifan Zhao committed
736
  // cerr << "Switch to timer: " << timer << flush << "\n";
737
  /* Stop the currently running timer */
Yifan Zhao's avatar
Yifan Zhao committed
738
739
  if (timers->current != hpvm_TimerID_NONE) {
    struct hpvm_SubTimerList *subtimerlist =
Yifan Zhao's avatar
Yifan Zhao committed
740
        timers->sub_timer_list[timers->current];
Yifan Zhao's avatar
Yifan Zhao committed
741
    struct hpvm_SubTimer *currSubTimer =
Yifan Zhao's avatar
Yifan Zhao committed
742
        (subtimerlist != NULL) ? subtimerlist->current : NULL;
743

Yifan Zhao's avatar
Yifan Zhao committed
744
    if (!is_async(timers->current)) {
745
746
      if (timers->current != timer) {
        if (currSubTimer != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
747
          hpvm_StopTimerAndSubTimer(&timers->timers[timers->current],
Yifan Zhao's avatar
Yifan Zhao committed
748
                                    &currSubTimer->timer);
749
        } else {
Yifan Zhao's avatar
Yifan Zhao committed
750
          hpvm_StopTimer(&timers->timers[timers->current]);
751
752
753
        }
      } else {
        if (currSubTimer != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
754
          hpvm_StopTimer(&currSubTimer->timer);
755
756
757
758
759
        }
      }
    } else {
      insert_marker(timers, timer);
      if (!is_async(timer)) { // if switching to async too, keep driver going
Yifan Zhao's avatar
Yifan Zhao committed
760
        hpvm_StopTimer(&timers->timers[hpvm_TimerID_DRIVER]);
761
762
763
764
      }
    }
  }

Yifan Zhao's avatar
Yifan Zhao committed
765
  hpvm_Timestamp currentTime = get_time();
766
767
768
769

  /* The only cases we check for asynchronous task completion is
   * when an overlapping CPU operation completes, or the next
   * segment blocks on completion of previous async operations */
Yifan Zhao's avatar
Yifan Zhao committed
770
771
  if (asyncs_outstanding(timers) &&
      (!is_async(timers->current) || is_blocking(timer))) {
772

Yifan Zhao's avatar
Yifan Zhao committed
773
    struct hpvm_async_time_marker_list *last_event = get_last_async(timers);
774
775
776
777
778
    /* CL_COMPLETE if completed */

    cl_int ciErrNum = CL_SUCCESS;
    cl_int async_done = CL_COMPLETE;

Yifan Zhao's avatar
Yifan Zhao committed
779
780
781
    ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker),
                              CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int),
                              &async_done, NULL);
782
783
784
785
    if (ciErrNum != CL_SUCCESS) {
      fprintf(stdout, "Error Querying EventInfo1!\n");
    }

Yifan Zhao's avatar
Yifan Zhao committed
786
    if (is_blocking(timer)) {
787
788
789
790
791
      /* Async operations completed after previous CPU operations:
       * overlapped time is the total CPU time since this set of async
       * operations were first issued */

      // timer to switch to is COPY or NONE
Yifan Zhao's avatar
Yifan Zhao committed
792
      if (async_done != CL_COMPLETE) {
Yifan Zhao's avatar
Yifan Zhao committed
793
        accumulate_time(&(timers->timers[hpvm_TimerID_OVERLAP].elapsed),
Yifan Zhao's avatar
Yifan Zhao committed
794
                        timers->async_begin, currentTime);
795
796
797
798
799
800
801
802
      }

      /* Wait on async operation completion */
      ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
      if (ciErrNum != CL_SUCCESS) {
        fprintf(stderr, "Error Waiting for Events!\n");
      }

Yifan Zhao's avatar
Yifan Zhao committed
803
      hpvm_Timestamp total_async_time = record_async_times(timers);
804
805
806

      /* Async operations completed before previous CPU operations:
       * overlapped time is the total async time */
Yifan Zhao's avatar
Yifan Zhao committed
807
808
809
      if (async_done == CL_COMPLETE) {
        // fprintf(stderr, "Async_done: total_async_type = %lld\n",
        // total_async_time);
Yifan Zhao's avatar
Yifan Zhao committed
810
        timers->timers[hpvm_TimerID_OVERLAP].elapsed += total_async_time;
811
812
813
      }

    } else
Yifan Zhao's avatar
Yifan Zhao committed
814
815
816
817
        /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
        // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are
        // outstanding so something is deeper in stack
        if (async_done == CL_COMPLETE) {
818
819
      /* Async operations completed before previous CPU operations:
       * overlapped time is the total async time */
Yifan Zhao's avatar
Yifan Zhao committed
820
      timers->timers[hpvm_TimerID_OVERLAP].elapsed +=
Yifan Zhao's avatar
Yifan Zhao committed
821
          record_async_times(timers);
822
823
824
825
    }
  }

  /* Start the new timer */
Yifan Zhao's avatar
Yifan Zhao committed
826
  if (timer != hpvm_TimerID_NONE) {
Yifan Zhao's avatar
Yifan Zhao committed
827
    if (!is_async(timer)) {
Yifan Zhao's avatar
Yifan Zhao committed
828
      hpvm_StartTimer(&timers->timers[timer]);
829
830
831
832
833
834
835
    } else {
      // toSwitchTo Is Async (KERNEL/COPY_ASYNC)
      if (!asyncs_outstanding(timers)) {
        /* No asyncs outstanding, insert a fresh async marker */

        insert_marker(timers, timer);
        timers->async_begin = currentTime;
Yifan Zhao's avatar
Yifan Zhao committed
836
      } else if (!is_async(timers->current)) {
837
838
839
840
841
        /* Previous asyncs still in flight, but a previous SwitchTo
         * already marked the end of the most recent async operation,
         * so we can rename that marker as the beginning of this async
         * operation */

Yifan Zhao's avatar
Yifan Zhao committed
842
        struct hpvm_async_time_marker_list *last_event = get_last_async(timers);
843
844
845
846
        last_event->label = NULL;
        last_event->timerID = timer;
      }
      if (!is_async(timers->current)) {
Yifan Zhao's avatar
Yifan Zhao committed
847
        hpvm_StartTimer(&timers->timers[hpvm_TimerID_DRIVER]);
848
849
850
851
852
853
      }
    }
  }
  timers->current = timer;
}

Yifan Zhao's avatar
Yifan Zhao committed
854
855
856
void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label,
                           enum hpvm_TimerID category) {
  struct hpvm_SubTimerList *subtimerlist =
Yifan Zhao's avatar
Yifan Zhao committed
857
      timers->sub_timer_list[timers->current];
Yifan Zhao's avatar
Yifan Zhao committed
858
  struct hpvm_SubTimer *curr =
Yifan Zhao's avatar
Yifan Zhao committed
859
      (subtimerlist != NULL) ? subtimerlist->current : NULL;
860

Yifan Zhao's avatar
Yifan Zhao committed
861
  if (timers->current != hpvm_TimerID_NONE) {
Yifan Zhao's avatar
Yifan Zhao committed
862
    if (!is_async(timers->current)) {
863
864
      if (timers->current != category) {
        if (curr != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
865
          hpvm_StopTimerAndSubTimer(&timers->timers[timers->current],
Yifan Zhao's avatar
Yifan Zhao committed
866
                                    &curr->timer);
867
        } else {
Yifan Zhao's avatar
Yifan Zhao committed
868
          hpvm_StopTimer(&timers->timers[timers->current]);
869
870
871
        }
      } else {
        if (curr != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
872
          hpvm_StopTimer(&curr->timer);
873
874
875
876
877
        }
      }
    } else {
      insert_submarker(timers, label, category);
      if (!is_async(category)) { // if switching to async too, keep driver going
Yifan Zhao's avatar
Yifan Zhao committed
878
        hpvm_StopTimer(&timers->timers[hpvm_TimerID_DRIVER]);
879
880
881
882
      }
    }
  }

Yifan Zhao's avatar
Yifan Zhao committed
883
  hpvm_Timestamp currentTime = get_time();
884
885
886
887

  /* The only cases we check for asynchronous task completion is
   * when an overlapping CPU operation completes, or the next
   * segment blocks on completion of previous async operations */
Yifan Zhao's avatar
Yifan Zhao committed
888
889
  if (asyncs_outstanding(timers) &&
      (!is_async(timers->current) || is_blocking(category))) {
890

Yifan Zhao's avatar
Yifan Zhao committed
891
    struct hpvm_async_time_marker_list *last_event = get_last_async(timers);
892
893
894
895
896
    /* CL_COMPLETE if completed */

    cl_int ciErrNum = CL_SUCCESS;
    cl_int async_done = CL_COMPLETE;

Yifan Zhao's avatar
Yifan Zhao committed
897
898
899
    ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker),
                              CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int),
                              &async_done, NULL);
900
901
902
903
    if (ciErrNum != CL_SUCCESS) {
      fprintf(stdout, "Error Querying EventInfo2!\n");
    }

Yifan Zhao's avatar
Yifan Zhao committed
904
    if (is_blocking(category)) {
905
906
907
908
909
      /* Async operations completed after previous CPU operations:
       * overlapped time is the total CPU time since this set of async
       * operations were first issued */

      // timer to switch to is COPY or NONE
Yifan Zhao's avatar
Yifan Zhao committed
910
911
912
913
914
      // if it hasn't already finished, then just take now and use that as the
      // elapsed time in OVERLAP anything happening after now isn't OVERLAP
      // because everything is being stopped to wait for synchronization it
      // seems that the extra sync wall time isn't being recorded anywhere
      if (async_done != CL_COMPLETE)
Yifan Zhao's avatar
Yifan Zhao committed
915
        accumulate_time(&(timers->timers[hpvm_TimerID_OVERLAP].elapsed),
Yifan Zhao's avatar
Yifan Zhao committed
916
                        timers->async_begin, currentTime);
917
918
919
920
921
922

      /* Wait on async operation completion */
      ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
      if (ciErrNum != CL_SUCCESS) {
        fprintf(stderr, "Error Waiting for Events!\n");
      }
Yifan Zhao's avatar
Yifan Zhao committed
923
      hpvm_Timestamp total_async_time = record_async_times(timers);
924
925
926

      /* Async operations completed before previous CPU operations:
       * overlapped time is the total async time */
Yifan Zhao's avatar
Yifan Zhao committed
927
928
929
930
      // If it did finish, then accumulate all the async time that did happen
      // into OVERLAP the immediately preceding EventSynchronize theoretically
      // didn't have any effect since it was already completed.
      if (async_done == CL_COMPLETE /*cudaSuccess*/)
Yifan Zhao's avatar
Yifan Zhao committed
931
        timers->timers[hpvm_TimerID_OVERLAP].elapsed += total_async_time;
932
933

    } else
Yifan Zhao's avatar
Yifan Zhao committed
934
935
936
937
        /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
        // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are
        // outstanding so something is deeper in stack
        if (async_done == CL_COMPLETE /*cudaSuccess*/) {
938
939
      /* Async operations completed before previous CPU operations:
       * overlapped time is the total async time */
Yifan Zhao's avatar
Yifan Zhao committed
940
      timers->timers[hpvm_TimerID_OVERLAP].elapsed +=
Yifan Zhao's avatar
Yifan Zhao committed
941
          record_async_times(timers);
942
943
944
945
946
    }
    // else, this isn't blocking, so just check the next time around
  }

  subtimerlist = timers->sub_timer_list[category];
Yifan Zhao's avatar
Yifan Zhao committed
947
  struct hpvm_SubTimer *subtimer = NULL;
948
949
950
951
952
953
954
955
956
957
958
959
960

  if (label != NULL) {
    subtimer = subtimerlist->subtimer_list;
    while (subtimer != NULL) {
      if (strcmp(subtimer->label, label) == 0) {
        break;
      } else {
        subtimer = subtimer->next;
      }
    }
  }

  /* Start the new timer */
Yifan Zhao's avatar
Yifan Zhao committed
961
  if (category != hpvm_TimerID_NONE) {
Yifan Zhao's avatar
Yifan Zhao committed
962
    if (!is_async(category)) {
963
964
965
966
967
      if (subtimerlist != NULL) {
        subtimerlist->current = subtimer;
      }

      if (category != timers->current && subtimer != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
968
        hpvm_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
969
      } else if (subtimer != NULL) {
Yifan Zhao's avatar
Yifan Zhao committed
970
        hpvm_StartTimer(&subtimer->timer);
971
      } else {
Yifan Zhao's avatar
Yifan Zhao committed
972
        hpvm_StartTimer(&timers->timers[category]);
973
974
975
976
977
978
979
980
981
982
983
      }
    } else {
      if (subtimerlist != NULL) {
        subtimerlist->current = subtimer;
      }

      // toSwitchTo Is Async (KERNEL/COPY_ASYNC)
      if (!asyncs_outstanding(timers)) {
        /* No asyncs outstanding, insert a fresh async marker */
        insert_submarker(timers, label, category);
        timers->async_begin = currentTime;
Yifan Zhao's avatar
Yifan Zhao committed
984
      } else if (!is_async(timers->current)) {
985
986
987
988
989
        /* Previous asyncs still in flight, but a previous SwitchTo
         * already marked the end of the most recent async operation,
         * so we can rename that marker as the beginning of this async
         * operation */

Yifan Zhao's avatar
Yifan Zhao committed
990
        struct hpvm_async_time_marker_list *last_event = get_last_async(timers);
991
992
993
994
        last_event->timerID = category;
        last_event->label = label;
      } // else, marker for switchToThis was already inserted

Yifan Zhao's avatar
Yifan Zhao committed
995
996
      // toSwitchto is already asynchronous, but if current/prev state is async
      // too, then DRIVER is already running
997
      if (!is_async(timers->current)) {
Yifan Zhao's avatar
Yifan Zhao committed
998
        hpvm_StartTimer(&timers->timers[hpvm_TimerID_DRIVER]);
999
1000
      }
    }
For faster browsing, not all history is shown. View entire blame