DFG2LLVM_CPU.cpp 68.7 KB
Newer Older
1
//===-------------------------- DFG2LLVM_CPU.cpp --------------------------===//
2
3
4
5
6
7
8
9
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

10
#define DEBUG_TYPE "DFG2LLVM_CPU"
Yifan Zhao's avatar
Yifan Zhao committed
11
12
13
#include "SupportHPVM/DFG2LLVM.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
14
#include "llvm/IR/InstIterator.h"
Yifan Zhao's avatar
Yifan Zhao committed
15
#include "llvm/IR/Module.h"
16
17
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
Yifan Zhao's avatar
Yifan Zhao committed
18
#include "llvm/Pass.h"
19
#include "llvm/Support/SourceMgr.h"
Yifan Zhao's avatar
Yifan Zhao committed
20
21
22
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
23

24
25
26
27
28
29
30
31
#ifndef LLVM_BUILD_DIR
#error LLVM_BUILD_DIR is not defined
#endif

#define STR_VALUE(X) #X
#define STRINGIFY(X) STR_VALUE(X)
#define LLVM_BUILD_DIR_STR STRINGIFY(LLVM_BUILD_DIR)

32
33
using namespace llvm;
using namespace builddfg;
34
using namespace dfg2llvm;
35

Yifan Zhao's avatar
Yifan Zhao committed
36
// HPVM Command line option to use timer or not
37
static cl::opt<bool> HPVMTimer_CPU("hpvm-timers-cpu",
Yifan Zhao's avatar
Yifan Zhao committed
38
                                   cl::desc("Enable hpvm timers"));
39

40
41
namespace {

42
43
// DFG2LLVM_CPU - The first implementation.
struct DFG2LLVM_CPU : public DFG2LLVM {
44
  static char ID; // Pass identification, replacement for typeid
45
  DFG2LLVM_CPU() : DFG2LLVM(ID) {}
46

47
48
private:
  // Member variables
49

50
  // Functions
51

52
53
54
55
56
public:
  bool runOnModule(Module &M);
};

// Visitor for Code generation traversal (tree traversal for now)
57
class CGT_CPU : public CodeGenTraversal {
58
59

private:
Yifan Zhao's avatar
Yifan Zhao committed
60
  // Member variables
61

62
  FunctionCallee malloc;
Yifan Zhao's avatar
Yifan Zhao committed
63
  // HPVM Runtime API
64
65
66
  FunctionCallee llvm_hpvm_cpu_launch;
  FunctionCallee llvm_hpvm_cpu_wait;
  FunctionCallee llvm_hpvm_cpu_argument_ptr;
Yifan Zhao's avatar
Yifan Zhao committed
67
68
69
70
71
72
73
74
75
76
77
78

  FunctionCallee llvm_hpvm_streamLaunch;
  FunctionCallee llvm_hpvm_streamPush;
  FunctionCallee llvm_hpvm_streamPop;
  FunctionCallee llvm_hpvm_streamWait;
  FunctionCallee llvm_hpvm_createBindInBuffer;
  FunctionCallee llvm_hpvm_createBindOutBuffer;
  FunctionCallee llvm_hpvm_createEdgeBuffer;
  FunctionCallee llvm_hpvm_createLastInputBuffer;
  FunctionCallee llvm_hpvm_createThread;
  FunctionCallee llvm_hpvm_bufferPush;
  FunctionCallee llvm_hpvm_bufferPop;
79
80
81
82
  FunctionCallee llvm_hpvm_cpu_dstack_push;
  FunctionCallee llvm_hpvm_cpu_dstack_pop;
  FunctionCallee llvm_hpvm_cpu_getDimLimit;
  FunctionCallee llvm_hpvm_cpu_getDimInstance;
Yifan Zhao's avatar
Yifan Zhao committed
83
84
85
86
87

  // Functions
  std::vector<IntrinsicInst *> *getUseList(Value *LI);
  Value *addLoop(Instruction *I, Value *limit, const Twine &indexName = "");
  void addWhileLoop(Instruction *, Instruction *, Instruction *, Value *);
kotsifa2's avatar
kotsifa2 committed
88
  Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *);
Yifan Zhao's avatar
Yifan Zhao committed
89
  Argument *getArgumentFromEnd(Function *F, unsigned offset);
90
  Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU,
Yifan Zhao's avatar
Yifan Zhao committed
91
                      Instruction *InsertBefore);
92
  void invokeChild_CPU(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap,
Yifan Zhao's avatar
Yifan Zhao committed
93
                       Instruction *InsertBefore);
94
  void invokeChild_PTX(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap,
Yifan Zhao's avatar
Yifan Zhao committed
95
96
97
98
99
100
101
102
                       Instruction *InsertBefore);
  StructType *getArgumentListStructTy(DFNode *);
  Function *createFunctionFilter(DFNode *C);
  void startNodeThread(DFNode *, std::vector<Value *>,
                       DenseMap<DFEdge *, Value *>, Value *, Value *,
                       Instruction *);
  Function *createLaunchFunction(DFInternalNode *);

103
104
  // Virtual Functions
  void init() {
105
106
    HPVMTimer = HPVMTimer_CPU;
    TargetName = "CPU";
107
108
  }
  void initRuntimeAPI();
Yifan Zhao's avatar
Yifan Zhao committed
109
110
111
112
  void codeGen(DFInternalNode *N);
  void codeGen(DFLeafNode *N);
  Function *codeGenStreamPush(DFInternalNode *N);
  Function *codeGenStreamPop(DFInternalNode *N);
113

114
115
public:
  // Constructor
116
  CGT_CPU(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) {
117
    init();
118
119
    initRuntimeAPI();
  }
120

Yifan Zhao's avatar
Yifan Zhao committed
121
122
  void codeGenLaunch(DFInternalNode *Root);
  void codeGenLaunchStreaming(DFInternalNode *Root);
123
};
124

125
126
bool DFG2LLVM_CPU::runOnModule(Module &M) {
  DEBUG(errs() << "\nDFG2LLVM_CPU PASS\n");
127

128
129
130
131
  // Get the BuildDFG Analysis Results:
  // - Dataflow graph
  // - Maps from i8* hansles to DFNode and DFEdge
  BuildDFG &DFG = getAnalysis<BuildDFG>();
132

Yifan Zhao's avatar
Yifan Zhao committed
133
134
  // DFInternalNode *Root = DFG.getRoot();
  std::vector<DFInternalNode *> Roots = DFG.getRoots();
135
136
  // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
  // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
137

138
  // Visitor for Code Generation Graph Traversal
139
  CGT_CPU *CGTVisitor = new CGT_CPU(M, DFG);
140

141
  // Iterate over all the DFGs and produce code for each one of them
Yifan Zhao's avatar
Yifan Zhao committed
142
  for (auto &rootNode : Roots) {
143
144
    // Initiate code generation for root DFNode
    CGTVisitor->visit(rootNode);
Yifan Zhao's avatar
Yifan Zhao committed
145
146
    // Go ahead and replace the launch intrinsic with pthread call, otherwise
    // return now.
147
    // TODO: Later on, we might like to do this in a separate pass, which would
Yifan Zhao's avatar
Yifan Zhao committed
148
149
150
    // allow us the flexibility to switch between complete static code
    // generation for DFG or having a customized runtime+scheduler

151
    // Do streaming code generation if root node is streaming. Usual otherwise
Yifan Zhao's avatar
Yifan Zhao committed
152
    if (rootNode->isChildGraphStreaming())
153
154
155
      CGTVisitor->codeGenLaunchStreaming(rootNode);
    else
      CGTVisitor->codeGenLaunch(rootNode);
156
  }
157

158
  delete CGTVisitor;
159
160
  return true;
}
161

Yifan Zhao's avatar
Yifan Zhao committed
162
// Initialize the HPVM runtime API. This makes it easier to insert these calls
163
void CGT_CPU::initRuntimeAPI() {
164
165
166

  // Load Runtime API Module
  SMDiagnostic Err;
167

168
169
  std::string runtimeAPI = std::string(LLVM_BUILD_DIR_STR) +
                           "/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc";
170

171
  runtimeModule = parseIRFile(runtimeAPI, Err, M.getContext());
Yifan Zhao's avatar
Yifan Zhao committed
172
  if (runtimeModule == nullptr) {
173
174
    DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n");
    assert(false && "couldn't parse runtime");
Yifan Zhao's avatar
Yifan Zhao committed
175
176
  } else
    DEBUG(errs() << "Successfully loaded hpvm-rt API module\n");
177
178

  // Get or insert the global declarations for launch/wait functions
179
  DECLARE(llvm_hpvm_cpu_launch);
180
  DECLARE(malloc);
181
182
  DECLARE(llvm_hpvm_cpu_wait);
  DECLARE(llvm_hpvm_cpu_argument_ptr);
Yifan Zhao's avatar
Yifan Zhao committed
183
184
185
186
187
188
189
190
191
192
193
  DECLARE(llvm_hpvm_streamLaunch);
  DECLARE(llvm_hpvm_streamPush);
  DECLARE(llvm_hpvm_streamPop);
  DECLARE(llvm_hpvm_streamWait);
  DECLARE(llvm_hpvm_createBindInBuffer);
  DECLARE(llvm_hpvm_createBindOutBuffer);
  DECLARE(llvm_hpvm_createEdgeBuffer);
  DECLARE(llvm_hpvm_createLastInputBuffer);
  DECLARE(llvm_hpvm_createThread);
  DECLARE(llvm_hpvm_bufferPush);
  DECLARE(llvm_hpvm_bufferPop);
194
195
196
197
  DECLARE(llvm_hpvm_cpu_dstack_push);
  DECLARE(llvm_hpvm_cpu_dstack_pop);
  DECLARE(llvm_hpvm_cpu_getDimLimit);
  DECLARE(llvm_hpvm_cpu_getDimInstance);
198
199
200

  // Get or insert timerAPI functions as well if you plan to use timers
  initTimerAPI();
201

202
  // Insert init context in main
Yifan Zhao's avatar
Yifan Zhao committed
203
204
  Function *VI = M.getFunction("llvm.hpvm.init");
  assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once");
205
  DEBUG(errs() << "Inserting cpu timer initialization\n");
Yifan Zhao's avatar
Yifan Zhao committed
206
  Instruction *I = cast<Instruction>(*VI->user_begin());
207
  initializeTimerSet(I);
Yifan Zhao's avatar
Yifan Zhao committed
208
209
210
211
  switchToTimer(hpvm_TimerID_NONE, I);
  // Insert print instruction at hpvm exit
  Function *VC = M.getFunction("llvm.hpvm.cleanup");
  assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once");
212

213
  DEBUG(errs() << "Inserting cpu timer print\n");
214
  printTimerSet(I);
215
216
}

217
218
/* Returns vector of all wait instructions
 */
219
std::vector<IntrinsicInst *> *CGT_CPU::getUseList(Value *GraphID) {
Yifan Zhao's avatar
Yifan Zhao committed
220
  std::vector<IntrinsicInst *> *UseList = new std::vector<IntrinsicInst *>();
221
  // It must have been loaded from memory somewhere
Yifan Zhao's avatar
Yifan Zhao committed
222
223
224
225
  for (Value::user_iterator ui = GraphID->user_begin(),
                            ue = GraphID->user_end();
       ui != ue; ++ui) {
    if (IntrinsicInst *waitI = dyn_cast<IntrinsicInst>(*ui)) {
226
      UseList->push_back(waitI);
227
    } else {
228
      llvm_unreachable("Error: Operation on Graph ID not supported!\n");
229
230
    }
  }
231
  return UseList;
232
233
}

234
235
236
/* Traverse the function argument list in reverse order to get argument at a
 * distance offset fromt he end of argument list of function F
 */
237
Argument *CGT_CPU::getArgumentFromEnd(Function *F, unsigned offset) {
Yifan Zhao's avatar
Yifan Zhao committed
238
239
  assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) &&
         "Invalid offset to access arguments!");
240
241
242
  Function::arg_iterator e = F->arg_end();
  // Last element of argument iterator is dummy. Skip it.
  e--;
Yifan Zhao's avatar
Yifan Zhao committed
243
244
  Argument *arg;
  for (; offset != 0; e--) {
245
    offset--;
kotsifa2's avatar
kotsifa2 committed
246
    arg = &*e;
247
248
249
250
  }
  return arg;
}

251
252
253
254
255
256
257
258
259
260
261
/* Add Loop around the instruction I
 * Algorithm:
 * (1) Split the basic block of instruction I into three parts, where the
 * middleblock/body would contain instruction I.
 * (2) Add phi node before instruction I. Add incoming edge to phi node from
 * predecessor
 * (3) Add increment and compare instruction to index variable
 * (4) Replace terminator/branch instruction of body with conditional branch
 * which loops over bidy if true and goes to end if false
 * (5) Update phi node of body
 */
262
void CGT_CPU::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart,
Yifan Zhao's avatar
Yifan Zhao committed
263
264
265
266
267
                           Instruction *BodyEnd, Value *TerminationCond) {
  BasicBlock *Entry = CondBlockStart->getParent();
  BasicBlock *CondBlock = Entry->splitBasicBlock(CondBlockStart, "condition");
  BasicBlock *WhileBody = CondBlock->splitBasicBlock(BodyStart, "while.body");
  BasicBlock *WhileEnd = WhileBody->splitBasicBlock(BodyEnd, "while.end");
268
269
270

  // Replace the terminator instruction of conditional with new conditional
  // branch which goes to while.body if true and branches to while.end otherwise
Yifan Zhao's avatar
Yifan Zhao committed
271
  BranchInst *BI = BranchInst::Create(WhileEnd, WhileBody, TerminationCond);
272
273
274
  ReplaceInstWithInst(CondBlock->getTerminator(), BI);

  // While Body should jump to condition block
Yifan Zhao's avatar
Yifan Zhao committed
275
  BranchInst *UnconditionalBranch = BranchInst::Create(CondBlock);
276
277
278
  ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch);
}

279
Instruction *CGT_CPU::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond,
kotsifa2's avatar
kotsifa2 committed
280
                                          BasicBlock *Body) {
281
282
283
284
285
286
287
288
  Module *M = Entry->getParent()->getParent();
  Type *Int64Ty = Type::getInt64Ty(M->getContext());

  // Insert a PHI instruction at the beginning of the condition block
  Instruction *IB = Cond->getFirstNonPHI();
  PHINode *CounterPhi = PHINode::Create(Int64Ty, 2, "cnt", IB);

  ConstantInt *IConst =
Yifan Zhao's avatar
Yifan Zhao committed
289
      ConstantInt::get(Type::getInt64Ty(M->getContext()), 1, true);
290
  Instruction *CounterIncr =
Yifan Zhao's avatar
Yifan Zhao committed
291
292
      BinaryOperator::CreateNSW(Instruction::BinaryOps::Add, CounterPhi, IConst,
                                "cnt_incr", Body->getTerminator());
293
294
295
296
297
298
299

  // Set incoming values for Phi node
  IConst = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0, true);
  CounterPhi->addIncoming(IConst, Entry);
  CounterPhi->addIncoming(CounterIncr, Body);

  // Return the pointer to the created PHI node in the corresponding argument
kotsifa2's avatar
kotsifa2 committed
300
  return CounterPhi;
301
302
}

303
304
305
306
307
308
309
310
311
312
313
/* Add Loop around the instruction I
 * Algorithm:
 * (1) Split the basic block of instruction I into three parts, where the
 * middleblock/body would contain instruction I.
 * (2) Add phi node before instruction I. Add incoming edge to phi node from
 * predecessor
 * (3) Add increment and compare instruction to index variable
 * (4) Replace terminator/branch instruction of body with conditional branch
 * which loops over bidy if true and goes to end if false
 * (5) Update phi node of body
 */
314
Value *CGT_CPU::addLoop(Instruction *I, Value *limit, const Twine &indexName) {
Yifan Zhao's avatar
Yifan Zhao committed
315
316
  BasicBlock *Entry = I->getParent();
  BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body");
317

kotsifa2's avatar
kotsifa2 committed
318
319
  BasicBlock::iterator i(I);
  ++i;
Yifan Zhao's avatar
Yifan Zhao committed
320
  Instruction *NextI = &*i;
321
322
  // Next Instruction should also belong to the same basic block as the basic
  // block will have a terminator instruction
Yifan Zhao's avatar
Yifan Zhao committed
323
324
325
  assert(NextI->getParent() == ForBody &&
         "Next Instruction should also belong to the same basic block!");
  BasicBlock *ForEnd = ForBody->splitBasicBlock(NextI, "for.end");
326
327

  // Add Phi Node for index variable
Yifan Zhao's avatar
Yifan Zhao committed
328
329
  PHINode *IndexPhi = PHINode::Create(Type::getInt64Ty(I->getContext()), 2,
                                      "index." + indexName, I);
330
331

  // Add incoming edge to phi
332
  IndexPhi->addIncoming(ConstantInt::get(Type::getInt64Ty(I->getContext()), 0),
333
334
                        Entry);
  // Increment index variable
Yifan Zhao's avatar
Yifan Zhao committed
335
336
337
338
  BinaryOperator *IndexInc = BinaryOperator::Create(
      Instruction::Add, IndexPhi,
      ConstantInt::get(Type::getInt64Ty(I->getContext()), 1),
      "index." + indexName + ".inc", ForBody->getTerminator());
339
340

  // Compare index variable with limit
Yifan Zhao's avatar
Yifan Zhao committed
341
342
343
  CmpInst *Cond =
      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc, limit,
                      "cond." + indexName, ForBody->getTerminator());
344
345
346

  // Replace the terminator instruction of for.body with new conditional
  // branch which loops over body if true and branches to for.end otherwise
Yifan Zhao's avatar
Yifan Zhao committed
347
  BranchInst *BI = BranchInst::Create(ForBody, ForEnd, Cond);
348
349
350
351
352
353
354
  ReplaceInstWithInst(ForBody->getTerminator(), BI);

  // Add incoming edge to phi node in body
  IndexPhi->addIncoming(IndexInc, ForBody);
  return IndexPhi;
}

355
356
357
358
// Returns a packed struct type. The structtype is created by packing the input
// types, output types and isLastInput buffer type. All the streaming
// inputs/outputs are converted to i8*, since this is the type of buffer
// handles.
359
StructType *CGT_CPU::getArgumentListStructTy(DFNode *C) {
Yifan Zhao's avatar
Yifan Zhao committed
360
  std::vector<Type *> TyList;
361
  // Input types
Yifan Zhao's avatar
Yifan Zhao committed
362
363
364
365
  Function *CF = C->getFuncPointer();
  for (Function::arg_iterator ai = CF->arg_begin(), ae = CF->arg_end();
       ai != ae; ++ai) {
    if (C->getInDFEdgeAt(ai->getArgNo())->isStreamingEdge())
366
      TyList.push_back(Type::getInt8PtrTy(CF->getContext()));
Yifan Zhao's avatar
Yifan Zhao committed
367
    else
368
369
370
      TyList.push_back(ai->getType());
  }
  // Output Types
Yifan Zhao's avatar
Yifan Zhao committed
371
  StructType *OutStructTy = cast<StructType>(CF->getReturnType());
372
373
  for (unsigned i = 0; i < OutStructTy->getNumElements(); i++) {
    // All outputs of a node are streaming edge
Yifan Zhao's avatar
Yifan Zhao committed
374
375
    assert(C->getOutDFEdgeAt(i)->isStreamingEdge() &&
           "All output edges of child node have to be streaming");
376
377
378
379
380
    TyList.push_back(Type::getInt8PtrTy(CF->getContext()));
  }
  // isLastInput buffer element
  TyList.push_back(Type::getInt8PtrTy(CF->getContext()));

Yifan Zhao's avatar
Yifan Zhao committed
381
382
383
  StructType *STy =
      StructType::create(CF->getContext(), TyList,
                         Twine("struct.thread." + CF->getName()).str(), true);
384
385
386
  return STy;
}

387
void CGT_CPU::startNodeThread(DFNode *C, std::vector<Value *> Args,
Yifan Zhao's avatar
Yifan Zhao committed
388
389
390
391
392
                              DenseMap<DFEdge *, Value *> EdgeBufferMap,
                              Value *isLastInputBuffer, Value *graphID,
                              Instruction *IB) {
  DEBUG(errs() << "Starting Pipeline for child node: "
               << C->getFuncPointer()->getName() << "\n");
393
  // Create a filter/pipeline function for the child node
Yifan Zhao's avatar
Yifan Zhao committed
394
395
  Function *C_Pipeline = createFunctionFilter(C);
  Function *CF = C->getFuncPointer();
396
397
398

  // Get module context and i32 0 constant, as they would be frequently used in
  // this function.
Yifan Zhao's avatar
Yifan Zhao committed
399
400
  LLVMContext &Ctx = IB->getParent()->getContext();
  Constant *IntZero = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
401
402
403
404
405

  // Marshall arguments
  // Create a packed struct type with inputs of C followed by outputs and then
  // another i8* to indicate isLastInput buffer. Streaming inputs are replaced
  // by i8*
406
  //
Yifan Zhao's avatar
Yifan Zhao committed
407
  StructType *STy = getArgumentListStructTy(C);
408
  // Allocate the struct on heap *NOT* stack and bitcast i8* to STy*
Yifan Zhao's avatar
Yifan Zhao committed
409
410
411
412
413
414
415
  CallInst *CI =
      CallInst::Create(malloc, ArrayRef<Value *>(ConstantExpr::getSizeOf(STy)),
                       C->getFuncPointer()->getName() + ".inputs", IB);
  CastInst *Struct = BitCastInst::CreatePointerCast(
      CI, STy->getPointerTo(), CI->getName() + ".i8ptr", IB);
  // AllocaInst* AI = new AllocaInst(STy,
  // C->getFuncPointer()->getName()+".inputs", IB);
416
  // Insert elements in the struct
Yifan Zhao's avatar
Yifan Zhao committed
417
418
  DEBUG(errs() << "Marshall inputs for child node: "
               << C->getFuncPointer()->getName() << "\n");
419
  // Marshall Inputs
Yifan Zhao's avatar
Yifan Zhao committed
420
  for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) {
421
    // Create constant int (i)
Yifan Zhao's avatar
Yifan Zhao committed
422
    Constant *Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i);
423
    // Get Element pointer instruction
Yifan Zhao's avatar
Yifan Zhao committed
424
425
426
427
428
    Value *GEPIndices[] = {IntZero, Int_i};
    GetElementPtrInst *GEP = GetElementPtrInst::Create(
        nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2),
        Struct->getName() + ".arg_" + Twine(i), IB);
    DFEdge *E = C->getInDFEdgeAt(i);
429
430
    if (E->getSourceDF()->isEntryNode()) {
      // This is a Bind Input Edge
Yifan Zhao's avatar
Yifan Zhao committed
431
      if (E->isStreamingEdge()) {
432
        // Streaming Bind Input edge. Get buffer corresponding to it
Yifan Zhao's avatar
Yifan Zhao committed
433
434
        assert(EdgeBufferMap.count(E) &&
               "No mapping buffer for a Streaming Bind DFEdge!");
435
        new StoreInst(EdgeBufferMap[E], GEP, IB);
Yifan Zhao's avatar
Yifan Zhao committed
436
      } else {
437
438
439
        // Non-streaming Bind edge
        new StoreInst(Args[i], GEP, IB);
      }
Yifan Zhao's avatar
Yifan Zhao committed
440
441
    } else {
      // This is an edge between siblings.
442
443
      // This must be an streaming edge. As it is our assumption that all edges
      // between two nodes in a DFG are streaming.
Yifan Zhao's avatar
Yifan Zhao committed
444
445
      assert(EdgeBufferMap.count(E) &&
             "No mapping buffer for a Streaming DFEdge!");
446
447
448
449
      new StoreInst(EdgeBufferMap[E], GEP, IB);
    }
  }
  unsigned numInputs = CF->getFunctionType()->getNumParams();
450
  unsigned numOutputs = cast<StructType>(CF->getReturnType())->getNumElements();
451
  // Marshall Outputs
Yifan Zhao's avatar
Yifan Zhao committed
452
453
454
  DEBUG(errs() << "Marshall outputs for child node: "
               << C->getFuncPointer()->getName() << "\n");
  for (unsigned i = 0; i < numOutputs; i++) {
455
    // Create constant int (i+numInputs)
Yifan Zhao's avatar
Yifan Zhao committed
456
    Constant *Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i + numInputs);
457
    // Get Element pointer instruction
Yifan Zhao's avatar
Yifan Zhao committed
458
459
460
461
462
463
464
465
466
    Value *GEPIndices[] = {IntZero, Int_i};
    GetElementPtrInst *GEP = GetElementPtrInst::Create(
        nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2),
        Struct->getName() + ".out_" + Twine(i), IB);
    DFEdge *E = C->getOutDFEdgeAt(i);
    assert(E->isStreamingEdge() &&
           "Output Edge must be streaming of all nodes");
    assert(EdgeBufferMap.count(E) &&
           "No mapping buffer for a Out Streaming DFEdge!");
467
468
469
    new StoreInst(EdgeBufferMap[E], GEP, IB);
  }
  // Marshall last argument. isLastInput buffer
Yifan Zhao's avatar
Yifan Zhao committed
470
471
  DEBUG(errs() << "Marshall isLastInput for child node: "
               << C->getFuncPointer()->getName() << "\n");
472
  // Create constant int (i+numInputs)
Yifan Zhao's avatar
Yifan Zhao committed
473
474
  Constant *Int_index =
      ConstantInt::get(Type::getInt32Ty(Ctx), numInputs + numOutputs);
475
  // Get Element pointer instruction
Yifan Zhao's avatar
Yifan Zhao committed
476
477
478
479
  Value *GEPIndices[] = {IntZero, Int_index};
  GetElementPtrInst *GEP = GetElementPtrInst::Create(
      nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2),
      Struct->getName() + ".isLastInput", IB);
480
481
482
483
  new StoreInst(isLastInputBuffer, GEP, IB);

  // AllocaInst AI points to memory with all the arguments packed
  // Call runtime to create the thread with these arguments
Yifan Zhao's avatar
Yifan Zhao committed
484
485
486
  DEBUG(errs() << "Start Thread for child node: "
               << C->getFuncPointer()->getName() << "\n");
  // DEBUG(errs() << *llvm_hpvm_createThread << "\n");
487
488
  DEBUG(errs() << *graphID->getType() << "\n");
  DEBUG(errs() << *C_Pipeline->getType() << "\n");
489
  DEBUG(errs() << *Struct->getType() << "\n");
490
  // Bitcast AI to i8*
Yifan Zhao's avatar
Yifan Zhao committed
491
492
493
494
495
  CastInst *BI = BitCastInst::CreatePointerCast(Struct, Type::getInt8PtrTy(Ctx),
                                                Struct->getName(), IB);
  Value *CreateThreadArgs[] = {graphID, C_Pipeline, BI};
  CallInst::Create(llvm_hpvm_createThread,
                   ArrayRef<Value *>(CreateThreadArgs, 3), "", IB);
496
497
}

498
Function *CGT_CPU::createLaunchFunction(DFInternalNode *N) {
499
500
  DEBUG(errs() << "Generating Streaming Launch Function\n");
  // Get Function associated with Node N
Yifan Zhao's avatar
Yifan Zhao committed
501
  Function *NF = N->getFuncPointer();
502

Yifan Zhao's avatar
Yifan Zhao committed
503
504
  // Map from Streaming edge to buffer
  DenseMap<DFEdge *, Value *> EdgeBufferMap;
505
506

  /* Now we have all the necessary global declarations necessary to generate the
Yifan Zhao's avatar
Yifan Zhao committed
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
   * Launch function, pointer to which can be passed to pthread utils to execute
   * DFG. The Launch function has just one input: i8* data.addr
   * This is the address of the all the input data that needs to be passed to
   * this function. In our case it contains the input arguments of the Root
   * function in the correct order.
   * (1) Create an empty Launch function of type void (i8* args, i8* GraphID)
   * (2) Extract each of inputs from data.addr
   * (3) create Buffers for all the streaming edges
   *     - Put buffers in the context
   * (4) Go over each child node
   *     - marshall its arguments together (use buffers in place of streaming
   *       arguments)
   *     - Start the threads
   * (5) The return value from Root is stored in memory, pointer to which is
   * passed to pthread_exit call.
   */
523
  // (1) Create Launch Function of type void (i8* args, i8* GraphID)
Yifan Zhao's avatar
Yifan Zhao committed
524
525
526
527
528
529
  Type *i8Ty = Type::getInt8Ty(M.getContext());
  Type *ArgTypes[] = {i8Ty->getPointerTo(), i8Ty->getPointerTo()};
  FunctionType *LaunchFuncTy = FunctionType::get(
      Type::getVoidTy(NF->getContext()), ArrayRef<Type *>(ArgTypes, 2), false);
  Function *LaunchFunc = Function::Create(
      LaunchFuncTy, NF->getLinkage(), NF->getName() + ".LaunchFunction", &M);
530
531
  DEBUG(errs() << "Generating Code for Streaming Launch Function\n");
  // Give a name to the argument which is used pass data to this thread
Yifan Zhao's avatar
Yifan Zhao committed
532
  Argument *data = &*LaunchFunc->arg_begin();
533
  // NOTE-HS: Check correctness with Maria
Yifan Zhao's avatar
Yifan Zhao committed
534
  Argument *graphID = &*(LaunchFunc->arg_begin() + 1);
535
536
537
  data->setName("data.addr");
  graphID->setName("graphID");
  // Add a basic block to this empty function and a return null statement to it
538
  DEBUG(errs() << *LaunchFunc->getReturnType() << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
539
540
541
  BasicBlock *BB =
      BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc);
  ReturnInst *RI = ReturnInst::Create(LaunchFunc->getContext(), BB);
542
543
544
545

  DEBUG(errs() << "Created Empty Launch Function\n");

  // (2) Extract each of inputs from data.addr
Yifan Zhao's avatar
Yifan Zhao committed
546
  std::vector<Type *> TyList;
547
  std::vector<std::string> names;
Yifan Zhao's avatar
Yifan Zhao committed
548
  std::vector<Value *> Args;
549
550

  for (Function::arg_iterator ai = NF->arg_begin(), ae = NF->arg_end();
Yifan Zhao's avatar
Yifan Zhao committed
551
552
553
554
555
       ai != ae; ++ai) {
    if (N->getChildGraph()
            ->getEntry()
            ->getOutDFEdgeAt(ai->getArgNo())
            ->isStreamingEdge()) {
556
      TyList.push_back(i8Ty->getPointerTo());
Yifan Zhao's avatar
Yifan Zhao committed
557
      names.push_back(Twine(ai->getName() + "_buffer").str());
558
559
560
561
562
563
      continue;
    }
    TyList.push_back(ai->getType());
    names.push_back(ai->getName());
  }
  Args = extractElements(data, TyList, names, RI);
Yifan Zhao's avatar
Yifan Zhao committed
564
565
  DEBUG(errs() << "Launch function for " << NF->getName() << *LaunchFunc
               << "\n");
566
  // (3) Create buffers for all the streaming edges
Yifan Zhao's avatar
Yifan Zhao committed
567
568
569
570
  for (DFGraph::dfedge_iterator di = N->getChildGraph()->dfedge_begin(),
                                de = N->getChildGraph()->dfedge_end();
       di != de; ++di) {
    DFEdge *Edge = *di;
571
    DEBUG(errs() << *Edge->getType() << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
572
573
    Value *size = ConstantExpr::getSizeOf(Edge->getType());
    Value *CallArgs[] = {graphID, size};
574
    if (Edge->isStreamingEdge()) {
Yifan Zhao's avatar
Yifan Zhao committed
575
      CallInst *CI;
576
      // Create a buffer call
Yifan Zhao's avatar
Yifan Zhao committed
577
      if (Edge->getSourceDF()->isEntryNode()) {
578
        // Bind Input Edge
Yifan Zhao's avatar
Yifan Zhao committed
579
580
581
582
583
584
585
        Constant *Int_ArgNo = ConstantInt::get(
            Type::getInt32Ty(RI->getContext()), Edge->getSourcePosition());
        Value *BindInCallArgs[] = {graphID, size, Int_ArgNo};
        CI = CallInst::Create(
            llvm_hpvm_createBindInBuffer, ArrayRef<Value *>(BindInCallArgs, 3),
            "BindIn." + Edge->getDestDF()->getFuncPointer()->getName(), RI);
      } else if (Edge->getDestDF()->isExitNode()) {
586
        // Bind Output Edge
Yifan Zhao's avatar
Yifan Zhao committed
587
588
589
590
        CI = CallInst::Create(
            llvm_hpvm_createBindOutBuffer, ArrayRef<Value *>(CallArgs, 2),
            "BindOut." + Edge->getSourceDF()->getFuncPointer()->getName(), RI);
      } else {
591
        // Streaming Edge
Yifan Zhao's avatar
Yifan Zhao committed
592
593
594
595
596
        CI = CallInst::Create(
            llvm_hpvm_createEdgeBuffer, ArrayRef<Value *>(CallArgs, 2),
            Edge->getSourceDF()->getFuncPointer()->getName() + "." +
                Edge->getDestDF()->getFuncPointer()->getName(),
            RI);
597
598
599
600
      }
      EdgeBufferMap[Edge] = CI;
    }
  }
601
  // Create buffer for isLastInput for all the child nodes
Yifan Zhao's avatar
Yifan Zhao committed
602
603
604
605
606
607
  DFGraph *G = N->getChildGraph();
  DenseMap<DFNode *, Value *> NodeLastInputMap;
  for (DFGraph::children_iterator ci = G->begin(), ce = G->end(); ci != ce;
       ++ci) {
    DFNode *child = *ci;
    if (child->isDummyNode())
608
      continue;
Yifan Zhao's avatar
Yifan Zhao committed
609
610
611
612
613
    Value *size = ConstantExpr::getSizeOf(Type::getInt64Ty(NF->getContext()));
    Value *CallArgs[] = {graphID, size};
    CallInst *CI = CallInst::Create(
        llvm_hpvm_createLastInputBuffer, ArrayRef<Value *>(CallArgs, 2),
        "BindIn.isLastInput." + child->getFuncPointer()->getName(), RI);
614
615
    NodeLastInputMap[child] = CI;
  }
Yifan Zhao's avatar
Yifan Zhao committed
616
  DEBUG(errs() << "Start Each child node filter\n");
617
618
  // (4) Marshall arguments for each child node and start the thread with its
  //     pipeline funtion
Yifan Zhao's avatar
Yifan Zhao committed
619
620
621
622
  for (DFGraph::children_iterator ci = N->getChildGraph()->begin(),
                                  ce = N->getChildGraph()->end();
       ci != ce; ++ci) {
    DFNode *C = *ci;
623
624
625
    // Skip dummy node call
    if (C->isDummyNode())
      continue;
Yifan Zhao's avatar
Yifan Zhao committed
626

627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
    // Marshall all the arguments for this node into an i8*
    // Pass to the runtime to create the thread
    // Start the thread for child node C
    startNodeThread(C, Args, EdgeBufferMap, NodeLastInputMap[C], graphID, RI);
  }

  DEBUG(errs() << "Launch function:\n");
  DEBUG(errs() << *LaunchFunc << "\n");

  return LaunchFunc;
}

/* This fuction does the steps necessary to launch a streaming graph
 * Steps
 * Create Pipeline/Filter function for each node in child graph of Root
 * Create Functions DFGLaunch, DFGPush, DFGPop, DFGWait
 * Modify each of the instrinsic in host code
 * Launch, Push, Pop, Wait
 */
646
void CGT_CPU::codeGenLaunchStreaming(DFInternalNode *Root) {
Yifan Zhao's avatar
Yifan Zhao committed
647
648
  IntrinsicInst *LI = Root->getInstruction();
  Function *RootLaunch = createLaunchFunction(Root);
649
  // Substitute launch intrinsic main
Yifan Zhao's avatar
Yifan Zhao committed
650
651
652
653
654
  DEBUG(errs() << "Substitute launch intrinsic\n");
  Value *LaunchInstArgs[] = {RootLaunch, LI->getArgOperand(1)};
  CallInst *LaunchInst = CallInst::Create(
      llvm_hpvm_streamLaunch, ArrayRef<Value *>(LaunchInstArgs, 2),
      "graph" + Root->getFuncPointer()->getName(), LI);
655
656

  DEBUG(errs() << *LaunchInst << "\n");
657
  // Replace all wait instructions with cpu specific wait instructions
Yifan Zhao's avatar
Yifan Zhao committed
658
659
660
661
662
663
664
665
666
  DEBUG(errs() << "Substitute wait, push, pop intrinsics\n");
  std::vector<IntrinsicInst *> *UseList = getUseList(LI);
  for (unsigned i = 0; i < UseList->size(); ++i) {
    IntrinsicInst *II = UseList->at(i);
    CallInst *CI;
    Value *PushArgs[] = {LaunchInst, II->getOperand(1)};
    switch (II->getIntrinsicID()) {
    case Intrinsic::hpvm_wait:
      CI = CallInst::Create(llvm_hpvm_streamWait, ArrayRef<Value *>(LaunchInst),
667
668
                            "");
      break;
Yifan Zhao's avatar
Yifan Zhao committed
669
670
671
    case Intrinsic::hpvm_push:
      CI = CallInst::Create(llvm_hpvm_streamPush,
                            ArrayRef<Value *>(PushArgs, 2), "");
672
      break;
Yifan Zhao's avatar
Yifan Zhao committed
673
674
    case Intrinsic::hpvm_pop:
      CI = CallInst::Create(llvm_hpvm_streamPop, ArrayRef<Value *>(LaunchInst),
675
676
677
                            "");
      break;
    default:
Yifan Zhao's avatar
Yifan Zhao committed
678
679
      llvm_unreachable(
          "GraphID is used by an instruction other than wait, push, pop");
680
681
682
683
684
685
686
    };
    DEBUG(errs() << "Replace:\n\t" << *II << "\n");
    ReplaceInstWithInst(II, CI);
    DEBUG(errs() << "\twith " << *CI << "\n");
  }
}

687
void CGT_CPU::codeGenLaunch(DFInternalNode *Root) {
688
  // TODO: Place an assert to check if the constant passed by launch intrinsic
689
690
  // as the number of arguments to DFG is same as the number of arguments of the
  // root of DFG
691
  DEBUG(errs() << "Generating Launch Function\n");
692
  // Get Launch Instruction
Yifan Zhao's avatar
Yifan Zhao committed
693
694
  IntrinsicInst *LI = Root->getInstruction();
  switchToTimer(hpvm_TimerID_PTHREAD_CREATE, LI);
695
  DEBUG(errs() << "Generating Launch Function\n");
696

697
  /* Now we have all the necessary global declarations necessary to generate the
Yifan Zhao's avatar
Yifan Zhao committed
698
699
700
701
702
703
704
705
706
707
708
   * Launch function, pointer to which can be passed to pthread utils to execute
   * DFG. The Launch function has just one input: i8* data.addr
   * This is the address of the all the input data that needs to be passed to
   * this function. In our case it contains the input arguments of the Root
   * function in the correct order.
   * (1) Create an empty Launch function of type i8*(i8*)
   * (2) Extract each of inputs from data.addr and pass them as arguments to the
   * call to Root function
   * (3) The return value from Root is stored in memory, pointer to which is
   * passed to pthread_exit call.
   */
709
  // Create Launch Function of type i8*(i8*) which calls the root function
Yifan Zhao's avatar
Yifan Zhao committed
710
711
712
713
714
715
  Type *i8Ty = Type::getInt8Ty(M.getContext());
  FunctionType *AppFuncTy = FunctionType::get(
      i8Ty->getPointerTo(), ArrayRef<Type *>(i8Ty->getPointerTo()), false);
  Function *AppFunc =
      Function::Create(AppFuncTy, Root->getFuncPointer()->getLinkage(),
                       "LaunchDataflowGraph", &M);
716
  DEBUG(errs() << "Generating Launch Function\n");
717
  // Give a name to the argument which is used pass data to this thread
Yifan Zhao's avatar
Yifan Zhao committed
718
  Value *data = &*AppFunc->arg_begin();
719
720
  data->setName("data.addr");
  // Add a basic block to this empty function and a return null statement to it
721
  BasicBlock *BB = BasicBlock::Create(AppFunc->getContext(), "entry", AppFunc);
Yifan Zhao's avatar
Yifan Zhao committed
722
723
724
725
  ReturnInst *RI =
      ReturnInst::Create(AppFunc->getContext(),
                         Constant::getNullValue(AppFunc->getReturnType()), BB);
  switchToTimer(hpvm_TimerID_ARG_UNPACK, RI);
726
727

  DEBUG(errs() << "Created Empty Launch Function\n");
728
729
730
731
732
733
734
735
  // Find the CPU function generated for Root and
  //  Function* RootF_CPU = Root->getGenFunc();
  Function *RootF_CPU = Root->getGenFuncForTarget(hpvm::CPU_TARGET);
  assert(RootF_CPU && "Error: No generated CPU function for Root node\n");
  assert(Root->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) &&
         "Error: Generated Function for Root node with no cpu wrapper\n");

  // Generate a call to RootF_CPU with null parameters for now
Yifan Zhao's avatar
Yifan Zhao committed
736
  std::vector<Value *> Args;
737
  for (unsigned i = 0; i < RootF_CPU->getFunctionType()->getNumParams(); i++) {
Yifan Zhao's avatar
Yifan Zhao committed
738
    Args.push_back(
739
        Constant::getNullValue(RootF_CPU->getFunctionType()->getParamType(i)));
740
  }
Yifan Zhao's avatar
Yifan Zhao committed
741
  CallInst *CI =
742
      CallInst::Create(RootF_CPU, Args, RootF_CPU->getName() + ".output", RI);
743
744

  // Extract input data from i8* data.addr and patch them to correct argument of
745
  // call to RootF_CPU. For each argument
Yifan Zhao's avatar
Yifan Zhao committed
746
  std::vector<Type *> TyList;
747
  std::vector<std::string> names;
748
749
  for (Function::arg_iterator ai = RootF_CPU->arg_begin(),
                              ae = RootF_CPU->arg_end();
Yifan Zhao's avatar
Yifan Zhao committed
750
       ai != ae; ++ai) {
751
752
    TyList.push_back(ai->getType());
    names.push_back(ai->getName());
753
  }
Yifan Zhao's avatar
Yifan Zhao committed
754
  std::vector<Value *> elements = extractElements(data, TyList, names, CI);
755
  // Patch the elements to the call arguments
Yifan Zhao's avatar
Yifan Zhao committed
756
  for (unsigned i = 0; i < CI->getNumArgOperands(); i++)
757
758
    CI->setArgOperand(i, elements[i]);

759
  // Add timers around Call to RootF_CPU function
Yifan Zhao's avatar
Yifan Zhao committed
760
761
  switchToTimer(hpvm_TimerID_COMPUTATION, CI);
  switchToTimer(hpvm_TimerID_OUTPUT_PACK, RI);
762

Yifan Zhao's avatar
Yifan Zhao committed
763
  StructType *RootRetTy =
764
      cast<StructType>(RootF_CPU->getFunctionType()->getReturnType());
765

Yifan Zhao's avatar
Yifan Zhao committed
766
  // if Root has non empty return
767
768
  if (RootRetTy->getNumElements()) {
    // We can't access the type of the arg struct - build it
Yifan Zhao's avatar
Yifan Zhao committed
769
    std::vector<Type *> TyList;
770
771
    for (Function::arg_iterator ai = RootF_CPU->arg_begin(),
                                ae = RootF_CPU->arg_end();
Yifan Zhao's avatar
Yifan Zhao committed
772
         ai != ae; ++ai) {
773
774
775
776
      TyList.push_back(ai->getType());
    }
    TyList.push_back(CI->getType());

Yifan Zhao's avatar
Yifan Zhao committed
777
778
    StructType *ArgStructTy = StructType::create(
        M.getContext(), ArrayRef<Type *>(TyList),
779
        (RootF_CPU->getName() + ".arg.struct.ty").str(), true);
780
781

    // Cast the data pointer to the type of the arg struct
Yifan Zhao's avatar
Yifan Zhao committed
782
783
    CastInst *OutputAddrCast = CastInst::CreatePointerCast(
        data, ArgStructTy->getPointerTo(), "argStructCast.addr", RI);
784
785
786
787

    // Result struct is the last element of the packed struct passed to launch
    unsigned outStructIdx = ArgStructTy->getNumElements() - 1;

Yifan Zhao's avatar
Yifan Zhao committed
788
789
790
791
    ConstantInt *IntZero =
        ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
    ConstantInt *IntIdx =
        ConstantInt::get(Type::getInt32Ty(M.getContext()), outStructIdx);
792

Yifan Zhao's avatar
Yifan Zhao committed
793
    Value *GEPIIdxList[] = {IntZero, IntIdx};
794
    // Get data pointer to the last element of struct - result field
Yifan Zhao's avatar
Yifan Zhao committed
795
796
797
    GetElementPtrInst *OutGEPI = GetElementPtrInst::Create(
        ArgStructTy, OutputAddrCast, ArrayRef<Value *>(GEPIIdxList, 2),
        CI->getName() + ".addr", RI);
798
799
800
801
802
803
804
805
    // Store result there
    new StoreInst(CI, OutGEPI, RI);
  } else {
    // There is no return - no need to actually code gen, but for fewer
    // changes maintain what code was already doing
    // We were casting the data pointer to the result type of Root, and
    // returning result there. This would work at the LLVM level, but not
    // at the C level, thus the rewrite.
Yifan Zhao's avatar
Yifan Zhao committed
806
807
    CastInst *OutputAddrCast = CastInst::CreatePointerCast(
        data, CI->getType()->getPointerTo(), CI->getName() + ".addr", RI);
808
809
810
    new StoreInst(CI, OutputAddrCast, RI);
  }

Yifan Zhao's avatar
Yifan Zhao committed
811
  switchToTimer(hpvm_TimerID_NONE, RI);
812

813
814
  DEBUG(errs() << "Application specific function:\n");
  DEBUG(errs() << *AppFunc << "\n");
815

816
  // Substitute launch intrinsic main
Yifan Zhao's avatar
Yifan Zhao committed
817
818
  Value *LaunchInstArgs[] = {AppFunc, LI->getArgOperand(1)};
  CallInst *LaunchInst = CallInst::Create(
819
      llvm_hpvm_cpu_launch, ArrayRef<Value *>(LaunchInstArgs, 2),
Yifan Zhao's avatar
Yifan Zhao committed
820
821
      "graph" + Root->getFuncPointer()->getName(), LI);
  // ReplaceInstWithInst(LI, LaunchInst);
822
823

  DEBUG(errs() << *LaunchInst << "\n");
824
  // Replace all wait instructions with cpu specific wait instructions
Yifan Zhao's avatar
Yifan Zhao committed
825
826
827
828
829
830
  std::vector<IntrinsicInst *> *UseList = getUseList(LI);
  for (unsigned i = 0; i < UseList->size(); ++i) {
    IntrinsicInst *II = UseList->at(i);
    CallInst *CI;
    switch (II->getIntrinsicID()) {
    case Intrinsic::hpvm_wait:
831
      CI = CallInst::Create(llvm_hpvm_cpu_wait, ArrayRef<Value *>(LaunchInst),
832
833
                            "");
      break;
Yifan Zhao's avatar
Yifan Zhao committed
834
835
    case Intrinsic::hpvm_push:
      CI = CallInst::Create(llvm_hpvm_bufferPush, ArrayRef<Value *>(LaunchInst),
836
837
                            "");
      break;
Yifan Zhao's avatar
Yifan Zhao committed
838
839
    case Intrinsic::hpvm_pop:
      CI = CallInst::Create(llvm_hpvm_bufferPop, ArrayRef<Value *>(LaunchInst),
840
841
842
                            "");
      break;
    default:
Yifan Zhao's avatar
Yifan Zhao committed
843
844
      llvm_unreachable(
          "GraphID is used by an instruction other than wait, push, pop");
845
846
847
    };
    ReplaceInstWithInst(II, CI);
    DEBUG(errs() << *CI << "\n");
848
  }
849
}
850

851
Value *CGT_CPU::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU,
Yifan Zhao's avatar
Yifan Zhao committed
852
                             Instruction *InsertBefore) {
853
854
855
856
  // TODO: Assumption is that each input port of a node has just one
  // incoming edge. May change later on.

  // Find the incoming edge at the requested input port
Yifan Zhao's avatar
Yifan Zhao committed
857
  DFEdge *E = Child->getInDFEdgeAt(i);
858
859
  assert(E && "No incoming edge or binding for input element!");
  // Find the Source DFNode associated with the incoming edge
Yifan Zhao's avatar
Yifan Zhao committed
860
  DFNode *SrcDF = E->getSourceDF();
861
862
863

  // If Source DFNode is a dummyNode, edge is from parent. Get the
  // argument from argument list of this internal node
Yifan Zhao's avatar
Yifan Zhao committed
864
865
  Value *inputVal;
  if (SrcDF->isEntryNode()) {
866
    inputVal = getArgumentAt(ParentF_CPU, E->getSourcePosition());
Yifan Zhao's avatar
Yifan Zhao committed
867
868
    DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n");
  } else {
869
870
    // edge is from a sibling
    // Check - code should already be generated for this source dfnode
Yifan Zhao's avatar
Yifan Zhao committed
871
872
    assert(OutputMap.count(SrcDF) &&
           "Source node call not found. Dependency violation!");
873

874
    // Find CallInst associated with the Source DFNode using OutputMap
Yifan Zhao's avatar
Yifan Zhao committed
875
    Value *CI = OutputMap[SrcDF];
876
877
878
879

    // Extract element at source position from this call instruction
    std::vector<unsigned> IndexList;
    IndexList.push_back(E->getSourcePosition());
Yifan Zhao's avatar
Yifan Zhao committed
880
881
882
    DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI << "\n");
    ExtractValueInst *EI =
        ExtractValueInst::Create(CI, IndexList, "", InsertBefore);
883
884
885
886
887
    inputVal = EI;
  }
  return inputVal;
}

888
void CGT_CPU::invokeChild_CPU(DFNode *C, Function *F_CPU,
Yifan Zhao's avatar
Yifan Zhao committed
889
890
891
                              ValueToValueMapTy &VMap, Instruction *IB) {
  Function *CF = C->getFuncPointer();

892
893
894
  //  Function* CF_CPU = C->getGenFunc();
  Function *CF_CPU = C->getGenFuncForTarget(hpvm::CPU_TARGET);
  assert(CF_CPU != NULL &&
Yifan Zhao's avatar
Yifan Zhao committed
895
         "Found leaf node for which code generation has not happened yet!\n");
896
897
  assert(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) &&
         "The generated function to be called from cpu backend is not an cpu "
Yifan Zhao's avatar
Yifan Zhao committed
898
         "function\n");
899
  DEBUG(errs() << "Invoking child node" << CF_CPU->getName() << "\n");
900

Yifan Zhao's avatar
Yifan Zhao committed
901
  std::vector<Value *> Args;
902
903
904
  // Create argument list to pass to call instruction
  // First find the correct values using the edges
  // The remaing six values are inserted as constants for now.
Yifan Zhao's avatar
Yifan Zhao committed
905
  for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) {
906
    Args.push_back(getInValueAt(C, i, F_CPU, IB));
907
908
  }

909
  Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_CPU->getContext()), 0);
Yifan Zhao's avatar
Yifan Zhao committed
910
  for (unsigned j = 0; j < 6; j++)
kotsifa2's avatar
kotsifa2 committed
911
    Args.push_back(I64Zero);
912

913
  DEBUG(errs() << "Gen Function type: " << *CF_CPU->getType() << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
914
915
  DEBUG(errs() << "Node Function type: " << *CF->getType() << "\n");
  DEBUG(errs() << "Arguments: " << Args.size() << "\n");
916

917
  // Call the F_CPU function associated with this node
Yifan Zhao's avatar
Yifan Zhao committed
918
  CallInst *CI =