DFG2LLVM_CPU.cpp 68.9 KB
Newer Older
1
//===-------------------------- DFG2LLVM_CPU.cpp --------------------------===//
2
3
4
5
6
7
8
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
9
10
11
12
13
//
// This pass is responsible for generating code for host code and kernel code 
// for CPU target using HPVM dataflow graph.
//
//===----------------------------------------------------------------------===//
14

15
#define DEBUG_TYPE "DFG2LLVM_CPU"
Yifan Zhao's avatar
Yifan Zhao committed
16
17
18
#include "SupportHPVM/DFG2LLVM.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
19
#include "llvm/IR/InstIterator.h"
Yifan Zhao's avatar
Yifan Zhao committed
20
#include "llvm/IR/Module.h"
21
22
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
Yifan Zhao's avatar
Yifan Zhao committed
23
#include "llvm/Pass.h"
24
#include "llvm/Support/SourceMgr.h"
Yifan Zhao's avatar
Yifan Zhao committed
25
26
27
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
28

29
30
31
32
33
34
35
36
#ifndef LLVM_BUILD_DIR
#error LLVM_BUILD_DIR is not defined
#endif

#define STR_VALUE(X) #X
#define STRINGIFY(X) STR_VALUE(X)
#define LLVM_BUILD_DIR_STR STRINGIFY(LLVM_BUILD_DIR)

37
38
using namespace llvm;
using namespace builddfg;
39
using namespace dfg2llvm;
40

Yifan Zhao's avatar
Yifan Zhao committed
41
// HPVM Command line option to use timer or not
42
static cl::opt<bool> HPVMTimer_CPU("hpvm-timers-cpu",
Yifan Zhao's avatar
Yifan Zhao committed
43
                                   cl::desc("Enable hpvm timers"));
44

45
46
namespace {

47
48
// DFG2LLVM_CPU - The first implementation.
struct DFG2LLVM_CPU : public DFG2LLVM {
49
  static char ID; // Pass identification, replacement for typeid
50
  DFG2LLVM_CPU() : DFG2LLVM(ID) {}
51

52
53
private:
  // Member variables
54

55
  // Functions
56

57
58
59
60
61
public:
  bool runOnModule(Module &M);
};

// Visitor for Code generation traversal (tree traversal for now)
62
class CGT_CPU : public CodeGenTraversal {
63
64

private:
Yifan Zhao's avatar
Yifan Zhao committed
65
  // Member variables
66

67
  FunctionCallee malloc;
Yifan Zhao's avatar
Yifan Zhao committed
68
  // HPVM Runtime API
69
70
71
  FunctionCallee llvm_hpvm_cpu_launch;
  FunctionCallee llvm_hpvm_cpu_wait;
  FunctionCallee llvm_hpvm_cpu_argument_ptr;
Yifan Zhao's avatar
Yifan Zhao committed
72
73
74
75
76
77
78
79
80
81
82
83

  FunctionCallee llvm_hpvm_streamLaunch;
  FunctionCallee llvm_hpvm_streamPush;
  FunctionCallee llvm_hpvm_streamPop;
  FunctionCallee llvm_hpvm_streamWait;
  FunctionCallee llvm_hpvm_createBindInBuffer;
  FunctionCallee llvm_hpvm_createBindOutBuffer;
  FunctionCallee llvm_hpvm_createEdgeBuffer;
  FunctionCallee llvm_hpvm_createLastInputBuffer;
  FunctionCallee llvm_hpvm_createThread;
  FunctionCallee llvm_hpvm_bufferPush;
  FunctionCallee llvm_hpvm_bufferPop;
84
85
86
87
  FunctionCallee llvm_hpvm_cpu_dstack_push;
  FunctionCallee llvm_hpvm_cpu_dstack_pop;
  FunctionCallee llvm_hpvm_cpu_getDimLimit;
  FunctionCallee llvm_hpvm_cpu_getDimInstance;
Yifan Zhao's avatar
Yifan Zhao committed
88
89
90
91
92

  // Functions
  std::vector<IntrinsicInst *> *getUseList(Value *LI);
  Value *addLoop(Instruction *I, Value *limit, const Twine &indexName = "");
  void addWhileLoop(Instruction *, Instruction *, Instruction *, Value *);
kotsifa2's avatar
kotsifa2 committed
93
  Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *);
Yifan Zhao's avatar
Yifan Zhao committed
94
  Argument *getArgumentFromEnd(Function *F, unsigned offset);
95
  Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU,
Yifan Zhao's avatar
Yifan Zhao committed
96
                      Instruction *InsertBefore);
97
  void invokeChild_CPU(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap,
Yifan Zhao's avatar
Yifan Zhao committed
98
                       Instruction *InsertBefore);
99
  void invokeChild_PTX(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap,
Yifan Zhao's avatar
Yifan Zhao committed
100
101
102
103
104
105
106
107
                       Instruction *InsertBefore);
  StructType *getArgumentListStructTy(DFNode *);
  Function *createFunctionFilter(DFNode *C);
  void startNodeThread(DFNode *, std::vector<Value *>,
                       DenseMap<DFEdge *, Value *>, Value *, Value *,
                       Instruction *);
  Function *createLaunchFunction(DFInternalNode *);

108
109
  // Virtual Functions
  void init() {
110
111
    HPVMTimer = HPVMTimer_CPU;
    TargetName = "CPU";
112
113
  }
  void initRuntimeAPI();
Yifan Zhao's avatar
Yifan Zhao committed
114
115
116
117
  void codeGen(DFInternalNode *N);
  void codeGen(DFLeafNode *N);
  Function *codeGenStreamPush(DFInternalNode *N);
  Function *codeGenStreamPop(DFInternalNode *N);
118

119
120
public:
  // Constructor
121
  CGT_CPU(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) {
122
    init();
123
124
    initRuntimeAPI();
  }
125

Yifan Zhao's avatar
Yifan Zhao committed
126
127
  void codeGenLaunch(DFInternalNode *Root);
  void codeGenLaunchStreaming(DFInternalNode *Root);
128
};
129

130
131
bool DFG2LLVM_CPU::runOnModule(Module &M) {
  DEBUG(errs() << "\nDFG2LLVM_CPU PASS\n");
132

133
134
135
136
  // Get the BuildDFG Analysis Results:
  // - Dataflow graph
  // - Maps from i8* hansles to DFNode and DFEdge
  BuildDFG &DFG = getAnalysis<BuildDFG>();
137

Yifan Zhao's avatar
Yifan Zhao committed
138
139
  // DFInternalNode *Root = DFG.getRoot();
  std::vector<DFInternalNode *> Roots = DFG.getRoots();
140
141
  // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
  // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
142

143
  // Visitor for Code Generation Graph Traversal
144
  CGT_CPU *CGTVisitor = new CGT_CPU(M, DFG);
145

146
  // Iterate over all the DFGs and produce code for each one of them
Yifan Zhao's avatar
Yifan Zhao committed
147
  for (auto &rootNode : Roots) {
148
149
    // Initiate code generation for root DFNode
    CGTVisitor->visit(rootNode);
Yifan Zhao's avatar
Yifan Zhao committed
150
151
    // Go ahead and replace the launch intrinsic with pthread call, otherwise
    // return now.
152
    // TODO: Later on, we might like to do this in a separate pass, which would
Yifan Zhao's avatar
Yifan Zhao committed
153
154
155
    // allow us the flexibility to switch between complete static code
    // generation for DFG or having a customized runtime+scheduler

156
    // Do streaming code generation if root node is streaming. Usual otherwise
Yifan Zhao's avatar
Yifan Zhao committed
157
    if (rootNode->isChildGraphStreaming())
158
159
160
      CGTVisitor->codeGenLaunchStreaming(rootNode);
    else
      CGTVisitor->codeGenLaunch(rootNode);
161
  }
162

163
  delete CGTVisitor;
164
165
  return true;
}
166

Yifan Zhao's avatar
Yifan Zhao committed
167
// Initialize the HPVM runtime API. This makes it easier to insert these calls
168
void CGT_CPU::initRuntimeAPI() {
169
170
171

  // Load Runtime API Module
  SMDiagnostic Err;
172

173
174
  std::string runtimeAPI = std::string(LLVM_BUILD_DIR_STR) +
                           "/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc";
175

176
  runtimeModule = parseIRFile(runtimeAPI, Err, M.getContext());
Yifan Zhao's avatar
Yifan Zhao committed
177
  if (runtimeModule == nullptr) {
178
179
    DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n");
    assert(false && "couldn't parse runtime");
Yifan Zhao's avatar
Yifan Zhao committed
180
181
  } else
    DEBUG(errs() << "Successfully loaded hpvm-rt API module\n");
182
183

  // Get or insert the global declarations for launch/wait functions
184
  DECLARE(llvm_hpvm_cpu_launch);
185
  DECLARE(malloc);
186
187
  DECLARE(llvm_hpvm_cpu_wait);
  DECLARE(llvm_hpvm_cpu_argument_ptr);
Yifan Zhao's avatar
Yifan Zhao committed
188
189
190
191
192
193
194
195
196
197
198
  DECLARE(llvm_hpvm_streamLaunch);
  DECLARE(llvm_hpvm_streamPush);
  DECLARE(llvm_hpvm_streamPop);
  DECLARE(llvm_hpvm_streamWait);
  DECLARE(llvm_hpvm_createBindInBuffer);
  DECLARE(llvm_hpvm_createBindOutBuffer);
  DECLARE(llvm_hpvm_createEdgeBuffer);
  DECLARE(llvm_hpvm_createLastInputBuffer);
  DECLARE(llvm_hpvm_createThread);
  DECLARE(llvm_hpvm_bufferPush);
  DECLARE(llvm_hpvm_bufferPop);
199
200
201
202
  DECLARE(llvm_hpvm_cpu_dstack_push);
  DECLARE(llvm_hpvm_cpu_dstack_pop);
  DECLARE(llvm_hpvm_cpu_getDimLimit);
  DECLARE(llvm_hpvm_cpu_getDimInstance);
203
204
205

  // Get or insert timerAPI functions as well if you plan to use timers
  initTimerAPI();
206

207
  // Insert init context in main
Yifan Zhao's avatar
Yifan Zhao committed
208
209
  Function *VI = M.getFunction("llvm.hpvm.init");
  assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once");
210
  DEBUG(errs() << "Inserting cpu timer initialization\n");
Yifan Zhao's avatar
Yifan Zhao committed
211
  Instruction *I = cast<Instruction>(*VI->user_begin());
212
  initializeTimerSet(I);
Yifan Zhao's avatar
Yifan Zhao committed
213
214
215
216
  switchToTimer(hpvm_TimerID_NONE, I);
  // Insert print instruction at hpvm exit
  Function *VC = M.getFunction("llvm.hpvm.cleanup");
  assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once");
217

218
  DEBUG(errs() << "Inserting cpu timer print\n");
219
  printTimerSet(I);
220
221
}

222
223
/* Returns vector of all wait instructions
 */
224
std::vector<IntrinsicInst *> *CGT_CPU::getUseList(Value *GraphID) {
Yifan Zhao's avatar
Yifan Zhao committed
225
  std::vector<IntrinsicInst *> *UseList = new std::vector<IntrinsicInst *>();
226
  // It must have been loaded from memory somewhere
Yifan Zhao's avatar
Yifan Zhao committed
227
228
229
230
  for (Value::user_iterator ui = GraphID->user_begin(),
                            ue = GraphID->user_end();
       ui != ue; ++ui) {
    if (IntrinsicInst *waitI = dyn_cast<IntrinsicInst>(*ui)) {
231
      UseList->push_back(waitI);
232
    } else {
233
      llvm_unreachable("Error: Operation on Graph ID not supported!\n");
234
235
    }
  }
236
  return UseList;
237
238
}

239
240
241
/* Traverse the function argument list in reverse order to get argument at a
 * distance offset fromt he end of argument list of function F
 */
242
Argument *CGT_CPU::getArgumentFromEnd(Function *F, unsigned offset) {
Yifan Zhao's avatar
Yifan Zhao committed
243
244
  assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) &&
         "Invalid offset to access arguments!");
245
246
247
  Function::arg_iterator e = F->arg_end();
  // Last element of argument iterator is dummy. Skip it.
  e--;
Yifan Zhao's avatar
Yifan Zhao committed
248
249
  Argument *arg;
  for (; offset != 0; e--) {
250
    offset--;
kotsifa2's avatar
kotsifa2 committed
251
    arg = &*e;
252
253
254
255
  }
  return arg;
}

256
257
258
259
260
261
262
263
264
265
266
/* Add Loop around the instruction I
 * Algorithm:
 * (1) Split the basic block of instruction I into three parts, where the
 * middleblock/body would contain instruction I.
 * (2) Add phi node before instruction I. Add incoming edge to phi node from
 * predecessor
 * (3) Add increment and compare instruction to index variable
 * (4) Replace terminator/branch instruction of body with conditional branch
 * which loops over bidy if true and goes to end if false
 * (5) Update phi node of body
 */
267
void CGT_CPU::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart,
Yifan Zhao's avatar
Yifan Zhao committed
268
269
270
271
272
                           Instruction *BodyEnd, Value *TerminationCond) {
  BasicBlock *Entry = CondBlockStart->getParent();
  BasicBlock *CondBlock = Entry->splitBasicBlock(CondBlockStart, "condition");
  BasicBlock *WhileBody = CondBlock->splitBasicBlock(BodyStart, "while.body");
  BasicBlock *WhileEnd = WhileBody->splitBasicBlock(BodyEnd, "while.end");
273
274
275

  // Replace the terminator instruction of conditional with new conditional
  // branch which goes to while.body if true and branches to while.end otherwise
Yifan Zhao's avatar
Yifan Zhao committed
276
  BranchInst *BI = BranchInst::Create(WhileEnd, WhileBody, TerminationCond);
277
278
279
  ReplaceInstWithInst(CondBlock->getTerminator(), BI);

  // While Body should jump to condition block
Yifan Zhao's avatar
Yifan Zhao committed
280
  BranchInst *UnconditionalBranch = BranchInst::Create(CondBlock);
281
282
283
  ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch);
}

284
Instruction *CGT_CPU::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond,
kotsifa2's avatar
kotsifa2 committed
285
                                          BasicBlock *Body) {
286
287
288
289
290
291
292
293
  Module *M = Entry->getParent()->getParent();
  Type *Int64Ty = Type::getInt64Ty(M->getContext());

  // Insert a PHI instruction at the beginning of the condition block
  Instruction *IB = Cond->getFirstNonPHI();
  PHINode *CounterPhi = PHINode::Create(Int64Ty, 2, "cnt", IB);

  ConstantInt *IConst =
Yifan Zhao's avatar
Yifan Zhao committed
294
      ConstantInt::get(Type::getInt64Ty(M->getContext()), 1, true);
295
  Instruction *CounterIncr =
Yifan Zhao's avatar
Yifan Zhao committed
296
297
      BinaryOperator::CreateNSW(Instruction::BinaryOps::Add, CounterPhi, IConst,
                                "cnt_incr", Body->getTerminator());
298
299
300
301
302
303
304

  // Set incoming values for Phi node
  IConst = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0, true);
  CounterPhi->addIncoming(IConst, Entry);
  CounterPhi->addIncoming(CounterIncr, Body);

  // Return the pointer to the created PHI node in the corresponding argument
kotsifa2's avatar
kotsifa2 committed
305
  return CounterPhi;
306
307
}

308
309
310
311
312
313
314
315
316
317
318
/* Add Loop around the instruction I
 * Algorithm:
 * (1) Split the basic block of instruction I into three parts, where the
 * middleblock/body would contain instruction I.
 * (2) Add phi node before instruction I. Add incoming edge to phi node from
 * predecessor
 * (3) Add increment and compare instruction to index variable
 * (4) Replace terminator/branch instruction of body with conditional branch
 * which loops over bidy if true and goes to end if false
 * (5) Update phi node of body
 */
319
Value *CGT_CPU::addLoop(Instruction *I, Value *limit, const Twine &indexName) {
Yifan Zhao's avatar
Yifan Zhao committed
320
321
  BasicBlock *Entry = I->getParent();
  BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body");
322

kotsifa2's avatar
kotsifa2 committed
323
324
  BasicBlock::iterator i(I);
  ++i;
Yifan Zhao's avatar
Yifan Zhao committed
325
  Instruction *NextI = &*i;
326
327
  // Next Instruction should also belong to the same basic block as the basic
  // block will have a terminator instruction
Yifan Zhao's avatar
Yifan Zhao committed
328
329
330
  assert(NextI->getParent() == ForBody &&
         "Next Instruction should also belong to the same basic block!");
  BasicBlock *ForEnd = ForBody->splitBasicBlock(NextI, "for.end");
331
332

  // Add Phi Node for index variable
Yifan Zhao's avatar
Yifan Zhao committed
333
334
  PHINode *IndexPhi = PHINode::Create(Type::getInt64Ty(I->getContext()), 2,
                                      "index." + indexName, I);
335
336

  // Add incoming edge to phi
337
  IndexPhi->addIncoming(ConstantInt::get(Type::getInt64Ty(I->getContext()), 0),
338
339
                        Entry);
  // Increment index variable
Yifan Zhao's avatar
Yifan Zhao committed
340
341
342
343
  BinaryOperator *IndexInc = BinaryOperator::Create(
      Instruction::Add, IndexPhi,
      ConstantInt::get(Type::getInt64Ty(I->getContext()), 1),
      "index." + indexName + ".inc", ForBody->getTerminator());
344
345

  // Compare index variable with limit
Yifan Zhao's avatar
Yifan Zhao committed
346
347
348
  CmpInst *Cond =
      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc, limit,
                      "cond." + indexName, ForBody->getTerminator());
349
350
351

  // Replace the terminator instruction of for.body with new conditional
  // branch which loops over body if true and branches to for.end otherwise
Yifan Zhao's avatar
Yifan Zhao committed
352
  BranchInst *BI = BranchInst::Create(ForBody, ForEnd, Cond);
353
354
355
356
357
358
359
  ReplaceInstWithInst(ForBody->getTerminator(), BI);

  // Add incoming edge to phi node in body
  IndexPhi->addIncoming(IndexInc, ForBody);
  return IndexPhi;
}

360
361
362
363
// Returns a packed struct type. The structtype is created by packing the input
// types, output types and isLastInput buffer type. All the streaming
// inputs/outputs are converted to i8*, since this is the type of buffer
// handles.
364
StructType *CGT_CPU::getArgumentListStructTy(DFNode *C) {
Yifan Zhao's avatar
Yifan Zhao committed
365
  std::vector<Type *> TyList;
366
  // Input types
Yifan Zhao's avatar
Yifan Zhao committed
367
368
369
370
  Function *CF = C->getFuncPointer();
  for (Function::arg_iterator ai = CF->arg_begin(), ae = CF->arg_end();
       ai != ae; ++ai) {
    if (C->getInDFEdgeAt(ai->getArgNo())->isStreamingEdge())
371
      TyList.push_back(Type::getInt8PtrTy(CF->getContext()));
Yifan Zhao's avatar
Yifan Zhao committed
372
    else
373
374
375
      TyList.push_back(ai->getType());
  }
  // Output Types
Yifan Zhao's avatar
Yifan Zhao committed
376
  StructType *OutStructTy = cast<StructType>(CF->getReturnType());
377
378
  for (unsigned i = 0; i < OutStructTy->getNumElements(); i++) {
    // All outputs of a node are streaming edge
Yifan Zhao's avatar
Yifan Zhao committed
379
380
    assert(C->getOutDFEdgeAt(i)->isStreamingEdge() &&
           "All output edges of child node have to be streaming");
381
382
383
384
385
    TyList.push_back(Type::getInt8PtrTy(CF->getContext()));
  }
  // isLastInput buffer element
  TyList.push_back(Type::getInt8PtrTy(CF->getContext()));

Yifan Zhao's avatar
Yifan Zhao committed
386
387
388
  StructType *STy =
      StructType::create(CF->getContext(), TyList,
                         Twine("struct.thread." + CF->getName()).str(), true);
389
390
391
  return STy;
}

392
void CGT_CPU::startNodeThread(DFNode *C, std::vector<Value *> Args,
Yifan Zhao's avatar
Yifan Zhao committed
393
394
395
396
397
                              DenseMap<DFEdge *, Value *> EdgeBufferMap,
                              Value *isLastInputBuffer, Value *graphID,
                              Instruction *IB) {
  DEBUG(errs() << "Starting Pipeline for child node: "
               << C->getFuncPointer()->getName() << "\n");
398
  // Create a filter/pipeline function for the child node
Yifan Zhao's avatar
Yifan Zhao committed
399
400
  Function *C_Pipeline = createFunctionFilter(C);
  Function *CF = C->getFuncPointer();
401
402
403

  // Get module context and i32 0 constant, as they would be frequently used in
  // this function.
Yifan Zhao's avatar
Yifan Zhao committed
404
405
  LLVMContext &Ctx = IB->getParent()->getContext();
  Constant *IntZero = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
406
407
408
409
410

  // Marshall arguments
  // Create a packed struct type with inputs of C followed by outputs and then
  // another i8* to indicate isLastInput buffer. Streaming inputs are replaced
  // by i8*
411
  //
Yifan Zhao's avatar
Yifan Zhao committed
412
  StructType *STy = getArgumentListStructTy(C);
413
  // Allocate the struct on heap *NOT* stack and bitcast i8* to STy*
Yifan Zhao's avatar
Yifan Zhao committed
414
415
416
417
418
419
420
  CallInst *CI =
      CallInst::Create(malloc, ArrayRef<Value *>(ConstantExpr::getSizeOf(STy)),
                       C->getFuncPointer()->getName() + ".inputs", IB);
  CastInst *Struct = BitCastInst::CreatePointerCast(
      CI, STy->getPointerTo(), CI->getName() + ".i8ptr", IB);
  // AllocaInst* AI = new AllocaInst(STy,
  // C->getFuncPointer()->getName()+".inputs", IB);
421
  // Insert elements in the struct
Yifan Zhao's avatar
Yifan Zhao committed
422
423
  DEBUG(errs() << "Marshall inputs for child node: "
               << C->getFuncPointer()->getName() << "\n");
424
  // Marshall Inputs
Yifan Zhao's avatar
Yifan Zhao committed
425
  for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) {
426
    // Create constant int (i)
Yifan Zhao's avatar
Yifan Zhao committed
427
    Constant *Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i);
428
    // Get Element pointer instruction
Yifan Zhao's avatar
Yifan Zhao committed
429
430
431
432
433
    Value *GEPIndices[] = {IntZero, Int_i};
    GetElementPtrInst *GEP = GetElementPtrInst::Create(
        nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2),
        Struct->getName() + ".arg_" + Twine(i), IB);
    DFEdge *E = C->getInDFEdgeAt(i);
434
435
    if (E->getSourceDF()->isEntryNode()) {
      // This is a Bind Input Edge
Yifan Zhao's avatar
Yifan Zhao committed
436
      if (E->isStreamingEdge()) {
437
        // Streaming Bind Input edge. Get buffer corresponding to it
Yifan Zhao's avatar
Yifan Zhao committed
438
439
        assert(EdgeBufferMap.count(E) &&
               "No mapping buffer for a Streaming Bind DFEdge!");
440
        new StoreInst(EdgeBufferMap[E], GEP, IB);
Yifan Zhao's avatar
Yifan Zhao committed
441
      } else {
442
443
444
        // Non-streaming Bind edge
        new StoreInst(Args[i], GEP, IB);
      }
Yifan Zhao's avatar
Yifan Zhao committed
445
446
    } else {
      // This is an edge between siblings.
447
448
      // This must be an streaming edge. As it is our assumption that all edges
      // between two nodes in a DFG are streaming.
Yifan Zhao's avatar
Yifan Zhao committed
449
450
      assert(EdgeBufferMap.count(E) &&
             "No mapping buffer for a Streaming DFEdge!");
451
452
453
454
      new StoreInst(EdgeBufferMap[E], GEP, IB);
    }
  }
  unsigned numInputs = CF->getFunctionType()->getNumParams();
455
  unsigned numOutputs = cast<StructType>(CF->getReturnType())->getNumElements();
456
  // Marshall Outputs
Yifan Zhao's avatar
Yifan Zhao committed
457
458
459
  DEBUG(errs() << "Marshall outputs for child node: "
               << C->getFuncPointer()->getName() << "\n");
  for (unsigned i = 0; i < numOutputs; i++) {
460
    // Create constant int (i+numInputs)
Yifan Zhao's avatar
Yifan Zhao committed
461
    Constant *Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i + numInputs);
462
    // Get Element pointer instruction
Yifan Zhao's avatar
Yifan Zhao committed
463
464
465
466
467
468
469
470
471
    Value *GEPIndices[] = {IntZero, Int_i};
    GetElementPtrInst *GEP = GetElementPtrInst::Create(
        nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2),
        Struct->getName() + ".out_" + Twine(i), IB);
    DFEdge *E = C->getOutDFEdgeAt(i);
    assert(E->isStreamingEdge() &&
           "Output Edge must be streaming of all nodes");
    assert(EdgeBufferMap.count(E) &&
           "No mapping buffer for a Out Streaming DFEdge!");
472
473
474
    new StoreInst(EdgeBufferMap[E], GEP, IB);
  }
  // Marshall last argument. isLastInput buffer
Yifan Zhao's avatar
Yifan Zhao committed
475
476
  DEBUG(errs() << "Marshall isLastInput for child node: "
               << C->getFuncPointer()->getName() << "\n");
477
  // Create constant int (i+numInputs)
Yifan Zhao's avatar
Yifan Zhao committed
478
479
  Constant *Int_index =
      ConstantInt::get(Type::getInt32Ty(Ctx), numInputs + numOutputs);
480
  // Get Element pointer instruction
Yifan Zhao's avatar
Yifan Zhao committed
481
482
483
484
  Value *GEPIndices[] = {IntZero, Int_index};
  GetElementPtrInst *GEP = GetElementPtrInst::Create(
      nullptr, Struct, ArrayRef<Value *>(GEPIndices, 2),
      Struct->getName() + ".isLastInput", IB);
485
486
487
488
  new StoreInst(isLastInputBuffer, GEP, IB);

  // AllocaInst AI points to memory with all the arguments packed
  // Call runtime to create the thread with these arguments
Yifan Zhao's avatar
Yifan Zhao committed
489
490
491
  DEBUG(errs() << "Start Thread for child node: "
               << C->getFuncPointer()->getName() << "\n");
  // DEBUG(errs() << *llvm_hpvm_createThread << "\n");
492
493
  DEBUG(errs() << *graphID->getType() << "\n");
  DEBUG(errs() << *C_Pipeline->getType() << "\n");
494
  DEBUG(errs() << *Struct->getType() << "\n");
495
  // Bitcast AI to i8*
Yifan Zhao's avatar
Yifan Zhao committed
496
497
498
499
500
  CastInst *BI = BitCastInst::CreatePointerCast(Struct, Type::getInt8PtrTy(Ctx),
                                                Struct->getName(), IB);
  Value *CreateThreadArgs[] = {graphID, C_Pipeline, BI};
  CallInst::Create(llvm_hpvm_createThread,
                   ArrayRef<Value *>(CreateThreadArgs, 3), "", IB);
501
502
}

503
Function *CGT_CPU::createLaunchFunction(DFInternalNode *N) {
504
505
  DEBUG(errs() << "Generating Streaming Launch Function\n");
  // Get Function associated with Node N
Yifan Zhao's avatar
Yifan Zhao committed
506
  Function *NF = N->getFuncPointer();
507

Yifan Zhao's avatar
Yifan Zhao committed
508
509
  // Map from Streaming edge to buffer
  DenseMap<DFEdge *, Value *> EdgeBufferMap;
510
511

  /* Now we have all the necessary global declarations necessary to generate the
Yifan Zhao's avatar
Yifan Zhao committed
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
   * Launch function, pointer to which can be passed to pthread utils to execute
   * DFG. The Launch function has just one input: i8* data.addr
   * This is the address of the all the input data that needs to be passed to
   * this function. In our case it contains the input arguments of the Root
   * function in the correct order.
   * (1) Create an empty Launch function of type void (i8* args, i8* GraphID)
   * (2) Extract each of inputs from data.addr
   * (3) create Buffers for all the streaming edges
   *     - Put buffers in the context
   * (4) Go over each child node
   *     - marshall its arguments together (use buffers in place of streaming
   *       arguments)
   *     - Start the threads
   * (5) The return value from Root is stored in memory, pointer to which is
   * passed to pthread_exit call.
   */
528
  // (1) Create Launch Function of type void (i8* args, i8* GraphID)
Yifan Zhao's avatar
Yifan Zhao committed
529
530
531
532
533
534
  Type *i8Ty = Type::getInt8Ty(M.getContext());
  Type *ArgTypes[] = {i8Ty->getPointerTo(), i8Ty->getPointerTo()};
  FunctionType *LaunchFuncTy = FunctionType::get(
      Type::getVoidTy(NF->getContext()), ArrayRef<Type *>(ArgTypes, 2), false);
  Function *LaunchFunc = Function::Create(
      LaunchFuncTy, NF->getLinkage(), NF->getName() + ".LaunchFunction", &M);
535
536
  DEBUG(errs() << "Generating Code for Streaming Launch Function\n");
  // Give a name to the argument which is used pass data to this thread
Yifan Zhao's avatar
Yifan Zhao committed
537
  Argument *data = &*LaunchFunc->arg_begin();
538
  // NOTE-HS: Check correctness with Maria
Yifan Zhao's avatar
Yifan Zhao committed
539
  Argument *graphID = &*(LaunchFunc->arg_begin() + 1);
540
541
542
  data->setName("data.addr");
  graphID->setName("graphID");
  // Add a basic block to this empty function and a return null statement to it
543
  DEBUG(errs() << *LaunchFunc->getReturnType() << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
544
545
546
  BasicBlock *BB =
      BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc);
  ReturnInst *RI = ReturnInst::Create(LaunchFunc->getContext(), BB);
547
548
549
550

  DEBUG(errs() << "Created Empty Launch Function\n");

  // (2) Extract each of inputs from data.addr
Yifan Zhao's avatar
Yifan Zhao committed
551
  std::vector<Type *> TyList;
552
  std::vector<std::string> names;
Yifan Zhao's avatar
Yifan Zhao committed
553
  std::vector<Value *> Args;
554
555

  for (Function::arg_iterator ai = NF->arg_begin(), ae = NF->arg_end();
Yifan Zhao's avatar
Yifan Zhao committed
556
557
558
559
560
       ai != ae; ++ai) {
    if (N->getChildGraph()
            ->getEntry()
            ->getOutDFEdgeAt(ai->getArgNo())
            ->isStreamingEdge()) {
561
      TyList.push_back(i8Ty->getPointerTo());
Yifan Zhao's avatar
Yifan Zhao committed
562
      names.push_back(Twine(ai->getName() + "_buffer").str());
563
564
565
566
567
568
      continue;
    }
    TyList.push_back(ai->getType());
    names.push_back(ai->getName());
  }
  Args = extractElements(data, TyList, names, RI);
Yifan Zhao's avatar
Yifan Zhao committed
569
570
  DEBUG(errs() << "Launch function for " << NF->getName() << *LaunchFunc
               << "\n");
571
  // (3) Create buffers for all the streaming edges
Yifan Zhao's avatar
Yifan Zhao committed
572
573
574
575
  for (DFGraph::dfedge_iterator di = N->getChildGraph()->dfedge_begin(),
                                de = N->getChildGraph()->dfedge_end();
       di != de; ++di) {
    DFEdge *Edge = *di;
576
    DEBUG(errs() << *Edge->getType() << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
577
578
    Value *size = ConstantExpr::getSizeOf(Edge->getType());
    Value *CallArgs[] = {graphID, size};
579
    if (Edge->isStreamingEdge()) {
Yifan Zhao's avatar
Yifan Zhao committed
580
      CallInst *CI;
581
      // Create a buffer call
Yifan Zhao's avatar
Yifan Zhao committed
582
      if (Edge->getSourceDF()->isEntryNode()) {
583
        // Bind Input Edge
Yifan Zhao's avatar
Yifan Zhao committed
584
585
586
587
588
589
590
        Constant *Int_ArgNo = ConstantInt::get(
            Type::getInt32Ty(RI->getContext()), Edge->getSourcePosition());
        Value *BindInCallArgs[] = {graphID, size, Int_ArgNo};
        CI = CallInst::Create(
            llvm_hpvm_createBindInBuffer, ArrayRef<Value *>(BindInCallArgs, 3),
            "BindIn." + Edge->getDestDF()->getFuncPointer()->getName(), RI);
      } else if (Edge->getDestDF()->isExitNode()) {
591
        // Bind Output Edge
Yifan Zhao's avatar
Yifan Zhao committed
592
593
594
595
        CI = CallInst::Create(
            llvm_hpvm_createBindOutBuffer, ArrayRef<Value *>(CallArgs, 2),
            "BindOut." + Edge->getSourceDF()->getFuncPointer()->getName(), RI);
      } else {
596
        // Streaming Edge
Yifan Zhao's avatar
Yifan Zhao committed
597
598
599
600
601
        CI = CallInst::Create(
            llvm_hpvm_createEdgeBuffer, ArrayRef<Value *>(CallArgs, 2),
            Edge->getSourceDF()->getFuncPointer()->getName() + "." +
                Edge->getDestDF()->getFuncPointer()->getName(),
            RI);
602
603
604
605
      }
      EdgeBufferMap[Edge] = CI;
    }
  }
606
  // Create buffer for isLastInput for all the child nodes
Yifan Zhao's avatar
Yifan Zhao committed
607
608
609
610
611
612
  DFGraph *G = N->getChildGraph();
  DenseMap<DFNode *, Value *> NodeLastInputMap;
  for (DFGraph::children_iterator ci = G->begin(), ce = G->end(); ci != ce;
       ++ci) {
    DFNode *child = *ci;
    if (child->isDummyNode())
613
      continue;
Yifan Zhao's avatar
Yifan Zhao committed
614
615
616
617
618
    Value *size = ConstantExpr::getSizeOf(Type::getInt64Ty(NF->getContext()));
    Value *CallArgs[] = {graphID, size};
    CallInst *CI = CallInst::Create(
        llvm_hpvm_createLastInputBuffer, ArrayRef<Value *>(CallArgs, 2),
        "BindIn.isLastInput." + child->getFuncPointer()->getName(), RI);
619
620
    NodeLastInputMap[child] = CI;
  }
Yifan Zhao's avatar
Yifan Zhao committed
621
  DEBUG(errs() << "Start Each child node filter\n");
622
623
  // (4) Marshall arguments for each child node and start the thread with its
  //     pipeline funtion
Yifan Zhao's avatar
Yifan Zhao committed
624
625
626
627
  for (DFGraph::children_iterator ci = N->getChildGraph()->begin(),
                                  ce = N->getChildGraph()->end();
       ci != ce; ++ci) {
    DFNode *C = *ci;
628
629
630
    // Skip dummy node call
    if (C->isDummyNode())
      continue;
Yifan Zhao's avatar
Yifan Zhao committed
631

632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
    // Marshall all the arguments for this node into an i8*
    // Pass to the runtime to create the thread
    // Start the thread for child node C
    startNodeThread(C, Args, EdgeBufferMap, NodeLastInputMap[C], graphID, RI);
  }

  DEBUG(errs() << "Launch function:\n");
  DEBUG(errs() << *LaunchFunc << "\n");

  return LaunchFunc;
}

/* This fuction does the steps necessary to launch a streaming graph
 * Steps
 * Create Pipeline/Filter function for each node in child graph of Root
 * Create Functions DFGLaunch, DFGPush, DFGPop, DFGWait
 * Modify each of the instrinsic in host code
 * Launch, Push, Pop, Wait
 */
651
void CGT_CPU::codeGenLaunchStreaming(DFInternalNode *Root) {
Yifan Zhao's avatar
Yifan Zhao committed
652
653
  IntrinsicInst *LI = Root->getInstruction();
  Function *RootLaunch = createLaunchFunction(Root);
654
  // Substitute launch intrinsic main
Yifan Zhao's avatar
Yifan Zhao committed
655
656
657
658
659
  DEBUG(errs() << "Substitute launch intrinsic\n");
  Value *LaunchInstArgs[] = {RootLaunch, LI->getArgOperand(1)};
  CallInst *LaunchInst = CallInst::Create(
      llvm_hpvm_streamLaunch, ArrayRef<Value *>(LaunchInstArgs, 2),
      "graph" + Root->getFuncPointer()->getName(), LI);
660
661

  DEBUG(errs() << *LaunchInst << "\n");
662
  // Replace all wait instructions with cpu specific wait instructions
Yifan Zhao's avatar
Yifan Zhao committed
663
664
665
666
667
668
669
670
671
  DEBUG(errs() << "Substitute wait, push, pop intrinsics\n");
  std::vector<IntrinsicInst *> *UseList = getUseList(LI);
  for (unsigned i = 0; i < UseList->size(); ++i) {
    IntrinsicInst *II = UseList->at(i);
    CallInst *CI;
    Value *PushArgs[] = {LaunchInst, II->getOperand(1)};
    switch (II->getIntrinsicID()) {
    case Intrinsic::hpvm_wait:
      CI = CallInst::Create(llvm_hpvm_streamWait, ArrayRef<Value *>(LaunchInst),
672
673
                            "");
      break;
Yifan Zhao's avatar
Yifan Zhao committed
674
675
676
    case Intrinsic::hpvm_push:
      CI = CallInst::Create(llvm_hpvm_streamPush,
                            ArrayRef<Value *>(PushArgs, 2), "");
677
      break;
Yifan Zhao's avatar
Yifan Zhao committed
678
679
    case Intrinsic::hpvm_pop:
      CI = CallInst::Create(llvm_hpvm_streamPop, ArrayRef<Value *>(LaunchInst),
680
681
682
                            "");
      break;
    default:
Yifan Zhao's avatar
Yifan Zhao committed
683
684
      llvm_unreachable(
          "GraphID is used by an instruction other than wait, push, pop");
685
686
687
688
689
690
691
    };
    DEBUG(errs() << "Replace:\n\t" << *II << "\n");
    ReplaceInstWithInst(II, CI);
    DEBUG(errs() << "\twith " << *CI << "\n");
  }
}

692
void CGT_CPU::codeGenLaunch(DFInternalNode *Root) {
693
  // TODO: Place an assert to check if the constant passed by launch intrinsic
694
695
  // as the number of arguments to DFG is same as the number of arguments of the
  // root of DFG
696
  DEBUG(errs() << "Generating Launch Function\n");
697
  // Get Launch Instruction
Yifan Zhao's avatar
Yifan Zhao committed
698
699
  IntrinsicInst *LI = Root->getInstruction();
  switchToTimer(hpvm_TimerID_PTHREAD_CREATE, LI);
700
  DEBUG(errs() << "Generating Launch Function\n");
701

702
  /* Now we have all the necessary global declarations necessary to generate the
Yifan Zhao's avatar
Yifan Zhao committed
703
704
705
706
707
708
709
710
711
712
713
   * Launch function, pointer to which can be passed to pthread utils to execute
   * DFG. The Launch function has just one input: i8* data.addr
   * This is the address of the all the input data that needs to be passed to
   * this function. In our case it contains the input arguments of the Root
   * function in the correct order.
   * (1) Create an empty Launch function of type i8*(i8*)
   * (2) Extract each of inputs from data.addr and pass them as arguments to the
   * call to Root function
   * (3) The return value from Root is stored in memory, pointer to which is
   * passed to pthread_exit call.
   */
714
  // Create Launch Function of type i8*(i8*) which calls the root function
Yifan Zhao's avatar
Yifan Zhao committed
715
716
717
718
719
720
  Type *i8Ty = Type::getInt8Ty(M.getContext());
  FunctionType *AppFuncTy = FunctionType::get(
      i8Ty->getPointerTo(), ArrayRef<Type *>(i8Ty->getPointerTo()), false);
  Function *AppFunc =
      Function::Create(AppFuncTy, Root->getFuncPointer()->getLinkage(),
                       "LaunchDataflowGraph", &M);
721
  DEBUG(errs() << "Generating Launch Function\n");
722
  // Give a name to the argument which is used pass data to this thread
Yifan Zhao's avatar
Yifan Zhao committed
723
  Value *data = &*AppFunc->arg_begin();
724
725
  data->setName("data.addr");
  // Add a basic block to this empty function and a return null statement to it
726
  BasicBlock *BB = BasicBlock::Create(AppFunc->getContext(), "entry", AppFunc);
Yifan Zhao's avatar
Yifan Zhao committed
727
728
729
730
  ReturnInst *RI =
      ReturnInst::Create(AppFunc->getContext(),
                         Constant::getNullValue(AppFunc->getReturnType()), BB);
  switchToTimer(hpvm_TimerID_ARG_UNPACK, RI);
731
732

  DEBUG(errs() << "Created Empty Launch Function\n");
733
734
735
736
737
738
739
740
  // Find the CPU function generated for Root and
  //  Function* RootF_CPU = Root->getGenFunc();
  Function *RootF_CPU = Root->getGenFuncForTarget(hpvm::CPU_TARGET);
  assert(RootF_CPU && "Error: No generated CPU function for Root node\n");
  assert(Root->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) &&
         "Error: Generated Function for Root node with no cpu wrapper\n");

  // Generate a call to RootF_CPU with null parameters for now
Yifan Zhao's avatar
Yifan Zhao committed
741
  std::vector<Value *> Args;
742
  for (unsigned i = 0; i < RootF_CPU->getFunctionType()->getNumParams(); i++) {
Yifan Zhao's avatar
Yifan Zhao committed
743
    Args.push_back(
744
        Constant::getNullValue(RootF_CPU->getFunctionType()->getParamType(i)));
745
  }
Yifan Zhao's avatar
Yifan Zhao committed
746
  CallInst *CI =
747
      CallInst::Create(RootF_CPU, Args, RootF_CPU->getName() + ".output", RI);
748
749

  // Extract input data from i8* data.addr and patch them to correct argument of
750
  // call to RootF_CPU. For each argument
Yifan Zhao's avatar
Yifan Zhao committed
751
  std::vector<Type *> TyList;
752
  std::vector<std::string> names;
753
754
  for (Function::arg_iterator ai = RootF_CPU->arg_begin(),
                              ae = RootF_CPU->arg_end();
Yifan Zhao's avatar
Yifan Zhao committed
755
       ai != ae; ++ai) {
756
757
    TyList.push_back(ai->getType());
    names.push_back(ai->getName());
758
  }
Yifan Zhao's avatar
Yifan Zhao committed
759
  std::vector<Value *> elements = extractElements(data, TyList, names, CI);
760
  // Patch the elements to the call arguments
Yifan Zhao's avatar
Yifan Zhao committed
761
  for (unsigned i = 0; i < CI->getNumArgOperands(); i++)
762
763
    CI->setArgOperand(i, elements[i]);

764
  // Add timers around Call to RootF_CPU function
Yifan Zhao's avatar
Yifan Zhao committed
765
766
  switchToTimer(hpvm_TimerID_COMPUTATION, CI);
  switchToTimer(hpvm_TimerID_OUTPUT_PACK, RI);
767

Yifan Zhao's avatar
Yifan Zhao committed
768
  StructType *RootRetTy =
769
      cast<StructType>(RootF_CPU->getFunctionType()->getReturnType());
770

Yifan Zhao's avatar
Yifan Zhao committed
771
  // if Root has non empty return
772
773
  if (RootRetTy->getNumElements()) {
    // We can't access the type of the arg struct - build it
Yifan Zhao's avatar
Yifan Zhao committed
774
    std::vector<Type *> TyList;
775
776
    for (Function::arg_iterator ai = RootF_CPU->arg_begin(),
                                ae = RootF_CPU->arg_end();
Yifan Zhao's avatar
Yifan Zhao committed
777
         ai != ae; ++ai) {
778
779
780
781
      TyList.push_back(ai->getType());
    }
    TyList.push_back(CI->getType());

Yifan Zhao's avatar
Yifan Zhao committed
782
783
    StructType *ArgStructTy = StructType::create(
        M.getContext(), ArrayRef<Type *>(TyList),
784
        (RootF_CPU->getName() + ".arg.struct.ty").str(), true);
785
786

    // Cast the data pointer to the type of the arg struct
Yifan Zhao's avatar
Yifan Zhao committed
787
788
    CastInst *OutputAddrCast = CastInst::CreatePointerCast(
        data, ArgStructTy->getPointerTo(), "argStructCast.addr", RI);
789
790
791
792

    // Result struct is the last element of the packed struct passed to launch
    unsigned outStructIdx = ArgStructTy->getNumElements() - 1;

Yifan Zhao's avatar
Yifan Zhao committed
793
794
795
796
    ConstantInt *IntZero =
        ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
    ConstantInt *IntIdx =
        ConstantInt::get(Type::getInt32Ty(M.getContext()), outStructIdx);
797

Yifan Zhao's avatar
Yifan Zhao committed
798
    Value *GEPIIdxList[] = {IntZero, IntIdx};
799
    // Get data pointer to the last element of struct - result field
Yifan Zhao's avatar
Yifan Zhao committed
800
801
802
    GetElementPtrInst *OutGEPI = GetElementPtrInst::Create(
        ArgStructTy, OutputAddrCast, ArrayRef<Value *>(GEPIIdxList, 2),
        CI->getName() + ".addr", RI);
803
804
805
806
807
808
809
810
    // Store result there
    new StoreInst(CI, OutGEPI, RI);
  } else {
    // There is no return - no need to actually code gen, but for fewer
    // changes maintain what code was already doing
    // We were casting the data pointer to the result type of Root, and
    // returning result there. This would work at the LLVM level, but not
    // at the C level, thus the rewrite.
Yifan Zhao's avatar
Yifan Zhao committed
811
812
    CastInst *OutputAddrCast = CastInst::CreatePointerCast(
        data, CI->getType()->getPointerTo(), CI->getName() + ".addr", RI);
813
814
815
    new StoreInst(CI, OutputAddrCast, RI);
  }

Yifan Zhao's avatar
Yifan Zhao committed
816
  switchToTimer(hpvm_TimerID_NONE, RI);
817

818
819
  DEBUG(errs() << "Application specific function:\n");
  DEBUG(errs() << *AppFunc << "\n");
820

821
  // Substitute launch intrinsic main
Yifan Zhao's avatar
Yifan Zhao committed
822
823
  Value *LaunchInstArgs[] = {AppFunc, LI->getArgOperand(1)};
  CallInst *LaunchInst = CallInst::Create(
824
      llvm_hpvm_cpu_launch, ArrayRef<Value *>(LaunchInstArgs, 2),
Yifan Zhao's avatar
Yifan Zhao committed
825
826
      "graph" + Root->getFuncPointer()->getName(), LI);
  // ReplaceInstWithInst(LI, LaunchInst);
827
828

  DEBUG(errs() << *LaunchInst << "\n");
829
  // Replace all wait instructions with cpu specific wait instructions
Yifan Zhao's avatar
Yifan Zhao committed
830
831
832
833
834
835
  std::vector<IntrinsicInst *> *UseList = getUseList(LI);
  for (unsigned i = 0; i < UseList->size(); ++i) {
    IntrinsicInst *II = UseList->at(i);
    CallInst *CI;
    switch (II->getIntrinsicID()) {
    case Intrinsic::hpvm_wait:
836
      CI = CallInst::Create(llvm_hpvm_cpu_wait, ArrayRef<Value *>(LaunchInst),
837
838
                            "");
      break;
Yifan Zhao's avatar
Yifan Zhao committed
839
840
    case Intrinsic::hpvm_push:
      CI = CallInst::Create(llvm_hpvm_bufferPush, ArrayRef<Value *>(LaunchInst),
841
842
                            "");
      break;
Yifan Zhao's avatar
Yifan Zhao committed
843
844
    case Intrinsic::hpvm_pop:
      CI = CallInst::Create(llvm_hpvm_bufferPop, ArrayRef<Value *>(LaunchInst),
845
846
847
                            "");
      break;
    default:
Yifan Zhao's avatar
Yifan Zhao committed
848
849
      llvm_unreachable(
          "GraphID is used by an instruction other than wait, push, pop");
850
851
852
    };
    ReplaceInstWithInst(II, CI);
    DEBUG(errs() << *CI << "\n");
853
  }
854
}
855

856
Value *CGT_CPU::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU,
Yifan Zhao's avatar
Yifan Zhao committed
857
                             Instruction *InsertBefore) {
858
859
860
861
  // TODO: Assumption is that each input port of a node has just one
  // incoming edge. May change later on.

  // Find the incoming edge at the requested input port
Yifan Zhao's avatar
Yifan Zhao committed
862
  DFEdge *E = Child->getInDFEdgeAt(i);
863
864
  assert(E && "No incoming edge or binding for input element!");
  // Find the Source DFNode associated with the incoming edge
Yifan Zhao's avatar
Yifan Zhao committed
865
  DFNode *SrcDF = E->getSourceDF();
866
867
868

  // If Source DFNode is a dummyNode, edge is from parent. Get the
  // argument from argument list of this internal node
Yifan Zhao's avatar
Yifan Zhao committed
869
870
  Value *inputVal;
  if (SrcDF->isEntryNode()) {
871
    inputVal = getArgumentAt(ParentF_CPU, E->getSourcePosition());
Yifan Zhao's avatar
Yifan Zhao committed
872
873
    DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n");
  } else {
874
875
    // edge is from a sibling
    // Check - code should already be generated for this source dfnode
Yifan Zhao's avatar
Yifan Zhao committed
876
877
    assert(OutputMap.count(SrcDF) &&
           "Source node call not found. Dependency violation!");
878

879
    // Find CallInst associated with the Source DFNode using OutputMap
Yifan Zhao's avatar
Yifan Zhao committed
880
    Value *CI = OutputMap[SrcDF];
881
882
883
884

    // Extract element at source position from this call instruction
    std::vector<unsigned> IndexList;
    IndexList.push_back(E->getSourcePosition());
Yifan Zhao's avatar
Yifan Zhao committed
885
886
887
    DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI << "\n");
    ExtractValueInst *EI =
        ExtractValueInst::Create(CI, IndexList, "", InsertBefore);
888
889
890
891
892
    inputVal = EI;
  }
  return inputVal;
}

893
void CGT_CPU::invokeChild_CPU(DFNode *C, Function *F_CPU,
Yifan Zhao's avatar
Yifan Zhao committed
894
895
896
                              ValueToValueMapTy &VMap, Instruction *IB) {
  Function *CF = C->getFuncPointer();

897
898
899
  //  Function* CF_CPU = C->getGenFunc();
  Function *CF_CPU = C->getGenFuncForTarget(hpvm::CPU_TARGET);
  assert(CF_CPU != NULL &&
Yifan Zhao's avatar
Yifan Zhao committed
900
         "Found leaf node for which code generation has not happened yet!\n");
901
902
  assert(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) &&
         "The generated function to be called from cpu backend is not an cpu "
Yifan Zhao's avatar
Yifan Zhao committed
903
         "function\n");
904
  DEBUG(errs() << "Invoking child node" << CF_CPU->getName() << "\n");
905

Yifan Zhao's avatar
Yifan Zhao committed
906
  std::vector<Value *> Args;
907
908
909
  // Create argument list to pass to call instruction
  // First find the correct values using the edges
  // The remaing six values are inserted as constants for now.
Yifan Zhao's avatar
Yifan Zhao committed
910
  for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) {
911
    Args.push_back(getInValueAt(C, i, F_CPU, IB));
912
913
  }

914
  Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_CPU->getContext()), 0);
Yifan Zhao's avatar
Yifan Zhao committed
915
  for (unsigned j = 0; j < 6; j++)
kotsifa2's avatar
kotsifa2 committed
916
    Args.push_back(I64Zero);
917

918
  DEBUG(errs() << "Gen Function type: " << *CF_CPU->getType() << "\n");
Yifan Zhao's avatar
Yifan Zhao committed
919
920
  DEBUG(errs() << "Node Function type: " << *CF->getType() << "\n");
  DEBUG(errs() << "Arguments: " << Args.size() << "\n");