diff --git a/llvm/include/llvm/BuildDFG/BuildDFG.h b/llvm/include/llvm/BuildDFG/BuildDFG.h
index 1bf4ee4ec74358739977d9078278a3b6b7f30b19..811b024d92af8d9b7a473752c8b6dd26e38a9f91 100644
--- a/llvm/include/llvm/BuildDFG/BuildDFG.h
+++ b/llvm/include/llvm/BuildDFG/BuildDFG.h
@@ -19,51 +19,51 @@
 using namespace llvm;
 
 namespace builddfg {
-  // BuildDFG - The first implementation.
-  struct BuildDFG : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
-    BuildDFG() : ModulePass(ID) {}
+// BuildDFG - The first implementation.
+struct BuildDFG : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  BuildDFG() : ModulePass(ID) {}
 
-    typedef ValueMap<Value*, DFNode*> HandleToDFNode;
-    typedef ValueMap<Value*, DFEdge*> HandleToDFEdge;
+  typedef ValueMap<Value*, DFNode*> HandleToDFNode;
+  typedef ValueMap<Value*, DFEdge*> HandleToDFEdge;
 
-    private:
-    // Member variables
-    DFInternalNode *Root;
+private:
+  // Member variables
+  DFInternalNode *Root;
 
-    HandleToDFNode HandleToDFNodeMap;   // This map associates the i8* pointer
-                                        // with the DFNode structure that it
-                                        // represents
-    HandleToDFEdge HandleToDFEdgeMap;   // This map associates the i8* pointer
-                                        // with the DFEdge structure that it
-                                        // represents
+  HandleToDFNode HandleToDFNodeMap;   // This map associates the i8* pointer
+  // with the DFNode structure that it
+  // represents
+  HandleToDFEdge HandleToDFEdgeMap;   // This map associates the i8* pointer
+  // with the DFEdge structure that it
+  // represents
 
 
-    // Functions
-    void handleCreateNode (DFInternalNode* N, IntrinsicInst* II);
-    void handleCreateEdge (DFInternalNode* N, IntrinsicInst* II);
-    void handleGetParentNode (DFInternalNode* N, IntrinsicInst* II);
-    void handleBindInput (DFInternalNode* N, IntrinsicInst* II);
-    void handleBindOutput (DFInternalNode* N, IntrinsicInst* II);
+  // Functions
+  void handleCreateNode (DFInternalNode* N, IntrinsicInst* II);
+  void handleCreateEdge (DFInternalNode* N, IntrinsicInst* II);
+  void handleGetParentNode (DFInternalNode* N, IntrinsicInst* II);
+  void handleBindInput (DFInternalNode* N, IntrinsicInst* II);
+  void handleBindOutput (DFInternalNode* N, IntrinsicInst* II);
 
-    void BuildGraph (DFInternalNode* N, Function* F);
+  void BuildGraph (DFInternalNode* N, Function* F);
 
-    public:
-    // Functions
-    virtual bool runOnModule(Module &M);
+public:
+  // Functions
+  virtual bool runOnModule(Module &M);
 
-    static bool isViscLaunchIntrinsic(Instruction * I);
-    static bool isViscGraphIntrinsic(Instruction * I);
-    static bool isViscQueryIntrinsic(Instruction* I);
-    static bool isViscIntrinsic(Instruction* I);
-    static bool isTypeCongruent(Type *L, Type *R);
+  static bool isViscLaunchIntrinsic(Instruction * I);
+  static bool isViscGraphIntrinsic(Instruction * I);
+  static bool isViscQueryIntrinsic(Instruction* I);
+  static bool isViscIntrinsic(Instruction* I);
+  static bool isTypeCongruent(Type *L, Type *R);
 
   //TODO: Maybe make these fields const
-    DFInternalNode *getRoot() const;
-    HandleToDFNode &getHandleToDFNodeMap();
-    HandleToDFEdge &getHandleToDFEdgeMap();
+  DFInternalNode *getRoot() const;
+  HandleToDFNode &getHandleToDFNodeMap();
+  HandleToDFEdge &getHandleToDFEdgeMap();
 
-  };
+};
 
 } // End of namespace
 
diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h
index be4886bb2ff0d1d2b44893e4eb14aafb6653be56..2d935b3c5f3190f2341dd5ac39dfb81eef0f7074 100644
--- a/llvm/include/llvm/IR/DFGraph.h
+++ b/llvm/include/llvm/IR/DFGraph.h
@@ -251,6 +251,11 @@ public:
   DFNode(IntrinsicInst* _II, Function* _FuncPointer, DFInternalNode* _Parent,
          unsigned _NumOfDim, std::vector<Value*> _DimLimits, DFNodeKind _K);
 
+  bool isRoot() {
+    // It is a root node is it was created from a launch intrinsic
+    return II->getCalledFunction()->getName().equals("llvm.visc.launch");
+  }
+
   StructType* getOutputType() {
     return OutputType;
   }
@@ -761,7 +766,7 @@ struct DOTGraphTraits<DFGraph*> : public DefaultDOTGraphTraits {
 
 void viewDFGraph(DFGraph *G) {
   llvm::WriteGraph(G, "DataflowGraph");
-  llvm::ViewGraph(G, "DataflowGraph");
+  //llvm::ViewGraph(G, "DataflowGraph");
 }
 
 } // End llvm namespace
diff --git a/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp
index 6cc121aa5d9f9c206018decb2189c6d92e8a0c38..8081f4065eb6b51e2fd61e866f7255510b480617 100644
--- a/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp
+++ b/llvm/lib/Transforms/BuildDFG/BuildDFG.cpp
@@ -21,337 +21,337 @@ STATISTIC(IntrinsicCounter, "Counts number of visc intrinsics greeted");
 
 namespace builddfg {
 
-  bool BuildDFG::runOnModule(Module &M) {
-
-    errs() << "-------- Searching for launch site ----------\n";
-
-    bool foundLaunchSite = false;
-    IntrinsicInst* II;
-
-    // Iterate over all functions in the module
-    for (Module::iterator mi = M.begin(),
-         me = M.end(); (mi != me) && (!foundLaunchSite); ++mi) {
-      Function* f = &*mi;
-      errs() << "Function: " << f->getName() << "\n";
-
-      for (inst_iterator i = inst_begin(f), e = inst_end(f);
-           (i != e) && (!foundLaunchSite); ++i) {
-        Instruction* I = &*i; // Grab pointer to Instruction
-        if (isViscLaunchIntrinsic(I)) {
-          errs() << "------------ Found launch site --------------\n";
-          foundLaunchSite = true;
-          II = cast<IntrinsicInst>(I);
-        }
-      }
-    }
+bool BuildDFG::runOnModule(Module &M) {
 
-    assert(foundLaunchSite && "Launch site not found!");
+  errs() << "-------- Searching for launch site ----------\n";
 
-    // Intrinsic Instruction has been initialized from this point on.
+  bool foundLaunchSite = false;
+  IntrinsicInst* II;
 
-    Function* F = cast<Function>((II->getOperand(0))->stripPointerCasts());
-    Root = DFInternalNode::Create(II, F);
-    BuildGraph(Root, F);
+  // Iterate over all functions in the module
+  for (Module::iterator mi = M.begin(),
+       me = M.end(); (mi != me) && (!foundLaunchSite); ++mi) {
+    Function* f = &*mi;
+    errs() << "Function: " << f->getName() << "\n";
 
-    for(DFGraph::children_iterator i = Root->getChildGraph()->begin(),
-        e = Root->getChildGraph()->end(); i!=e; i++) {
-      DFNode* N = *i;
-      errs() << "\t" << N->getFuncPointer()->getName() << "\n";
-    }
-    Root->getChildGraph()->sortChildren();
-    for(DFGraph::children_iterator i = Root->getChildGraph()->begin(),
-        e = Root->getChildGraph()->end(); i!=e; i++) {
-      DFNode* N = *i;
-      errs() << "\t" << N->getFuncPointer()->getName() << "\n";
+    for (inst_iterator i = inst_begin(f), e = inst_end(f);
+         (i != e) && (!foundLaunchSite); ++i) {
+      Instruction* I = &*i; // Grab pointer to Instruction
+      if (isViscLaunchIntrinsic(I)) {
+        errs() << "------------ Found launch site --------------\n";
+        foundLaunchSite = true;
+        II = cast<IntrinsicInst>(I);
+      }
     }
-    viewDFGraph(Root->getChildGraph());
-    return false; //TODO: What does returning "false" mean?
   }
 
-  DFInternalNode *BuildDFG::getRoot() const {
-    return Root;
-  }
+  assert(foundLaunchSite && "Launch site not found!");
 
-  //TODO: Maybe make this const
-  BuildDFG::HandleToDFNode &BuildDFG::getHandleToDFNodeMap() {
-    return HandleToDFNodeMap;
-  }
+  // Intrinsic Instruction has been initialized from this point on.
 
-  //TODO: Maybe make this const
-  BuildDFG::HandleToDFEdge &BuildDFG::getHandleToDFEdgeMap() {
-    return HandleToDFEdgeMap;
-  }
+  Function* F = cast<Function>((II->getOperand(0))->stripPointerCasts());
+  Root = DFInternalNode::Create(II, F);
+  BuildGraph(Root, F);
 
-  // Returns true if instruction I is a visc launch intrinsic, false otherwise
-  bool BuildDFG::isViscLaunchIntrinsic(Instruction* I) {
-    if(!isa<IntrinsicInst>(I))
-      return false;
-    IntrinsicInst* II = cast<IntrinsicInst>(I);
-    return (II->getCalledFunction()->getName()).equals("llvm.visc.launch");
+  for(DFGraph::children_iterator i = Root->getChildGraph()->begin(),
+      e = Root->getChildGraph()->end(); i!=e; i++) {
+    DFNode* N = *i;
+    errs() << "\t" << N->getFuncPointer()->getName() << "\n";
   }
-
-  // Returns true if instruction I is a visc graph intrinsic, false otherwise
-  bool BuildDFG::isViscGraphIntrinsic(Instruction* I) {
-    if(!isa<IntrinsicInst>(I))
-      return false;
-    IntrinsicInst* II = cast<IntrinsicInst>(I);
-    return (II->getCalledFunction()->getName()).startswith("llvm.visc.create")
-      || (II->getCalledFunction()->getName()).startswith("llvm.visc.bind");
+  Root->getChildGraph()->sortChildren();
+  for(DFGraph::children_iterator i = Root->getChildGraph()->begin(),
+      e = Root->getChildGraph()->end(); i!=e; i++) {
+    DFNode* N = *i;
+    errs() << "\t" << N->getFuncPointer()->getName() << "\n";
   }
-
-  // Returns true if instruction I is a visc query intrinsic, false otherwise
-  bool BuildDFG::isViscQueryIntrinsic(Instruction* I) {
-    if(!isa<IntrinsicInst>(I))
-      return false;
-    IntrinsicInst* II = cast<IntrinsicInst>(I);
-    return (II->getCalledFunction()->getName()).startswith("llvm.visc.get");
+  viewDFGraph(Root->getChildGraph());
+  return false; //TODO: What does returning "false" mean?
+}
+
+DFInternalNode *BuildDFG::getRoot() const {
+  return Root;
+}
+
+//TODO: Maybe make this const
+BuildDFG::HandleToDFNode &BuildDFG::getHandleToDFNodeMap() {
+  return HandleToDFNodeMap;
+}
+
+//TODO: Maybe make this const
+BuildDFG::HandleToDFEdge &BuildDFG::getHandleToDFEdgeMap() {
+  return HandleToDFEdgeMap;
+}
+
+// Returns true if instruction I is a visc launch intrinsic, false otherwise
+bool BuildDFG::isViscLaunchIntrinsic(Instruction* I) {
+  if(!isa<IntrinsicInst>(I))
+    return false;
+  IntrinsicInst* II = cast<IntrinsicInst>(I);
+  return (II->getCalledFunction()->getName()).equals("llvm.visc.launch");
+}
+
+// Returns true if instruction I is a visc graph intrinsic, false otherwise
+bool BuildDFG::isViscGraphIntrinsic(Instruction* I) {
+  if(!isa<IntrinsicInst>(I))
+    return false;
+  IntrinsicInst* II = cast<IntrinsicInst>(I);
+  return (II->getCalledFunction()->getName()).startswith("llvm.visc.create")
+         || (II->getCalledFunction()->getName()).startswith("llvm.visc.bind");
+}
+
+// Returns true if instruction I is a visc query intrinsic, false otherwise
+bool BuildDFG::isViscQueryIntrinsic(Instruction* I) {
+  if(!isa<IntrinsicInst>(I))
+    return false;
+  IntrinsicInst* II = cast<IntrinsicInst>(I);
+  return (II->getCalledFunction()->getName()).startswith("llvm.visc.get");
+}
+
+// Returns true if instruction I is a visc intrinsic, false otherwise
+bool BuildDFG::isViscIntrinsic(Instruction* I) {
+  if(!isa<IntrinsicInst>(I))
+    return false;
+  IntrinsicInst* II = cast<IntrinsicInst>(I);
+  return (II->getCalledFunction()->getName()).startswith("llvm.visc");
+}
+
+// Two types are "congruent" if they are identical, or if they are both
+// pointer types with different pointee types and the same address space.
+bool BuildDFG::isTypeCongruent(Type* L, Type* R) {
+  if(L == R)
+    return true;
+  PointerType *PL = dyn_cast<PointerType>(L);
+  PointerType *PR = dyn_cast<PointerType>(R);
+  if (!PL || !PR)
+    return false;
+  return PL->getAddressSpace() == PR->getAddressSpace();
+}
+
+// Handles all the createNodeXX visc intrinsics.
+void BuildDFG::handleCreateNode(DFInternalNode* N, IntrinsicInst* II) {
+  bool isInternalNode = false;
+
+  Function* F = cast<Function>((II->getOperand(0))->stripPointerCasts());
+
+  // Check if the function associated with this intrinsic is a leaf or
+  // internal node
+  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+    Instruction* I = &*i; // Grab pointer to Instruction
+    if (isViscGraphIntrinsic(I))
+      isInternalNode = true;
   }
 
-  // Returns true if instruction I is a visc intrinsic, false otherwise
-  bool BuildDFG::isViscIntrinsic(Instruction* I) {
-    if(!isa<IntrinsicInst>(I))
-      return false;
-    IntrinsicInst* II = cast<IntrinsicInst>(I);
-    return (II->getCalledFunction()->getName()).startswith("llvm.visc");
+  // Number of Dimensions would be equal to the (number of operands - 1) as
+  // the first operand is the pointer to associated Function and the
+  // remaining operands are the limits in each dimension.
+  unsigned numOfDim = II->getCalledFunction()->getFunctionType()->getNumParams()-1;
+  assert(numOfDim <= 3
+         && "Invalid number of dimensions for createNode intrinsic!");
+  std::vector<Value*> dimLimits;
+  for (unsigned i = 1; i <= numOfDim; i++) {
+    // The operands of II are same as the operands of the called
+    // intrinsic. It has one extra operand at the end, which is the intrinsic
+    // being called.
+    dimLimits.push_back(cast<Value> (II->getOperand(i)));
   }
 
-  // Two types are "congruent" if they are identical, or if they are both
-  // pointer types with different pointee types and the same address space.
-  bool BuildDFG::isTypeCongruent(Type* L, Type* R) {
-    if(L == R)
-      return true;
-    PointerType *PL = dyn_cast<PointerType>(L);
-    PointerType *PR = dyn_cast<PointerType>(R);
-    if (!PL || !PR)
-      return false;
-    return PL->getAddressSpace() == PR->getAddressSpace();
+  if(isInternalNode) {
+    // Create Internal DFNode, add it to the map and recursively build its
+    // dataflow graph
+    DFInternalNode* childDFNode = DFInternalNode::Create(II, F, N, numOfDim, dimLimits);
+    N->addChildToDFGraph(childDFNode);
+    HandleToDFNodeMap[II] = childDFNode;
+    BuildGraph(childDFNode, F);
   }
-
-  // Handles all the createNodeXX visc intrinsics.
-  void BuildDFG::handleCreateNode(DFInternalNode* N, IntrinsicInst* II) {
-    bool isInternalNode = false;
-
-    Function* F = cast<Function>((II->getOperand(0))->stripPointerCasts());
-
-    // Check if the function associated with this intrinsic is a leaf or
-    // internal node
-    for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-      Instruction* I = &*i; // Grab pointer to Instruction
-      if (isViscGraphIntrinsic(I))
-        isInternalNode = true;
-    }
-
-    // Number of Dimensions would be equal to the (number of operands - 1) as
-    // the first operand is the pointer to associated Function and the
-    // remaining operands are the limits in each dimension.
-    unsigned numOfDim = II->getCalledFunction()->getFunctionType()->getNumParams()-1;
-    assert(numOfDim <= 3
-      && "Invalid number of dimensions for createNode intrinsic!");
-    std::vector<Value*> dimLimits;
-    for (unsigned i = 1; i <= numOfDim; i++) {
-      // The operands of II are same as the operands of the called
-      // intrinsic. It has one extra operand at the end, which is the intrinsic
-      // being called.
-      dimLimits.push_back(cast<Value> (II->getOperand(i)));
-    }
-
-    if(isInternalNode) {
-      // Create Internal DFNode, add it to the map and recursively build its
-      // dataflow graph
-      DFInternalNode* childDFNode = DFInternalNode::Create(II, F, N, numOfDim, dimLimits);
-      N->addChildToDFGraph(childDFNode);
-      HandleToDFNodeMap[II] = childDFNode;
-      BuildGraph(childDFNode, F);
-    }
-    else {
-      // Create Leaf DFnode and add it to the map.
-      DFLeafNode* childDFNode = DFLeafNode::Create(II, F, N, numOfDim, dimLimits);
-      N->addChildToDFGraph(childDFNode);
-      HandleToDFNodeMap[II] = childDFNode;
-    }
+  else {
+    // Create Leaf DFnode and add it to the map.
+    DFLeafNode* childDFNode = DFLeafNode::Create(II, F, N, numOfDim, dimLimits);
+    N->addChildToDFGraph(childDFNode);
+    HandleToDFNodeMap[II] = childDFNode;
   }
-
-  void BuildDFG::handleCreateEdge (DFInternalNode* N, IntrinsicInst* II) {
-    // The DFNode structures must be in the map before the edge is processed
-    HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
-    assert(DFI != HandleToDFNodeMap.end());
-    DFI = HandleToDFNodeMap.find(II->getOperand(1));
-    assert(DFI != HandleToDFNodeMap.end());
-
-    DFNode* SrcDF = HandleToDFNodeMap[II->getOperand(0)];
-    DFNode* DestDF = HandleToDFNodeMap[II->getOperand(1)];
-
-    bool EdgeType = !cast<ConstantInt>(II->getOperand(2))->isZero();
-
-    unsigned SourcePosition = cast<ConstantInt>(II->getOperand(3))->getZExtValue();
-    unsigned DestPosition = cast<ConstantInt>(II->getOperand(4))->getZExtValue();
-
-    Type *SrcTy, *DestTy;
-
-    // Get destination type
-    FunctionType *FT = DestDF->getFuncPointer()->getFunctionType();
-    assert((FT->getNumParams() > DestPosition) 
-      && "Invalid argument number for destination dataflow node!");
-    DestTy = FT->getParamType(DestPosition);
-
-    // Get source type
-    StructType* OutTy = SrcDF->getOutputType();
-    assert((OutTy->getNumElements() > SourcePosition)
-      && "Invalid argument number for source dataflow node!");
-    SrcTy = OutTy->getElementType(SourcePosition);
-
-    // check if the types are compatible
-    assert(isTypeCongruent(SrcTy, DestTy)
-      && "Source and destination type of edge do not match");
-
-    DFEdge* newDFEdge = DFEdge::Create(SrcDF,
-                                       DestDF,
-                                       EdgeType,
-                                       SourcePosition,
-                                       DestPosition,
-                                       DestTy);
-
-    HandleToDFEdgeMap[II] = newDFEdge;
-
-    // Add Edge to the dataflow graph associated with the parent node
-    N->addEdgeToDFGraph(newDFEdge);
-  }
-
-  void BuildDFG::handleBindInput(DFInternalNode* N, IntrinsicInst* II) {
-    // The DFNode structures must be in the map before the edge is processed
-    HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
-    assert(DFI != HandleToDFNodeMap.end());
-
-    DFNode* SrcDF = N->getChildGraph()->getEntry();
-    DFNode* DestDF = HandleToDFNodeMap[II->getOperand(0)];
-
-    unsigned SourcePosition = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-    unsigned DestPosition = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-
-    // Get destination type
-    FunctionType *FT = DestDF->getFuncPointer()->getFunctionType();
-    assert((FT->getNumParams() > DestPosition) 
-      && "Invalid argument number for destination dataflow node!");
-    Type* DestTy = FT->getParamType(DestPosition);
-
-    // Get source type
-    FT = SrcDF->getFuncPointer()->getFunctionType();
-    assert((FT->getNumParams() > SourcePosition) 
-      && "Invalid argument number for parent dataflow node!");
-    Type* SrcTy = FT->getParamType(SourcePosition);
-
-    // check if the types are compatible
-    assert(isTypeCongruent(SrcTy, DestTy)
-      && "Source and destination type of edge do not match");
-
-    // Add Binding as an edge between Entry and child Node  
-    DFEdge* newDFEdge = DFEdge::Create(SrcDF,
-                                       DestDF,
-                                       false,
-                                       SourcePosition,
-                                       DestPosition,
-                                       DestTy);
-
-    HandleToDFEdgeMap[II] = newDFEdge;
-
-    // Add Edge to the dataflow graph associated with the parent node
-    N->addEdgeToDFGraph(newDFEdge);
-  }
-
-  void BuildDFG::handleBindOutput(DFInternalNode* N, IntrinsicInst* II) {
-    // The DFNode structures must be in the map before the edge is processed
-    HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
-    assert(DFI != HandleToDFNodeMap.end());
-
-    DFNode* SrcDF = HandleToDFNodeMap[II->getOperand(0)];
-    DFNode* DestDF = N->getChildGraph()->getExit();
-
-    unsigned SourcePosition = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-    unsigned DestPosition = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-
-    // Get destination type
-    StructType* OutTy = DestDF->getOutputType();
-    assert((OutTy->getNumElements() > DestPosition)
-      && "Invalid argument number for destination parent dataflow node!");
-    Type* DestTy = OutTy->getElementType(DestPosition);
-
-    // Get source type
-    OutTy = SrcDF->getOutputType();
-    assert((OutTy->getNumElements() > SourcePosition)
-      && "Invalid argument number for source dataflow node!");
-    Type* SrcTy = OutTy->getElementType(SourcePosition);
-
-    // check if the types are compatible
-    assert(isTypeCongruent(SrcTy, DestTy)
-      && "Source and destination type of edge do not match");
-
-    // Add Binding as an edge between child and exit node 
-    DFEdge* newDFEdge = DFEdge::Create(SrcDF,
-                                       DestDF,
-                                       false,
-                                       SourcePosition,
-                                       DestPosition,
-                                       DestTy);
-
-    HandleToDFEdgeMap[II] = newDFEdge;
-
-    // Add Edge to the dataflow graph associated with the parent node
-    N->addEdgeToDFGraph(newDFEdge);
-  }
-
-  void BuildDFG::BuildGraph (DFInternalNode* N, Function *F) {
-
-    // TODO: Place checks for valid visc functions. For example one of the
-    // check can be that any function that contains visc dataflow graph
-    // construction intrinsics should not have other llvm IR statements.
-
-    // Iterate over all the instructions of a function and look for visc
-    // intrinsics.
-    for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-      Instruction* I = &*i; // Grab pointer to instruction reference
-      errs() << *I << "\n";
-      if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(I)) {
-        errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " << II->getCalledFunction()->getName()<<"\n";
-        switch(II->getIntrinsicID()) {
-          case Intrinsic::visc_test:
-            errs() << "Found Test Intrinsic";
-            break;
-
-          case Intrinsic::visc_createNode:
-          case Intrinsic::visc_createNode1D:
-          case Intrinsic::visc_createNode2D:
-          case Intrinsic::visc_createNode3D:
-            handleCreateNode (N, II);
-            break;
-
-          case Intrinsic::visc_createEdge:
-            handleCreateEdge(N, II);
-            break;
-          case Intrinsic::visc_bind_input:
-            handleBindInput(N, II);
-            break;
-          case Intrinsic::visc_bind_output:
-            handleBindOutput(N, II);
-            break;
-
-          //TODO: Reconsider launch within a dataflow graph (recursion?) 
-          case Intrinsic::visc_launch:
-            errs() << "Error: Launch intrinsic used within a dataflow graph\n";
-            break;
-
-          default:
-            errs() << "Error: Invalid VISC Intrinsic inside Internal node!\n";
-            break;
-        }
-      }
-      else {
-        errs() << "Non-intrinsic instruction\n";
+}
+
+void BuildDFG::handleCreateEdge (DFInternalNode* N, IntrinsicInst* II) {
+  // The DFNode structures must be in the map before the edge is processed
+  HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
+  assert(DFI != HandleToDFNodeMap.end());
+  DFI = HandleToDFNodeMap.find(II->getOperand(1));
+  assert(DFI != HandleToDFNodeMap.end());
+
+  DFNode* SrcDF = HandleToDFNodeMap[II->getOperand(0)];
+  DFNode* DestDF = HandleToDFNodeMap[II->getOperand(1)];
+
+  bool EdgeType = !cast<ConstantInt>(II->getOperand(2))->isZero();
+
+  unsigned SourcePosition = cast<ConstantInt>(II->getOperand(3))->getZExtValue();
+  unsigned DestPosition = cast<ConstantInt>(II->getOperand(4))->getZExtValue();
+
+  Type *SrcTy, *DestTy;
+
+  // Get destination type
+  FunctionType *FT = DestDF->getFuncPointer()->getFunctionType();
+  assert((FT->getNumParams() > DestPosition)
+         && "Invalid argument number for destination dataflow node!");
+  DestTy = FT->getParamType(DestPosition);
+
+  // Get source type
+  StructType* OutTy = SrcDF->getOutputType();
+  assert((OutTy->getNumElements() > SourcePosition)
+         && "Invalid argument number for source dataflow node!");
+  SrcTy = OutTy->getElementType(SourcePosition);
+
+  // check if the types are compatible
+  assert(isTypeCongruent(SrcTy, DestTy)
+         && "Source and destination type of edge do not match");
+
+  DFEdge* newDFEdge = DFEdge::Create(SrcDF,
+                                     DestDF,
+                                     EdgeType,
+                                     SourcePosition,
+                                     DestPosition,
+                                     DestTy);
+
+  HandleToDFEdgeMap[II] = newDFEdge;
+
+  // Add Edge to the dataflow graph associated with the parent node
+  N->addEdgeToDFGraph(newDFEdge);
+}
+
+void BuildDFG::handleBindInput(DFInternalNode* N, IntrinsicInst* II) {
+  // The DFNode structures must be in the map before the edge is processed
+  HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
+  assert(DFI != HandleToDFNodeMap.end());
+
+  DFNode* SrcDF = N->getChildGraph()->getEntry();
+  DFNode* DestDF = HandleToDFNodeMap[II->getOperand(0)];
+
+  unsigned SourcePosition = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
+  unsigned DestPosition = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
+
+  // Get destination type
+  FunctionType *FT = DestDF->getFuncPointer()->getFunctionType();
+  assert((FT->getNumParams() > DestPosition)
+         && "Invalid argument number for destination dataflow node!");
+  Type* DestTy = FT->getParamType(DestPosition);
+
+  // Get source type
+  FT = SrcDF->getFuncPointer()->getFunctionType();
+  assert((FT->getNumParams() > SourcePosition)
+         && "Invalid argument number for parent dataflow node!");
+  Type* SrcTy = FT->getParamType(SourcePosition);
+
+  // check if the types are compatible
+  assert(isTypeCongruent(SrcTy, DestTy)
+         && "Source and destination type of edge do not match");
+
+  // Add Binding as an edge between Entry and child Node
+  DFEdge* newDFEdge = DFEdge::Create(SrcDF,
+                                     DestDF,
+                                     false,
+                                     SourcePosition,
+                                     DestPosition,
+                                     DestTy);
+
+  HandleToDFEdgeMap[II] = newDFEdge;
+
+  // Add Edge to the dataflow graph associated with the parent node
+  N->addEdgeToDFGraph(newDFEdge);
+}
+
+void BuildDFG::handleBindOutput(DFInternalNode* N, IntrinsicInst* II) {
+  // The DFNode structures must be in the map before the edge is processed
+  HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
+  assert(DFI != HandleToDFNodeMap.end());
+
+  DFNode* SrcDF = HandleToDFNodeMap[II->getOperand(0)];
+  DFNode* DestDF = N->getChildGraph()->getExit();
+
+  unsigned SourcePosition = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
+  unsigned DestPosition = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
+
+  // Get destination type
+  StructType* OutTy = DestDF->getOutputType();
+  assert((OutTy->getNumElements() > DestPosition)
+         && "Invalid argument number for destination parent dataflow node!");
+  Type* DestTy = OutTy->getElementType(DestPosition);
+
+  // Get source type
+  OutTy = SrcDF->getOutputType();
+  assert((OutTy->getNumElements() > SourcePosition)
+         && "Invalid argument number for source dataflow node!");
+  Type* SrcTy = OutTy->getElementType(SourcePosition);
+
+  // check if the types are compatible
+  assert(isTypeCongruent(SrcTy, DestTy)
+         && "Source and destination type of edge do not match");
+
+  // Add Binding as an edge between child and exit node
+  DFEdge* newDFEdge = DFEdge::Create(SrcDF,
+                                     DestDF,
+                                     false,
+                                     SourcePosition,
+                                     DestPosition,
+                                     DestTy);
+
+  HandleToDFEdgeMap[II] = newDFEdge;
+
+  // Add Edge to the dataflow graph associated with the parent node
+  N->addEdgeToDFGraph(newDFEdge);
+}
+
+void BuildDFG::BuildGraph (DFInternalNode* N, Function *F) {
+
+  // TODO: Place checks for valid visc functions. For example one of the
+  // check can be that any function that contains visc dataflow graph
+  // construction intrinsics should not have other llvm IR statements.
+
+  // Iterate over all the instructions of a function and look for visc
+  // intrinsics.
+  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+    Instruction* I = &*i; // Grab pointer to instruction reference
+    errs() << *I << "\n";
+    if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(I)) {
+      errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " << II->getCalledFunction()->getName()<<"\n";
+      switch(II->getIntrinsicID()) {
+      case Intrinsic::visc_test:
+        errs() << "Found Test Intrinsic";
+        break;
+
+      case Intrinsic::visc_createNode:
+      case Intrinsic::visc_createNode1D:
+      case Intrinsic::visc_createNode2D:
+      case Intrinsic::visc_createNode3D:
+        handleCreateNode (N, II);
+        break;
+
+      case Intrinsic::visc_createEdge:
+        handleCreateEdge(N, II);
+        break;
+      case Intrinsic::visc_bind_input:
+        handleBindInput(N, II);
+        break;
+      case Intrinsic::visc_bind_output:
+        handleBindOutput(N, II);
+        break;
+
+      //TODO: Reconsider launch within a dataflow graph (recursion?)
+      case Intrinsic::visc_launch:
+        errs() << "Error: Launch intrinsic used within a dataflow graph\n";
+        break;
+
+      default:
+        errs() << "Error: Invalid VISC Intrinsic inside Internal node!\n";
+        break;
       }
     }
+    else {
+      errs() << "Non-intrinsic instruction\n";
+    }
   }
+}
 
-  char BuildDFG::ID = 0;
-  static RegisterPass<BuildDFG> X("buildDFG", "Hierarchical Dataflow Graph Builder Pass", false, false);
+char BuildDFG::ID = 0;
+static RegisterPass<BuildDFG> X("buildDFG", "Hierarchical Dataflow Graph Builder Pass", false, false);
 
-} // End of namespace
+} // End of namespace builddfg
 
diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
index ec6717a50f124ad9e9dcb1956925f6048ab67d2c..fbb196fd70962ecac7e554a4e1d3cac771a10629 100644
--- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
@@ -1,4 +1,4 @@
-//=== DFG2LLVM_X86.cpp ===//
+//===-------------------------- DFG2LLVM_X86.cpp --------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,349 +22,443 @@ using namespace builddfg;
 //STATISTIC(IntrinsicCounter, "Counts number of visc intrinsics greeted");
 
 namespace {
-  
-  // DFG2LLVM_X86 - The first implementation.
-  struct DFG2LLVM_X86 : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
-    DFG2LLVM_X86() : ModulePass(ID) {}
 
-    private:
-    // Member variables
+// DFG2LLVM_X86 - The first implementation.
+struct DFG2LLVM_X86 : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  DFG2LLVM_X86() : ModulePass(ID) {}
 
-    // Functions
+private:
+  // Member variables
 
-    public:
-    bool runOnModule(Module &M);
+  // Functions
 
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<BuildDFG>();
-    }
+public:
+  bool runOnModule(Module &M);
 
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<BuildDFG>();
+  }
 
-  };
-
-  // Visitor for Code generation traversal (tree traversal for now)
-  class CodeGenTraversal : public DFNodeVisitor {
-
-  private:
-    //Member variables
-    Module &M;
-    BuildDFG &DFG;
-
-    // Map from Old function associated with DFNode to new cloned function with
-    // extra index and dimension arguments. This map also serves to find out if
-    // we already have an index and dim extended function copy or not (i.e.,
-    // "Have we visited this function before?")
-    ValueMap<Function*, Function*> FMap; 
-    DenseMap<DFNode*, CallInst*> CallMap;
-
-    //Functions
-    void addIdxDimArgs(Function* F);
-    Value* addLoop(Instruction* I, Value* limit, const Twine& indexName = "");
-    Argument* getArgumentFromEnd(Function* F, unsigned offset);
-    Argument* getArgumentAt(Function* F, unsigned offset);
-    void codeGen(DFInternalNode* N);
-    void codeGen(DFLeafNode* N);
-  public:
-    // Constructor
-    CodeGenTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) { }
-
-    virtual void visit(DFInternalNode* N) {
-      // Follows a bottom-up approach for code generation.
-      // First generate code for all the child nodes
-      errs() << "Start: Generating Code for Node (I) - " << N->getFuncPointer()->getName() << "\n";
-      for(DFGraph::children_iterator i = N->getChildGraph()->begin(),
-          e = N->getChildGraph()->end(); i != e; ++i) {
-        DFNode* child = *i;
-        child->applyDFNodeVisitor(*this);
-      }
-      // Generate code for this internal node now. This way all the cloned
-      // functions for children exist.
-      codeGen(N);
-      errs() << "DONE: Generating Code for Node (I) - " << N->getFuncPointer()->getName() << "\n";
-    }
 
-    virtual void visit(DFLeafNode* N) {
-      errs() << "Start: Generating Code for Node (L) - " << N->getFuncPointer()->getName() << "\n";
-      codeGen(N);
-      errs() << "DONE: Generating Code for Node (L) - " << N->getFuncPointer()->getName() << "\n";
+};
+
+// Visitor for Code generation traversal (tree traversal for now)
+class CodeGenTraversal : public DFNodeVisitor {
+
+private:
+  //Member variables
+  Module &M;
+  BuildDFG &DFG;
+
+  // Map from Old function associated with DFNode to new cloned function with
+  // extra index and dimension arguments. This map also serves to find out if
+  // we already have an index and dim extended function copy or not (i.e.,
+  // "Have we visited this function before?")
+  ValueMap<Function*, Function*> FMap;
+  DenseMap<DFNode*, CallInst*> CallMap;
+
+  //Functions
+  void addIdxDimArgs(Function* F);
+  Value* addLoop(Instruction* I, Value* limit, const Twine& indexName = "");
+  Argument* getArgumentFromEnd(Function* F, unsigned offset);
+  Argument* getArgumentAt(Function* F, unsigned offset);
+  Constant* getOrInsertPThreadCreate();
+  Constant* getOrInsertPThreadJoin();
+  Constant* getOrInsertPThreadExit();
+  Constant* getOrInsertMalloc();
+  void codeGenLaunch(DFInternalNode* Root);
+  void codeGen(DFInternalNode* N);
+  void codeGen(DFLeafNode* N);
+public:
+  // Constructor
+  CodeGenTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) { }
+
+  virtual void visit(DFInternalNode* N) {
+    // Follows a bottom-up approach for code generation.
+    // First generate code for all the child nodes
+    errs() << "Start: Generating Code for Node (I) - " << N->getFuncPointer()->getName() << "\n";
+    for(DFGraph::children_iterator i = N->getChildGraph()->begin(),
+        e = N->getChildGraph()->end(); i != e; ++i) {
+      DFNode* child = *i;
+      child->applyDFNodeVisitor(*this);
     }
+    // Generate code for this internal node now. This way all the cloned
+    // functions for children exist.
+    codeGen(N);
+    errs() << "DONE: Generating Code for Node (I) - " << N->getFuncPointer()->getName() << "\n";
+  }
+
+  virtual void visit(DFLeafNode* N) {
+    errs() << "Start: Generating Code for Node (L) - " << N->getFuncPointer()->getName() << "\n";
+    codeGen(N);
+    errs() << "DONE: Generating Code for Node (L) - " << N->getFuncPointer()->getName() << "\n";
+  }
 
-  };
+};
 
-  bool DFG2LLVM_X86::runOnModule(Module &M) {
+bool DFG2LLVM_X86::runOnModule(Module &M) {
 
-    // Get the BuildDFG Analysis Results:
-    // - Dataflow graph
-    // - Maps from i8* hansles to DFNode and DFEdge
-    BuildDFG &DFG = getAnalysis<BuildDFG>();
+  // Get the BuildDFG Analysis Results:
+  // - Dataflow graph
+  // - Maps from i8* hansles to DFNode and DFEdge
+  BuildDFG &DFG = getAnalysis<BuildDFG>();
 
-    DFInternalNode *Root = DFG.getRoot();
-    // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
-    // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
+  DFInternalNode *Root = DFG.getRoot();
+  // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
+  // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
 
-    // Visitor for Code Generation Graph Traversal
-    CodeGenTraversal *CGTVisitor = new CodeGenTraversal(M, DFG);
+  // Visitor for Code Generation Graph Traversal
+  CodeGenTraversal *CGTVisitor = new CodeGenTraversal(M, DFG);
 
-    // Initiate code generation for root DFNode
-    CGTVisitor->visit(Root);
+  // Initiate code generation for root DFNode
+  CGTVisitor->visit(Root);
 
-    return true;
-  } 
-  
-  void CodeGenTraversal::addIdxDimArgs(Function* F) {  
-    // Add Index and Dim arguments
-    std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"}; 
-    for (int i = 0; i < 6; ++i) {
-      new Argument(Type::getInt32Ty(F->getContext()), names[i], F);
-    }
+  return true;
+}
 
-    // Create the argument type list with added argument types
-    std::vector<Type*> ArgTypes;
-    for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-        ai != ae; ++ai) {
-      ArgTypes.push_back(ai->getType());
-    }
-    // Adding new arguments to the function argument list, would not change the
-    // function type. We need to change the type of this function to reflect the
-    // added arguments
-    FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg());
-    PointerType* PTy = PointerType::get(FTy, cast<PointerType>(F->getType())->getAddressSpace());
-
-    // Change the function type
-    F->mutateType(PTy);
+void CodeGenTraversal::addIdxDimArgs(Function* F) {
+  // Add Index and Dim arguments
+  std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"};
+  for (int i = 0; i < 6; ++i) {
+    new Argument(Type::getInt32Ty(F->getContext()), names[i], F);
   }
 
-  /* Traverse the function argument list in reverse order to get argument at a
-   * distance offset fromt he end of argument list of function F
+  // Create the argument type list with added argument types
+  std::vector<Type*> ArgTypes;
+  for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+      ai != ae; ++ai) {
+    ArgTypes.push_back(ai->getType());
+  }
+  // Adding new arguments to the function argument list, would not change the
+  // function type. We need to change the type of this function to reflect the
+  // added arguments
+  FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg());
+  PointerType* PTy = PointerType::get(FTy, cast<PointerType>(F->getType())->getAddressSpace());
+
+  // Change the function type
+  F->mutateType(PTy);
+}
+
+/* Traverse the function argument list in reverse order to get argument at a
+ * distance offset fromt he end of argument list of function F
+ */
+Argument* CodeGenTraversal::getArgumentFromEnd(Function* F, unsigned offset) {
+  assert((F->getFunctionType()->getNumParams() >= offset && offset > 0)
+         && "Invalid offset to access arguments!");
+  Function::arg_iterator e = F->arg_end();
+  // Last element of argument iterator is dummy. Skip it.
+  e--;
+  Argument* arg;
+  for( ; offset != 0; e--) {
+    offset--;
+    arg = e;
+  }
+  return arg;
+}
+
+/* Traverse the function F argument list to get argument at offset*/
+Argument* CodeGenTraversal::getArgumentAt(Function* F, unsigned offset) {
+  assert((F->getFunctionType()->getNumParams() > offset && offset >= 0)
+         && "Invalid offset to access arguments!");
+
+  Argument* arg;
+  Function::arg_iterator i = F->arg_begin(), e = F->arg_end();
+  for(; offset != 0 && i!=e; i++) {
+    offset--;
+  }
+  arg = i;
+  errs() << *F;
+  errs() << *arg <<"\n";
+  return arg;
+}
+
+/* Add Loop around the instruction I
+ * Algorithm:
+ * (1) Split the basic block of instruction I into three parts, where the
+ * middleblock/body would contain instruction I.
+ * (2) Add phi node before instruction I. Add incoming edge to phi node from
+ * predecessor
+ * (3) Add increment and compare instruction to index variable
+ * (4) Replace terminator/branch instruction of body with conditional branch
+ * which loops over bidy if true and goes to end if false
+ * (5) Update phi node of body
+ */
+Value* CodeGenTraversal::addLoop(Instruction* I, Value* limit, const Twine& indexName) {
+  BasicBlock* Entry = I->getParent();
+  BasicBlock* ForBody = Entry->splitBasicBlock(I, "for.body");
+
+  BasicBlock::iterator i = I;
+  Instruction* NextI = ++i;
+  // Next Instruction should also belong to the same basic block as the basic
+  // block will have a terminator instruction
+  assert(NextI->getParent() == ForBody
+         && "Next Instruction should also belong to the same basic block!");
+  BasicBlock* ForEnd = ForBody->splitBasicBlock(NextI, "for.end");
+
+
+  // Add Phi Node for index variable
+  PHINode* IndexPhi = PHINode::Create(Type::getInt32Ty(I->getContext()),
+                                      2, "index."+indexName, I);
+
+  // Add incoming edge to phi
+  IndexPhi->addIncoming(ConstantInt::get(Type::getInt32Ty(I->getContext()), 0),
+                        Entry);
+  // Increment index variable
+  BinaryOperator* IndexInc = BinaryOperator::Create(Instruction::Add,
+                             IndexPhi, ConstantInt::get(Type::getInt32Ty(I->getContext()), 1),
+                             "index."+indexName+".inc", ForBody->getTerminator());
+
+  // Compare index variable with limit
+  CmpInst* Cond = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc,
+                                  limit, "cond."+indexName, ForBody->getTerminator());
+
+  // Replace the terminator instruction of for.body with new conditional
+  // branch which loops over body if true and branches to for.end otherwise
+  BranchInst* BI = BranchInst::Create(ForBody, ForEnd, Cond);
+  ReplaceInstWithInst(ForBody->getTerminator(), BI);
+
+  // Add incoming edge to phi node in body
+  IndexPhi->addIncoming(IndexInc, ForBody);
+  return IndexPhi;
+}
+
+void CodeGenTraversal::codeGenLaunch(DFInternalNode* Root) {
+  // Get Launch Instruction
+  IntrinsicInst* LI = Root->getInstruction();
+
+  // Get frequently used types
+  Type* i64Ty = Type::getInt64Ty(LI->getContext());
+  Type* i32Ty = Type::getInt32Ty(LI->getContext());
+  Type* i8Ty = Type::getInt8Ty(LI->getContext());
+  Type* voidTy = Type::getVoidTy(LI->getContext());
+
+  /* Get or Insert pthread utilities necessary to run DFG as a separate thread
+   * (1) pthread_create
+   * (2) pthread_join
+   * (3) pthread_exit
+   * Also requires a new struct for pthread_attr_t
    */
-  Argument* CodeGenTraversal::getArgumentFromEnd(Function* F, unsigned offset) {
-    assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) 
-      && "Invalid offset to access arguments!");
-    Function::arg_iterator e = F->arg_end();
-    // Last element of argument iterator is dummy. Skip it. 
-    e--;
-    Argument* arg;
-    for( ; offset != 0; e--) {
-      offset--;
-      arg = e;
-    }
-    return arg;
+  Type *PThreadTy, *PThreadAttrTy, *PThreadArgTy;
+  std::vector<Type*>Elements;
+  // PThreads use different attribute types for 32-bit and 64-bit machines
+  if(M.getPointerSize() == Module::Pointer64) {
+    PThreadTy = Type::getInt64Ty(LI->getContext());
+    Elements.push_back(ArrayType::get(Type::getInt8Ty(LI->getContext()), 56));
   }
-
-  /* Traverse the function F argument list to get argument at offset*/
-  Argument* CodeGenTraversal::getArgumentAt(Function* F, unsigned offset) {
-    assert((F->getFunctionType()->getNumParams() > offset && offset >= 0) 
-      && "Invalid offset to access arguments!");
-
-    Argument* arg;
-    Function::arg_iterator i = F->arg_begin(), e = F->arg_end();
-    for(; offset != 0 && i!=e; i++) {
-      offset--;
-    }
-    arg = i;
-    errs() << *F;
-    errs() << *arg <<"\n";
-    return arg;
+  else {
+    PThreadTy = Type::getInt32Ty(LI->getContext());
+    Elements.push_back(ArrayType::get(Type::getInt8Ty(LI->getContext()), 36));
   }
 
-  /* Add Loop around the instruction I 
-   * Algorithm:
-   * (1) Split the basic block of instruction I into three parts, where the
-   * middleblock/body would contain instruction I. 
-   * (2) Add phi node before instruction I. Add incoming edge to phi node from
-   * predecessor
-   * (3) Add increment and compare instruction to index variable
-   * (4) Replace terminator/branch instruction of body with conditional branch
-   * which loops over bidy if true and goes to end if false
-   * (5) Update phi node of body
+  PThreadAttrTy = StructType::create(LI->getContext(), Elements, "union.pthread_attr_t");
+  FunctionType* PThreadFuncTy = FunctionType::get(i8Ty->getPointerTo(),
+                                                  ArrayRef<Type*>(i8Ty->getPointerTo()),
+                                                  false);
+  PThreadArgTy = i8Ty->getPointerTo();
+
+  // Argument types for pthread_create
+  Type* ArgTypesPTCreate[] = {PThreadTy->getPointerTo(),
+                              PThreadAttrTy->getPointerTo(),
+                              PThreadFuncTy->getPointerTo(),
+                              i8Ty->getPointerTo()};
+  // Construct FunctionType of pthread_create call
+  FunctionType* PThreadCreateTy = FunctionType::get(i32Ty,
+                                                    ArrayRef<Type*>(ArgTypesPTCreate, 4),
+                                                    false);
+  // Argument types for pthread_join
+  Type* ArgTypesPTJoin[] = {PThreadTy,
+                            i8Ty->getPointerTo()->getPointerTo()};
+  // Construct FunctionType for pthread_join call
+  FunctionType* PThreadJoinTy = FunctionType::get(i32Ty,
+                                                  ArrayRef<Type*>(ArgTypesPTJoin, 2),
+                                                  false);
+  // Construct FunctionType for pthread_exit call
+  FunctionType* PThreadExitTy = FunctionType::get(voidTy,
+                                                  ArrayRef<Type*>(i8Ty->getPointerTo()),
+                                                  false);
+  // Get or insert the global declarations for pthread functions
+  Constant* PThreadCreate = M.getOrInsertFunction("pthread_create", PThreadCreateTy);
+  Constant* PThreadJoin = M.getOrInsertFunction("pthread_join", PThreadJoinTy);
+  Constant* PThreadExit = M.getOrInsertFunction("pthread_exit", PThreadExitTy);
+  // Construct FunctionType for malloc call
+  FunctionType* MallocTy = FunctionType::get(i8Ty->getPointerTo(),
+                                            ArrayRef<Type*>(i64Ty),
+                                            false);
+  // Get or insert the global declaration for malloc call
+  Constant* Malloc = M.getOrInsertFunction("malloc", MallocTy);
+
+  /* Now we have all the necessary global declarations necessary to generate the
+   * Launch function, pointer to which can be passed to pthread utils to execute
+   * DFG. The Launch function has just one input: i8* data.addr
+   * This is the address of the all the input data that needs to be passed to
+   * this function. In our case it contains the input arguments of the Root
+   * function in the correct order.
+   * (1) Create an empty Launch function of type i8*(i8*)
+   * (2) Extract each of inputs from data.addr and pass them as arguments to the
+   * call to Root function
+   * (3) The return value from Root is stored in memory, pointer to which is
+   * passed to pthread_exit call.
    */
-  Value* CodeGenTraversal::addLoop(Instruction* I, Value* limit, const Twine& indexName) {
-    BasicBlock* Entry = I->getParent();
-    BasicBlock* ForBody = Entry->splitBasicBlock(I, "for.body");
-    
-    BasicBlock::iterator i = I;
-    Instruction* NextI = ++i;
-    // Next Instruction should also belong to the same basic block as the basic
-    // block will have a terminator instruction
-    assert(NextI->getParent() == ForBody
-      && "Next Instruction should also belong to the same basic block!");
-    BasicBlock* ForEnd = ForBody->splitBasicBlock(NextI, "for.end");
-
-    
-    // Add Phi Node for index variable
-    PHINode* IndexPhi = PHINode::Create(Type::getInt32Ty(I->getContext()),
-                                        2, "index."+indexName, I);
-
-    // Add incoming edge to phi
-    IndexPhi->addIncoming(ConstantInt::get(Type::getInt32Ty(I->getContext()), 0),
-                          Entry);
-    // Increment index variable
-    BinaryOperator* IndexInc = BinaryOperator::Create(Instruction::Add,
-      IndexPhi, ConstantInt::get(Type::getInt32Ty(I->getContext()), 1),
-      "index."+indexName+".inc", ForBody->getTerminator());
-    
-    // Compare index variable with limit
-    CmpInst* Cond = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc,
-      limit, "cond."+indexName, ForBody->getTerminator());
-    
-    // Replace the terminator instruction of for.body with new conditional
-    // branch which loops over body if true and branches to for.end otherwise
-    BranchInst* BI = BranchInst::Create(ForBody, ForEnd, Cond);
-    ReplaceInstWithInst(ForBody->getTerminator(), BI);
-   
-    // Add incoming edge to phi node in body
-    IndexPhi->addIncoming(IndexInc, ForBody);
-    return IndexPhi;
+  // Create Launch Function of type i8*(i8*) which calls the root function 
+  Function* LaunchFunc = Function::Create(PThreadFuncTy,
+                                          Root->getFuncPointer()->getLinkage(),
+                                          "LaunchDataflowGraph",
+                                          &M);
+  // Give a name to the argument which is used pass data to this thread
+  Value* data = LaunchFunc->arg_begin();
+  data->setName("data.addr");
+  // Add a basic block to this empty function and a return null statement to it
+  BasicBlock *BB = BasicBlock::Create(LaunchFunc->getContext(), "entry", LaunchFunc);
+  ReturnInst* RI = ReturnInst::Create(LaunchFunc->getContext(),
+                                      Constant::getNullValue(LaunchFunc->getReturnType()),
+                                      BB);
+  // Find the X86 function generated for Root and 
+  Function* RootF_X86 = FMap[Root->getFuncPointer()];
+  // Generate a call to RootF_X86 with null parameters for now 
+  std::vector<Value*>Args;
+  for(unsigned i=0; i< RootF_X86->getFunctionType()->getNumParams(); i++) {
+    Args.push_back(Constant::getNullValue(RootF_X86->getFunctionType()->getParamType(i)));
+  }
+  CallInst* CI = CallInst::Create(RootF_X86, Args, RootF_X86->getName()+".output", RI);
+
+  // Extract input data from i8* data.addr and patch them to correct argument of
+  // call to RootF_X86. For each argument 
+  unsigned argNum = 0;
+  for(Function::const_arg_iterator i = RootF_X86->arg_begin(),
+      e = RootF_X86->arg_end(); i != e; i++) {
+    // BitCast: %arg.addr = bitcast i8* data.addr to <pointer-to-argType> 
+    CastInst* BI = BitCastInst::CreatePointerCast(data,
+                                                  i->getType()->getPointerTo(),
+                                                  i->getName()+".addr",
+                                                  CI);
+    // Load: %arg = load <pointer-to-argType> %arg.addr
+    LoadInst* LI = new LoadInst(BI, i->getName(), CI);
+    // Patch argument to call instruction
+    CI->setArgOperand(argNum, LI);
+
+    // TODO: Minor Optimization - The last GEP statement can/should be left out
+    // as no more arguments left
+    // Increment using GEP: %nextArg = getelementptr <ptr-to-argType> %arg.addr, i64 1
+    // This essentially takes us to the next argument in memory
+    Constant* IntOne = ConstantInt::get(i64Ty, 1);
+    GetElementPtrInst* GEP = GetElementPtrInst::Create(BI,
+                                                      ArrayRef<Value*>(IntOne),
+                                                      "nextArg",
+                                                      CI);
+    // Increment argNum and for the next iteration use result of this GEP to
+    // extract next argument
+    argNum++;
+    data = GEP;
+  }
+  // Code for returning the output
+  Constant* SizeOf = ConstantExpr::getSizeOf(CI->getType());
+  CallInst* OutputAddr = CallInst::Create(Malloc, ArrayRef<Value*>(SizeOf), "output.addr", RI);
+  CastInst* OutputAddrCast = CastInst::CreatePointerCast(OutputAddr,
+                                                        CI->getType()->getPointerTo(),
+                                                        CI->getName()+".addr",
+                                                        RI);
+  new StoreInst(CI, OutputAddrCast, RI);
+
+  CallInst::Create(PThreadExit, ArrayRef<Value*>(OutputAddr), "", RI);
+  errs() << "Launch Function:\n";
+  errs() << *LaunchFunc << "\n";
+ 
+  // Substitute launch intrinsic main
+  AllocaInst* AI = new AllocaInst(PThreadTy, "DFG_thread", LI);
+  errs() << *AI << "\n";
+  Value* PTCreateArgs[] = {AI,
+                          Constant::getNullValue(PThreadCreateTy->getParamType(1)),
+                          LaunchFunc,
+                          LI->getArgOperand(1)};
+  CallInst* PTCreateInst = CallInst::Create(PThreadCreate,
+                                            ArrayRef<Value*>(PTCreateArgs,4),
+                                            "", LI);
+
+  errs() << *PTCreateInst << "\n";
+  // Place Join
+  LoadInst* LoadPThreadID = new LoadInst(AI, "DFG_thread", LI->getParent()->getTerminator());
+  Value* PTJoinArgs[] = {LoadPThreadID,
+                        Constant::getNullValue(PThreadJoinTy->getParamType(1))};
+  CallInst* PTJoinInst = CallInst::Create(PThreadJoin,
+                                            ArrayRef<Value*>(PTJoinArgs,2),
+                                            "",
+                                            LI->getParent()->getTerminator());
+  errs() << *PTJoinInst << "\n";
+
+  LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+  LI->eraseFromParent();
+}
+void CodeGenTraversal::codeGen(DFInternalNode* N) {
+  Function* F = N->getFuncPointer();
+
+  // Check if clone already exists. If it does, it means we have visited this
+  // function before and nothing else needs to be done for this leaf node.
+  if(FMap.count(F))
+    return;
+
+  // Create of clone of F with no instructions. Only the type is the same as F
+  // without the extra arguments.
+  Function* F_X86;
+
+  // Clone the function, if we are seeing this function for the first time. We
+  // only need a clone in terms of type.
+  ValueToValueMapTy VMap;
+
+  // Create new function with the same type
+  F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M);
+
+  // Loop over the arguments, copying the names of arguments over.
+  Function::arg_iterator dest_iterator = F_X86->arg_begin();
+  for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
+       i != e; ++i) {
+    dest_iterator->setName(i->getName()); // Copy the name over...
+    // Add mapping to VMap and increment dest iterator
+    VMap[i] = dest_iterator++;
   }
-    
-  void CodeGenTraversal::codeGen(DFInternalNode* N) {
-    Function* F = N->getFuncPointer();
-
-    // Check if clone already exists. If it does, it means we have visited this
-    // function before and nothing else needs to be done for this leaf node.
-    if(FMap.count(F))
-      return;
-
-    // Create of clone of F with no instructions. Only the type is the same as F
-    // without the extra arguments.
-    Function* F_X86;
-    
-    // Clone the function, if we are seeing this function for the first time. We
-    // only need a clone in terms of type.
-    ValueToValueMapTy VMap;
-    
-    // Create new function with the same type
-    F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M);
-
-    // Loop over the arguments, copying the names of arguments over.
-    Function::arg_iterator dest_iterator = F_X86->arg_begin();
-    for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
-          i != e; ++i) {
-      dest_iterator->setName(i->getName()); // Copy the name over...
-      // Add mapping to VMap and increment dest iterator
-      VMap[i] = dest_iterator++;   
-    }
 
-    // Add a basic block to this empty function
-    BasicBlock *BB = BasicBlock::Create(F_X86->getContext(), "entry", F_X86);
-    ReturnInst* RI = ReturnInst::Create(F_X86->getContext(),
-                      UndefValue::get(F_X86->getReturnType()), BB);
+  // Add a basic block to this empty function
+  BasicBlock *BB = BasicBlock::Create(F_X86->getContext(), "entry", F_X86);
+  ReturnInst* RI = ReturnInst::Create(F_X86->getContext(),
+                                      UndefValue::get(F_X86->getReturnType()), BB);
 
-    //Add old func: new func pair to the FMap
-    FMap[F] = F_X86;
-    
-    // Add Index and Dim arguments
+  //Add old func: new func pair to the FMap
+  FMap[F] = F_X86;
+
+  // Add Index and Dim arguments except for the root node
+  if(!N->isRoot())
     addIdxDimArgs(F_X86);
 
-    // Sort children in topological order before code generation
-    N->getChildGraph()->sortChildren();
-
-    for(DFGraph::children_iterator ci = N->getChildGraph()->begin(),
-        ce = N->getChildGraph()->end(); ci != ce; ++ci) {
-      DFNode* C = *ci;
-      // Skip dummy node call
-      if (C->isDummyNode())
-        continue;
-
-      Function* CF = C->getFuncPointer();
-
-      assert(FMap.count(CF)
-        && "Found leaf node for which code generation has not happened yet!");
-      Function* CF_X86 = FMap[CF];
-      std::vector<Value*> Args;
-      // Create argument list to pass to call instruction
-      // First find the correct values using the edges
-      // The remaing six values are inserted as constants for now.
-      for(unsigned i=0; i<CF->getFunctionType()->getNumParams(); i++) {
-        
-        // TODO: Assumption is that each input port of a node has just one
-        // incoming edge. May change later on.
-        
-        // Find the incoming edge at the requested input port
-        DFEdge* E = C->getInDFEdgeAt(i);
-        assert(E && "No incoming edge or binding for input element!");
-        // Find the Source DFNode associated with the incoming edge
-        DFNode* SrcDF = E->getSourceDF();
-        
-        // If Source DFNode is a dummyNode, edge is from parent. Get the
-        // argument from argument list of this internal node
-        Value* inputVal;
-        if(SrcDF->isEntryNode()) {
-          inputVal = getArgumentAt(F_X86, i);
-          errs() << "Argument "<< i<< " = "  << *inputVal << "\n";
-        }
-        else {
-          // edge is from a sibling
-          // Check - code should already be generated for this source dfnode
-          assert(CallMap.count(SrcDF)
-            && "Source node call not found. Dependency violation!");
-          
-          // Find CallInst associated with the Source DFNode using FMap
-          CallInst* CI = CallMap[SrcDF];
-          
-          // Extract element at source position from this call instruction
-          std::vector<unsigned> IndexList;
-          IndexList.push_back(E->getSourcePosition());
-          DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n");
-          ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList,
-                                  "", RI);
-          inputVal = EI;
-        }
-        // input value has been obtained.
-        Args.push_back(inputVal);
-      }
-      
-      Value* I32Zero = ConstantInt::get(Type::getInt32Ty(F_X86->getContext()), 0);
-      for(unsigned j=0; j<6; j++)
-        Args.push_back(I32Zero);
-
-      // Call the F_X86 function associated with this node
-      CallInst* CI = CallInst::Create(CF_X86, Args,
-                                      CF_X86->getName()+"_output",
-                                      RI);
-      DEBUG(errs() << *CI << "\n");
-      CallMap[C] = CI;
-
-      // Find num of dimensions this node is replicated in. 
-      // Based on number of dimensions, insert loop instructions
-      std::string varNames[3] = {"x", "y", "z"};
-      for(unsigned j=0; j < C->getNumOfDim(); j++) {
-        Value* indexLimit;
-        // Limit can either be a constant or an arguement of the internal node.
-        // In case of constant we can use that constant value directly in the
-        // new F_X86 function. In case of an argument, we need to get the mapped
-        // value using VMap
-        if(isa<Constant>(C->getDimLimits()[j]))
-          indexLimit = C->getDimLimits()[j];
-        else
-          indexLimit = VMap[C->getDimLimits()[j]];
-        assert(indexLimit && "Invalid dimension limit!");
-        // Insert loop
-        Value* indexVar = addLoop(CI, indexLimit, varNames[j]);
-        unsigned numArgs = CI->getNumArgOperands();
-        // Insert index variable and limit arguments
-        CI->setArgOperand(numArgs-6+j, indexVar);
-        CI->setArgOperand(numArgs-3+j, indexLimit);
-      }
-    }
+  // Sort children in topological order before code generation
+  N->getChildGraph()->sortChildren();
+
+  for(DFGraph::children_iterator ci = N->getChildGraph()->begin(),
+      ce = N->getChildGraph()->end(); ci != ce; ++ci) {
+    DFNode* C = *ci;
+    // Skip dummy node call
+    if (C->isDummyNode())
+      continue;
+
+    Function* CF = C->getFuncPointer();
+
+    assert(FMap.count(CF)
+           && "Found leaf node for which code generation has not happened yet!");
+    Function* CF_X86 = FMap[CF];
+    std::vector<Value*> Args;
+    // Create argument list to pass to call instruction
+    // First find the correct values using the edges
+    // The remaing six values are inserted as constants for now.
+    for(unsigned i=0; i<CF->getFunctionType()->getNumParams(); i++) {
+
+      // TODO: Assumption is that each input port of a node has just one
+      // incoming edge. May change later on.
 
-    errs() << "*** Generating epilogue code for the function****\n";
-    // Generate code for output bindings
-    // Get Exit node
-    DFNode* C = N->getChildGraph()->getExit();
-    // Get OutputType of this node
-    StructType* OutTy = N->getOutputType();
-    Value *retVal = UndefValue::get(F_X86->getReturnType());
-    // Find all the input edges to exit node
-    for (unsigned i=0; i < OutTy->getNumElements(); i++) {
       // Find the incoming edge at the requested input port
       DFEdge* E = C->getInDFEdgeAt(i);
-      
-      assert(E && "No Binding for output element!");
+      assert(E && "No incoming edge or binding for input element!");
       // Find the Source DFNode associated with the incoming edge
       DFNode* SrcDF = E->getSourceDF();
-      
+
       // If Source DFNode is a dummyNode, edge is from parent. Get the
       // argument from argument list of this internal node
       Value* inputVal;
@@ -373,246 +467,334 @@ namespace {
         errs() << "Argument "<< i<< " = "  << *inputVal << "\n";
       }
       else {
-        // edge is from a internal node 
+        // edge is from a sibling
         // Check - code should already be generated for this source dfnode
         assert(CallMap.count(SrcDF)
-          && "Source node call not found. Dependency violation!");
-        
+               && "Source node call not found. Dependency violation!");
+
         // Find CallInst associated with the Source DFNode using FMap
         CallInst* CI = CallMap[SrcDF];
-        
+
         // Extract element at source position from this call instruction
         std::vector<unsigned> IndexList;
         IndexList.push_back(E->getSourcePosition());
         DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n");
         ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList,
-                                "",RI);
+                               "", RI);
         inputVal = EI;
       }
-      std::vector<unsigned> IdxList;
-      IdxList.push_back(i);
-      retVal = InsertValueInst::Create(retVal, inputVal, IdxList, "", RI);
+      // input value has been obtained.
+      Args.push_back(inputVal);
+    }
+
+    Value* I32Zero = ConstantInt::get(Type::getInt32Ty(F_X86->getContext()), 0);
+    for(unsigned j=0; j<6; j++)
+      Args.push_back(I32Zero);
+
+    // Call the F_X86 function associated with this node
+    CallInst* CI = CallInst::Create(CF_X86, Args,
+                                    CF_X86->getName()+"_output",
+                                    RI);
+    DEBUG(errs() << *CI << "\n");
+    CallMap[C] = CI;
+
+    // Find num of dimensions this node is replicated in.
+    // Based on number of dimensions, insert loop instructions
+    std::string varNames[3] = {"x", "y", "z"};
+    for(unsigned j=0; j < C->getNumOfDim(); j++) {
+      Value* indexLimit;
+      // Limit can either be a constant or an arguement of the internal node.
+      // In case of constant we can use that constant value directly in the
+      // new F_X86 function. In case of an argument, we need to get the mapped
+      // value using VMap
+      if(isa<Constant>(C->getDimLimits()[j]))
+        indexLimit = C->getDimLimits()[j];
+      else
+        indexLimit = VMap[C->getDimLimits()[j]];
+      assert(indexLimit && "Invalid dimension limit!");
+      // Insert loop
+      Value* indexVar = addLoop(CI, indexLimit, varNames[j]);
+      unsigned numArgs = CI->getNumArgOperands();
+      // Insert index variable and limit arguments
+      CI->setArgOperand(numArgs-6+j, indexVar);
+      CI->setArgOperand(numArgs-3+j, indexLimit);
     }
-    retVal->setName("output");
-    ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal);
-    ReplaceInstWithInst(RI, newRI);
   }
 
-  // Code generation for leaf nodes
-  void CodeGenTraversal::codeGen(DFLeafNode* N) {
-    // Skip code generation if it is a dummy node
-    if(N->isDummyNode()) {
-      DEBUG(errs() << "Skipping dummy node\n");
-      return;
+  errs() << "*** Generating epilogue code for the function****\n";
+  // Generate code for output bindings
+  // Get Exit node
+  DFNode* C = N->getChildGraph()->getExit();
+  // Get OutputType of this node
+  StructType* OutTy = N->getOutputType();
+  Value *retVal = UndefValue::get(F_X86->getReturnType());
+  // Find all the input edges to exit node
+  for (unsigned i=0; i < OutTy->getNumElements(); i++) {
+    // Find the incoming edge at the requested input port
+    DFEdge* E = C->getInDFEdgeAt(i);
+
+    assert(E && "No Binding for output element!");
+    // Find the Source DFNode associated with the incoming edge
+    DFNode* SrcDF = E->getSourceDF();
+
+    // If Source DFNode is a dummyNode, edge is from parent. Get the
+    // argument from argument list of this internal node
+    Value* inputVal;
+    if(SrcDF->isEntryNode()) {
+      inputVal = getArgumentAt(F_X86, i);
+      errs() << "Argument "<< i<< " = "  << *inputVal << "\n";
     }
-    
-    std::vector<IntrinsicInst *> IItoRemove;
-    std::vector<std::pair<IntrinsicInst *, Value *> > IItoReplace;
-    BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap;
-
-    // Get the function associated woth the dataflow node
-    Function *F = N->getFuncPointer();
-
-    // Check if clone already exists. If it does, it means we have visited this
-    // function before and nothing else needs to be done for this leaf node.
-    if(FMap.count(F))
-      return;
-
-    // Clone the function, if we are seeing this function for the first time.
-    Function *F_X86;
-    ValueToValueMapTy VMap;
-    F_X86 = CloneFunction(F, VMap, true);
-    // Insert the cloned function into the module
-    M.getFunctionList().push_back(F_X86);
-    //Add old func: new func pair to the FMap
-    FMap[F] = F_X86;
-
-    // Add the new argument to the argument list
-    addIdxDimArgs(F_X86);
+    else {
+      // edge is from a internal node
+      // Check - code should already be generated for this source dfnode
+      assert(CallMap.count(SrcDF)
+             && "Source node call not found. Dependency violation!");
+
+      // Find CallInst associated with the Source DFNode using FMap
+      CallInst* CI = CallMap[SrcDF];
+
+      // Extract element at source position from this call instruction
+      std::vector<unsigned> IndexList;
+      IndexList.push_back(E->getSourcePosition());
+      DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n");
+      ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList,
+                             "",RI);
+      inputVal = EI;
+    }
+    std::vector<unsigned> IdxList;
+    IdxList.push_back(i);
+    retVal = InsertValueInst::Create(retVal, inputVal, IdxList, "", RI);
+  }
+  retVal->setName("output");
+  ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal);
+  ReplaceInstWithInst(RI, newRI);
+
+  // If it is a root node, we can go ahead and replace the launch intrinsic with
+  // pthead call, otherwise return now.
+  // TODO: Later on, we might like to do this in a separate pass, which would
+  // allow us the flexibility to switch between complete static code generation
+  // for DFG or having a customized runtime+scheduler
+  if(!N->isRoot())
+    return;
+  
+  codeGenLaunch(N);
 
-    // Go through all the instructions
-    for (inst_iterator i = inst_begin(F_X86), e = inst_end(F_X86); i != e; ++i) {
-      Instruction *I = &(*i);
-      DEBUG(errs() << *I << "\n");
-      // Leaf nodes should not contain VISC graph intrinsics or launch
-      assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!");
-      assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!");
-
-      if (BuildDFG::isViscQueryIntrinsic(I)) {
-        IntrinsicInst* II = cast<IntrinsicInst>(I);
-        IntrinsicInst* ArgII;
-        DFNode* ArgDFNode;
-
-        /***********************************************************************
-        *                        Handle VISC Query intrinsics                  *
-        ***********************************************************************/
-        switch (II->getIntrinsicID()) {
-          /**************************** llvm.visc.getNode() *******************/
-          case Intrinsic::visc_getNode: {
-            // add mapping <intrinsic, this node> to the node-specific map
-            Leaf_HandleToDFNodeMap[II] = N;
-            IItoRemove.push_back(II);            
-            break;
-          }
-          /************************* llvm.visc.getParentNode() ****************/
-          case Intrinsic::visc_getParentNode: {
-            // get the parent node of the arg node
-            // get argument node
-            ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-            // get the parent node of the arg node
-            ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-            // Add mapping <intrinsic, parent node> to the node-specific map
-            // the argument node must have been added to the map, orelse the
-            // code could not refer to it
-            Leaf_HandleToDFNodeMap[II] = ArgDFNode->getParent();
-            IItoRemove.push_back(II);
-            break;
-          }
-          /*************************** llvm.visc.getNumDims() *****************/
-          case Intrinsic::visc_getNumDims: {
-            // get node from map
-            // get the appropriate field
-            ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-            int numOfDim = Leaf_HandleToDFNodeMap[ArgII]->getNumOfDim();
-            IntegerType* IntTy = Type::getInt32Ty(getGlobalContext());
-            ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim);
-
-            II->replaceAllUsesWith(numOfDimConstant);
-            IItoRemove.push_back(II);
-            break;
-          }
-          /*********************** llvm.visc.getNodeInstanceID() **************/
-          case Intrinsic::visc_getNodeInstanceID_x:
-          case Intrinsic::visc_getNodeInstanceID_y:
-          case Intrinsic::visc_getNodeInstanceID_z: {
-            ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-            ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-
-            // The dfnode argument should be an ancestor of this leaf node or
-            // the leaf node itself
-            int parentLevel = N->getAncestorHops(ArgDFNode);
-            assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
-              && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
-            
-            //FIXME: Not handling cases where the arg node is an ancestor 
-            // To be removed later
-            assert((parentLevel == 0)
-              && "Currently not handling cases other than immediate ancestor!");
-
-            // Get specified dimension
-            // (dim = 0) => x
-            // (dim = 1) => y
-            // (dim = 2) => z
-            int dim = (int) (II->getIntrinsicID() -
-                              Intrinsic::visc_getNodeInstanceID_x);
-            assert((dim >= 0) && (dim < 3)
-              && "Invalid dimension for getNodeInstanceID_[xyz]. Check Intrinsic ID!");
-
-            // For immediate ancestor, use the extra argument introduced in
-            // F_X86
-            int numParamsF = F->getFunctionType()->getNumParams();
-            int numParamsF_X86 = F_X86->getFunctionType()->getNumParams();
-            assert((numParamsF_X86 - numParamsF == 6)
-              && "Difference of arguments between function and its clone is not 6!");
-
-            unsigned offset = 3 + (3-dim);
-            // Traverse argument list of F_X86 in reverse order to find the
-            // correct index or dim argument.
-            Argument* indexVal = getArgumentFromEnd(F_X86, offset);
-            assert(indexVal && "Index argument not found. Invalid offset!");
-            
-            DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
-
-            II->replaceAllUsesWith(indexVal);
-            IItoRemove.push_back(II); 
-            break;
-          }
-          /********************** llvm.visc.getNumNodeInstances() *************/
-          case Intrinsic::visc_getNumNodeInstances_x:
-          case Intrinsic::visc_getNumNodeInstances_y:
-          case Intrinsic::visc_getNumNodeInstances_z: {
-
-            ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-            ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-
-            // The dfnode argument should be an ancestor of this leaf node or
-            // the leaf node itself
-            int parentLevel = N->getAncestorHops(ArgDFNode);
-            assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
-              && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
-            
-            //FIXME: Not handling cases where the arg node is an ancestor 
-            // To be removed later
-            assert((parentLevel == 0)
-              && "Currently not handling cases other than immediate ancestor!");
-
-            // Get specified dimension
-            // (dim = 0) => x
-            // (dim = 1) => y
-            // (dim = 2) => z
-            int dim = (int) (II->getIntrinsicID() -
-                              Intrinsic::visc_getNumNodeInstances_x);
-            assert((dim >= 0) && (dim < 3)
-              && "Invalid dimension for getNumNodeInstances_[xyz]. Check Intrinsic ID!");
-
-            // For immediate ancestor, use the extra argument introduced in
-            // F_X86
-            int numParamsF = F->getFunctionType()->getNumParams();
-            int numParamsF_X86 = F_X86->getFunctionType()->getNumParams();
-            assert((numParamsF_X86 - numParamsF == 6)
-              && "Difference of arguments between function and its clone is not 6!");
-
-            unsigned offset = 3 - dim;
-            // Traverse argument list of F_X86 in reverse order to find the
-            // correct index or dim argument.
-            Argument* limitVal = getArgumentFromEnd(F_X86, offset);
-            assert(limitVal && "Limit argument not found. Invalid offset!");
-            
-            DEBUG(errs() << *II << " replaced with " <<  *limitVal << "\n");
-
-            II->replaceAllUsesWith(limitVal);
-            IItoRemove.push_back(II);
-            
-            break;
-          }
-          default:
-            DEBUG(errs() << "Found unknown intrinsic with ID = " <<
-              II->getIntrinsicID() << "\n");
-            DEBUG(errs() << Intrinsic::visc_getNumNodeInstances_x << "\n");
-            assert(false && "Unknown VISC Intrinsic!");
-            break;
-        }
+}
+
+// Code generation for leaf nodes
+void CodeGenTraversal::codeGen(DFLeafNode* N) {
+  // Skip code generation if it is a dummy node
+  if(N->isDummyNode()) {
+    DEBUG(errs() << "Skipping dummy node\n");
+    return;
+  }
 
-      } else {
-        //TODO: how to handle address space qualifiers in load/store
+  std::vector<IntrinsicInst *> IItoRemove;
+  std::vector<std::pair<IntrinsicInst *, Value *> > IItoReplace;
+  BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap;
+
+  // Get the function associated woth the dataflow node
+  Function *F = N->getFuncPointer();
+
+  // Check if clone already exists. If it does, it means we have visited this
+  // function before and nothing else needs to be done for this leaf node.
+  if(FMap.count(F))
+    return;
+
+  // Clone the function, if we are seeing this function for the first time.
+  Function *F_X86;
+  ValueToValueMapTy VMap;
+  F_X86 = CloneFunction(F, VMap, true);
+  // Insert the cloned function into the module
+  M.getFunctionList().push_back(F_X86);
+  // Add old func: new func pair to the FMap
+  FMap[F] = F_X86;
+
+  // Add the new argument to the argument list
+  addIdxDimArgs(F_X86);
+
+  // Go through all the instructions
+  for (inst_iterator i = inst_begin(F_X86), e = inst_end(F_X86); i != e; ++i) {
+    Instruction *I = &(*i);
+    DEBUG(errs() << *I << "\n");
+    // Leaf nodes should not contain VISC graph intrinsics or launch
+    assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!");
+    assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!");
+
+    if (BuildDFG::isViscQueryIntrinsic(I)) {
+      IntrinsicInst* II = cast<IntrinsicInst>(I);
+      IntrinsicInst* ArgII;
+      DFNode* ArgDFNode;
+
+      /***********************************************************************
+      *                        Handle VISC Query intrinsics                  *
+      ***********************************************************************/
+      switch (II->getIntrinsicID()) {
+      /**************************** llvm.visc.getNode() *******************/
+      case Intrinsic::visc_getNode: {
+        // add mapping <intrinsic, this node> to the node-specific map
+        Leaf_HandleToDFNodeMap[II] = N;
+        IItoRemove.push_back(II);
+        break;
+      }
+      /************************* llvm.visc.getParentNode() ****************/
+      case Intrinsic::visc_getParentNode: {
+        // get the parent node of the arg node
+        // get argument node
+        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
+        // get the parent node of the arg node
+        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
+        // Add mapping <intrinsic, parent node> to the node-specific map
+        // the argument node must have been added to the map, orelse the
+        // code could not refer to it
+        Leaf_HandleToDFNodeMap[II] = ArgDFNode->getParent();
+        IItoRemove.push_back(II);
+        break;
+      }
+      /*************************** llvm.visc.getNumDims() *****************/
+      case Intrinsic::visc_getNumDims: {
+        // get node from map
+        // get the appropriate field
+        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
+        int numOfDim = Leaf_HandleToDFNodeMap[ArgII]->getNumOfDim();
+        IntegerType* IntTy = Type::getInt32Ty(getGlobalContext());
+        ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim);
+
+        II->replaceAllUsesWith(numOfDimConstant);
+        IItoRemove.push_back(II);
+        break;
+      }
+      /*********************** llvm.visc.getNodeInstanceID() **************/
+      case Intrinsic::visc_getNodeInstanceID_x:
+      case Intrinsic::visc_getNodeInstanceID_y:
+      case Intrinsic::visc_getNodeInstanceID_z: {
+        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
+        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
+
+        // The dfnode argument should be an ancestor of this leaf node or
+        // the leaf node itself
+        int parentLevel = N->getAncestorHops(ArgDFNode);
+        assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
+               && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
+
+        //FIXME: Not handling cases where the arg node is an ancestor
+        // To be removed later
+        assert((parentLevel == 0)
+               && "Currently not handling cases other than immediate ancestor!");
+
+        // Get specified dimension
+        // (dim = 0) => x
+        // (dim = 1) => y
+        // (dim = 2) => z
+        int dim = (int) (II->getIntrinsicID() -
+                         Intrinsic::visc_getNodeInstanceID_x);
+        assert((dim >= 0) && (dim < 3)
+               && "Invalid dimension for getNodeInstanceID_[xyz]. Check Intrinsic ID!");
+
+        // For immediate ancestor, use the extra argument introduced in
+        // F_X86
+        int numParamsF = F->getFunctionType()->getNumParams();
+        int numParamsF_X86 = F_X86->getFunctionType()->getNumParams();
+        assert((numParamsF_X86 - numParamsF == 6)
+               && "Difference of arguments between function and its clone is not 6!");
+
+        unsigned offset = 3 + (3-dim);
+        // Traverse argument list of F_X86 in reverse order to find the
+        // correct index or dim argument.
+        Argument* indexVal = getArgumentFromEnd(F_X86, offset);
+        assert(indexVal && "Index argument not found. Invalid offset!");
+
+        DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
+
+        II->replaceAllUsesWith(indexVal);
+        IItoRemove.push_back(II);
+        break;
+      }
+      /********************** llvm.visc.getNumNodeInstances() *************/
+      case Intrinsic::visc_getNumNodeInstances_x:
+      case Intrinsic::visc_getNumNodeInstances_y:
+      case Intrinsic::visc_getNumNodeInstances_z: {
+
+        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
+        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
+
+        // The dfnode argument should be an ancestor of this leaf node or
+        // the leaf node itself
+        int parentLevel = N->getAncestorHops(ArgDFNode);
+        assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
+               && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
+
+        //FIXME: Not handling cases where the arg node is an ancestor
+        // To be removed later
+        assert((parentLevel == 0)
+               && "Currently not handling cases other than immediate ancestor!");
+
+        // Get specified dimension
+        // (dim = 0) => x
+        // (dim = 1) => y
+        // (dim = 2) => z
+        int dim = (int) (II->getIntrinsicID() -
+                         Intrinsic::visc_getNumNodeInstances_x);
+        assert((dim >= 0) && (dim < 3)
+               && "Invalid dimension for getNumNodeInstances_[xyz]. Check Intrinsic ID!");
+
+        // For immediate ancestor, use the extra argument introduced in
+        // F_X86
+        int numParamsF = F->getFunctionType()->getNumParams();
+        int numParamsF_X86 = F_X86->getFunctionType()->getNumParams();
+        assert((numParamsF_X86 - numParamsF == 6)
+               && "Difference of arguments between function and its clone is not 6!");
+
+        unsigned offset = 3 - dim;
+        // Traverse argument list of F_X86 in reverse order to find the
+        // correct index or dim argument.
+        Argument* limitVal = getArgumentFromEnd(F_X86, offset);
+        assert(limitVal && "Limit argument not found. Invalid offset!");
+
+        DEBUG(errs() << *II << " replaced with " <<  *limitVal << "\n");
+
+        II->replaceAllUsesWith(limitVal);
+        IItoRemove.push_back(II);
+
+        break;
+      }
+      default:
+        DEBUG(errs() << "Found unknown intrinsic with ID = " <<
+              II->getIntrinsicID() << "\n");
+        assert(false && "Unknown VISC Intrinsic!");
+        break;
       }
 
+    } else {
+      //TODO: how to handle address space qualifiers in load/store
     }
 
-    //TODO:
-    // When to replace the uses?
-    // In which order is it safe to replace the instructions in
-    // IItoReplace?
-    // Probably in the reverse order in the vectors
-    // It is a good idea to have them in one vector and chech the type
-    // using dyn_cast in order to determine if we replace with inst or value
+  }
 
+  //TODO:
+  // When to replace the uses?
+  // In which order is it safe to replace the instructions in
+  // IItoReplace?
+  // Probably in the reverse order in the vectors
+  // It is a good idea to have them in one vector and chech the type
+  // using dyn_cast in order to determine if we replace with inst or value
 
-    //TODO: maybe leave these instructions to be removed by a later DCE pass
-    for (std::vector<IntrinsicInst *>::iterator i = IItoRemove.begin();
-         i != IItoRemove.end(); ++i) {
-      (*i)->replaceAllUsesWith(UndefValue::get((*i)->getType()));
-      (*i)->eraseFromParent();
-    }
-  
-  DEBUG(errs() << *F_X86);
+
+  //TODO: maybe leave these instructions to be removed by a later DCE pass
+  for (std::vector<IntrinsicInst *>::iterator i = IItoRemove.begin();
+       i != IItoRemove.end(); ++i) {
+    (*i)->replaceAllUsesWith(UndefValue::get((*i)->getType()));
+    (*i)->eraseFromParent();
   }
 
+  DEBUG(errs() << *F_X86);
+}
+
 } // End of namespace
 
 char DFG2LLVM_X86::ID = 0;
 static RegisterPass<DFG2LLVM_X86> X("dfg2llvm-x86",
-                                "Dataflow Graph to LLVM for X86 backend",
-                                false /* does not modify the CFG */,
-                                true /* transformation, not just analysis */);
+                                    "Dataflow Graph to LLVM for X86 backend",
+                                    false /* does not modify the CFG */,
+                                    true /* transformation, not just analysis */);