diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h
index 7820e8c20ed8182b0589fbdfc3c4d082b35a228b..11473cec41f0df45b14b0bc6ab4b480c08a5aedc 100644
--- a/llvm/include/llvm/IR/DFGraph.h
+++ b/llvm/include/llvm/IR/DFGraph.h
@@ -64,6 +64,12 @@ public:
 
   void addChildDFNode(DFNode* child) {
     ChildrenList.push_back(child);
+    // Add a control flow edge from Entry to this node
+    //DFEdge* E = DFEdge::Create( Entry, N, false,
+                        //0, 0, Type::getVoidTy(), false,true);
+    // Add a control flow edge from this node to exit
+    //DFEdge* E = DFEdge::Create( Entry, N, false,
+                        //0, 0, Type::getVoidTy(), false,true);
   }
 
   // Dataflow edge connecting child dataflow nodes
@@ -481,22 +487,26 @@ private:
                                 ///< destination DFnode
   Type* ArgType;                ///< Type of the argument
   bool isStreaming;             ///< Is this an streaming edge
+  bool isControlEdge;           ///< Is this a control edge. For a control edge,
+                                ///< only source and destination DF matters.
+                                ///< Other fields are useless
 
   // Functions
   DFEdge(DFNode* _SrcDF, DFNode* _DestDF, bool _EdgeType,
-         unsigned _SourcePosition, unsigned _DestPosition, Type* _ArgType, bool _isStreaming)
+         unsigned _SourcePosition, unsigned _DestPosition, Type* _ArgType, bool _isStreaming, bool _isControlEdge)
        : SrcDF(_SrcDF), DestDF(_DestDF), EdgeType(_EdgeType),
          SourcePosition(_SourcePosition), DestPosition(_DestPosition),
-         ArgType(_ArgType), isStreaming(_isStreaming) {}
+         ArgType(_ArgType), isStreaming(_isStreaming), isControlEdge(_isControlEdge) {}
 
 public:
   //TODO: Decide whether we need this type
 //  typedef enum {ONE_TO_ONE = false, ALL_TO_ALL} DFEdgeType;
 
   static DFEdge *Create(DFNode* SrcDF, DFNode* DestDF, bool EdgeType,
-                        unsigned SourcePosition, unsigned DestPosition, Type* ArgType, bool isStreaming = false) {
+                        unsigned SourcePosition, unsigned DestPosition, Type*
+                        ArgType, bool isStreaming = false, bool isControlEdge = false) {
     return new DFEdge(SrcDF, DestDF, EdgeType, SourcePosition, DestPosition,
-                      ArgType, isStreaming);
+                      ArgType, isStreaming, isControlEdge);
 
   }
 
@@ -528,6 +538,9 @@ public:
     return isStreaming;
   }
 
+  bool isControlFlowEdge() {
+    return isControlEdge;
+  }
 };
 
 
diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td
index c4df5f488c2ac78e92dd16300298672d454163cb..2dcd8a2f334df937e809a5e6097281d21245afc9 100644
--- a/llvm/include/llvm/IR/IntrinsicsVISC.td
+++ b/llvm/include/llvm/IR/IntrinsicsVISC.td
@@ -166,7 +166,7 @@ let TargetPrefix = "visc" in {
   /* Memory allocation inside the graph
    * i8* llvm.visc.malloc();
    */
-  def int_visc_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>;
+  def int_visc_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], []>;
 
   /* Find the vector length supported by target architecture
    * intrinsic -
diff --git a/llvm/include/llvm/SupportVISC/DFG2LLVM.h b/llvm/include/llvm/SupportVISC/DFG2LLVM.h
index 24be0152e19e24d634103d1fb7145d3fa66ba7a4..8a1d855c1426f39c855f7c8b1e62e15dfcf25f1e 100644
--- a/llvm/include/llvm/SupportVISC/DFG2LLVM.h
+++ b/llvm/include/llvm/SupportVISC/DFG2LLVM.h
@@ -31,6 +31,8 @@ namespace dfg2llvm {
 static inline ConstantInt* getTimerID(Module&, enum visc_TimerID);
 static inline ConstantInt* getTimerID(Module&, enum visc::Target);
 
+bool hasAttribute(Function*, unsigned, Attribute::AttrKind);
+
 // DFG2LLVM abstract class implementation
 class DFG2LLVM : public ModulePass {
 protected:
@@ -317,5 +319,10 @@ static inline ConstantInt* getTargetID(Module& M, enum visc::Target T) {
   return ConstantInt::get(Type::getInt32Ty(M.getContext()), T);
 }
 
+// Find if argument has the given attribute
+bool hasAttribute(Function* F, unsigned arg_index, Attribute::AttrKind AK) {
+  return F->getAttributes().hasAttribute(arg_index+1, AK);
+}
+
 } // End of namespace
 
diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 59bdb622b93ef45a4d2b01ed415ff5e86b524470..525367fa05853010fa29f85b476bd44211a0899b 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -112,7 +112,6 @@ static void getExecuteNodeParams(Value* &, Value* &, Value* &, Kernel*,
                                  ValueToValueMapTy&, Instruction*);
 static Value* genWorkGroupPtr(std::vector<Value*>, ValueToValueMapTy&,
                               Instruction*, const Twine& WGName = "WGSize");
-static bool hasAttribute(Function*, unsigned, Attribute::AttrKind);
 static std::string getPTXFilename(const Module&);
 static std::string getFilenameFromModule(const Module& M);
 static void changeDataLayout(Module &);
@@ -1358,10 +1357,6 @@ static Value* genWorkGroupPtr(std::vector<Value*> WGSize, ValueToValueMapTy& VMa
 
 }
 
-// Find if argument has the given attribute
-static bool hasAttribute(Function* F, unsigned arg_index, Attribute::AttrKind AK) {
-  return F->getAttributes().hasAttribute(arg_index+1, AK);
-}
 // Get generated PTX binary name
 static std::string getPTXFilename(const Module& M) {
   std::string moduleID = M.getModuleIdentifier();
diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
index 51a40baa94bae48dc4fe4b5994d3bd43f07e8e4c..c608f1feeb71edf44a75fa3af0ad942b89ef77e8 100644
--- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
@@ -114,7 +114,6 @@ static void getExecuteNodeParams(Value* &, Value* &, Value* &, Kernel*,
                                  ValueToValueMapTy&, Instruction*);
 static Value* genWorkGroupPtr(std::vector<Value*>, ValueToValueMapTy&,
                               Instruction*, const Twine& WGName = "WGSize");
-static bool hasAttribute(Function*, unsigned, Attribute::AttrKind);
 static std::string getSPIRFilename(const Module&);
 static std::string getFilenameFromModule(const Module& M);
 static void changeDataLayout(Module &);
@@ -1422,10 +1421,6 @@ static Value* genWorkGroupPtr(std::vector<Value*> WGSize, ValueToValueMapTy& VMa
 
 }
 
-// Find if argument has the given attribute
-static bool hasAttribute(Function* F, unsigned arg_index, Attribute::AttrKind AK) {
-  return F->getAttributes().hasAttribute(arg_index+1, AK);
-}
 //Get generated SPIR binary name
 static std::string getSPIRFilename(const Module& M) {
   std::string mid = M.getModuleIdentifier();
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index 2f1d9706cc652872c5095c6454d7778e8e1afc0b..bab672f0e92c7c5ae05eacd6bd6e8c98d019e371 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -56,6 +56,7 @@ IS_VISC_CALL(pop)
 IS_VISC_CALL(getNode)
 IS_VISC_CALL(getParentNode)
 IS_VISC_CALL(barrier)
+IS_VISC_CALL(malloc)
 IS_VISC_CALL(return)
 IS_VISC_CALL(getNodeInstanceID_x)
 IS_VISC_CALL(getNodeInstanceID_y)
@@ -123,7 +124,8 @@ static Value* genCodeForReturn(CallInst* CI) {
   for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
     ArgTypes.push_back(CI->getArgOperand(i)->getType());
   }
-  StructType* RetTy = StructType::create(Ctx, ArgTypes, CI->getParent()->getParent()->getName(), true);
+  Twine outTyName = "struct.out." + CI->getParent()->getParent()->getName();
+  StructType* RetTy = StructType::create(Ctx, ArgTypes, outTyName.str(), true);
 
   InsertValueInst* IV = InsertValueInst::Create(UndefValue::get(RetTy),
                                                   CI->getArgOperand(0),
@@ -1071,6 +1073,16 @@ bool GenVISC::runOnModule(Module &M) {
         CI->replaceAllUsesWith(BarrierInst);
         toBeErased.push_back(CI);
       }
+      if (isVISCmallocCall(I)) {
+        Function* MallocF = Intrinsic::getDeclaration(&M, Intrinsic::visc_malloc);
+        DEBUG(errs() << *MallocF << "\n");
+        CallInst* MallocInst = CallInst::Create(MallocF,
+                                CI->getArgOperand(0), "", CI);
+        DEBUG(errs() << "Found visc malloc call: " << *CI << "\n");
+        DEBUG(errs() << "\tSubstitute with: " << *MallocInst << "\n");
+        CI->replaceAllUsesWith(MallocInst);
+        toBeErased.push_back(CI);
+      }
       if (isVISCreturnCall(I)) {
         // The operands to this call are the values to be returned by the node
         Value* ReturnVal = genCodeForReturn(CI);
diff --git a/llvm/lib/Transforms/Makefile b/llvm/lib/Transforms/Makefile
index 88faa1c8ba55ccd9c2d1f3e21d0ee8a340064d7b..c4a477c36a93ed77ab4e0937e7dbfc7cd73edce4 100644
--- a/llvm/lib/Transforms/Makefile
+++ b/llvm/lib/Transforms/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello \
-                ObjCARC BuildDFG DFG2LLVM_NVPTX DFG2LLVM_SPIR DFG2LLVM_X86 \
+		ObjCARC BuildDFG LocalMem DFG2LLVM_NVPTX DFG2LLVM_SPIR DFG2LLVM_X86 \
                 ClearDFG GenVISC
 
 include $(LEVEL)/Makefile.config
diff --git a/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp b/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp
index e568981c8903debbaaa04edff2c16cd72175d7a9..134d288f91d793317c94665d4c104d7182f54468 100644
--- a/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp
+++ b/llvm/test/VISC/parboil/benchmarks/bfs/src/visc_base/main.cpp
@@ -112,11 +112,11 @@ void packData(RootIn* args,
   args->grid = grid;
 }
 
-void Allocation() {
+void Allocation(int block) {
   // Memory shared between threadblocks
-  void* local_q_tail = malloc(sizeof(int));
-  void* local_q = malloc(LOCAL_MEM_SIZE*sizeof(int));
-  void* shift = malloc(sizeof(int));
+  void* local_q_tail = __visc__malloc(sizeof(int));
+  void* local_q = __visc__malloc(LOCAL_MEM_SIZE*sizeof(int));
+  void* shift = __visc__malloc(sizeof(int));
   
   __visc__return(local_q_tail, sizeof(int), local_q, LOCAL_MEM_SIZE*sizeof(int), shift, sizeof(int));
 }
@@ -250,6 +250,7 @@ void BlockingBFS(int *q1, size_t bytesq1,
   void* BFSLeafNode = __visc__createNode1D(BFSLeaf, block);
 
   // Bind edges
+  __visc__bindIn(AllocationNode, 17, 0, 0); // Bind block
   __visc__bindIn(BFSLeafNode, 0, 0, 0); // Bind q1
   __visc__bindIn(BFSLeafNode, 1, 1, 0); // Bind bytes_q1
   __visc__bindIn(BFSLeafNode, 2, 2, 0); // Bind q2