diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index c402848cbd1328ba5a0e790586e4030ac9ca6f2c..7df93bd099e837335656eb6f5f30aa34e4cb0f41 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -55,7 +55,7 @@ public:
 class Kernel {
 public:
   Kernel(Function* _KF, std::vector<unsigned> _inArgMap =
-         std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*>
+           std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*>
          _globalWGSize = std::vector<Value*>(),
          unsigned _blockDim = 0,
          std::vector<Value*> _localWGSize = std::vector<Value*>())
@@ -79,11 +79,17 @@ public:
   std::vector<unsigned> getInArgMap() {
     return inArgMap;
   }
+
+  bool hasLocalWG() {
+    return blockDim == 0;
+  }
 };
 
 // Helper function declarations
 static void getExecuteNodeParams(Value* &, Value* &, Value* &, Kernel*,
                                  ValueToValueMapTy&, Instruction*);
+static Value* genWorkGroupPtr(std::vector<Value*>&, ValueToValueMapTy&,
+                              Instruction*, const Twine& WGName = "WGSize");
 static bool hasAttribute(Function*, unsigned, Attribute::AttrKind);
 static std::string getPTXFilename(const Module&);
 static std::string getFilenameFromModule(const Module& M);
@@ -723,6 +729,10 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
   int pLevel = PNode->getLevel();
   int pReplFactor = PNode->getNumOfDim();
 
+  // Choose parent node as kernel launch if:
+  // (1) Parent is the top level node i.e., Root of DFG
+  //                    OR
+  // (2) Parent does not have multiple instances
   if (!pLevel || !pReplFactor) {
     KernelLaunchNode = PNode;
     kernel = new Kernel(NULL, N->getInArgMap(), N->getNumOfDim(), N->getDimLimits());
@@ -742,7 +752,8 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
     //}
   }
   else {
-    errs() << "*************** Entering else part **************\n";
+    // Converting a 2-level DFG to opencl kernel
+    errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n";
     KernelLaunchNode = PNode->getParent();
     assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match");
     // Contains the instructions generating the kernel configuration parameters
@@ -1272,53 +1283,65 @@ void CodeGenTraversal::transformFunctionToVoid(Function* F) {
 static void getExecuteNodeParams(Value* &workDim, Value* &LocalWGPtr, Value*
                                  &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) {
 
-  // Get int64_t and or ease of use
-  Type* Int64Ty = Type::getInt64Ty(getGlobalContext());
-
   // Assign number of dimenstions a constant value
   workDim = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), kernel->gridDim);
 
-  // For now, local work group size if null
-  LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(getGlobalContext()));
+  // If local work group size if null
+  if(!kernel->hasLocalWG()) {
+    LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(getGlobalContext()));
+  }
+  else {
+    LocalWGPtr = genWorkGroupPtr(kernel->localWGSize, VMap, IB, "LocalWGSize");
+  }
 
-  // Global Work Group type is [#dim x i64]
-  Type* GlobalWGTy = ArrayType::get(Int64Ty, kernel->gridDim);
+  GlobalWGPtr = genWorkGroupPtr(kernel->globalWGSize, VMap, IB, "GlobalWGSize");
+  DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n");
+}
+
+static Value* genWorkGroupPtr(std::vector<Value*>& WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) {
+  Value* WGPtr;
+  // Get int64_t and or ease of use
+  Type* Int64Ty = Type::getInt64Ty(getGlobalContext());
+
+  // Work Group type is [#dim x i64]
+  Type* WGTy = ArrayType::get(Int64Ty, WGSize.size());
   // Allocate space of Global work group data on stack and get pointer to
   // first element.
-  AllocaInst* GlobalWG = new AllocaInst(GlobalWGTy, "GlobalWGSize", IB);
-  GlobalWGPtr = BitCastInst::CreatePointerCast(GlobalWG, Int64Ty->getPointerTo(), GlobalWG->getName()+".0", IB);
-  Value* nextDim = GlobalWGPtr;
-  DEBUG(errs() << *GlobalWGPtr << "\n");
+  AllocaInst* WG = new AllocaInst(WGTy, WGName, IB);
+  WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB);
+  Value* nextDim = WGPtr;
+  DEBUG(errs() << *WGPtr << "\n");
 
   // Iterate over the number of dimensions and store the global work group
   // size in that dimension
-  for(unsigned i=0; i < kernel->gridDim; i++) {
-    assert(kernel->globalWGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!");
-    if(kernel->globalWGSize[i]->getType() != Int64Ty) {
+  for(unsigned i=0; i < WGSize.size(); i++) {
+    assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!");
+    if(WGSize[i]->getType() != Int64Ty) {
       // If number of dimensions are mentioned in any other integer format,
       // generate code to extend it to i64. We need to use the mapped value in
       // the new generated function, hence the use of VMap
-      kernel->globalWGSize[i] = BitCastInst::CreateIntegerCast(VMap[kernel->globalWGSize[i]], Int64Ty, true, "", IB);
-      StoreInst* SI = new StoreInst(kernel->globalWGSize[i], nextDim, IB);
+      // FIXME: Why are we changing the kernel WGSize vector here?
+      WGSize[i] = BitCastInst::CreateIntegerCast(VMap[WGSize[i]], Int64Ty, true, "", IB);
+      StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB);
       DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n");
     } else {
       // Store the value representing work group size in ith dimension on
       // stack
-      StoreInst* SI = new StoreInst(VMap[kernel->globalWGSize[i]], nextDim, IB);
+      StoreInst* SI = new StoreInst(VMap[WGSize[i]], nextDim, IB);
       DEBUG(errs() << "\t Work group size: " << *SI << "\n");
     }
-    if(i+1 < kernel->gridDim) {
+    if(i+1 < WGSize.size()) {
       // Move to next dimension
       GetElementPtrInst* GEP = GetElementPtrInst::Create(nextDim,
                                ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)),
-                               GlobalWG->getName()+"."+Twine(i+1),
+                               WG->getName()+"."+Twine(i+1),
                                IB);
       DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n");
       nextDim = GEP;
     }
   }
+  return WGPtr;
 
-  DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n");
 }
 
 // Find if argument has the given attribute