diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 27910715fe76685e1ad6e6cdd356c2c5a4e5288e..cea4ff3eb355686dd28c9b2011d5347f2faa67c5 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -1137,7 +1137,6 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
 // Function to replace call instructions to functions in the kernel
   std::map<Function *, Function *> OrgToClonedFuncMap;
   std::vector<Function *> FuncToBeRemoved;
-  std::vector<CallInst *> CallstoRemoved;
   auto CloneAndReplaceCall = [&] (CallInst *CI, Function *OrgFunc) {
     Function* NewFunc;
     // Check if the called function has already been cloned before.
@@ -1158,7 +1157,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
     CallInst* Inst = CallInst::Create(NewFunc, args,
         OrgFunc->getReturnType()->isVoidTy()? "" : CI->getName(), CI);
     CI->replaceAllUsesWith(Inst);
-    CallstoRemoved.push_back(CI);
+    IItoRemove.push_back(CI);
     return NewFunc;
   };
 
@@ -1383,9 +1382,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
       case Intrinsic::visc_atomic_sub:
       case Intrinsic::visc_atomic_xchg:
       case Intrinsic::visc_atomic_min:
-      case Intrinsic::visc_atomic_umin:
       case Intrinsic::visc_atomic_max:
-      case Intrinsic::visc_atomic_umax:
       case Intrinsic::visc_atomic_and:
       case Intrinsic::visc_atomic_or:
       case Intrinsic::visc_atomic_xor:
@@ -1403,196 +1400,177 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
         assert(Ptr->getType()->isPointerTy()
                && "First argument of supported atomics is expected to be a pointer");
         PointerType* PtrTy = cast<PointerType>(Ptr->getType());
-        if(PtrTy != Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) {
-          Ptr = CastInst::CreatePointerCast(Ptr, Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()), "", II);
+				std::string name;
+        if(PtrTy == Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) {
+          if(II->getIntrinsicID() == Intrinsic::visc_atomic_add)
+            name = "atomic_add";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_sub)
+            name = "atomic_sub";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_xchg)
+            name = "atomic_xchg";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_min)
+            name = "atomic_min";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_max)
+            name = "atomic_max";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_and)
+            name = "atomic_and";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_or)
+            name = "atomic_or";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_xor)
+            name = "atomic_xor";
+        } else {
+          assert(PtrTy == Type::getInt64PtrTy(II->getContext(), PtrTy->getAddressSpace()) && "Invalid pointer type");
+          if(II->getIntrinsicID() == Intrinsic::visc_atomic_add)
+            name = "atom_add";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_sub)
+            name = "atom_sub";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_xchg)
+            name = "atom_xchg";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_min)
+            name = "atom_min";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_max)
+            name = "atom_max";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_and)
+            name = "atom_and";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_or)
+            name = "atom_or";
+          else if(II->getIntrinsicID() == Intrinsic::visc_atomic_xor)
+            name = "atom_xor";
         }
-        AtomicRMWInst* AtomicInst = new AtomicRMWInst(getAtomicOp(II->getIntrinsicID()),
-            Ptr, Val, AtomicOrdering::SequentiallyConsistent, SyncScope::System, II);
-        AtomicInst->setVolatile(true);
-        DEBUG(errs() << "Substitute with: " << *AtomicInst << "\n");
-        II->replaceAllUsesWith(AtomicInst);
+        Type* paramTypes[] = {PtrTy, Val->getType()};
+				FunctionType * AtomFuncT = FunctionType::get(II->getType(), ArrayRef<Type*>(paramTypes,2), false);	
+			  FunctionCallee AtomFunc = KernelM->getOrInsertFunction(name, AtomFuncT);				
+			  
+        Value* Params[] = {Ptr, Val};
+        CallInst* AtomCI = CallInst::Create(AtomFunc, ArrayRef<Value*>(Params,2), II->getName(), II);
+        DEBUG(errs() << "Substitute with: " << *AtomCI << "\n");
+        II->replaceAllUsesWith(AtomCI);
         IItoRemove.push_back(II);
       }
       break;
       default:
-        llvm_unreachable("Unknown VISC Intrinsic!");
-        break;
-			}
-
-		}
-		else if(MemCpyInst *MemCpyI = dyn_cast<MemCpyInst>(I)) {
-			IRBuilder<> Builder(I);
-			Value *Source = MemCpyI->getSource();
-			Value *Destination = MemCpyI->getArgOperand(0)->stripPointerCasts();
-			Value *Length = MemCpyI->getOperand(2);
-			DEBUG(errs() << "Found memcpy instruction: " << *I << "\n");
-			DEBUG(errs() << "Source: " << *Source << "\n"); 
-			DEBUG(errs() << "Destination: " << *Destination << "\n"); 
-			DEBUG(errs() << "Length: " << *Length << "\n");
-
-			size_t memcpy_length;
-			unsigned int memcpy_count;
-			if (ConstantInt* CI = dyn_cast<ConstantInt>(Length)) {
-				if (CI->getBitWidth() <= 64) {
-					memcpy_length = CI->getSExtValue();
-					DEBUG(errs() << "Memcpy lenght = " << memcpy_length << "\n");
-					Type *Source_Type = Source->getType()->getPointerElementType();
-					DEBUG(errs() << "Source Type : " << *Source_Type << "\n");
-					memcpy_count = memcpy_length / (Source_Type->getPrimitiveSizeInBits() / 8);
-					DEBUG(errs() << "Memcpy count = " << memcpy_count << "\n");
-					if (GetElementPtrInst *sourceGEPI = dyn_cast<GetElementPtrInst>(Source)) {
-						if (GetElementPtrInst *destGEPI = dyn_cast<GetElementPtrInst>(Destination)) {
-							Value *SourcePtrOperand = sourceGEPI->getPointerOperand();
-							Value *DestPtrOperand = destGEPI->getPointerOperand();
-							for(int i = 0; i < memcpy_count; ++i) {
-								Constant *increment;
-								LoadInst *newLoadI;
-								StoreInst *newStoreI;
-								// First, need to increment the correct index for both source and dest 
-								// This invluves checking to see how many indeces the GEP has
-								// Assume for now only 1 or 2 are the viable options.
-
-								std::vector<Value*> GEPlIndex;
-								if (sourceGEPI->getNumIndices() == 1) {
-									Value *Index = sourceGEPI->getOperand(1);      
-									increment = ConstantInt::get(Index->getType(), i, false);
-									Value *incAdd = Builder.CreateAdd(Index, increment);
-									DEBUG(errs() << "Add: " << *incAdd << "\n");
-									GEPlIndex.push_back(incAdd);
-									Value *newGEPIl = Builder.CreateGEP(SourcePtrOperand, ArrayRef<Value*>(GEPlIndex));
-									DEBUG(errs() << "Load GEP: " << *newGEPIl << "\n");
-									newLoadI = Builder.CreateLoad(newGEPIl);
-									DEBUG(errs() << "Load: " << *newLoadI << "\n");
-								} else { 
-									llvm_unreachable("Unhandled case where source GEPI has more than 1 indices!\n");
-								}
-
-
-								std::vector<Value*> GEPsIndex;
-								if (destGEPI->getNumIndices() == 1) {
-
-								} else if (destGEPI->getNumIndices() == 2) {
-									Value *Index0 = destGEPI->getOperand(1);      
-									GEPsIndex.push_back(Index0);
-									Value *Index1 = destGEPI->getOperand(2);      
-									increment = ConstantInt::get(Index1->getType(), i, false);
-									Value *incAdd = Builder.CreateAdd(Index1, increment);
-									DEBUG(errs() << "Add: " << *incAdd << "\n");
-									GEPsIndex.push_back(incAdd);
-									Value *newGEPIs = Builder.CreateGEP(DestPtrOperand, ArrayRef<Value*>(GEPsIndex));
-									DEBUG(errs() << "Store GEP: " << *newGEPIs << "\n");
-									newStoreI = Builder.CreateStore(newLoadI, newGEPIs, MemCpyI->isVolatile());
-									DEBUG(errs() << "Store: " << *newStoreI << "\n");
-								} else {
-									llvm_unreachable("Unhandled case where dest GEPI has more than 2 indices!\n");
-								}
-							}
-							IItoRemove.push_back(sourceGEPI);
-							IItoRemove.push_back(destGEPI);
-							Instruction *destBitcastI = dyn_cast<Instruction>(MemCpyI->getArgOperand(0));
-							Instruction *sourceBitcastI = dyn_cast<Instruction>(MemCpyI->getArgOperand(1));
-							IItoRemove.push_back(destBitcastI);
-							IItoRemove.push_back(sourceBitcastI);
-							IItoRemove.push_back(MemCpyI);
-						}
-					}
-
-				}
-			} else {
-				llvm_unreachable("MEMCPY length is not a constant, not handled!\n");
-			}
-			//      llvm_unreachable("HERE!");
-		}
-
-		else if(CallInst* CI = dyn_cast<CallInst>(I)) {
-			DEBUG(errs() << "Found a call: " << *CI << "\n");
-			Function* calleeF = cast<Function>(CI->getCalledValue()->stripPointerCasts());
-			if(calleeF->isDeclaration()) {
-				// Add the declaration to kernel module
-				DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n");
-				KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType());
-				if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(CI)) {
-					// Now handle a few specific intrinsics
-					// For now, sin and cos are translated to their libclc equivalent
-					switch(II->getIntrinsicID()) {
-						case Intrinsic::sin:
-						case Intrinsic::cos:
-							{
-								DEBUG(errs() << "Found sincos: " << *II << "\n");
-								// Get the libclc function
-								// libclc uses mangled name for sin cos
-								assert(II->getType()->isFloatTy()
-										&& "Only handling sin(float) and cos(float)!");
-								std::string name;
-								if(II->getIntrinsicID() == Intrinsic::sin)
-									name = "sin";
-								else
-									name = "cos";
-
-								FunctionType* SinCosFT = FunctionType::get(II->getType(),
-										Type::getFloatTy(KernelM->getContext()),
-										false);
-								FunctionCallee LibclcFunction = KernelM->getOrInsertFunction(name, SinCosFT);
-								CallInst* CI = CallInst::Create(LibclcFunction, II->getArgOperand(0), II->getName(), II);
-
-								II->replaceAllUsesWith(CI);
-								IItoRemove.push_back(II);
-								break;
-							}
-						case Intrinsic::floor:
-							{
-								DEBUG(errs() << "Found floor intrinsic\n");
-								F = Intrinsic::getDeclaration(KernelM.get(), Intrinsic::nvvm_floor_f);
-								FunctionType* FTy = F->getFunctionType();
-								DEBUG(errs() << *F << "\n");
-
-								// Create argument list
-								std::vector<Value*> args;
-								assert(CI->getNumArgOperands() == FTy->getNumParams()
-										&& "Number of arguments of call do not match with Intrinsic");
-								for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
-									Value* V = CI->getArgOperand(i);
-									// Either the type should match or both should be of pointer type
-									assert((V->getType() == FTy->getParamType(i) ||
-												(V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy()))
-											&& "Dummy function call argument does not match with Intrinsic argument!");
-									// If the types do not match, then both must be pointer type and pointer
-									// cast needs to be performed
-									if(V->getType() != FTy->getParamType(i)) {
-										V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI);
-									}
-									args.push_back(V);
-								}
-								// Insert call instruction
-								CallInst* Inst = CallInst::Create(F, args,
-										F->getReturnType()->isVoidTy()? "" : CI->getName(), CI);
-								DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n");
-								CI->replaceAllUsesWith(Inst);
-								IItoRemove.push_back(II);
-								break;
-							}
-						default:
-							errs() << "[WARNING] Found Intrinsic: " << *II << "\n" ;
-					}
-				}
-
-			}
-			else {
-				// Check if the called function has already been cloned before.
-				Function *NewFunc = CloneAndReplaceCall(CI, calleeF);
-				// Iterate over the new function to see if it calls any other functions
-				// in the module.
-				for(inst_iterator i = inst_begin(NewFunc), e = inst_end(NewFunc); i != e; ++i) {
-					if(auto *Call = dyn_cast<CallInst>(&*i)) {
-						Function *CalledFunc = cast<Function>(Call->getCalledValue()->stripPointerCasts());
-						CloneAndReplaceCall(Call, CalledFunc);
-					}
-				}
-			}
-			//TODO: how to handle address space qualifiers in load/store
-		}
-
-	}
+      llvm_unreachable("Unknown VISC Intrinsic!");
+      break;
+      }
+
+    }
+    else if(MemCpyInst *MemCpyI = dyn_cast<MemCpyInst>(I)) {
+      IRBuilder<> Builder(I);
+      Value *Source = MemCpyI->getSource();
+      Value *Destination = MemCpyI->getArgOperand(0)->stripPointerCasts();
+      Value *Length = MemCpyI->getOperand(2);
+      DEBUG(errs() << "Found memcpy instruction: " << *I << "\n");
+      DEBUG(errs() << "Source: " << *Source << "\n"); 
+      DEBUG(errs() << "Destination: " << *Destination << "\n"); 
+      DEBUG(errs() << "Length: " << *Length << "\n");
+
+      size_t memcpy_length;
+      unsigned int memcpy_count;
+      if (ConstantInt* CI = dyn_cast<ConstantInt>(Length)) {
+        if (CI->getBitWidth() <= 64) {
+          memcpy_length = CI->getSExtValue();
+          DEBUG(errs() << "Memcpy lenght = " << memcpy_length << "\n");
+          Type *Source_Type = Source->getType()->getPointerElementType();
+          DEBUG(errs() << "Source Type : " << *Source_Type << "\n");
+          memcpy_count = memcpy_length / (Source_Type->getPrimitiveSizeInBits() / 8);
+          DEBUG(errs() << "Memcpy count = " << memcpy_count << "\n");
+          if (GetElementPtrInst *sourceGEPI = dyn_cast<GetElementPtrInst>(Source)) {
+            if (GetElementPtrInst *destGEPI = dyn_cast<GetElementPtrInst>(Destination)) {
+              Value *SourcePtrOperand = sourceGEPI->getPointerOperand();
+              Value *DestPtrOperand = destGEPI->getPointerOperand();
+              for(int i = 0; i < memcpy_count; ++i) {
+                Constant *increment;
+                LoadInst *newLoadI;
+                StoreInst *newStoreI;
+                // First, need to increment the correct index for both source and dest 
+                // This invluves checking to see how many indeces the GEP has
+                // Assume for now only 1 or 2 are the viable options.
+
+                std::vector<Value*> GEPlIndex;
+                if (sourceGEPI->getNumIndices() == 1) {
+                  Value *Index = sourceGEPI->getOperand(1);      
+                  increment = ConstantInt::get(Index->getType(), i, false);
+                  Value *incAdd = Builder.CreateAdd(Index, increment);
+                  DEBUG(errs() << "Add: " << *incAdd << "\n");
+                  GEPlIndex.push_back(incAdd);
+                  Value *newGEPIl = Builder.CreateGEP(SourcePtrOperand, ArrayRef<Value*>(GEPlIndex));
+                  DEBUG(errs() << "Load GEP: " << *newGEPIl << "\n");
+                  newLoadI = Builder.CreateLoad(newGEPIl);
+                  DEBUG(errs() << "Load: " << *newLoadI << "\n");
+                } else { 
+                  llvm_unreachable("Unhandled case where source GEPI has more than 1 indices!\n");
+                }
+
+
+                std::vector<Value*> GEPsIndex;
+                if (destGEPI->getNumIndices() == 1) {
+
+                } else if (destGEPI->getNumIndices() == 2) {
+                  Value *Index0 = destGEPI->getOperand(1);      
+                  GEPsIndex.push_back(Index0);
+                  Value *Index1 = destGEPI->getOperand(2);      
+                  increment = ConstantInt::get(Index1->getType(), i, false);
+                  Value *incAdd = Builder.CreateAdd(Index1, increment);
+                  DEBUG(errs() << "Add: " << *incAdd << "\n");
+                  GEPsIndex.push_back(incAdd);
+                  Value *newGEPIs = Builder.CreateGEP(DestPtrOperand, ArrayRef<Value*>(GEPsIndex));
+                  DEBUG(errs() << "Store GEP: " << *newGEPIs << "\n");
+                  newStoreI = Builder.CreateStore(newLoadI, newGEPIs, MemCpyI->isVolatile());
+                  DEBUG(errs() << "Store: " << *newStoreI << "\n");
+                } else {
+                  llvm_unreachable("Unhandled case where dest GEPI has more than 2 indices!\n");
+                }
+              }
+              IItoRemove.push_back(sourceGEPI);
+              IItoRemove.push_back(destGEPI);
+              Instruction *destBitcastI = dyn_cast<Instruction>(MemCpyI->getArgOperand(0));
+              Instruction *sourceBitcastI = dyn_cast<Instruction>(MemCpyI->getArgOperand(1));
+              IItoRemove.push_back(destBitcastI);
+              IItoRemove.push_back(sourceBitcastI);
+              IItoRemove.push_back(MemCpyI);
+            }
+          }
+
+        }
+      } else {
+        llvm_unreachable("MEMCPY length is not a constant, not handled!\n");
+      }
+      //      llvm_unreachable("HERE!");
+    }
+
+    else if(CallInst* CI = dyn_cast<CallInst>(I)) {
+      DEBUG(errs() << "Found a call: " << *CI << "\n");
+      Function* calleeF = cast<Function>(CI->getCalledValue()->stripPointerCasts());
+      if(calleeF->isDeclaration()) {
+        // Add the declaration to kernel module
+        if (calleeF->getName() == "sqrtf") {
+          calleeF->setName(Twine("sqrt"));
+          DEBUG(errs() << "CaleeF: " << *calleeF << "\n");
+          DEBUG(errs() << "CI: " << *CI << "\n");
+        } else if (calleeF->getName() == "rsqrtf") {
+          calleeF->setName(Twine("rsqrt"));
+          DEBUG(errs() << "CaleeF: " << *calleeF << "\n");
+          DEBUG(errs() << "CI: " << *CI << "\n");
+        }  
+        DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n");
+        KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType());
+      }
+      else {
+        // Check if the called function has already been cloned before.
+        Function *NewFunc = CloneAndReplaceCall(CI, calleeF);
+        // Iterate over the new function to see if it calls any other functions
+        // in the module.
+        for(inst_iterator i = inst_begin(NewFunc), e = inst_end(NewFunc); i != e; ++i) {
+          if(auto *Call = dyn_cast<CallInst>(&*i)) {
+            Function *CalledFunc = cast<Function>(Call->getCalledValue()->stripPointerCasts());
+            CloneAndReplaceCall(Call, CalledFunc);
+          }
+        }
+      }
+      //TODO: how to handle address space qualifiers in load/store
+    }
+
+  }
   // search for pattern where float is being casted to int and loaded/stored and change it.	
   DEBUG(errs() << "finding pattern for replacement!\n");
   for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; ++i) {
@@ -1866,276 +1844,271 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
 
   }
 
-	// We need to do this explicitly: DCE pass will not remove them because we
-	// have assumed theworst memory behaviour for these function calls
-	// Traverse the vector backwards, otherwise definitions are deleted while
-	// their subsequent uses are still around
-	for (auto *I : reverse(IItoRemove)) {
-		DEBUG(errs() << "Erasing: " << *I << "\n");
-		I->eraseFromParent();
-	}
-
-	// Removed the cloned functions from the parent module into the new module 
-	for(auto *F : FuncToBeRemoved) {
-		F->removeFromParent(); //TODO: MARIA check
-		KernelM->getFunctionList().push_back(F);
-	}
-
-	 for (auto *I : reverse(CallstoRemoved)) {
-                DEBUG(errs() << "Erasing: " << *I << "\n");
-                I->eraseFromParent();
-        }
+  // We need to do this explicitly: DCE pass will not remove them because we
+  // have assumed theworst memory behaviour for these function calls
+  // Traverse the vector backwards, otherwise definitions are deleted while
+  // their subsequent uses are still around
+  for (auto *I : reverse(IItoRemove)) {
+    DEBUG(errs() << "Erasing: " << *I << "\n");
+    I->eraseFromParent();
+  }
 
-	addCLMetadata(F_nvptx);
-	kernel->KernelFunction = F_nvptx;
-	errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n";
-	DEBUG(errs() << *KernelM);
+  // Removed the cloned functions from the parent module into the new module 
+  for(auto *F : FuncToBeRemoved) {
+    F->removeFromParent(); //TODO: MARIA check
+    KernelM->getFunctionList().push_back(F);
+  }
+
+  addCLMetadata(F_nvptx);
+  kernel->KernelFunction = F_nvptx;
+  errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n";
+  DEBUG(errs() << *KernelM);
 
-	return;
+  return;
 }
 
 bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
-	errs() << "\nDFG2LLVM_NVPTX PASS\n";
+  errs() << "\nDFG2LLVM_NVPTX PASS\n";
 
-	// Get the BuildDFG Analysis Results:
-	// - Dataflow graph
-	// - Maps from i8* hansles to DFNode and DFEdge
-	BuildDFG &DFG = getAnalysis<BuildDFG>();
+  // Get the BuildDFG Analysis Results:
+  // - Dataflow graph
+  // - Maps from i8* hansles to DFNode and DFEdge
+  BuildDFG &DFG = getAnalysis<BuildDFG>();
 
-	// DFInternalNode *Root = DFG.getRoot();
-	std::vector<DFInternalNode*> Roots = DFG.getRoots();
-	//    BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
-	//    BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
+  // DFInternalNode *Root = DFG.getRoot();
+  std::vector<DFInternalNode*> Roots = DFG.getRoots();
+  //    BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
+  //    BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
 
-	// Visitor for Code Generation Graph Traversal
-	CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG);
+  // Visitor for Code Generation Graph Traversal
+  CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG);
 
-	// Iterate over all the DFGs and produce code for each one of them
-	for (auto rootNode: Roots) {
-		// Initiate code generation for root DFNode
-		CGTVisitor->visit(rootNode);
-	}
+  // Iterate over all the DFGs and produce code for each one of them
+  for (auto rootNode: Roots) {
+    // Initiate code generation for root DFNode
+    CGTVisitor->visit(rootNode);
+  }
 
-	CGTVisitor->writeKernelsModule();
+  CGTVisitor->writeKernelsModule();
 
-	//TODO: Edit module epilogue to remove the VISC intrinsic declarations
-	delete CGTVisitor;
+  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
+  delete CGTVisitor;
 
-	return true;
+  return true;
 }
 
 std::string CGT_NVPTX::getKernelsModuleName(Module &M) {
-	/*SmallString<128> currentDir;
-		llvm::sys::fs::current_path(currentDir);
-		std::string fileName = getFilenameFromModule(M);
-		Twine output = Twine(currentDir) + "/Output/" + fileName + "";
-		return output.str().append(".kernels.ll");*/
-	std::string mid = M.getModuleIdentifier();
-	return mid.append(".kernels.ll");
+  /*SmallString<128> currentDir;
+    llvm::sys::fs::current_path(currentDir);
+    std::string fileName = getFilenameFromModule(M);
+    Twine output = Twine(currentDir) + "/Output/" + fileName + "";
+    return output.str().append(".kernels.ll");*/
+  std::string mid = M.getModuleIdentifier();
+  return mid.append(".kernels.ll");
 }
 
 void CGT_NVPTX::fixValueAddrspace(Value* V, unsigned addrspace) {
-	assert(isa<PointerType>(V->getType())
-			&& "Value should be of Pointer Type!");
-	PointerType* OldTy = cast<PointerType>(V->getType());
-	PointerType* NewTy = PointerType::get(OldTy->getElementType(), addrspace);
-	V->mutateType(NewTy);
-	for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; ui++) {
-		// Change all uses producing pointer type in same address space to new
-		// addressspace.
-		if(PointerType* PTy = dyn_cast<PointerType>((*ui)->getType())) {
-			if(PTy->getAddressSpace() == OldTy->getAddressSpace()) {
-				fixValueAddrspace(*ui, addrspace);
-			}
-		}
-	}
+  assert(isa<PointerType>(V->getType())
+      && "Value should be of Pointer Type!");
+  PointerType* OldTy = cast<PointerType>(V->getType());
+  PointerType* NewTy = PointerType::get(OldTy->getElementType(), addrspace);
+  V->mutateType(NewTy);
+  for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; ui++) {
+    // Change all uses producing pointer type in same address space to new
+    // addressspace.
+    if(PointerType* PTy = dyn_cast<PointerType>((*ui)->getType())) {
+      if(PTy->getAddressSpace() == OldTy->getAddressSpace()) {
+        fixValueAddrspace(*ui, addrspace);
+      }
+    }
+  }
 }
 
 
 std::vector<unsigned> CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned>* GlobalMemArgs, Function* F) {
-	std::vector<unsigned> ConstantMemArgs;
-	for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-			ai != ae; ++ai) {
-		Argument* arg = &*ai; 
-		std::vector<unsigned>::iterator pos = std::find(GlobalMemArgs->begin(),
-				GlobalMemArgs->end(), arg->getArgNo());
-		// It has to be a global memory argument to be promotable
-		if(pos == GlobalMemArgs->end())
-			continue;
-
-		// Check if it can/should be promoted
-		if(canBePromoted(arg, F)) {
-			errs() << "Promoting << " << arg->getName()  << " to constant memory."<< "\n";
-			ConstantMemArgs.push_back(arg->getArgNo());
-			GlobalMemArgs->erase(pos);
-		}
-	}
-	return ConstantMemArgs;
+  std::vector<unsigned> ConstantMemArgs;
+  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+      ai != ae; ++ai) {
+    Argument* arg = &*ai; 
+    std::vector<unsigned>::iterator pos = std::find(GlobalMemArgs->begin(),
+        GlobalMemArgs->end(), arg->getArgNo());
+    // It has to be a global memory argument to be promotable
+    if(pos == GlobalMemArgs->end())
+      continue;
+
+    // Check if it can/should be promoted
+    if(canBePromoted(arg, F)) {
+      errs() << "Promoting << " << arg->getName()  << " to constant memory."<< "\n";
+      ConstantMemArgs.push_back(arg->getArgNo());
+      GlobalMemArgs->erase(pos);
+    }
+  }
+  return ConstantMemArgs;
 }
 
 Function* CGT_NVPTX::changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned addrspace) {
-	unsigned idx = 0;
-	std::vector<Type*> ArgTypes;
-	for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-			ai != ae; ++ai) {
-		Argument *arg = &*ai;
-		DEBUG(errs() << *arg << "\n");
-		unsigned argno = arg->getArgNo();
-		if ((idx < Args.size()) && (argno == Args[idx])) {
-			fixValueAddrspace(arg, addrspace);
-			idx++;
-		}
-		ArgTypes.push_back(arg->getType());
-	}
-	FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, false);
-
-	//F->mutateType(PTy);
-	Function* newF = cloneFunction(F, newFT, false);
-	replaceNodeFunctionInIR(*F->getParent(), F, newF);
-
-	DEBUG(errs() << *newF->getFunctionType() << "\n" <<*newF << "\n");
-	return newF;
+  unsigned idx = 0;
+  std::vector<Type*> ArgTypes;
+  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+      ai != ae; ++ai) {
+    Argument *arg = &*ai;
+    DEBUG(errs() << *arg << "\n");
+    unsigned argno = arg->getArgNo();
+    if ((idx < Args.size()) && (argno == Args[idx])) {
+      fixValueAddrspace(arg, addrspace);
+      idx++;
+    }
+    ArgTypes.push_back(arg->getType());
+  }
+  FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, false);
+
+  //F->mutateType(PTy);
+  Function* newF = cloneFunction(F, newFT, false);
+  replaceNodeFunctionInIR(*F->getParent(), F, newF);
+
+  DEBUG(errs() << *newF->getFunctionType() << "\n" <<*newF << "\n");
+  return newF;
 }
 
 /* Add metadata to module KernelM, for OpenCL kernels */
 void CGT_NVPTX::addCLMetadata(Function *F) {
 
-	IRBuilder<> Builder(&*F->begin());
+  IRBuilder<> Builder(&*F->begin());
 
-	SmallVector<Metadata*,8> KernelMD;
-	KernelMD.push_back(ValueAsMetadata::get(F));
+  SmallVector<Metadata*,8> KernelMD;
+  KernelMD.push_back(ValueAsMetadata::get(F));
 
-	// TODO: There is additional metadata used by kernel files but we skip them as
-	// they are not mandatory. In future they might be useful to enable
-	// optimizations
+  // TODO: There is additional metadata used by kernel files but we skip them as
+  // they are not mandatory. In future they might be useful to enable
+  // optimizations
 
-	MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD);
-	NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels");
-	MDN_kernels->addOperand(MDKernelNode);
+  MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD);
+  NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels");
+  MDN_kernels->addOperand(MDKernelNode);
 
-	KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel"));
-	// TODO: Replace 1 with the number of the kernel.
-	// Add when support for multiple launces is added
-	KernelMD.push_back(ValueAsMetadata::get(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1)));
-	MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD);
-	NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations");
-	MDN_annotations->addOperand(MDNvvmAnnotationsNode);
+  KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel"));
+  // TODO: Replace 1 with the number of the kernel.
+  // Add when support for multiple launces is added
+  KernelMD.push_back(ValueAsMetadata::get(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1)));
+  MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD);
+  NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations");
+  MDN_annotations->addOperand(MDNvvmAnnotationsNode);
 
 }
 
 void CGT_NVPTX::writeKernelsModule() {
 
-	// In addition to deleting all other functions, we also want to spiff it
-	// up a little bit.  Do this now.
-	legacy::PassManager Passes;
+  // In addition to deleting all other functions, we also want to spiff it
+  // up a little bit.  Do this now.
+  legacy::PassManager Passes;
 
-	errs() << "Writing to File --- ";
-	errs() << getKernelsModuleName(M).c_str() << "\n";
-	std::error_code EC;
-	ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None);
-	if (EC) {
-		errs() << EC.message() << '\n';
-	}
+  errs() << "Writing to File --- ";
+  errs() << getKernelsModuleName(M).c_str() << "\n";
+  std::error_code EC;
+  ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None);
+  if (EC) {
+    errs() << EC.message() << '\n';
+  }
 
-	Passes.add(
-			createPrintModulePass(Out.os()));
+  Passes.add(
+      createPrintModulePass(Out.os()));
 
-	Passes.run(*KernelM);
+  Passes.run(*KernelM);
 
-	// Declare success.
-	Out.keep();
+  // Declare success.
+  Out.keep();
 }
 
 Function* CGT_NVPTX::transformFunctionToVoid(Function* F) {
 
-	DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n");
-	// FIXME: Maybe do that using the Node?
-	StructType* FRetTy = dyn_cast<StructType>(F->getReturnType());
-	assert(FRetTy && "Return Type must always be a struct");
-
-	// Keeps return statements, because we will need to replace them
-	std::vector<ReturnInst *> RItoRemove;
-	findReturnInst(F, RItoRemove);
-
-	std::vector<Type *> RetArgTypes;
-	std::vector<Argument*> RetArgs;
-	std::vector<Argument*> Args;
-	// Check for { } return struct, which means that the function returns void
-	if (FRetTy->isEmptyTy()) {
-
-		DEBUG(errs() << "\tFunction output struct is void\n");
-		DEBUG(errs() << "\tNo parameters added\n");
-
-		// Replacing return statements with others returning void
-		for (auto *RI : RItoRemove) {
-			ReturnInst::Create((F->getContext()), 0, RI);
-			RI->eraseFromParent();
-		}
-		DEBUG(errs() << "\tChanged return statements to return void\n");
-	}
-	else {
-		// The struct has return values, thus needs to be converted to parameter
-
-		// Iterate over all element types of return struct and add arguments to the
-		// function
-		for (unsigned i=0; i<FRetTy->getNumElements(); i++) {
-			Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F);
-			RetArgs.push_back(RetArg);
-			RetArgTypes.push_back(RetArg->getType());
-			DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n");
-		}
-
-		DEBUG(errs() << "\tReplacing Return statements\n");
-		// Replace return statements with extractValue and store instructions
-		for (auto *RI : RItoRemove) {
-			Value* RetVal = RI->getReturnValue();
-			for(unsigned i = 0; i < RetArgs.size(); i++) {
-				ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i),
-						RetArgs[i]->getName()+".val", RI);
-				new StoreInst(EI, RetArgs[i], RI);
-			}
-			// assert(RetVal && "Return value should not be null at this point");
-			// StructType* RetType = cast<StructType>(RetVal->getType());
-			// assert(RetType && "Return type is not a struct");
-
-			ReturnInst::Create((F->getContext()), 0, RI);
-			RI->eraseFromParent();
-
-		}
-	}
-	DEBUG(errs() << "\tReplaced return statements\n");
-
-	// Create the argument type list with the added argument's type
-	std::vector<Type*> ArgTypes;
-	for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-			ai != ae; ++ai) {
-		ArgTypes.push_back(ai->getType());
-	}
-	for(auto *RATy: RetArgTypes) {
-		ArgTypes.push_back(RATy);
-	}
-
-	// Creating Args vector to use in cloning!
-	for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-			ai != ae; ++ai) {
-		Args.push_back(&*ai);
-	}
-	for(auto *ai : RetArgs) {
-		Args.push_back(ai);
-	}
-
-	// Adding new arguments to the function argument list, would not change the
-	// function type. We need to change the type of this function to reflect the
-	// added arguments
-	Type* VoidRetType = Type::getVoidTy(F->getContext());
-	FunctionType* newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg());
-
-	// Change the function type
-	//F->mutateType(PTy);
-	Function* newF = cloneFunction(F, newFT, false, NULL, &Args);
-	replaceNodeFunctionInIR(*F->getParent(), F, newF);
-	//F->eraseFromParent();
-	return newF;
+  DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n");
+  // FIXME: Maybe do that using the Node?
+  StructType* FRetTy = dyn_cast<StructType>(F->getReturnType());
+  assert(FRetTy && "Return Type must always be a struct");
+
+  // Keeps return statements, because we will need to replace them
+  std::vector<ReturnInst *> RItoRemove;
+  findReturnInst(F, RItoRemove);
+
+  std::vector<Type *> RetArgTypes;
+  std::vector<Argument*> RetArgs;
+  std::vector<Argument*> Args;
+  // Check for { } return struct, which means that the function returns void
+  if (FRetTy->isEmptyTy()) {
+
+    DEBUG(errs() << "\tFunction output struct is void\n");
+    DEBUG(errs() << "\tNo parameters added\n");
+
+    // Replacing return statements with others returning void
+    for (auto *RI : RItoRemove) {
+      ReturnInst::Create((F->getContext()), 0, RI);
+      RI->eraseFromParent();
+    }
+    DEBUG(errs() << "\tChanged return statements to return void\n");
+  }
+  else {
+    // The struct has return values, thus needs to be converted to parameter
+
+    // Iterate over all element types of return struct and add arguments to the
+    // function
+    for (unsigned i=0; i<FRetTy->getNumElements(); i++) {
+      Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F);
+      RetArgs.push_back(RetArg);
+      RetArgTypes.push_back(RetArg->getType());
+      DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n");
+    }
+
+    DEBUG(errs() << "\tReplacing Return statements\n");
+    // Replace return statements with extractValue and store instructions
+    for (auto *RI : RItoRemove) {
+      Value* RetVal = RI->getReturnValue();
+      for(unsigned i = 0; i < RetArgs.size(); i++) {
+        ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i),
+            RetArgs[i]->getName()+".val", RI);
+        new StoreInst(EI, RetArgs[i], RI);
+      }
+      // assert(RetVal && "Return value should not be null at this point");
+      // StructType* RetType = cast<StructType>(RetVal->getType());
+      // assert(RetType && "Return type is not a struct");
+
+      ReturnInst::Create((F->getContext()), 0, RI);
+      RI->eraseFromParent();
+
+    }
+  }
+  DEBUG(errs() << "\tReplaced return statements\n");
+
+  // Create the argument type list with the added argument's type
+  std::vector<Type*> ArgTypes;
+  for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+      ai != ae; ++ai) {
+    ArgTypes.push_back(ai->getType());
+  }
+  for(auto *RATy: RetArgTypes) {
+    ArgTypes.push_back(RATy);
+  }
+
+  // Creating Args vector to use in cloning!
+  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+      ai != ae; ++ai) {
+    Args.push_back(&*ai);
+  }
+  for(auto *ai : RetArgs) {
+    Args.push_back(ai);
+  }
+
+  // Adding new arguments to the function argument list, would not change the
+  // function type. We need to change the type of this function to reflect the
+  // added arguments
+  Type* VoidRetType = Type::getVoidTy(F->getContext());
+  FunctionType* newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg());
+
+  // Change the function type
+  //F->mutateType(PTy);
+  Function* newF = cloneFunction(F, newFT, false, NULL, &Args);
+  replaceNodeFunctionInIR(*F->getParent(), F, newF);
+  //F->eraseFromParent();
+  return newF;
 }
 
 /******************************************************************************
@@ -2147,333 +2120,333 @@ Function* CGT_NVPTX::transformFunctionToVoid(Function* F) {
 // 2. Loads not dependent on getNodeInstanceID itrinsic
 
 static bool findLoadStoreUses(Value* V, std::vector<Value*>*UseList, std::vector<Value*>*VisitedList) {
-	if(std::find(VisitedList->begin(), VisitedList->end(), V) != VisitedList->end()) {
-		DEBUG(errs() << "\tAlready visited value: " << *V << "\n");
-		return false;
-	}
-	VisitedList->push_back(V);
-	for(Value::user_iterator ui = V->user_begin(), ue = V->user_end();
-			ui != ue; ++ui) {
-		Instruction* I = dyn_cast<Instruction>(*ui);
-		if(!I) {
-			// if use is not an instruction, then skip it
-			continue;
-		}
-		DEBUG(errs() << "\t" << *I << "\n");
-		if(isa<LoadInst>(I)) {
-			DEBUG(errs() << "\tFound load instruction: " << *I << "\n");
-			DEBUG(errs() << "\tAdd to use list: " << *V << "\n");
-			UseList->push_back(V);
-		}
-		else if(isa<StoreInst>(I) || isa<AtomicRMWInst>(I)) {
-			// found a store in use chain
-			DEBUG(errs() << "Found store/atomicrmw instruction: " << *I << "\n");
-			return true;
-		}
-		else if(BuildDFG::isViscIntrinsic(I)) {
-			// If it is an atomic intrinsic, we found a store
-			IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-			assert(II && II->getCalledValue()->getName().startswith("llvm.visc.atomic")
-					&& "Only visc atomic intrinsics can have an argument as input");
-			return true;
-		}
-		else {
-			DEBUG(errs() << "\tTraverse use chain of: " << *I << "\n");
-			if(findLoadStoreUses(I, UseList, VisitedList))
-				return true;
-		}
-	}
-	return false;
+  if(std::find(VisitedList->begin(), VisitedList->end(), V) != VisitedList->end()) {
+    DEBUG(errs() << "\tAlready visited value: " << *V << "\n");
+    return false;
+  }
+  VisitedList->push_back(V);
+  for(Value::user_iterator ui = V->user_begin(), ue = V->user_end();
+      ui != ue; ++ui) {
+    Instruction* I = dyn_cast<Instruction>(*ui);
+    if(!I) {
+      // if use is not an instruction, then skip it
+      continue;
+    }
+    DEBUG(errs() << "\t" << *I << "\n");
+    if(isa<LoadInst>(I)) {
+      DEBUG(errs() << "\tFound load instruction: " << *I << "\n");
+      DEBUG(errs() << "\tAdd to use list: " << *V << "\n");
+      UseList->push_back(V);
+    }
+    else if(isa<StoreInst>(I) || isa<AtomicRMWInst>(I)) {
+      // found a store in use chain
+      DEBUG(errs() << "Found store/atomicrmw instruction: " << *I << "\n");
+      return true;
+    }
+    else if(BuildDFG::isViscIntrinsic(I)) {
+      // If it is an atomic intrinsic, we found a store
+      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+      assert(II && II->getCalledValue()->getName().startswith("llvm.visc.atomic")
+          && "Only visc atomic intrinsics can have an argument as input");
+      return true;
+    }
+    else {
+      DEBUG(errs() << "\tTraverse use chain of: " << *I << "\n");
+      if(findLoadStoreUses(I, UseList, VisitedList))
+        return true;
+    }
+  }
+  return false;
 }
 
 static bool isDependentOnNodeInstanceID(Value* V, std::vector<Value*>*DependenceList) {
-	if(std::find(DependenceList->begin(), DependenceList->end(), V) != DependenceList->end()) {
-		DEBUG(errs() << "\tAlready visited value: " << *V << "\n");
-		return false;
-	}
-	DependenceList->push_back(V);
-	// If not an instruction, then not dependent on node instance id
-	if(!isa<Instruction>(V) || isa<Constant>(V)) {
-		DEBUG(errs() << "\tStop\n");
-		return false;
-	}
-
-	Instruction* I = cast<Instruction>(V);
-	for(unsigned i = 0; i < I->getNumOperands(); i++) {
-		Value* operand = I->getOperand(i);
-		if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(operand)) {
-			if((II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_x
-						|| II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_y
-						|| II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_z)) {
-				Value* Node = II->getArgOperand(0);
-				IntrinsicInst* GN = dyn_cast<IntrinsicInst>(Node);
-				assert(GN && "NodeInstanceID operande should be node/parent node intrinsic\n");
-				if(GN->getIntrinsicID() == Intrinsic::visc_getNode) {
-					DEBUG(errs() << "\tDependency found on Node instance ID: " << *II << "\n");
-					return true;
-				}
-			}
-		}
-		if(CmpInst* CI = dyn_cast<CmpInst>(operand)) {
-			DEBUG(errs() << "Found compare instruction: "<< *CI<<"\nNot following its dependency list\n");
-			continue;
-		}
-		DEBUG( errs() << "\tTraverse the operand chain of: " << *operand << "\n");
-		if(isDependentOnNodeInstanceID(operand, DependenceList)) {
-			return true;
-		}
-	}
-	return false;
+  if(std::find(DependenceList->begin(), DependenceList->end(), V) != DependenceList->end()) {
+    DEBUG(errs() << "\tAlready visited value: " << *V << "\n");
+    return false;
+  }
+  DependenceList->push_back(V);
+  // If not an instruction, then not dependent on node instance id
+  if(!isa<Instruction>(V) || isa<Constant>(V)) {
+    DEBUG(errs() << "\tStop\n");
+    return false;
+  }
+
+  Instruction* I = cast<Instruction>(V);
+  for(unsigned i = 0; i < I->getNumOperands(); i++) {
+    Value* operand = I->getOperand(i);
+    if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(operand)) {
+      if((II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_x
+            || II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_y
+            || II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_z)) {
+        Value* Node = II->getArgOperand(0);
+        IntrinsicInst* GN = dyn_cast<IntrinsicInst>(Node);
+        assert(GN && "NodeInstanceID operande should be node/parent node intrinsic\n");
+        if(GN->getIntrinsicID() == Intrinsic::visc_getNode) {
+          DEBUG(errs() << "\tDependency found on Node instance ID: " << *II << "\n");
+          return true;
+        }
+      }
+    }
+    if(CmpInst* CI = dyn_cast<CmpInst>(operand)) {
+      DEBUG(errs() << "Found compare instruction: "<< *CI<<"\nNot following its dependency list\n");
+      continue;
+    }
+    DEBUG( errs() << "\tTraverse the operand chain of: " << *operand << "\n");
+    if(isDependentOnNodeInstanceID(operand, DependenceList)) {
+      return true;
+    }
+  }
+  return false;
 }
 
 // Function to check if argument arg can be changed to a constant memory pointer
 static bool canBePromoted(Argument* arg, Function* F) {
-	DEBUG(errs() << "OPT: Check if Argument " << *arg << " can be changed to constant memory\n");
-	std::vector<Value*> UseList;
-	std::vector<Value*> VisitedList;
-	// recursively traverse use chain
-	// if find a store instruction return false, everything fails, cannot be
-	// promoted
-	// if find a load instruction as use, add the GEP instruction to list
-	bool foundStore = findLoadStoreUses(arg, &UseList, &VisitedList);
-	if(foundStore == true)
-		return false;
-	// See that the GEP instructions are not dependent on getNodeInstanceID
-	// intrinsic
-	DEBUG(errs() << foundStore << "\tNo Store Instruction found. Check dependence on node instance ID\n");
-	std::vector<Value*>DependenceList;
-	for(auto U: UseList) {
-		if(isDependentOnNodeInstanceID(U, &DependenceList))
-			return false;
-	}
-	DEBUG(errs() << "\tYes, Promotable to Constant Memory\n");
-	return true;
+  DEBUG(errs() << "OPT: Check if Argument " << *arg << " can be changed to constant memory\n");
+  std::vector<Value*> UseList;
+  std::vector<Value*> VisitedList;
+  // recursively traverse use chain
+  // if find a store instruction return false, everything fails, cannot be
+  // promoted
+  // if find a load instruction as use, add the GEP instruction to list
+  bool foundStore = findLoadStoreUses(arg, &UseList, &VisitedList);
+  if(foundStore == true)
+    return false;
+  // See that the GEP instructions are not dependent on getNodeInstanceID
+  // intrinsic
+  DEBUG(errs() << foundStore << "\tNo Store Instruction found. Check dependence on node instance ID\n");
+  std::vector<Value*>DependenceList;
+  for(auto U: UseList) {
+    if(isDependentOnNodeInstanceID(U, &DependenceList))
+      return false;
+  }
+  DEBUG(errs() << "\tYes, Promotable to Constant Memory\n");
+  return true;
 }
 
 
 // Calculate execute node parameters which include, number of diemnsions for
 // dynamic instances of the kernel, local and global work group sizes.
 static void getExecuteNodeParams(Module &M, Value* &workDim, Value* &LocalWGPtr, Value*
-		&GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) {
-
-	// Assign number of dimenstions a constant value
-	workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim);
-
-	// If local work group size if null
-	if(!kernel->hasLocalWG()) {
-		LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext()));
-	}
-	else {
-		for(unsigned i = 0; i < kernel->localWGSize.size(); i++) {
-			if(isa<Argument>(kernel->localWGSize[i]))
-				kernel->localWGSize[i] = VMap[kernel->localWGSize[i]];
-		}
-		LocalWGPtr = genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize");
-	}
-
-	for(unsigned i = 0; i < kernel->globalWGSize.size(); i++) {
-		if(isa<Argument>(kernel->globalWGSize[i]))
-			kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]];
-	}
-
-	// For OpenCL, global work group size is the total bumber of instances in each
-	// dimension. So, multiply local and global dim limits.
-	std::vector<Value*> globalWGSizeInsts;
-	if(kernel->hasLocalWG()) {
-		for (unsigned i = 0; i < kernel->gridDim; i++) {
-			BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB);
-			globalWGSizeInsts.push_back(MulInst);
-		}
-	}
-	else {
-		globalWGSizeInsts = kernel->globalWGSize;
-	}
-	GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize");
-	DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n");
+    &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) {
+
+  // Assign number of dimenstions a constant value
+  workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim);
+
+  // If local work group size if null
+  if(!kernel->hasLocalWG()) {
+    LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext()));
+  }
+  else {
+    for(unsigned i = 0; i < kernel->localWGSize.size(); i++) {
+      if(isa<Argument>(kernel->localWGSize[i]))
+        kernel->localWGSize[i] = VMap[kernel->localWGSize[i]];
+    }
+    LocalWGPtr = genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize");
+  }
+
+  for(unsigned i = 0; i < kernel->globalWGSize.size(); i++) {
+    if(isa<Argument>(kernel->globalWGSize[i]))
+      kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]];
+  }
+
+  // For OpenCL, global work group size is the total bumber of instances in each
+  // dimension. So, multiply local and global dim limits.
+  std::vector<Value*> globalWGSizeInsts;
+  if(kernel->hasLocalWG()) {
+    for (unsigned i = 0; i < kernel->gridDim; i++) {
+      BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB);
+      globalWGSizeInsts.push_back(MulInst);
+    }
+  }
+  else {
+    globalWGSizeInsts = kernel->globalWGSize;
+  }
+  GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize");
+  DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n");
 }
 
 // CodeGen for allocating space for Work Group on stack and returning a pointer
 // to its address
 static Value* genWorkGroupPtr(Module &M, std::vector<Value*> WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) {
-	Value* WGPtr;
-	// Get int64_t and or ease of use
-	Type* Int64Ty = Type::getInt64Ty(M.getContext());
-
-	// Work Group type is [#dim x i64]
-	Type* WGTy = ArrayType::get(Int64Ty, WGSize.size());
-	// Allocate space of Global work group data on stack and get pointer to
-	// first element.
-	AllocaInst* WG = new AllocaInst(WGTy, 0, WGName, IB);
-	WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB);
-	Value* nextDim = WGPtr;
-	DEBUG(errs() << *WGPtr << "\n");
-
-	// Iterate over the number of dimensions and store the global work group
-	// size in that dimension
-	for(unsigned i=0; i < WGSize.size(); i++) {
-		DEBUG(errs() << *WGSize[i] << "\n");
-		assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!");
-
-		if(WGSize[i]->getType() != Int64Ty) {
-			// If number of dimensions are mentioned in any other integer format,
-			// generate code to extend it to i64. We need to use the mapped value in
-			// the new generated function, hence the use of VMap
-			// FIXME: Why are we changing the kernel WGSize vector here?
-			DEBUG(errs() << "Not i64. Zero extend required.\n");
-			DEBUG(errs() << *WGSize[i] << "\n");
-			CastInst* CI = BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB);
-			DEBUG(errs() << "Bitcast done.\n");
-			StoreInst* SI = new StoreInst(CI, nextDim, IB);
-			DEBUG(errs() << "Zero extend done.\n");
-			DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n");
-		} else {
-			// Store the value representing work group size in ith dimension on
-			// stack
-			StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB);
-
-			DEBUG(errs() << "\t Work group size: " << *SI << "\n");
-		}
-		if(i+1 < WGSize.size()) {
-			// Move to next dimension
-			GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, nextDim,
-					ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)),
-					WG->getName()+"."+Twine(i+1),
-					IB);
-			DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n");
-			nextDim = GEP;
-		}
-	}
-	return WGPtr;
+  Value* WGPtr;
+  // Get int64_t and or ease of use
+  Type* Int64Ty = Type::getInt64Ty(M.getContext());
+
+  // Work Group type is [#dim x i64]
+  Type* WGTy = ArrayType::get(Int64Ty, WGSize.size());
+  // Allocate space of Global work group data on stack and get pointer to
+  // first element.
+  AllocaInst* WG = new AllocaInst(WGTy, 0, WGName, IB);
+  WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB);
+  Value* nextDim = WGPtr;
+  DEBUG(errs() << *WGPtr << "\n");
+
+  // Iterate over the number of dimensions and store the global work group
+  // size in that dimension
+  for(unsigned i=0; i < WGSize.size(); i++) {
+    DEBUG(errs() << *WGSize[i] << "\n");
+    assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!");
+
+    if(WGSize[i]->getType() != Int64Ty) {
+      // If number of dimensions are mentioned in any other integer format,
+      // generate code to extend it to i64. We need to use the mapped value in
+      // the new generated function, hence the use of VMap
+      // FIXME: Why are we changing the kernel WGSize vector here?
+      DEBUG(errs() << "Not i64. Zero extend required.\n");
+      DEBUG(errs() << *WGSize[i] << "\n");
+      CastInst* CI = BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB);
+      DEBUG(errs() << "Bitcast done.\n");
+      StoreInst* SI = new StoreInst(CI, nextDim, IB);
+      DEBUG(errs() << "Zero extend done.\n");
+      DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n");
+    } else {
+      // Store the value representing work group size in ith dimension on
+      // stack
+      StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB);
+
+      DEBUG(errs() << "\t Work group size: " << *SI << "\n");
+    }
+    if(i+1 < WGSize.size()) {
+      // Move to next dimension
+      GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, nextDim,
+          ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)),
+          WG->getName()+"."+Twine(i+1),
+          IB);
+      DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n");
+      nextDim = GEP;
+    }
+  }
+  return WGPtr;
 
 }
 
 // Get generated PTX binary name
 static std::string getPTXFilename(const Module& M) {
-	std::string moduleID = M.getModuleIdentifier();
-	moduleID.append(".kernels.cl");
-	return moduleID;
+  std::string moduleID = M.getModuleIdentifier();
+  moduleID.append(".kernels.cl");
+  return moduleID;
 }
 
 // Get the name of the input file from module ID
 static std::string getFilenameFromModule(const Module& M) {
-	std::string moduleID = M.getModuleIdentifier();
-	return moduleID.substr(moduleID.find_last_of("/")+1);
+  std::string moduleID = M.getModuleIdentifier();
+  return moduleID.substr(moduleID.find_last_of("/")+1);
 }
 
 // Changes the data layout of the Module to be compiled with NVPTX backend
 // TODO: Figure out when to call it, probably after duplicating the modules
 static void changeDataLayout(Module &M) {
-	std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
-	std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64";
+  std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
+  std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64";
 
-	if (TARGET_PTX == 32)
-		M.setDataLayout(StringRef(nvptx32_layoutStr));
-	else if (TARGET_PTX == 64)
-		M.setDataLayout(StringRef(nvptx64_layoutStr));
-	else assert(false && "Invalid PTX target");
+  if (TARGET_PTX == 32)
+    M.setDataLayout(StringRef(nvptx32_layoutStr));
+  else if (TARGET_PTX == 64)
+    M.setDataLayout(StringRef(nvptx64_layoutStr));
+  else assert(false && "Invalid PTX target");
 
-	return;
+  return;
 }
 
 static void changeTargetTriple(Module &M) {
-	std::string nvptx32_TargetTriple = "nvptx--nvidiacl";
-	std::string nvptx64_TargetTriple = "nvptx64--nvidiacl";
+  std::string nvptx32_TargetTriple = "nvptx--nvidiacl";
+  std::string nvptx64_TargetTriple = "nvptx64--nvidiacl";
 
-	if (TARGET_PTX == 32)
-		M.setTargetTriple(StringRef(nvptx32_TargetTriple));
-	else if (TARGET_PTX == 64)
-		M.setTargetTriple(StringRef(nvptx64_TargetTriple));
-	else assert(false && "Invalid PTX target");
+  if (TARGET_PTX == 32)
+    M.setTargetTriple(StringRef(nvptx32_TargetTriple));
+  else if (TARGET_PTX == 64)
+    M.setTargetTriple(StringRef(nvptx64_TargetTriple));
+  else assert(false && "Invalid PTX target");
 
-	return;
+  return;
 }
 
 // Helper function, populate a vector with all return statements in a function
 static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) {
-	for (auto &BB : *F) {
-		if(auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
-			ReturnInstVec.push_back(RI);
-	}	
+  for (auto &BB : *F) {
+    if(auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
+      ReturnInstVec.push_back(RI);
+  }	
 }
 
 // Helper function, populate a vector with all IntrinsicID intrinsics in a function
 static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) {
-	for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-		Instruction *I = &(*i);
-		IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-		if (II && II->getIntrinsicID() == IntrinsicID) {
-			IntrinsicInstVec.push_back(II);
-		}
-	}
+  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+    Instruction *I = &(*i);
+    IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+    if (II && II->getIntrinsicID() == IntrinsicID) {
+      IntrinsicInstVec.push_back(II);
+    }
+  }
 }
 
 // Helper funtion, returns the atomicrmw op, corresponding to intrinsic atomic op
 static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID ID) {
-	switch(ID) {
-		case Intrinsic::visc_atomic_add:
-			return AtomicRMWInst::Add;
-		case Intrinsic::visc_atomic_sub:
-			return AtomicRMWInst::Sub;
-		case Intrinsic::visc_atomic_min:
-			return AtomicRMWInst::Min;
-		case Intrinsic::visc_atomic_umin:
-			return AtomicRMWInst::UMin;
-		case Intrinsic::visc_atomic_max:
-			return AtomicRMWInst::Max;
-		case Intrinsic::visc_atomic_umax:
-			return AtomicRMWInst::UMax;
-			//case Intrinsic::visc_atomic_inc: return AtomicRMWInst::Inc;
-			//case Intrinsic::visc_atomic_dec: return AtomicRMWInst::Dec;
-		case Intrinsic::visc_atomic_xchg:
-			return AtomicRMWInst::Xchg;
-		case Intrinsic::visc_atomic_and:
-			return AtomicRMWInst::And;
-		case Intrinsic::visc_atomic_or:
-			return AtomicRMWInst::Or;
-		case Intrinsic::visc_atomic_xor:
-			return AtomicRMWInst::Xor;
-		default:
-			llvm_unreachable("Unsupported atomic intrinsic!");
-	};
+  switch(ID) {
+    case Intrinsic::visc_atomic_add:
+      return AtomicRMWInst::Add;
+    case Intrinsic::visc_atomic_sub:
+      return AtomicRMWInst::Sub;
+    case Intrinsic::visc_atomic_min:
+      return AtomicRMWInst::Min;
+    case Intrinsic::visc_atomic_umin:
+      return AtomicRMWInst::UMin;
+    case Intrinsic::visc_atomic_max:
+      return AtomicRMWInst::Max;
+    case Intrinsic::visc_atomic_umax:
+      return AtomicRMWInst::UMax;
+      //case Intrinsic::visc_atomic_inc: return AtomicRMWInst::Inc;
+      //case Intrinsic::visc_atomic_dec: return AtomicRMWInst::Dec;
+    case Intrinsic::visc_atomic_xchg:
+      return AtomicRMWInst::Xchg;
+    case Intrinsic::visc_atomic_and:
+      return AtomicRMWInst::And;
+    case Intrinsic::visc_atomic_or:
+      return AtomicRMWInst::Or;
+    case Intrinsic::visc_atomic_xor:
+      return AtomicRMWInst::Xor;
+    default:
+      llvm_unreachable("Unsupported atomic intrinsic!");
+  };
 }
 
 
 // Helper funtion, returns the OpenCL function name, corresponding to atomic op
 static std::string getAtomicOpName(Intrinsic::ID ID) {
-	switch(ID) {
-		case Intrinsic::visc_atomic_cmpxchg:
-			return "atom_cmpxchg";
-		case Intrinsic::visc_atomic_add:
-			return "atom_add";
-		case Intrinsic::visc_atomic_sub:
-			return "atom_sub";
-		case Intrinsic::visc_atomic_min:
-			return "atom_min";
-		case Intrinsic::visc_atomic_max:
-			return "atom_max";
-		case Intrinsic::visc_atomic_inc:
-			return "atom_inc";
-		case Intrinsic::visc_atomic_dec:
-			return "atom_dec";
-		case Intrinsic::visc_atomic_xchg:
-			return "atom_xchg";
-		case Intrinsic::visc_atomic_and:
-			return "atom_and";
-		case Intrinsic::visc_atomic_or:
-			return "atom_or";
-		case Intrinsic::visc_atomic_xor:
-			return "atom_xor";
-		default:
-			llvm_unreachable("Unsupported atomic intrinsic!");
-	};
+  switch(ID) {
+    case Intrinsic::visc_atomic_cmpxchg:
+      return "atom_cmpxchg";
+    case Intrinsic::visc_atomic_add:
+      return "atom_add";
+    case Intrinsic::visc_atomic_sub:
+      return "atom_sub";
+    case Intrinsic::visc_atomic_min:
+      return "atom_min";
+    case Intrinsic::visc_atomic_max:
+      return "atom_max";
+    case Intrinsic::visc_atomic_inc:
+      return "atom_inc";
+    case Intrinsic::visc_atomic_dec:
+      return "atom_dec";
+    case Intrinsic::visc_atomic_xchg:
+      return "atom_xchg";
+    case Intrinsic::visc_atomic_and:
+      return "atom_and";
+    case Intrinsic::visc_atomic_or:
+      return "atom_or";
+    case Intrinsic::visc_atomic_xor:
+      return "atom_xor";
+    default:
+      llvm_unreachable("Unsupported atomic intrinsic!");
+  };
 }
 
 } // End of namespace
 
 char DFG2LLVM_NVPTX::ID = 0;
 static RegisterPass<DFG2LLVM_NVPTX> X("dfg2llvm-nvptx",
-		"Dataflow Graph to LLVM for NVPTX Pass",
-		false /* does not modify the CFG */,
-		true /* transformation,   *
-					* not just analysis */);
+    "Dataflow Graph to LLVM for NVPTX Pass",
+    false /* does not modify the CFG */,
+    true /* transformation,   *
+          * not just analysis */);
 
diff --git a/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp b/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp
index 46faa737d125dc3207b6a40601939a1ad639afc9..cb4d43311ab6754adf270769e56b3dd210a90163 100644
--- a/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp
+++ b/hpvm/projects/llvm-cbe/lib/Target/CBackend/CBackend.cpp
@@ -29,7 +29,7 @@
 
 #include <iostream>
 
-//#include "Graph.h"
+
 //#include "PHINodePass.h"
 
 //Jackson Korba 9/29/14
@@ -59,20 +59,20 @@ enum UnaryOps {
 };
 
 static bool isEmptyType(Type *Ty) {
-    if (StructType *STy = dyn_cast<StructType>(Ty))
-        return STy->getNumElements() == 0 ||
-            std::all_of(STy->element_begin(), STy->element_end(), [](Type *T){ return isEmptyType(T); });
-    if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-        return VTy->getNumElements() == 0 ||
-            isEmptyType(VTy->getElementType());
-    if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
-        return ATy->getNumElements() == 0 ||
-            isEmptyType(ATy->getElementType());
-    return Ty->isVoidTy();
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    return STy->getNumElements() == 0 ||
+      std::all_of(STy->element_begin(), STy->element_end(), [](Type *T){ return isEmptyType(T); });
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return VTy->getNumElements() == 0 ||
+      isEmptyType(VTy->getElementType());
+  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+    return ATy->getNumElements() == 0 ||
+      isEmptyType(ATy->getElementType());
+  return Ty->isVoidTy();
 }
 
 bool CWriter::isEmptyType(Type *Ty) const {
-    return ::isEmptyType(Ty);
+  return ::isEmptyType(Ty);
 }
 
 /// isAddressExposed - Return true if the specified value's name needs to
@@ -130,7 +130,7 @@ bool CWriter::isInlinableInst(Instruction &I) const {
 // generate significantly better code than to emit alloca calls directly.
 //
 AllocaInst *CWriter::isDirectAlloca(Value *V) const {
-	//DEBUG(errs() << "Checking if " << *V << " is a direct alloca!\n");
+  //DEBUG(errs() << "Checking if " << *V << " is a direct alloca!\n");
   AllocaInst *AI = dyn_cast<AllocaInst>(V);
   if (!AI) return 0;
   // Modification to inline fixed size array alloca!
@@ -149,10 +149,11 @@ bool CWriter::isInlineAsm(Instruction& I) const {
 }
 
 bool CWriter::runOnFunction(Function &F) {
- // Do not codegen any 'available_externally' functions at all, they have
- // definitions outside the translation unit.
- if (F.hasAvailableExternallyLinkage())
-   return false;
+  // Do not codegen any 'available_externally' functions at all, they have
+  // definitions outside the translation unit.
+  errs() << "Running CBE on function: " << F.getName() << "\n";
+  if (F.hasAvailableExternallyLinkage())
+    return false;
 
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   // Adding PDT pass to avoid code duplication
@@ -164,13 +165,13 @@ bool CWriter::runOnFunction(Function &F) {
   // Adding Assumption Cache
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   // Adding IVUsers Pass for loop recongnition
-//  IU = &getAnalysis<IVUsersWrapperPass>().getIU();
+  //  IU = &getAnalysis<IVUsersWrapperPass>().getIU();
 
   BasicBlock* entry = &(F.getEntryBlock());
   for (df_iterator<BasicBlock*> BI = df_begin(entry), BE = df_end(entry); BI!=BE; ++BI) { 
     BasicBlock *BB = *BI;
     if (Loop *L = LI->getLoopFor(&*BB)) {
-      if(simplifyLoop(L, DT, LI, SE, AC, nullptr, true)) {
+      if(simplifyLoop(L, DT, LI, SE, AC, nullptr, /*true*/false)) {
         //DEBUG(errs() << "Simplified loop!\n" << *L << "\n");
       }
     }
@@ -224,46 +225,46 @@ CWriter::printTypeString(raw_ostream &Out, Type *Ty, bool isSigned) {
   }
 
   switch (Ty->getTypeID()) {
-  case Type::VoidTyID:   return Out << "void";
-  case Type::IntegerTyID: {
-    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-    if (NumBits == 1)
-      return Out << "bool";
-    else {
-      assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
-      return Out << (isSigned?"i":"u") << NumBits;
-    }
-  }
-  case Type::FloatTyID:    return Out << "f32";
-  case Type::DoubleTyID:   return Out << "f64";
-  case Type::X86_FP80TyID: return Out << "f80";
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:    return Out << "f128";
-
-  case Type::X86_MMXTyID:
-    return Out << (isSigned ? "i32y2" : "u32y2");
-
-  case Type::VectorTyID: {
-    TypedefDeclTypes.insert(Ty);
-    VectorType *VTy = cast<VectorType>(Ty);
-    assert(VTy->getNumElements() != 0);
-    printTypeString(Out, VTy->getElementType(), isSigned);
-    return Out << "x" << VTy->getNumElements();
-  }
-
-  case Type::ArrayTyID: {
-    TypedefDeclTypes.insert(Ty);
-    ArrayType *ATy = cast<ArrayType>(Ty);
-    assert(ATy->getNumElements() != 0);
-    printTypeString(Out, ATy->getElementType(), isSigned);
-    return Out << "a" << ATy->getNumElements();
-  }
+    case Type::VoidTyID:   return Out << "void";
+    case Type::IntegerTyID: {
+                              unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+                              if (NumBits == 1)
+                                return Out << "bool";
+                              else {
+                                assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+                                return Out << (isSigned?"i":"u") << NumBits;
+                              }
+                            }
+    case Type::FloatTyID:    return Out << "f32";
+    case Type::DoubleTyID:   return Out << "f64";
+    case Type::X86_FP80TyID: return Out << "f80";
+    case Type::PPC_FP128TyID:
+    case Type::FP128TyID:    return Out << "f128";
+
+    case Type::X86_MMXTyID:
+                             return Out << (isSigned ? "i32y2" : "u32y2");
+
+    case Type::VectorTyID: {
+                             TypedefDeclTypes.insert(Ty);
+                             VectorType *VTy = cast<VectorType>(Ty);
+                             assert(VTy->getNumElements() != 0);
+                             printTypeString(Out, VTy->getElementType(), isSigned);
+                             return Out << "x" << VTy->getNumElements();
+                           }
+
+    case Type::ArrayTyID: {
+                            TypedefDeclTypes.insert(Ty);
+                            ArrayType *ATy = cast<ArrayType>(Ty);
+                            assert(ATy->getNumElements() != 0);
+                            printTypeString(Out, ATy->getElementType(), isSigned);
+                            return Out << "a" << ATy->getNumElements();
+                          }
 
-  default:
+    default:
 #ifndef NDEBUG
-    errs() << "Unknown primitive type: " << *Ty << "\n";
+                          errs() << "Unknown primitive type: " << *Ty << "\n";
 #endif
-    llvm_unreachable(0);
+                          llvm_unreachable(0);
   }
 }
 
@@ -279,68 +280,68 @@ std::string CWriter::getStructName(StructType *ST) {
 }
 
 std::string CWriter::getFunctionName(FunctionType *FT,
-				     std::pair<AttributeList, CallingConv::ID> PAL) {
+    std::pair<AttributeList, CallingConv::ID> PAL) {
   unsigned &id = UnnamedFunctionIDs[std::make_pair(FT, PAL)];
   if (id == 0)
-      id = ++NextFunctionNumber;
+    id = ++NextFunctionNumber;
   return "l_fptr_" + utostr(id);
 }
 
 std::string CWriter::getArrayName(ArrayType *AT) {
-    std::string astr;
-    raw_string_ostream ArrayInnards(astr);
-    // Arrays are wrapped in structs to allow them to have normal
-    // value semantics (avoiding the array "decay").
-    assert(!isEmptyType(AT));
-    printTypeName(ArrayInnards, AT->getElementType(), false);
-    return "struct l_array_" + utostr(AT->getNumElements()) + '_' + CBEMangle(ArrayInnards.str());
+  std::string astr;
+  raw_string_ostream ArrayInnards(astr);
+  // Arrays are wrapped in structs to allow them to have normal
+  // value semantics (avoiding the array "decay").
+  assert(!isEmptyType(AT));
+  printTypeName(ArrayInnards, AT->getElementType(), false);
+  return "struct l_array_" + utostr(AT->getNumElements()) + '_' + CBEMangle(ArrayInnards.str());
 }
 
 std::string CWriter::getVectorName(VectorType *VT, bool Aligned) {
-    std::string astr;
-    raw_string_ostream VectorInnards(astr);
-    // Vectors are handled like arrays
-    assert(!isEmptyType(VT));
-//    if (Aligned)
-//      Out << "__MSALIGN__(" << TD->getABITypeAlignment(VT) << ") ";
-    printTypeName(VectorInnards, VT->getElementType(), false);
-    return "struct l_vector_" + utostr(VT->getNumElements()) + '_' + CBEMangle(VectorInnards.str());
+  std::string astr;
+  raw_string_ostream VectorInnards(astr);
+  // Vectors are handled like arrays
+  assert(!isEmptyType(VT));
+  //    if (Aligned)
+  //      Out << "__MSALIGN__(" << TD->getABITypeAlignment(VT) << ") ";
+  printTypeName(VectorInnards, VT->getElementType(), false);
+  return "struct l_vector_" + utostr(VT->getNumElements()) + '_' + CBEMangle(VectorInnards.str());
 }
 
 
 static const std::string getCmpPredicateName(CmpInst::Predicate P) {
   switch (P) {
-  case FCmpInst::FCMP_FALSE: return "0";
-  case FCmpInst::FCMP_OEQ: return "oeq";
-  case FCmpInst::FCMP_OGT: return "ogt";
-  case FCmpInst::FCMP_OGE: return "oge";
-  case FCmpInst::FCMP_OLT: return "olt";
-  case FCmpInst::FCMP_OLE: return "ole";
-  case FCmpInst::FCMP_ONE: return "one";
-  case FCmpInst::FCMP_ORD: return "ord";
-  case FCmpInst::FCMP_UNO: return "uno";
-  case FCmpInst::FCMP_UEQ: return "ueq";
-  case FCmpInst::FCMP_UGT: return "ugt";
-  case FCmpInst::FCMP_UGE: return "uge";
-  case FCmpInst::FCMP_ULT: return "ult";
-  case FCmpInst::FCMP_ULE: return "ule";
-  case FCmpInst::FCMP_UNE: return "une";
-  case FCmpInst::FCMP_TRUE: return "1";
-  case ICmpInst::ICMP_EQ:  return "eq";
-  case ICmpInst::ICMP_NE:  return "ne";
-  case ICmpInst::ICMP_ULE: return "ule";
-  case ICmpInst::ICMP_SLE: return "sle";
-  case ICmpInst::ICMP_UGE: return "uge";
-  case ICmpInst::ICMP_SGE: return "sge";
-  case ICmpInst::ICMP_ULT: return "ult";
-  case ICmpInst::ICMP_SLT: return "slt";
-  case ICmpInst::ICMP_UGT: return "ugt";
-  case ICmpInst::ICMP_SGT: return "sgt";
-  default:
+    case FCmpInst::FCMP_FALSE: return "0";
+    case FCmpInst::FCMP_OEQ: return "oeq";
+    case FCmpInst::FCMP_OGT: return "ogt";
+    case FCmpInst::FCMP_OGE: return "oge";
+    case FCmpInst::FCMP_OLT: return "olt";
+    case FCmpInst::FCMP_OLE: return "ole";
+    case FCmpInst::FCMP_ONE: return "one";
+    case FCmpInst::FCMP_ORD: return "ord";
+    case FCmpInst::FCMP_UNO: return "uno";
+    case FCmpInst::FCMP_UEQ: return "ueq";
+    case FCmpInst::FCMP_UGT: return "ugt";
+    case FCmpInst::FCMP_UGE: return "uge";
+    case FCmpInst::FCMP_ULT: return "ult";
+    case FCmpInst::FCMP_ULE: return "ule";
+    case FCmpInst::FCMP_UNE: return "une";
+    case FCmpInst::FCMP_TRUE: return "1";
+    case ICmpInst::ICMP_EQ:  return "eq";
+    case ICmpInst::ICMP_NE:  return "ne";
+    case ICmpInst::ICMP_ULE: return "ule";
+    case ICmpInst::ICMP_SLE: return "sle";
+    case ICmpInst::ICMP_UGE: return "uge";
+    case ICmpInst::ICMP_SGE: return "sge";
+    case ICmpInst::ICMP_ULT: return "ult";
+    case ICmpInst::ICMP_SLT: return "slt";
+    case ICmpInst::ICMP_UGT: return "ugt";
+    case ICmpInst::ICMP_SGT: return "sgt";
+    default:
 #ifndef NDEBUG
-    errs() << "Invalid icmp predicate!" << P;
+                             errs() << "Invalid icmp predicate!" << P;
 #endif
-    llvm_unreachable(0);
+                             llvm_unreachable(0);
   }
 }
 
@@ -348,42 +349,42 @@ static const std::string getCmpPredicateName(CmpInst::Predicate P) {
 raw_ostream &
 CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned) {
   assert((Ty->isSingleValueType() || Ty->isVoidTy()) &&
-         "Invalid type for printSimpleType");
+      "Invalid type for printSimpleType");
   switch (Ty->getTypeID()) {
-  case Type::VoidTyID:   return Out << "void";
-  case Type::IntegerTyID: {
-    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-    if (NumBits == 1)
-      return Out << "bool";
-    else if (NumBits <= 8)
-      return Out << (isSigned?"char":"uchar");
-    else if (NumBits <= 16)
-      return Out << (isSigned?"short":"ushort");
-    else if (NumBits <= 32)
-      return Out << (isSigned?"int":"uint"); // !!FIX ME
-    else if (NumBits <= 64)
-      return Out << (isSigned?"long":"ulong");
-    else {
-      assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
-      return Out << (isSigned?"int128_t":"uint128_t");
-    }
-  }
-  case Type::FloatTyID:  return Out << "float";
-  case Type::DoubleTyID: return Out << "double";
-  // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
-  // present matches host 'long double'.
-  case Type::X86_FP80TyID:
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:  return Out << "long double";
+    case Type::VoidTyID:   return Out << "void";
+    case Type::IntegerTyID: {
+                              unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+                              if (NumBits == 1)
+                                return Out << "bool";
+                              else if (NumBits <= 8)
+                                return Out << (isSigned?"char":"uchar");
+                              else if (NumBits <= 16)
+                                return Out << (isSigned?"short":"ushort");
+                              else if (NumBits <= 32)
+                                return Out << (isSigned?"int":"uint"); // !!FIX ME
+                              else if (NumBits <= 64)
+                                return Out << (isSigned?"long":"ulong");
+                              else {
+                                assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+                                return Out << (isSigned?"int128_t":"uint128_t");
+                              }
+                            }
+    case Type::FloatTyID:  return Out << "float";
+    case Type::DoubleTyID: return Out << "double";
+                           // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+                           // present matches host 'long double'.
+    case Type::X86_FP80TyID:
+    case Type::PPC_FP128TyID:
+    case Type::FP128TyID:  return Out << "long double";
 
-  case Type::X86_MMXTyID:
-    return Out << (isSigned?"int":"uint") << " __attribute__((vector_size(8)))";
+    case Type::X86_MMXTyID:
+                           return Out << (isSigned?"int":"uint") << " __attribute__((vector_size(8)))";
 
-  default:
+    default:
 #ifndef NDEBUG
-    errs() << "Unknown primitive type: " << *Ty << "\n";
+                           errs() << "Unknown primitive type: " << *Ty << "\n";
 #endif
-    llvm_unreachable(0);
+                           llvm_unreachable(0);
   }
 }
 
@@ -391,9 +392,9 @@ CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned) {
 // declaration.
 //
 raw_ostream &CWriter::printTypeName(raw_ostream &Out, Type *Ty,
-				    bool isSigned,
-				    std::pair<AttributeList, CallingConv::ID> PAL) {
-  
+    bool isSigned,
+    std::pair<AttributeList, CallingConv::ID> PAL) {
+
   if (Ty->isSingleValueType() || Ty->isVoidTy()) {
     if (!Ty->isPointerTy() && !Ty->isVectorTy())
       return printSimpleType(Out, Ty, isSigned);
@@ -403,35 +404,35 @@ raw_ostream &CWriter::printTypeName(raw_ostream &Out, Type *Ty,
     return Out << "void";
 
   switch (Ty->getTypeID()) {
-  case Type::FunctionTyID: {
-    FunctionType *FTy = cast<FunctionType>(Ty);
-    return Out << getFunctionName(FTy, PAL);
-  }
-  case Type::StructTyID: {
-    TypedefDeclTypes.insert(Ty);
-    return Out << getStructName(cast<StructType>(Ty));
-  }
+    case Type::FunctionTyID: {
+                               FunctionType *FTy = cast<FunctionType>(Ty);
+                               return Out << getFunctionName(FTy, PAL);
+                             }
+    case Type::StructTyID: {
+                             TypedefDeclTypes.insert(Ty);
+                             return Out << getStructName(cast<StructType>(Ty));
+                           }
+
+    case Type::PointerTyID: {
+                              Type *ElTy = Ty->getPointerElementType();
+                              return printTypeName(Out, ElTy, false) << '*';
+                            }
 
-  case Type::PointerTyID: {
-    Type *ElTy = Ty->getPointerElementType();
-    return printTypeName(Out, ElTy, false) << '*';
-  }
+    case Type::ArrayTyID: {
+                            TypedefDeclTypes.insert(Ty);
+                            return Out << getArrayName(cast<ArrayType>(Ty));
+                          }
 
-  case Type::ArrayTyID: {
-    TypedefDeclTypes.insert(Ty);
-    return Out << getArrayName(cast<ArrayType>(Ty));
-  }
+    case Type::VectorTyID: {
+                             TypedefDeclTypes.insert(Ty);
+                             return Out << getVectorName(cast<VectorType>(Ty), true);
+                           }
 
-  case Type::VectorTyID: {
-    TypedefDeclTypes.insert(Ty);
-    return Out << getVectorName(cast<VectorType>(Ty), true);
-  }
-
-  default:
+    default:
 #ifndef NDEBUG
-    errs() << "Unexpected type: " << *Ty << "\n";
+                           errs() << "Unexpected type: " << *Ty << "\n";
 #endif
-    llvm_unreachable(0);
+                           llvm_unreachable(0);
   }
 }
 
@@ -452,16 +453,16 @@ raw_ostream &CWriter::printStructDeclaration(raw_ostream &Out, StructType *STy)
   Out << getStructName(STy) << " {\n";
   unsigned Idx = 0;
   for (StructType::element_iterator I = STy->element_begin(),
-         E = STy->element_end(); I != E; ++I, Idx++) {
+      E = STy->element_end(); I != E; ++I, Idx++) {
     Out << "  ";
     bool empty = isEmptyType(*I);
     if (empty)
-        Out << "/* "; // skip zero-sized types
+      Out << "/* "; // skip zero-sized types
     printTypeName(Out, *I, false) << " field" << utostr(Idx);
     if (empty)
-        Out << " */"; // skip zero-sized types
+      Out << " */"; // skip zero-sized types
     else
-        Out << ";\n";
+      Out << ";\n";
   }
   Out << '}';
   if (STy->isPacked())
@@ -473,31 +474,31 @@ raw_ostream &CWriter::printStructDeclaration(raw_ostream &Out, StructType *STy)
 }
 
 raw_ostream &CWriter::printFunctionDeclaration(raw_ostream &Out, FunctionType *Ty,
-					       std::pair<AttributeList, CallingConv::ID> PAL){
-					       
+    std::pair<AttributeList, CallingConv::ID> PAL){
+
   Out << "typedef ";
   printFunctionProto(Out, Ty, PAL, getFunctionName(Ty, PAL), NULL, false);
   return Out << ";\n";
 }
 
 raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy,
-					 std::pair<AttributeList, CallingConv::ID> Attrs,
-                                         const std::string &Name,
-					 Function::arg_iterator ArgList,
-                                         bool isKernel) {
+    std::pair<AttributeList, CallingConv::ID> Attrs,
+    const std::string &Name,
+    Function::arg_iterator ArgList,
+    bool isKernel) {
 
   // NOTE: AttributeSet is replaced by 'AttributeList' at function level in LLVM-9
   AttributeList &PAL = Attrs.first;
 
   if (PAL.hasAttribute(AttributeList::FunctionIndex, Attribute::NoReturn))
     Out << "__noreturn ";
-  
+
   if (isKernel)
     Out << "__kernel \n";
 
   // Should this function actually return a struct by-value?
   bool isStructReturn = PAL.hasAttribute(1, Attribute::StructRet) ||
-                        PAL.hasAttribute(2, Attribute::StructRet);
+    PAL.hasAttribute(2, Attribute::StructRet);
   // Get the return type for the function.
   Type *RetTy;
   if (!isStructReturn)
@@ -507,24 +508,24 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy,
     RetTy = cast<PointerType>(FTy->getParamType(0))->getElementType();
   }
   printTypeName(Out, RetTy,
-    /*isSigned=*/PAL.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt));
+      /*isSigned=*/PAL.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt));
 
   Out << "/* Processing Function: " << Name << ": " << Attrs.second << "*/\n";
   switch (Attrs.second) {
-   case CallingConv::C:
-    break;
-   case CallingConv::X86_StdCall:
-    Out << " __stdcall";
-    break;
-   case CallingConv::X86_FastCall:
-    Out << " __fastcall";
-    break;
-   case CallingConv::X86_ThisCall:
-    Out << " __thiscall";
-    break;
-   default:
-//    assert(0 && "Encountered Unhandled Calling Convention");
-    break;
+    case CallingConv::C:
+      break;
+    case CallingConv::X86_StdCall:
+      Out << " __stdcall";
+      break;
+    case CallingConv::X86_FastCall:
+      Out << " __fastcall";
+      break;
+    case CallingConv::X86_ThisCall:
+      Out << " __thiscall";
+      break;
+    default:
+      //    assert(0 && "Encountered Unhandled Calling Convention");
+      break;
   }
   Out << ' ' << Name << '(';
 
@@ -578,15 +579,15 @@ raw_ostream &CWriter::printFunctionProto(raw_ostream &Out, FunctionType *FTy,
     }
 
     printTypeNameUnaligned(Out, ArgTy,
-      /*isSigned=*/PAL.hasAttribute(Idx, Attribute::SExt));
+        /*isSigned=*/PAL.hasAttribute(Idx, Attribute::SExt));
     PrintedArg = true;
     bool noalias = false;
-		if (PAL.hasAttribute(Idx, Attribute::NoAlias)) {
-			noalias = true;
-		}
-		++Idx;
+    if (PAL.hasAttribute(Idx, Attribute::NoAlias)) {
+      noalias = true;
+    }
+    ++Idx;
     if (ArgList) {
-			
+
       Out << ' ' << (noalias ? " restrict " : "")  << GetValueName(&*ArgName);
       ++ArgName;
     }
@@ -688,11 +689,11 @@ bool CWriter::printConstantString(Constant *C, enum OperandContext Context) {
         case '\"': Out << "\\\""; break;
         case '\'': Out << "\\\'"; break;
         default:
-          Out << "\\x";
-          Out << (char)(( C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
-          Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
-          LastWasHex = true;
-          break;
+                   Out << "\\x";
+                   Out << (char)(( C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+                   Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+                   LastWasHex = true;
+                   break;
       }
     }
   }
@@ -828,146 +829,146 @@ void CWriter::printConstant(Constant *CPV, enum OperandContext Context) {
     assert(CE->getType()->isIntegerTy() || CE->getType()->isFloatingPointTy() || CE->getType()->isPointerTy()); // TODO: VectorType are valid here, but not supported
     GetElementPtrInst *GEPI;
     switch (CE->getOpcode()) {
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::BitCast:
-      Out << "(";
-      printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
-      if (CE->getOpcode() == Instruction::SExt &&
-          CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
-        // Make sure we really sext from bool here by subtracting from 0
-        Out << "0-";
-      }
-      printConstant(CE->getOperand(0), ContextCasted);
-      if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
-          (CE->getOpcode() == Instruction::Trunc ||
-           CE->getOpcode() == Instruction::FPToUI ||
-           CE->getOpcode() == Instruction::FPToSI ||
-           CE->getOpcode() == Instruction::PtrToInt)) {
-        // Make sure we really truncate to bool here by anding with 1
-        Out << "&1u";
-      }
-      Out << ')';
-      return;
+      case Instruction::Trunc:
+      case Instruction::ZExt:
+      case Instruction::SExt:
+      case Instruction::FPTrunc:
+      case Instruction::FPExt:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::PtrToInt:
+      case Instruction::IntToPtr:
+      case Instruction::BitCast:
+        Out << "(";
+        printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+        if (CE->getOpcode() == Instruction::SExt &&
+            CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
+          // Make sure we really sext from bool here by subtracting from 0
+          Out << "0-";
+        }
+        printConstant(CE->getOperand(0), ContextCasted);
+        if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
+            (CE->getOpcode() == Instruction::Trunc ||
+             CE->getOpcode() == Instruction::FPToUI ||
+             CE->getOpcode() == Instruction::FPToSI ||
+             CE->getOpcode() == Instruction::PtrToInt)) {
+          // Make sure we really truncate to bool here by anding with 1
+          Out << "&1u";
+        }
+        Out << ')';
+        return;
 
-    case Instruction::GetElementPtr:
-      Out << "(";
-      //DEBUG(errs() << "\n----------\nCE: " << *CE << "\n");
-      GEPI = dyn_cast<GetElementPtrInst>(CE->getAsInstruction());
-      //DEBUG(errs() << "GEPI: " << *GEPI << "\n");
-      printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), gep_type_end(CPV), CE->getOperand(0)->getType()->isArrayTy(), GEPI);
-      delete(GEPI);
-      //DEBUG(errs() << "Deleted GEPI!\n");
-      Out << ")";
-      return;
-    case Instruction::Select:
-      Out << '(';
-      printConstant(CE->getOperand(0), ContextCasted);
-      Out << '?';
-      printConstant(CE->getOperand(1), ContextNormal);
-      Out << ':';
-      printConstant(CE->getOperand(2), ContextNormal);
-      Out << ')';
-      return;
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::SDiv:
-    case Instruction::UDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor:
-    case Instruction::ICmp:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    {
-      Out << '(';
-      bool NeedsClosingParens = printConstExprCast(CE);
-      printConstantWithCast(CE->getOperand(0), CE->getOpcode());
-      switch (CE->getOpcode()) {
+      case Instruction::GetElementPtr:
+        Out << "(";
+        //DEBUG(errs() << "\n----------\nCE: " << *CE << "\n");
+        GEPI = dyn_cast<GetElementPtrInst>(CE->getAsInstruction());
+        //DEBUG(errs() << "GEPI: " << *GEPI << "\n");
+        printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), gep_type_end(CPV), CE->getOperand(0)->getType()->isArrayTy(), GEPI);
+        delete(GEPI);
+        //DEBUG(errs() << "Deleted GEPI!\n");
+        Out << ")";
+        return;
+      case Instruction::Select:
+        Out << '(';
+        printConstant(CE->getOperand(0), ContextCasted);
+        Out << '?';
+        printConstant(CE->getOperand(1), ContextNormal);
+        Out << ':';
+        printConstant(CE->getOperand(2), ContextNormal);
+        Out << ')';
+        return;
       case Instruction::Add:
-      case Instruction::FAdd: Out << " + "; break;
+      case Instruction::FAdd:
       case Instruction::Sub:
-      case Instruction::FSub: Out << " - "; break;
+      case Instruction::FSub:
       case Instruction::Mul:
-      case Instruction::FMul: Out << " * "; break;
+      case Instruction::FMul:
+      case Instruction::SDiv:
+      case Instruction::UDiv:
+      case Instruction::FDiv:
       case Instruction::URem:
       case Instruction::SRem:
-      case Instruction::FRem: Out << " % "; break;
-      case Instruction::UDiv:
-      case Instruction::SDiv:
-      case Instruction::FDiv: Out << " / "; break;
-      case Instruction::And: Out << " & "; break;
-      case Instruction::Or:  Out << " | "; break;
-      case Instruction::Xor: Out << " ^ "; break;
-      case Instruction::Shl: Out << " << "; break;
-      case Instruction::LShr:
-      case Instruction::AShr: Out << " >> "; break;
+      case Instruction::FRem:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
       case Instruction::ICmp:
-        switch (CE->getPredicate()) {
-          case ICmpInst::ICMP_EQ: Out << " == "; break;
-          case ICmpInst::ICMP_NE: Out << " != "; break;
-          case ICmpInst::ICMP_SLT:
-          case ICmpInst::ICMP_ULT: Out << " < "; break;
-          case ICmpInst::ICMP_SLE:
-          case ICmpInst::ICMP_ULE: Out << " <= "; break;
-          case ICmpInst::ICMP_SGT:
-          case ICmpInst::ICMP_UGT: Out << " > "; break;
-          case ICmpInst::ICMP_SGE:
-          case ICmpInst::ICMP_UGE: Out << " >= "; break;
-          default: llvm_unreachable("Illegal ICmp predicate");
+      case Instruction::Shl:
+      case Instruction::LShr:
+      case Instruction::AShr:
+        {
+          Out << '(';
+          bool NeedsClosingParens = printConstExprCast(CE);
+          printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+          switch (CE->getOpcode()) {
+            case Instruction::Add:
+            case Instruction::FAdd: Out << " + "; break;
+            case Instruction::Sub:
+            case Instruction::FSub: Out << " - "; break;
+            case Instruction::Mul:
+            case Instruction::FMul: Out << " * "; break;
+            case Instruction::URem:
+            case Instruction::SRem:
+            case Instruction::FRem: Out << " % "; break;
+            case Instruction::UDiv:
+            case Instruction::SDiv:
+            case Instruction::FDiv: Out << " / "; break;
+            case Instruction::And: Out << " & "; break;
+            case Instruction::Or:  Out << " | "; break;
+            case Instruction::Xor: Out << " ^ "; break;
+            case Instruction::Shl: Out << " << "; break;
+            case Instruction::LShr:
+            case Instruction::AShr: Out << " >> "; break;
+            case Instruction::ICmp:
+                                    switch (CE->getPredicate()) {
+                                      case ICmpInst::ICMP_EQ: Out << " == "; break;
+                                      case ICmpInst::ICMP_NE: Out << " != "; break;
+                                      case ICmpInst::ICMP_SLT:
+                                      case ICmpInst::ICMP_ULT: Out << " < "; break;
+                                      case ICmpInst::ICMP_SLE:
+                                      case ICmpInst::ICMP_ULE: Out << " <= "; break;
+                                      case ICmpInst::ICMP_SGT:
+                                      case ICmpInst::ICMP_UGT: Out << " > "; break;
+                                      case ICmpInst::ICMP_SGE:
+                                      case ICmpInst::ICMP_UGE: Out << " >= "; break;
+                                      default: llvm_unreachable("Illegal ICmp predicate");
+                                    }
+                                    break;
+            default: llvm_unreachable("Illegal opcode here!");
+          }
+          printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+          if (NeedsClosingParens)
+            Out << "))";
+          Out << ')';
+          return;
         }
-        break;
-      default: llvm_unreachable("Illegal opcode here!");
-      }
-      printConstantWithCast(CE->getOperand(1), CE->getOpcode());
-      if (NeedsClosingParens)
-        Out << "))";
-      Out << ')';
-      return;
-    }
-    case Instruction::FCmp: {
-      Out << '(';
-      bool NeedsClosingParens = printConstExprCast(CE);
-      if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
-        Out << "0";
-      else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
-        Out << "1";
-      else {
-        Out << "llvm_fcmp_" << getCmpPredicateName((CmpInst::Predicate)CE->getPredicate()) << "(";
-        printConstant(CE->getOperand(0), ContextCasted);
-        Out << ", ";
-        printConstant(CE->getOperand(1), ContextCasted);
-        Out << ")";
-      }
-      if (NeedsClosingParens)
-        Out << "))";
-      Out << ')';
-      return;
-    }
-    default:
+      case Instruction::FCmp: {
+                                Out << '(';
+                                bool NeedsClosingParens = printConstExprCast(CE);
+                                if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+                                  Out << "0";
+                                else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+                                  Out << "1";
+                                else {
+                                  Out << "llvm_fcmp_" << getCmpPredicateName((CmpInst::Predicate)CE->getPredicate()) << "(";
+                                  printConstant(CE->getOperand(0), ContextCasted);
+                                  Out << ", ";
+                                  printConstant(CE->getOperand(1), ContextCasted);
+                                  Out << ")";
+                                }
+                                if (NeedsClosingParens)
+                                  Out << "))";
+                                Out << ')';
+                                return;
+                              }
+      default:
 #ifndef NDEBUG
-      errs() << "CWriter Error: Unhandled constant expression: "
-           << *CE << "\n";
+                              errs() << "CWriter Error: Unhandled constant expression: "
+                                << *CE << "\n";
 #endif
-      llvm_unreachable(0);
+                              llvm_unreachable(0);
     }
   } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
     if (CPV->getType()->isVectorTy()) {
@@ -1000,236 +1001,236 @@ void CWriter::printConstant(Constant *CPV, enum OperandContext Context) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
     Type* Ty = CI->getType();
     unsigned ActiveBits = CI->getValue().getMinSignedBits();
-//    DEBUG(errs() << "Here: " << *CI << ", " << *Ty << ", " << ActiveBits << "\n");
+    //    DEBUG(errs() << "Here: " << *CI << ", " << *Ty << ", " << ActiveBits << "\n");
     Out << CI->getSExtValue();
-//    if (Ty == Type::getInt1Ty(CPV->getContext())) {
-//      Out << (CI->getZExtValue() ? '1' : '0');
-//    } else if (Context != ContextNormal &&
-//              ActiveBits < 64 &&
-//              Ty->getPrimitiveSizeInBits() < 64 &&
-//              ActiveBits < Ty->getPrimitiveSizeInBits()) {
-//      if (ActiveBits >= 32)
-//        Out << "(long)";
-//      Out << CI->getSExtValue(); // most likely a shorter representation
-////      if (ActiveBits >= 32)
-////        Out << ")";
-//    } else if (Ty->getPrimitiveSizeInBits() < 32 && Context == ContextNormal) {
-//      Out << "((";
-//      printSimpleType(Out, Ty, false) << ')';
-//      if (CI->isMinValue(true))
-//        Out << CI->getZExtValue() << 'u';
-//      else
-//        Out << CI->getSExtValue();
-//      Out << ')';
-//    } else if (Ty->getPrimitiveSizeInBits() <= 32) {
-//      Out << CI->getZExtValue() << 'u';
-//    } else if (Ty->getPrimitiveSizeInBits() <= 64) {
-//      Out << "(ulong)" << CI->getZExtValue();
-////      Out << "UINT64_C(" << CI->getZExtValue() << ")";
-////    } else if (Ty->getPrimitiveSizeInBits() <= 128) {
-////      const APInt &V = CI->getValue();
-////      const APInt &Vlo = V.getLoBits(64);
-////      const APInt &Vhi = V.getHiBits(64);
-////      Out << (Context == ContextStatic ? "UINT128_C" : "llvm_ctor_u128");
-////      Out << "(UINT64_C(" << Vhi.getZExtValue() << "), UINT64_C(" << Vlo.getZExtValue() << "))";
-//    }
+    //    if (Ty == Type::getInt1Ty(CPV->getContext())) {
+    //      Out << (CI->getZExtValue() ? '1' : '0');
+    //    } else if (Context != ContextNormal &&
+    //              ActiveBits < 64 &&
+    //              Ty->getPrimitiveSizeInBits() < 64 &&
+    //              ActiveBits < Ty->getPrimitiveSizeInBits()) {
+    //      if (ActiveBits >= 32)
+    //        Out << "(long)";
+    //      Out << CI->getSExtValue(); // most likely a shorter representation
+    ////      if (ActiveBits >= 32)
+    ////        Out << ")";
+    //    } else if (Ty->getPrimitiveSizeInBits() < 32 && Context == ContextNormal) {
+    //      Out << "((";
+    //      printSimpleType(Out, Ty, false) << ')';
+    //      if (CI->isMinValue(true))
+    //        Out << CI->getZExtValue() << 'u';
+    //      else
+    //        Out << CI->getSExtValue();
+    //      Out << ')';
+    //    } else if (Ty->getPrimitiveSizeInBits() <= 32) {
+    //      Out << CI->getZExtValue() << 'u';
+    //    } else if (Ty->getPrimitiveSizeInBits() <= 64) {
+    //      Out << "(ulong)" << CI->getZExtValue();
+    ////      Out << "UINT64_C(" << CI->getZExtValue() << ")";
+    ////    } else if (Ty->getPrimitiveSizeInBits() <= 128) {
+    ////      const APInt &V = CI->getValue();
+    ////      const APInt &Vlo = V.getLoBits(64);
+    ////      const APInt &Vhi = V.getHiBits(64);
+    ////      Out << (Context == ContextStatic ? "UINT128_C" : "llvm_ctor_u128");
+    ////      Out << "(UINT64_C(" << Vhi.getZExtValue() << "), UINT64_C(" << Vlo.getZExtValue() << "))";
+    //    }
     return;
   }
 
   switch (CPV->getType()->getTypeID()) {
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-  case Type::X86_FP80TyID:
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID: {
-    ConstantFP *FPC = cast<ConstantFP>(CPV);
-    std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
-    if (I != FPConstantMap.end()) {
-      // Because of FP precision problems we must load from a stack allocated
-      // value that holds the value in hex.
-      Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
-                       "float" :
-                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
-                       "double" :
-                       "long double")
-          << "*)&FPConstant" << I->second << ')';
-    } else {
-      double V;
-      if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
-        V = FPC->getValueAPF().convertToFloat();
-      else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
-        V = FPC->getValueAPF().convertToDouble();
-      else {
-        // Long double.  Convert the number to double, discarding precision.
-        // This is not awesome, but it at least makes the CBE output somewhat
-        // useful.
-        APFloat Tmp = FPC->getValueAPF();
-        bool LosesInfo;
-        Tmp.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
-        V = Tmp.convertToDouble();
-      }
-
-      if (std::isnan(V)) {
-        // The value is NaN
-
-        // FIXME the actual NaN bits should be emitted.
-        // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
-        // it's 0x7ff4.
-        const unsigned long QuietNaN = 0x7ff8UL;
-        //const unsigned long SignalNaN = 0x7ff4UL;
-
-        // We need to grab the first part of the FP #
-        char Buffer[100];
-
-        uint64_t ll = DoubleToBits(V);
-        sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
-
-        std::string Num(&Buffer[0], &Buffer[6]);
-        unsigned long Val = strtoul(Num.c_str(), 0, 16);
-
-        if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
-          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
-              << Buffer << "\") /*nan*/ ";
-        else
-          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
-              << Buffer << "\") /*nan*/ ";
-      } else if (std::isinf(V)) {
-        // The value is Inf
-        if (V < 0) Out << '-';
-        Out << "LLVM_INF" <<
-            (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
-            << " /*inf*/ ";
-      } else {
-        std::string Num;
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+    case Type::X86_FP80TyID:
+    case Type::PPC_FP128TyID:
+    case Type::FP128TyID: {
+                            ConstantFP *FPC = cast<ConstantFP>(CPV);
+                            std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+                            if (I != FPConstantMap.end()) {
+                              // Because of FP precision problems we must load from a stack allocated
+                              // value that holds the value in hex.
+                              Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
+                                      "float" :
+                                      FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
+                                      "double" :
+                                      "long double")
+                                << "*)&FPConstant" << I->second << ')';
+                            } else {
+                              double V;
+                              if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
+                                V = FPC->getValueAPF().convertToFloat();
+                              else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
+                                V = FPC->getValueAPF().convertToDouble();
+                              else {
+                                // Long double.  Convert the number to double, discarding precision.
+                                // This is not awesome, but it at least makes the CBE output somewhat
+                                // useful.
+                                APFloat Tmp = FPC->getValueAPF();
+                                bool LosesInfo;
+                                Tmp.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
+                                V = Tmp.convertToDouble();
+                              }
+
+                              if (std::isnan(V)) {
+                                // The value is NaN
+
+                                // FIXME the actual NaN bits should be emitted.
+                                // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+                                // it's 0x7ff4.
+                                const unsigned long QuietNaN = 0x7ff8UL;
+                                //const unsigned long SignalNaN = 0x7ff4UL;
+
+                                // We need to grab the first part of the FP #
+                                char Buffer[100];
+
+                                uint64_t ll = DoubleToBits(V);
+                                sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+                                std::string Num(&Buffer[0], &Buffer[6]);
+                                unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+                                if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
+                                  Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+                                    << Buffer << "\") /*nan*/ ";
+                                else
+                                  Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+                                    << Buffer << "\") /*nan*/ ";
+                              } else if (std::isinf(V)) {
+                                // The value is Inf
+                                if (V < 0) Out << '-';
+                                Out << "LLVM_INF" <<
+                                  (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
+                                  << " /*inf*/ ";
+                              } else {
+                                std::string Num;
 #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
-        // Print out the constant as a floating point number.
-        char Buffer[100];
-        sprintf(Buffer, "%a", V);
-        Num = Buffer;
+                                // Print out the constant as a floating point number.
+                                char Buffer[100];
+                                sprintf(Buffer, "%a", V);
+                                Num = Buffer;
 #else
-        Num = ftostr(FPC->getValueAPF());
+                                Num = ftostr(FPC->getValueAPF());
 #endif
-       Out << Num;
-      }
-    }
-    break;
-  }
-
-  case Type::ArrayTyID: {
-    if (printConstantString(CPV, Context)) break;
-    ArrayType *AT = cast<ArrayType>(CPV->getType());
-    assert(AT->getNumElements() != 0 && !isEmptyType(AT));
-    if (Context != ContextStatic) {
-      CtorDeclTypes.insert(AT);
-      Out << "llvm_ctor_";
-      printTypeString(Out, AT, false);
-      Out << "(";
-      Context = ContextCasted;
-    } else {
-      Out << "{ { "; // Arrays are wrapped in struct types.
-    }
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
-      printConstantArray(CA, Context);
-    } else if (ConstantDataSequential *CDS =
-                 dyn_cast<ConstantDataSequential>(CPV)) {
-      printConstantDataSequential(CDS, Context);
-    } else {
-      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
-      Constant *CZ = Constant::getNullValue(AT->getElementType());
-      printConstant(CZ, Context);
-      for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
-        Out << ", ";
-        printConstant(CZ, Context);
-      }
-    }
-    Out << (Context == ContextStatic ? " } }" : ")"); // Arrays are wrapped in struct types.
-    break;
-  }
-
-  case Type::VectorTyID: {
-    VectorType *VT = cast<VectorType>(CPV->getType());
-    assert(VT->getNumElements() != 0 && !isEmptyType(VT));
-    if (Context != ContextStatic) {
-      CtorDeclTypes.insert(VT);
-      Out << "llvm_ctor_";
-      printTypeString(Out, VT, false);
-      Out << "(";
-      Context = ContextCasted;
-    } else {
-      Out << "{ ";
-    }
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
-      printConstantVector(CV, Context);
-    } else if (ConstantDataSequential *CDS =
-               dyn_cast<ConstantDataSequential>(CPV)) {
-      printConstantDataSequential(CDS, Context);
-    } else {
-      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
-      Constant *CZ = Constant::getNullValue(VT->getElementType());
-      printConstant(CZ, Context);
-      for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
-        Out << ", ";
-        printConstant(CZ, Context);
-      }
-    }
-    Out << (Context == ContextStatic ? " }" : ")");
-    break;
-  }
-
-  case Type::StructTyID: {
-    StructType *ST = cast<StructType>(CPV->getType());
-    assert(!isEmptyType(ST));
-    if (Context != ContextStatic) {
-      CtorDeclTypes.insert(ST);
-      Out << "llvm_ctor_";
-      printTypeString(Out, ST, false);
-      Out << "(";
-      Context = ContextCasted;
-    } else {
-      Out << "{ ";
-    }
-
-    if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
-      bool printed = false;
-      for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-        Type *ElTy = ST->getElementType(i);
-        if (isEmptyType(ElTy)) continue;
-        if (printed) Out << ", ";
-        printConstant(Constant::getNullValue(ElTy), Context);
-        printed = true;
-      }
-      assert(printed);
-    } else {
-      bool printed = false;
-      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
-        Constant *C = cast<Constant>(CPV->getOperand(i));
-        if (isEmptyType(C->getType())) continue;
-        if (printed) Out << ", ";
-        printConstant(C, Context);
-        printed = true;
-      }
-      assert(printed);
-    }
-    Out << (Context == ContextStatic ? " }" : ")");
-    break;
-  }
-
-  case Type::PointerTyID:
-    if (isa<ConstantPointerNull>(CPV)) {
-      Out << "((";
-      printTypeName(Out, CPV->getType()); // sign doesn't matter
-      Out << ")/*NULL*/0)";
-      break;
-    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
-      writeOperand(GV);
-      break;
-    }
-    // FALL THROUGH
-  default:
+                                Out << Num;
+                              }
+                            }
+                            break;
+                          }
+
+    case Type::ArrayTyID: {
+                            if (printConstantString(CPV, Context)) break;
+                            ArrayType *AT = cast<ArrayType>(CPV->getType());
+                            assert(AT->getNumElements() != 0 && !isEmptyType(AT));
+                            if (Context != ContextStatic) {
+                              CtorDeclTypes.insert(AT);
+                              Out << "llvm_ctor_";
+                              printTypeString(Out, AT, false);
+                              Out << "(";
+                              Context = ContextCasted;
+                            } else {
+                              Out << "{ { "; // Arrays are wrapped in struct types.
+                            }
+                            if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+                              printConstantArray(CA, Context);
+                            } else if (ConstantDataSequential *CDS =
+                                dyn_cast<ConstantDataSequential>(CPV)) {
+                              printConstantDataSequential(CDS, Context);
+                            } else {
+                              assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+                              Constant *CZ = Constant::getNullValue(AT->getElementType());
+                              printConstant(CZ, Context);
+                              for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+                                Out << ", ";
+                                printConstant(CZ, Context);
+                              }
+                            }
+                            Out << (Context == ContextStatic ? " } }" : ")"); // Arrays are wrapped in struct types.
+                            break;
+                          }
+
+    case Type::VectorTyID: {
+                             VectorType *VT = cast<VectorType>(CPV->getType());
+                             assert(VT->getNumElements() != 0 && !isEmptyType(VT));
+                             if (Context != ContextStatic) {
+                               CtorDeclTypes.insert(VT);
+                               Out << "llvm_ctor_";
+                               printTypeString(Out, VT, false);
+                               Out << "(";
+                               Context = ContextCasted;
+                             } else {
+                               Out << "{ ";
+                             }
+                             if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+                               printConstantVector(CV, Context);
+                             } else if (ConstantDataSequential *CDS =
+                                 dyn_cast<ConstantDataSequential>(CPV)) {
+                               printConstantDataSequential(CDS, Context);
+                             } else {
+                               assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+                               Constant *CZ = Constant::getNullValue(VT->getElementType());
+                               printConstant(CZ, Context);
+                               for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+                                 Out << ", ";
+                                 printConstant(CZ, Context);
+                               }
+                             }
+                             Out << (Context == ContextStatic ? " }" : ")");
+                             break;
+                           }
+
+    case Type::StructTyID: {
+                             StructType *ST = cast<StructType>(CPV->getType());
+                             assert(!isEmptyType(ST));
+                             if (Context != ContextStatic) {
+                               CtorDeclTypes.insert(ST);
+                               Out << "llvm_ctor_";
+                               printTypeString(Out, ST, false);
+                               Out << "(";
+                               Context = ContextCasted;
+                             } else {
+                               Out << "{ ";
+                             }
+
+                             if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+                               bool printed = false;
+                               for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+                                 Type *ElTy = ST->getElementType(i);
+                                 if (isEmptyType(ElTy)) continue;
+                                 if (printed) Out << ", ";
+                                 printConstant(Constant::getNullValue(ElTy), Context);
+                                 printed = true;
+                               }
+                               assert(printed);
+                             } else {
+                               bool printed = false;
+                               for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
+                                 Constant *C = cast<Constant>(CPV->getOperand(i));
+                                 if (isEmptyType(C->getType())) continue;
+                                 if (printed) Out << ", ";
+                                 printConstant(C, Context);
+                                 printed = true;
+                               }
+                               assert(printed);
+                             }
+                             Out << (Context == ContextStatic ? " }" : ")");
+                             break;
+                           }
+
+    case Type::PointerTyID:
+                           if (isa<ConstantPointerNull>(CPV)) {
+                             Out << "((";
+                             printTypeName(Out, CPV->getType()); // sign doesn't matter
+                             Out << ")/*NULL*/0)";
+                             break;
+                           } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+                             writeOperand(GV);
+                             break;
+                           }
+                           // FALL THROUGH
+    default:
 #ifndef NDEBUG
-    errs() << "Unknown constant type: " << *CPV << "\n";
+                           errs() << "Unknown constant type: " << *CPV << "\n";
 #endif
-    llvm_unreachable(0);
+                           llvm_unreachable(0);
   }
 }
 
@@ -1241,41 +1242,41 @@ bool CWriter::printConstExprCast(ConstantExpr* CE) {
   Type *Ty = CE->getOperand(0)->getType();
   bool TypeIsSigned = false;
   switch (CE->getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    // We need to cast integer arithmetic so that it is always performed
-    // as unsigned, to avoid undefined behavior on overflow.
-  case Instruction::LShr:
-  case Instruction::URem:
-  case Instruction::UDiv: NeedsExplicitCast = true; break;
-  case Instruction::AShr:
-  case Instruction::SRem:
-  case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
-  case Instruction::SExt:
-    Ty = CE->getType();
-    NeedsExplicitCast = true;
-    TypeIsSigned = true;
-    break;
-  case Instruction::ZExt:
-  case Instruction::Trunc:
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::BitCast:
-    Ty = CE->getType();
-    NeedsExplicitCast = true;
-    break;
-  default: break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+      // We need to cast integer arithmetic so that it is always performed
+      // as unsigned, to avoid undefined behavior on overflow.
+    case Instruction::LShr:
+    case Instruction::URem:
+    case Instruction::UDiv: NeedsExplicitCast = true; break;
+    case Instruction::AShr:
+    case Instruction::SRem:
+    case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+    case Instruction::SExt:
+                            Ty = CE->getType();
+                            NeedsExplicitCast = true;
+                            TypeIsSigned = true;
+                            break;
+    case Instruction::ZExt:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+                            Ty = CE->getType();
+                            NeedsExplicitCast = true;
+                            break;
+    default: break;
   }
   if (NeedsExplicitCast) {
     Out << "((";
-      printTypeName(Out, Ty, TypeIsSigned); // not integer, sign doesn't matter
+    printTypeName(Out, Ty, TypeIsSigned); // not integer, sign doesn't matter
     Out << ")(";
   }
   return NeedsExplicitCast;
@@ -1308,7 +1309,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
 }
 
 std::string CWriter::GetValueName(Value *Operand) {
-	//DEBUG(errs() << "In getvaluename: " << *Operand << "\n");
+  //DEBUG(errs() << "In getvaluename: " << *Operand << "\n");
 
   // Resolve potential alias.
   if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
@@ -1332,7 +1333,7 @@ std::string CWriter::GetValueName(Value *Operand) {
   VarName.reserve(Name.capacity());
 
   for (std::string::iterator I = Name.begin(), E = Name.end();
-       I != E; ++I) {
+      I != E; ++I) {
     unsigned char ch = *I;
 
     if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
@@ -1356,9 +1357,9 @@ void CWriter::writeInstComputationInline(Instruction &I) {
   unsigned mask = 0;
   Type *Ty = I.getType();
   if (Ty->isIntegerTy()) {
-     IntegerType *ITy = static_cast<IntegerType*>(Ty);
-     if (!ITy->isPowerOf2ByteWidth())
-       mask = ITy->getBitMask();
+    IntegerType *ITy = static_cast<IntegerType*>(Ty);
+    if (!ITy->isPowerOf2ByteWidth())
+      mask = ITy->getBitMask();
   }
 
   // If this is a non-trivial bool computation, make sure to truncate down to
@@ -1429,26 +1430,26 @@ void CWriter::writeOperandDeref(Value *Operand) {
 bool CWriter::writeInstructionCast(Instruction &I) {
   Type *Ty = I.getOperand(0)->getType();
   switch (I.getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    // We need to cast integer arithmetic so that it is always performed
-    // as unsigned, to avoid undefined behavior on overflow.
-  case Instruction::LShr:
-  case Instruction::URem:
-  case Instruction::UDiv:
-    Out << "((";
-    printSimpleType(Out, Ty, false);
-    Out << ")(";
-    return true;
-  case Instruction::AShr:
-  case Instruction::SRem:
-  case Instruction::SDiv:
-    Out << "((";
-    printSimpleType(Out, Ty, true);
-    Out << ")(";
-    return true;
-  default: break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+      // We need to cast integer arithmetic so that it is always performed
+      // as unsigned, to avoid undefined behavior on overflow.
+    case Instruction::LShr:
+    case Instruction::URem:
+    case Instruction::UDiv:
+      Out << "((";
+      printSimpleType(Out, Ty, false);
+      Out << ")(";
+      return true;
+    case Instruction::AShr:
+    case Instruction::SRem:
+    case Instruction::SDiv:
+      Out << "((";
+      printSimpleType(Out, Ty, true);
+      Out << ")(";
+      return true;
+    default: break;
   }
   return false;
 }
@@ -1457,10 +1458,10 @@ bool CWriter::writeInstructionCast(Instruction &I) {
 // This will be used in cases where an instruction has specific type
 // requirements (usually signedness) for its operands.
 void CWriter::opcodeNeedsCast(unsigned Opcode,
-      // Indicate whether to do the cast or not.
-      bool &shouldCast,
-      // Indicate whether the cast should be to a signed type or not.
-      bool &castIsSigned) {
+    // Indicate whether to do the cast or not.
+    bool &shouldCast,
+    // Indicate whether the cast should be to a signed type or not.
+    bool &castIsSigned) {
 
   // Based on the Opcode for which this Operand is being written, determine
   // the new type to which the operand should be casted by setting the value
@@ -1493,24 +1494,24 @@ void CWriter::opcodeNeedsCast(unsigned Opcode,
 }
 
 void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
-//  DEBUG(errs() << "Here: " << *Operand << "\n");
+  //  DEBUG(errs() << "Here: " << *Operand << "\n");
   // Write out the casted operand if we should, otherwise just write the
   // operand.
 
   // Extract the operand's type, we'll need it.
-//  bool shouldCast;
-//  bool castIsSigned;
-//  opcodeNeedsCast(Opcode, shouldCast, castIsSigned);
-//
-//  Type* OpTy = Operand->getType();
-//  if (shouldCast) {
-//    Out << "((";
-//    printSimpleType(Out, OpTy, castIsSigned);
-//    Out << ")";
-//    writeOperand(Operand, ContextCasted);
-//    Out << ")";
-//  } else
-    writeOperand(Operand, ContextNormal/*ContextCasted*/);
+  //  bool shouldCast;
+  //  bool castIsSigned;
+  //  opcodeNeedsCast(Opcode, shouldCast, castIsSigned);
+  //
+  //  Type* OpTy = Operand->getType();
+  //  if (shouldCast) {
+  //    Out << "((";
+  //    printSimpleType(Out, OpTy, castIsSigned);
+  //    Out << ")";
+  //    writeOperand(Operand, ContextCasted);
+  //    Out << ")";
+  //  } else
+  writeOperand(Operand, ContextNormal/*ContextCasted*/);
 }
 
 // Write the operand with a cast to another type based on the icmp predicate
@@ -1549,54 +1550,54 @@ void CWriter::writeOperandWithCast(Value* Operand, ICmpInst &Cmp) {
 // directives to cater to specific compilers as need be.
 //
 static void generateCompilerSpecificCode(raw_ostream& Out,
-                                         const DataLayout *TD) {
+    const DataLayout *TD) {
   // Alloca is hard to get, and we don't want to include stdlib.h here.
   Out << "/* get a declaration for alloca */\n"
-      << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
-      << "#define  alloca(x) __builtin_alloca((x))\n"
-      << "#define _alloca(x) __builtin_alloca((x))\n"
-      << "#elif defined(__APPLE__)\n"
-      << "extern void *__builtin_alloca(unsigned long);\n"
-      << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#define longjmp _longjmp\n"
-      << "#define setjmp _setjmp\n"
-      << "#elif defined(__sun__)\n"
-      << "#if defined(__sparcv9)\n"
-      << "extern void *__builtin_alloca(unsigned long);\n"
-      << "#else\n"
-      << "extern void *__builtin_alloca(unsigned int);\n"
-      << "#endif\n"
-      << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
-      << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#elif defined(_MSC_VER)\n"
-      << "#define alloca(x) _alloca(x)\n"
-      << "#else\n"
-      << "#include <alloca.h>\n"
-      << "#endif\n\n";
+    << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+    << "#define  alloca(x) __builtin_alloca((x))\n"
+    << "#define _alloca(x) __builtin_alloca((x))\n"
+    << "#elif defined(__APPLE__)\n"
+    << "extern void *__builtin_alloca(unsigned long);\n"
+    << "#define alloca(x) __builtin_alloca(x)\n"
+    << "#define longjmp _longjmp\n"
+    << "#define setjmp _setjmp\n"
+    << "#elif defined(__sun__)\n"
+    << "#if defined(__sparcv9)\n"
+    << "extern void *__builtin_alloca(unsigned long);\n"
+    << "#else\n"
+    << "extern void *__builtin_alloca(unsigned int);\n"
+    << "#endif\n"
+    << "#define alloca(x) __builtin_alloca(x)\n"
+    << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
+    << "#define alloca(x) __builtin_alloca(x)\n"
+    << "#elif defined(_MSC_VER)\n"
+    << "#define alloca(x) _alloca(x)\n"
+    << "#else\n"
+    << "#include <alloca.h>\n"
+    << "#endif\n\n";
 
   // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
   Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
-      << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
-      << "#elif defined(__GNUC__)\n"
-      << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
-      << "#else\n"
-      << "#define __EXTERNAL_WEAK__\n"
-      << "#endif\n\n";
+    << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+    << "#elif defined(__GNUC__)\n"
+    << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+    << "#else\n"
+    << "#define __EXTERNAL_WEAK__\n"
+    << "#endif\n\n";
 
   // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
   Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
-      << "#define __ATTRIBUTE_WEAK__\n"
-      << "#elif defined(__GNUC__)\n"
-      << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
-      << "#else\n"
-      << "#define __ATTRIBUTE_WEAK__\n"
-      << "#endif\n\n";
+    << "#define __ATTRIBUTE_WEAK__\n"
+    << "#elif defined(__GNUC__)\n"
+    << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+    << "#else\n"
+    << "#define __ATTRIBUTE_WEAK__\n"
+    << "#endif\n\n";
 
   // Add hidden visibility support. FIXME: APPLE_CC?
   Out << "#if defined(__GNUC__)\n"
-      << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
-      << "#endif\n\n";
+    << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+    << "#endif\n\n";
 
   // Define unaligned-load helper macro
   Out << "#ifdef _MSC_VER\n";
@@ -1653,96 +1654,96 @@ static void generateCompilerSpecificCode(raw_ostream& Out,
   //
   // Similar to __builtin_inf, except the return type is float.
   Out << "#ifdef __GNUC__\n"
-      << "#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */\n"
-      << "#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */\n"
-      //<< "#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */\n"
-      //<< "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
-      << "#define LLVM_INF           __builtin_inf()         /* Double */\n"
-      << "#define LLVM_INFF          __builtin_inff()        /* Float */\n"
-      << "#define LLVM_PREFETCH(addr,rw,locality) "
-                              "__builtin_prefetch(addr,rw,locality)\n"
-      << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
-      << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
-      << "#else\n"
-      << "#define LLVM_NAN(NanStr)   ((double)NAN)           /* Double */\n"
-      << "#define LLVM_NANF(NanStr)  ((float)NAN))           /* Float */\n"
-      //<< "#define LLVM_NANS(NanStr)  ((double)NAN)           /* Double */\n"
-      //<< "#define LLVM_NANSF(NanStr) ((single)NAN)           /* Float */\n"
-      << "#define LLVM_INF           ((double)INFINITY)      /* Double */\n"
-      << "#define LLVM_INFF          ((float)INFINITY)       /* Float */\n"
-      << "#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */\n"
-      << "#define __ATTRIBUTE_CTOR__ \"__attribute__((constructor)) not supported on this compiler\"\n"
-      << "#define __ATTRIBUTE_DTOR__ \"__attribute__((destructor)) not supported on this compiler\"\n"
-      << "#endif\n\n";
+    << "#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */\n"
+    << "#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */\n"
+    //<< "#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */\n"
+    //<< "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+    << "#define LLVM_INF           __builtin_inf()         /* Double */\n"
+    << "#define LLVM_INFF          __builtin_inff()        /* Float */\n"
+    << "#define LLVM_PREFETCH(addr,rw,locality) "
+    "__builtin_prefetch(addr,rw,locality)\n"
+    << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+    << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+    << "#else\n"
+    << "#define LLVM_NAN(NanStr)   ((double)NAN)           /* Double */\n"
+    << "#define LLVM_NANF(NanStr)  ((float)NAN))           /* Float */\n"
+    //<< "#define LLVM_NANS(NanStr)  ((double)NAN)           /* Double */\n"
+    //<< "#define LLVM_NANSF(NanStr) ((single)NAN)           /* Float */\n"
+    << "#define LLVM_INF           ((double)INFINITY)      /* Double */\n"
+    << "#define LLVM_INFF          ((float)INFINITY)       /* Float */\n"
+    << "#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */\n"
+    << "#define __ATTRIBUTE_CTOR__ \"__attribute__((constructor)) not supported on this compiler\"\n"
+    << "#define __ATTRIBUTE_DTOR__ \"__attribute__((destructor)) not supported on this compiler\"\n"
+    << "#endif\n\n";
 
   Out << "#if !defined(__GNUC__) || __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
-      << "#define __builtin_stack_save() 0   /* not implemented */\n"
-      << "#define __builtin_stack_restore(X) /* noop */\n"
-      << "#endif\n\n";
+    << "#define __builtin_stack_save() 0   /* not implemented */\n"
+    << "#define __builtin_stack_restore(X) /* noop */\n"
+    << "#endif\n\n";
 
   // Output typedefs for 128-bit integers
   Out << "#if defined(__GNUC__) && defined(__LP64__) /* 128-bit integer types */\n"
-      << "typedef int __attribute__((mode(TI))) int128_t;\n"
-      << "typedef unsigned __attribute__((mode(TI))) uint128_t;\n"
-      << "#define UINT128_C(hi, lo) (((uint128_t)(hi) << 64) | (uint128_t)(lo))\n"
-      << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {"
-      << " return UINT128_C(hi, lo); }\n"
-      << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t r) {"
-      << " return l == r; }\n"
-      << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t r) {"
-      << " return l != r; }\n"
-      << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t r) {"
-      << " return l <= r; }\n"
-      << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) {"
-      << " return l <= r; }\n"
-      << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t r) {"
-      << " return l >= r; }\n"
-      << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) {"
-      << " return l >= r; }\n"
-      << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t r) {"
-      << " return l < r; }\n"
-      << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) {"
-      << " return l < r; }\n"
-      << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t r) {"
-      << " return l > r; }\n"
-      << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) {"
-      << " return l > r; }\n"
-
-      << "#else /* manual 128-bit types */\n"
-      // TODO: field order should be reversed for big-endian
-      << "typedef struct { ulong lo; ulong hi; } uint128_t;\n"
-      << "typedef uint128_t int128_t;\n"
-      << "#define UINT128_C(hi, lo) {(lo), (hi)}\n" // only use in Static context
-      << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {"
-      << " uint128_t r; r.lo = lo; r.hi = hi; return r; }\n"
-      << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t r) {"
-      << " return l.hi == r.hi && l.lo == r.lo; }\n"
-      << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t r) {"
-      << " return l.hi != r.hi || l.lo != r.lo; }\n"
-      << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t r) {"
-      << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo <= l.lo : 0); }\n"
-      << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) {"
-      << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo <= (long)l.lo : 0); }\n"
-      << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t r) {"
-      << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo >= l.hi : 0); }\n"
-      << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) {"
-      << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo >= (long)l.lo : 0); }\n"
-      << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t r) {"
-      << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo < l.hi : 0); }\n"
-      << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) {"
-      << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo < (long)l.lo : 0); }\n"
-      << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t r) {"
-      << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo > l.hi : 0); }\n"
-      << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) {"
-      << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo > (long)l.lo : 0); }\n"
-      << "#define __emulate_i128\n"
-      << "#endif\n\n";
+    << "typedef int __attribute__((mode(TI))) int128_t;\n"
+    << "typedef unsigned __attribute__((mode(TI))) uint128_t;\n"
+    << "#define UINT128_C(hi, lo) (((uint128_t)(hi) << 64) | (uint128_t)(lo))\n"
+    << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {"
+    << " return UINT128_C(hi, lo); }\n"
+    << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t r) {"
+    << " return l == r; }\n"
+    << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t r) {"
+    << " return l != r; }\n"
+    << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t r) {"
+    << " return l <= r; }\n"
+    << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) {"
+    << " return l <= r; }\n"
+    << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t r) {"
+    << " return l >= r; }\n"
+    << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) {"
+    << " return l >= r; }\n"
+    << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t r) {"
+    << " return l < r; }\n"
+    << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) {"
+    << " return l < r; }\n"
+    << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t r) {"
+    << " return l > r; }\n"
+    << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) {"
+    << " return l > r; }\n"
+
+    << "#else /* manual 128-bit types */\n"
+    // TODO: field order should be reversed for big-endian
+    << "typedef struct { ulong lo; ulong hi; } uint128_t;\n"
+    << "typedef uint128_t int128_t;\n"
+    << "#define UINT128_C(hi, lo) {(lo), (hi)}\n" // only use in Static context
+    << "static __forceinline uint128_t llvm_ctor_u128(ulong hi, ulong lo) {"
+    << " uint128_t r; r.lo = lo; r.hi = hi; return r; }\n"
+    << "static __forceinline bool llvm_icmp_eq_u128(uint128_t l, uint128_t r) {"
+    << " return l.hi == r.hi && l.lo == r.lo; }\n"
+    << "static __forceinline bool llvm_icmp_ne_u128(uint128_t l, uint128_t r) {"
+    << " return l.hi != r.hi || l.lo != r.lo; }\n"
+    << "static __forceinline bool llvm_icmp_ule_u128(uint128_t l, uint128_t r) {"
+    << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo <= l.lo : 0); }\n"
+    << "static __forceinline bool llvm_icmp_sle_i128(int128_t l, int128_t r) {"
+    << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo <= (long)l.lo : 0); }\n"
+    << "static __forceinline bool llvm_icmp_uge_u128(uint128_t l, uint128_t r) {"
+    << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo >= l.hi : 0); }\n"
+    << "static __forceinline bool llvm_icmp_sge_i128(int128_t l, int128_t r) {"
+    << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo >= (long)l.lo : 0); }\n"
+    << "static __forceinline bool llvm_icmp_ult_u128(uint128_t l, uint128_t r) {"
+    << " return l.hi < r.hi ? 1 : (l.hi == r.hi ? l.lo < l.hi : 0); }\n"
+    << "static __forceinline bool llvm_icmp_slt_i128(int128_t l, int128_t r) {"
+    << " return (long)l.hi < (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo < (long)l.lo : 0); }\n"
+    << "static __forceinline bool llvm_icmp_ugt_u128(uint128_t l, uint128_t r) {"
+    << " return l.hi > r.hi ? 1 : (l.hi == r.hi ? l.lo > l.hi : 0); }\n"
+    << "static __forceinline bool llvm_icmp_sgt_i128(int128_t l, int128_t r) {"
+    << " return (long)l.hi > (long)r.hi ? 1 : (l.hi == r.hi ? (long)l.lo > (long)l.lo : 0); }\n"
+    << "#define __emulate_i128\n"
+    << "#endif\n\n";
 
   // We output GCC specific attributes to preserve 'linkonce'ness on globals.
   // If we aren't being compiled with GCC, just drop these attributes.
   Out << "#ifdef _MSC_VER  /* Can only support \"linkonce\" vars with GCC */\n"
-      << "#define __attribute__(X)\n"
-      << "#endif\n\n";
+    << "#define __attribute__(X)\n"
+    << "#endif\n\n";
 }
 
 /// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
@@ -1796,7 +1797,7 @@ static SpecialGlobalClass getGlobalVariableClass(GlobalVariable *GV) {
 // PrintEscapedString - Print each character of the specified string, escaping
 // it if it is not printable or if it is an escape char.
 static void PrintEscapedString(const char *Str, unsigned Length,
-                               raw_ostream &Out) {
+    raw_ostream &Out) {
   for (unsigned i = 0; i != Length; ++i) {
     unsigned char C = Str[i];
     if (isprint(C) && C != '\\' && C != '"')
@@ -1885,48 +1886,51 @@ void CWriter::generateHeader(Module &M) {
   // an attribute added to their prototypes.
   std::set<Function*> StaticCtors, StaticDtors;
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
+      I != E; ++I) {
     switch (getGlobalVariableClass(&*I)) {
-    default: break;
-    case GlobalCtors:
-      FindStaticTors(&*I, StaticCtors);
-      break;
-    case GlobalDtors:
-      FindStaticTors(&*I, StaticDtors);
-      break;
+      default: break;
+      case GlobalCtors:
+               FindStaticTors(&*I, StaticCtors);
+               break;
+      case GlobalDtors:
+               FindStaticTors(&*I, StaticDtors);
+               break;
     }
   }
 
   // get declaration for alloca
-//  Out << "/* Provide Declarations */\n";
-//  Out << "#include <stdarg.h>\n";      // Varargs support
-//  Out << "#include <setjmp.h>\n";      // Unwind support
-//  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
-//  Out << "#include <stdint.h>\n";      // Sized integer support
-//  Out << "#include <math.h>\n";        // definitions for some math functions and numeric constants
-//  Out << "#include <APInt-C.h>\n";     // Implementations of many llvm intrinsics
-//  // Provide a definition for `bool' if not compiling with a C++ compiler.
-//  Out << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n";
-//  Out << "\n";
-
-//  generateCompilerSpecificCode(Out, TD);
+  //  Out << "/* Provide Declarations */\n";
+  //  Out << "#include <stdarg.h>\n";      // Varargs support
+  //  Out << "#include <setjmp.h>\n";      // Unwind support
+  //  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
+  //  Out << "#include <stdint.h>\n";      // Sized integer support
+  //  Out << "#include <math.h>\n";        // definitions for some math functions and numeric constants
+  //  Out << "#include <APInt-C.h>\n";     // Implementations of many llvm intrinsics
+  //  // Provide a definition for `bool' if not compiling with a C++ compiler.
+  //  Out << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n";
+  //  Out << "\n";
+
+  //  generateCompilerSpecificCode(Out, TD);
 
   Out << "\n\n/* Support for floating point constants */\n"
-      << "typedef ulong ConstantDoubleTy;\n"
-      << "typedef uint ConstantFloatTy;\n"
-      << "typedef struct { ulong f1; ushort f2; "
-         "ushort pad[3]; } ConstantFP80Ty;\n"
-      // This is used for both kinds of 128-bit long double; meaning differs.
-      << "typedef struct { ulong f1; ulong f2; }"
-         " ConstantFP128Ty;\n"
-      << "\n\n/* Global Declarations */\n"
-      << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    << "typedef ulong ConstantDoubleTy;\n"
+    << "typedef uint ConstantFloatTy;\n"
+    << "typedef struct { ulong f1; ushort f2; "
+    "ushort pad[3]; } ConstantFP80Ty;\n"
+    // This is used for both kinds of 128-bit long double; meaning differs.
+    << "typedef struct { ulong f1; ulong f2; }"
+    " ConstantFP128Ty;\n"
+    << "\n\n/* OpenCL Pragmas */\n"
+    << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+    << "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
+    << "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
+    << "\n\n/* Global Declarations */\n";
 
   // First output all the declarations for the program, because C requires
   // Functions & globals to be declared before they are used.
   if (!M.getModuleInlineAsm().empty()) {
     Out << "\n/* Module asm statements */\n"
-        << "__asm__ (";
+      << "__asm__ (";
 
     // Split the string into lines, to make it easier to read the .ll file.
     std::string Asm = M.getModuleInlineAsm();
@@ -1937,7 +1941,7 @@ void CWriter::generateHeader(Module &M) {
       // last newline up to this newline.
       Out << "\"";
       PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
-                         Out);
+          Out);
       Out << "\\n\"\n";
       CurPos = NewLine+1;
       NewLine = Asm.find_first_of('\n', CurPos);
@@ -1945,13 +1949,13 @@ void CWriter::generateHeader(Module &M) {
     Out << "\"";
     PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
     Out << "\");\n"
-        << "/* End Module asm statements */\n";
+      << "/* End Module asm statements */\n";
   }
 
   // collect any remaining types
   raw_null_ostream NullOut;
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
+      I != E; ++I) {
     // Ignore special globals, such as debug info.
     if (getGlobalVariableClass(&*I))
       continue;
@@ -1963,7 +1967,7 @@ void CWriter::generateHeader(Module &M) {
   if (!M.global_empty()) {
     Out << "\n/* External Global Variable Declarations */\n";
     for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-         I != E; ++I) {
+        I != E; ++I) {
       if (!I->isDeclaration() || isEmptyType(I->getType()->getPointerElementType()))
         continue;
 
@@ -1986,14 +1990,14 @@ void CWriter::generateHeader(Module &M) {
       unsigned Alignment = I->getAlignment();
       bool IsOveraligned = Alignment &&
         Alignment > TD->getABITypeAlignment(ElTy);
-//      if (IsOveraligned)
-//        Out << "__MSALIGN__(" << Alignment << ") ";
+      //      if (IsOveraligned)
+      //        Out << "__MSALIGN__(" << Alignment << ") ";
       printTypeName(Out, ElTy, false) << ' ' << GetValueName(&*I);
       if (IsOveraligned)
         Out << " __attribute__((aligned(" << Alignment << ")))";
 
       if (I->hasExternalWeakLinkage())
-         Out << " __EXTERNAL_WEAK__";
+        Out << " __EXTERNAL_WEAK__";
       Out << ";\n";
     }
   }
@@ -2059,7 +2063,7 @@ void CWriter::generateHeader(Module &M) {
         I->getName() == "_chkstk" ||
         I->getName() == "__chkstk" ||
         I->getName() == "___chkstk_ms")
-      continue;
+        continue;
 
     if (I->hasDLLImportStorageClass())
       Out << "__declspec(dllimport) ";
@@ -2092,7 +2096,7 @@ void CWriter::generateHeader(Module &M) {
   if (!M.global_empty()) {
     Out << "\n\n/* Global Variable Definitions and Initialization */\n";
     for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-         I != E; ++I) {
+        I != E; ++I) {
       declareOneGlobalVariable(&*I);
     }
   }
@@ -2101,7 +2105,7 @@ void CWriter::generateHeader(Module &M) {
   if (!M.alias_empty()) {
     Out << "\n/* External Alias Declarations */\n";
     for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
-         I != E; ++I) {
+        I != E; ++I) {
       assert(!I->isDeclaration() && !isEmptyType(I->getType()->getPointerElementType()));
       if (I->hasLocalLinkage())
         continue; // Internal Global
@@ -2119,8 +2123,8 @@ void CWriter::generateHeader(Module &M) {
       unsigned Alignment = I->getAlignment();
       bool IsOveraligned = Alignment &&
         Alignment > TD->getABITypeAlignment(ElTy);
-//      if (IsOveraligned)
-//        Out << "__MSALIGN__(" << Alignment << ") ";
+      //      if (IsOveraligned)
+      //        Out << "__MSALIGN__(" << Alignment << ") ";
       // GetValueName would resolve the alias, which is not what we want,
       // so use getName directly instead (assuming that the Alias has a name...)
       printTypeName(Out, ElTy, false) << " *" << I->getName();
@@ -2128,7 +2132,7 @@ void CWriter::generateHeader(Module &M) {
         Out << " __attribute__((aligned(" << Alignment << ")))";
 
       if (I->hasExternalWeakLinkage())
-         Out << " __EXTERNAL_WEAK__";
+        Out << " __EXTERNAL_WEAK__";
       Out << " = ";
       writeOperand(I->getAliasee(), ContextStatic);
       Out << ";\n";
@@ -2174,7 +2178,7 @@ void CWriter::generateHeader(Module &M) {
 
   // Loop over all select operations
   for (std::set<Type*>::iterator it = SelectDeclTypes.begin(), end = SelectDeclTypes.end();
-       it != end; ++it) {
+      it != end; ++it) {
     // static __forceinline Rty llvm_select_u8x4(<bool x 4> condition, <u8 x 4> iftrue, <u8 x 4> ifnot) {
     //   Rty r = {
     //     condition[0] ? iftrue[0] : ifnot[0],
@@ -2214,7 +2218,7 @@ void CWriter::generateHeader(Module &M) {
 
   // Loop over all compare operations
   for (std::set< std::pair<CmpInst::Predicate, VectorType*> >::iterator it = CmpDeclTypes.begin(), end = CmpDeclTypes.end();
-       it != end; ++it) {
+      it != end; ++it) {
     // static __forceinline <bool x 4> llvm_icmp_ge_u8x4(<u8 x 4> l, <u8 x 4> r) {
     //   Rty c = {
     //     l[0] >= r[0],
@@ -2249,21 +2253,21 @@ void CWriter::generateHeader(Module &M) {
       } else {
         Out << "l.vector[" << n << "]";
         switch ((*it).first) {
-        case CmpInst::ICMP_EQ:  Out << " == "; break;
-        case CmpInst::ICMP_NE:  Out << " != "; break;
-        case CmpInst::ICMP_ULE:
-        case CmpInst::ICMP_SLE: Out << " <= "; break;
-        case CmpInst::ICMP_UGE:
-        case CmpInst::ICMP_SGE: Out << " >= "; break;
-        case CmpInst::ICMP_ULT:
-        case CmpInst::ICMP_SLT: Out << " < "; break;
-        case CmpInst::ICMP_UGT:
-        case CmpInst::ICMP_SGT: Out << " > "; break;
-        default:
+          case CmpInst::ICMP_EQ:  Out << " == "; break;
+          case CmpInst::ICMP_NE:  Out << " != "; break;
+          case CmpInst::ICMP_ULE:
+          case CmpInst::ICMP_SLE: Out << " <= "; break;
+          case CmpInst::ICMP_UGE:
+          case CmpInst::ICMP_SGE: Out << " >= "; break;
+          case CmpInst::ICMP_ULT:
+          case CmpInst::ICMP_SLT: Out << " < "; break;
+          case CmpInst::ICMP_UGT:
+          case CmpInst::ICMP_SGT: Out << " > "; break;
+          default:
 #ifndef NDEBUG
-          errs() << "Invalid icmp predicate!" << (*it).first;
+                                  errs() << "Invalid icmp predicate!" << (*it).first;
 #endif
-          llvm_unreachable(0);
+                                  llvm_unreachable(0);
         }
         Out << "r.vector[" << n << "];\n";
       }
@@ -2273,7 +2277,7 @@ void CWriter::generateHeader(Module &M) {
 
   // Loop over all (vector) cast operations
   for (std::set<std::pair<CastInst::CastOps, std::pair<Type*, Type*>>>::iterator it = CastOpDeclTypes.begin(), end = CastOpDeclTypes.end();
-       it != end; ++it) {
+      it != end; ++it) {
     // static __forceinline <u32 x 4> llvm_ZExt_u8x4_u32x4(<u8 x 4> in) { // Src->isVector == Dst->isVector
     //   Rty out = {
     //     in[0],
@@ -2296,18 +2300,18 @@ void CWriter::generateHeader(Module &M) {
     Type *DstTy = (*it).second.second;
     bool SrcSigned, DstSigned;
     switch (opcode) {
-    default:
-      SrcSigned = false;
-      DstSigned = false;
-    case Instruction::SIToFP:
-      SrcSigned = true;
-      DstSigned = false;
-    case Instruction::FPToSI:
-      SrcSigned = false;
-      DstSigned = true;
-    case Instruction::SExt:
-      SrcSigned = true;
-      DstSigned = true;
+      default:
+        SrcSigned = false;
+        DstSigned = false;
+      case Instruction::SIToFP:
+        SrcSigned = true;
+        DstSigned = false;
+      case Instruction::FPToSI:
+        SrcSigned = false;
+        DstSigned = true;
+      case Instruction::SExt:
+        SrcSigned = true;
+        DstSigned = true;
     }
 
     Out << "static __forceinline ";
@@ -2346,20 +2350,20 @@ void CWriter::generateHeader(Module &M) {
       Out << " out;\n";
       Out << "  LLVM";
       switch (opcode) {
-      case Instruction::UIToFP: Out << "UItoFP"; break;
-      case Instruction::SIToFP: Out << "SItoFP"; break;
-      case Instruction::Trunc: Out << "Trunc"; break;
-      //case Instruction::FPExt:
-      //case Instruction::FPTrunc:
-      case Instruction::ZExt: Out << "ZExt"; break;
-      case Instruction::FPToUI: Out << "FPtoUI"; break;
-      case Instruction::SExt: Out << "SExt"; break;
-      case Instruction::FPToSI: Out << "FPtoSI"; break;
-      default:
-        llvm_unreachable("Invalid cast opcode for i128");
+        case Instruction::UIToFP: Out << "UItoFP"; break;
+        case Instruction::SIToFP: Out << "SItoFP"; break;
+        case Instruction::Trunc: Out << "Trunc"; break;
+                                 //case Instruction::FPExt:
+                                 //case Instruction::FPTrunc:
+        case Instruction::ZExt: Out << "ZExt"; break;
+        case Instruction::FPToUI: Out << "FPtoUI"; break;
+        case Instruction::SExt: Out << "SExt"; break;
+        case Instruction::FPToSI: Out << "FPtoSI"; break;
+        default:
+                                  llvm_unreachable("Invalid cast opcode for i128");
       }
       Out << "(" << SrcTy->getPrimitiveSizeInBits() << ", &in, "
-                 << DstTy->getPrimitiveSizeInBits() << ", &out);\n";
+        << DstTy->getPrimitiveSizeInBits() << ", &out);\n";
       Out << "  return out;\n";
       Out << "#endif\n";
       Out << "}\n";
@@ -2368,7 +2372,7 @@ void CWriter::generateHeader(Module &M) {
 
   // Loop over all simple vector operations
   for (std::set<std::pair<unsigned, Type*>>::iterator it = InlineOpDeclTypes.begin(), end = InlineOpDeclTypes.end();
-       it != end; ++it) {
+      it != end; ++it) {
     // static __forceinline <u32 x 4> llvm_BinOp_u32x4(<u32 x 4> a, <u32 x 4> b) {
     //   Rty r = {
     //      a[0] OP b[0],
@@ -2413,9 +2417,9 @@ void CWriter::generateHeader(Module &M) {
     // C can't handle non-power-of-two integer types
     unsigned mask = 0;
     if (ElemTy->isIntegerTy()) {
-       IntegerType *ITy = static_cast<IntegerType*>(ElemTy);
-       if (!ITy->isPowerOf2ByteWidth())
-         mask = ITy->getBitMask();
+      IntegerType *ITy = static_cast<IntegerType*>(ElemTy);
+      if (!ITy->isPowerOf2ByteWidth())
+        mask = ITy->getBitMask();
     }
 
     if (isa<VectorType>(OpTy)) {
@@ -2440,29 +2444,29 @@ void CWriter::generateHeader(Module &M) {
         } else {
           Out << "a.vector[" << n << "]";
           switch (opcode) {
-          case Instruction::Add:
-          case Instruction::FAdd: Out << " + "; break;
-          case Instruction::Sub:
-          case Instruction::FSub: Out << " - "; break;
-          case Instruction::Mul:
-          case Instruction::FMul: Out << " * "; break;
-          case Instruction::URem:
-          case Instruction::SRem:
-          case Instruction::FRem: Out << " % "; break;
-          case Instruction::UDiv:
-          case Instruction::SDiv:
-          case Instruction::FDiv: Out << " / "; break;
-          case Instruction::And:  Out << " & "; break;
-          case Instruction::Or:   Out << " | "; break;
-          case Instruction::Xor:  Out << " ^ "; break;
-          case Instruction::Shl : Out << " << "; break;
-          case Instruction::LShr:
-          case Instruction::AShr: Out << " >> "; break;
-          default:
+            case Instruction::Add:
+            case Instruction::FAdd: Out << " + "; break;
+            case Instruction::Sub:
+            case Instruction::FSub: Out << " - "; break;
+            case Instruction::Mul:
+            case Instruction::FMul: Out << " * "; break;
+            case Instruction::URem:
+            case Instruction::SRem:
+            case Instruction::FRem: Out << " % "; break;
+            case Instruction::UDiv:
+            case Instruction::SDiv:
+            case Instruction::FDiv: Out << " / "; break;
+            case Instruction::And:  Out << " & "; break;
+            case Instruction::Or:   Out << " | "; break;
+            case Instruction::Xor:  Out << " ^ "; break;
+            case Instruction::Shl : Out << " << "; break;
+            case Instruction::LShr:
+            case Instruction::AShr: Out << " >> "; break;
+            default:
 #ifndef NDEBUG
-             errs() << "Invalid operator type!" << opcode;
+                                    errs() << "Invalid operator type!" << opcode;
 #endif
-             llvm_unreachable(0);
+                                    llvm_unreachable(0);
           }
           Out << "b.vector[" << n << "]";
         }
@@ -2483,24 +2487,24 @@ void CWriter::generateHeader(Module &M) {
       } else {
         Out << "a";
         switch (opcode) {
-        case Instruction::Add: Out << " + "; break;
-        case Instruction::Sub: Out << " - "; break;
-        case Instruction::Mul: Out << " * "; break;
-        case Instruction::URem:
-        case Instruction::SRem: Out << " % "; break;
-        case Instruction::UDiv:
-        case Instruction::SDiv: Out << " / "; break;
-        case Instruction::And:  Out << " & "; break;
-        case Instruction::Or:   Out << " | "; break;
-        case Instruction::Xor:  Out << " ^ "; break;
-        case Instruction::Shl:  Out << " << "; break;
-        case Instruction::LShr:
-        case Instruction::AShr: Out << " >> "; break;
-        default:
+          case Instruction::Add: Out << " + "; break;
+          case Instruction::Sub: Out << " - "; break;
+          case Instruction::Mul: Out << " * "; break;
+          case Instruction::URem:
+          case Instruction::SRem: Out << " % "; break;
+          case Instruction::UDiv:
+          case Instruction::SDiv: Out << " / "; break;
+          case Instruction::And:  Out << " & "; break;
+          case Instruction::Or:   Out << " | "; break;
+          case Instruction::Xor:  Out << " ^ "; break;
+          case Instruction::Shl:  Out << " << "; break;
+          case Instruction::LShr:
+          case Instruction::AShr: Out << " >> "; break;
+          default:
 #ifndef NDEBUG
-           errs() << "Invalid operator type!" << opcode;
+                                  errs() << "Invalid operator type!" << opcode;
 #endif
-           llvm_unreachable(0);
+                                  llvm_unreachable(0);
         }
         Out << "b;\n";
       }
@@ -2537,26 +2541,26 @@ void CWriter::generateHeader(Module &M) {
         // everything that hasn't been manually implemented above
         Out << "  LLVM";
         switch (opcode) {
-        //case BinaryNeg: Out << "Neg"; break;
-        //case BinaryNot: Out << "FlipAllBits"; break;
-        case Instruction::Add: Out << "Add"; break;
-        case Instruction::Sub: Out << "Sub"; break;
-        case Instruction::Mul: Out << "Mul"; break;
-        case Instruction::URem: Out << "URem"; break;
-        case Instruction::SRem: Out << "SRem"; break;
-        case Instruction::UDiv: Out << "UDiv"; break;
-        case Instruction::SDiv: Out << "SDiv"; break;
-        //case Instruction::And:  Out << "And"; break;
-        //case Instruction::Or:   Out << "Or"; break;
-        //case Instruction::Xor:  Out << "Xor"; break;
-        //case Instruction::Shl: Out << "Shl"; break;
-        case Instruction::LShr: Out << "LShr"; break;
-        case Instruction::AShr: Out << "AShr"; break;
-        default:
+          //case BinaryNeg: Out << "Neg"; break;
+          //case BinaryNot: Out << "FlipAllBits"; break;
+          case Instruction::Add: Out << "Add"; break;
+          case Instruction::Sub: Out << "Sub"; break;
+          case Instruction::Mul: Out << "Mul"; break;
+          case Instruction::URem: Out << "URem"; break;
+          case Instruction::SRem: Out << "SRem"; break;
+          case Instruction::UDiv: Out << "UDiv"; break;
+          case Instruction::SDiv: Out << "SDiv"; break;
+                                  //case Instruction::And:  Out << "And"; break;
+                                  //case Instruction::Or:   Out << "Or"; break;
+                                  //case Instruction::Xor:  Out << "Xor"; break;
+                                  //case Instruction::Shl: Out << "Shl"; break;
+          case Instruction::LShr: Out << "LShr"; break;
+          case Instruction::AShr: Out << "AShr"; break;
+          default:
 #ifndef NDEBUG
-           errs() << "Invalid operator type!" << opcode;
+                                  errs() << "Invalid operator type!" << opcode;
 #endif
-           llvm_unreachable(0);
+                                  llvm_unreachable(0);
         }
         Out << "(16, &a, &b, &r);\n";
       }
@@ -2581,29 +2585,29 @@ void CWriter::generateHeader(Module &M) {
       } else {
         Out << "a";
         switch (opcode) {
-        case Instruction::Add:
-        case Instruction::FAdd: Out << " + "; break;
-        case Instruction::Sub:
-        case Instruction::FSub: Out << " - "; break;
-        case Instruction::Mul:
-        case Instruction::FMul: Out << " * "; break;
-        case Instruction::URem:
-        case Instruction::SRem:
-        case Instruction::FRem: Out << " % "; break;
-        case Instruction::UDiv:
-        case Instruction::SDiv:
-        case Instruction::FDiv: Out << " / "; break;
-        case Instruction::And:  Out << " & "; break;
-        case Instruction::Or:   Out << " | "; break;
-        case Instruction::Xor:  Out << " ^ "; break;
-        case Instruction::Shl : Out << " << "; break;
-        case Instruction::LShr:
-        case Instruction::AShr: Out << " >> "; break;
-        default:
+          case Instruction::Add:
+          case Instruction::FAdd: Out << " + "; break;
+          case Instruction::Sub:
+          case Instruction::FSub: Out << " - "; break;
+          case Instruction::Mul:
+          case Instruction::FMul: Out << " * "; break;
+          case Instruction::URem:
+          case Instruction::SRem:
+          case Instruction::FRem: Out << " % "; break;
+          case Instruction::UDiv:
+          case Instruction::SDiv:
+          case Instruction::FDiv: Out << " / "; break;
+          case Instruction::And:  Out << " & "; break;
+          case Instruction::Or:   Out << " | "; break;
+          case Instruction::Xor:  Out << " ^ "; break;
+          case Instruction::Shl : Out << " << "; break;
+          case Instruction::LShr:
+          case Instruction::AShr: Out << " >> "; break;
+          default:
 #ifndef NDEBUG
-           errs() << "Invalid operator type!" << opcode;
+                                  errs() << "Invalid operator type!" << opcode;
 #endif
-           llvm_unreachable(0);
+                                  llvm_unreachable(0);
         }
         Out << "b";
         if (mask)
@@ -2616,7 +2620,7 @@ void CWriter::generateHeader(Module &M) {
 
   // Loop over all inline constructors
   for (std::set<Type*>::iterator it = CtorDeclTypes.begin(), end = CtorDeclTypes.end();
-       it != end; ++it) {
+      it != end; ++it) {
     // static __forceinline <u32 x 4> llvm_ctor_u32x4(u32 x1, u32 x2, u32 x3, u32 x4) {
     //   Rty r = {
     //     x1, x2, x3, x4
@@ -2634,41 +2638,41 @@ void CWriter::generateHeader(Module &M) {
     unsigned e = (STy ? STy->getNumElements() : (ATy ? ATy->getNumElements() : VTy->getNumElements()));
     bool printed = false;
     for (unsigned i = 0; i != e; ++i) {
-        Type *ElTy = STy ? STy->getElementType(i) : (*it)->getSequentialElementType();
-        if (isEmptyType(ElTy))
-          Out << " /* ";
-        else if (printed)
-          Out << ", ";
-        printTypeNameUnaligned(Out, ElTy);
-        Out << " x" << i;
-        if (isEmptyType(ElTy))
-          Out << " */";
-        else
-          printed = true;
+      Type *ElTy = STy ? STy->getElementType(i) : (*it)->getSequentialElementType();
+      if (isEmptyType(ElTy))
+        Out << " /* ";
+      else if (printed)
+        Out << ", ";
+      printTypeNameUnaligned(Out, ElTy);
+      Out << " x" << i;
+      if (isEmptyType(ElTy))
+        Out << " */";
+      else
+        printed = true;
     }
     Out << ") {\n  ";
     printTypeName(Out, *it);
     Out << " r;";
     for (unsigned i = 0; i != e; ++i) {
-        Type *ElTy = STy ? STy->getElementType(i) : (*it)->getSequentialElementType();
-        if (isEmptyType(ElTy))
-          continue;
-        if (STy)
-          Out << "\n  r.field" << i << " = x" << i << ";";
-        else if (ATy)
-          Out << "\n  r.array[" << i << "] = x" << i << ";";
-        else if (VTy)
-          Out << "\n  r.vector[" << i << "] = x" << i << ";";
-        else
-          assert(0);
+      Type *ElTy = STy ? STy->getElementType(i) : (*it)->getSequentialElementType();
+      if (isEmptyType(ElTy))
+        continue;
+      if (STy)
+        Out << "\n  r.field" << i << " = x" << i << ";";
+      else if (ATy)
+        Out << "\n  r.array[" << i << "] = x" << i << ";";
+      else if (VTy)
+        Out << "\n  r.vector[" << i << "] = x" << i << ";";
+      else
+        assert(0);
     }
     Out << "\n  return r;\n}\n";
   }
 
   // Emit definitions of the intrinsics.
   for (SmallVector<Function*, 16>::iterator
-       I = intrinsicsToDefine.begin(),
-       E = intrinsicsToDefine.end(); I != E; ++I) {
+      I = intrinsicsToDefine.begin(),
+      E = intrinsicsToDefine.end(); I != E; ++I) {
     printIntrinsicDefinition(**I, Out);
   }
 
@@ -2700,8 +2704,8 @@ void CWriter::declareOneGlobalVariable(GlobalVariable* I) {
   unsigned Alignment = I->getAlignment();
   bool IsOveraligned = Alignment &&
     Alignment > TD->getABITypeAlignment(ElTy);
-//  if (IsOveraligned)
-//    Out << "__MSALIGN__(" << Alignment << ") ";
+  //  if (IsOveraligned)
+  //    Out << "__MSALIGN__(" << Alignment << ") ";
   printTypeName(Out, ElTy, false) << ' ' << GetValueName(I);
   if (IsOveraligned)
     Out << " __attribute__((aligned(" << Alignment << ")))";
@@ -2753,9 +2757,9 @@ void CWriter::printFloatingPointConstants(Function &F) {
   // precision.
   //
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I)
-      for (Instruction::op_iterator I_Op = I->op_begin(), E_Op = I->op_end(); I_Op != E_Op; ++I_Op)
-          if (const Constant *C = dyn_cast<Constant>(I_Op))
-            printFloatingPointConstants(C);
+    for (Instruction::op_iterator I_Op = I->op_begin(), E_Op = I->op_end(); I_Op != E_Op; ++I_Op)
+      if (const Constant *C = dyn_cast<Constant>(I_Op))
+        printFloatingPointConstants(C);
   Out << '\n';
 }
 
@@ -2782,31 +2786,31 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
     double Val = FPC->getValueAPF().convertToDouble();
     uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
     Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
-    << " = 0x" << utohexstr(i)
-    << "ULL;    /* " << Val << " */\n";
+      << " = 0x" << utohexstr(i)
+      << "ULL;    /* " << Val << " */\n";
   } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
     float Val = FPC->getValueAPF().convertToFloat();
     uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
-    getZExtValue();
+      getZExtValue();
     Out << "static const ConstantFloatTy FPConstant" << FPCounter++
-    << " = 0x" << utohexstr(i)
-    << "U;    /* " << Val << " */\n";
+      << " = 0x" << utohexstr(i)
+      << "U;    /* " << Val << " */\n";
   } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
     // api needed to prevent premature destruction
     const APInt api = FPC->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
-    << " = { 0x" << utohexstr(p[0])
-    << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
-    << "}; /* Long double constant */\n";
+      << " = { 0x" << utohexstr(p[0])
+      << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+      << "}; /* Long double constant */\n";
   } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
-             FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
+      FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
     const APInt api = FPC->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
-    << " = { 0x"
-    << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
-    << "}; /* Long double constant */\n";
+      << " = { 0x"
+      << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+      << "}; /* Long double constant */\n";
 
   } else {
     llvm_unreachable("Unknown float type!");
@@ -2862,14 +2866,14 @@ void CWriter::printModuleTypes(raw_ostream &Out) {
 
   // Question: Is UnnamedFunctionIDs ever non-empty?
   for (DenseMap<std::pair<FunctionType*,
-	 std::pair<AttributeList, CallingConv::ID> >, unsigned>::iterator
-       I = UnnamedFunctionIDs.begin(), E = UnnamedFunctionIDs.end();
-       I != E; ++I) {
-    
+      std::pair<AttributeList, CallingConv::ID> >, unsigned>::iterator
+      I = UnnamedFunctionIDs.begin(), E = UnnamedFunctionIDs.end();
+      I != E; ++I) {
+
     Out << '\n';
     std::pair<FunctionType*, std::pair<AttributeList, CallingConv::ID> > F = I->first;
     if (F.second.first == AttributeList() && F.second.second == CallingConv::C)
-        if (!TypesPrinted.insert(F.first).second) continue; // already printed this above
+      if (!TypesPrinted.insert(F.first).second) continue; // already printed this above
 
     // FIXME: Removing apparently unused function call - need to check
     printFunctionDeclaration(Out, F.first, F.second);
@@ -2878,8 +2882,8 @@ void CWriter::printModuleTypes(raw_ostream &Out) {
   // We may have collected some intrinsic prototypes to emit.
   // Emit them now, before the function that uses them is emitted
   for (std::vector<Function*>::iterator
-       I = prototypesToGen.begin(), E = prototypesToGen.end();
-       I != E; ++I) {
+      I = prototypesToGen.begin(), E = prototypesToGen.end();
+      I != E; ++I) {
     Out << '\n';
     Function *F = *I;
     printFunctionProto(Out, F);
@@ -2917,7 +2921,7 @@ void CWriter::forwardDeclareFunctionTypedefs(raw_ostream &Out, Type *Ty, std::se
 // this one depends on.
 //
 void CWriter::printContainedTypes(raw_ostream &Out, Type *Ty,
-                                    std::set<Type*> &TypesPrinted) {
+    std::set<Type*> &TypesPrinted) {
   // Check to see if we have already printed this struct.
   if (!TypesPrinted.insert(Ty).second) return;
   // Skip empty structs
@@ -2925,7 +2929,7 @@ void CWriter::printContainedTypes(raw_ostream &Out, Type *Ty,
 
   // Print all contained types first.
   for (Type::subtype_iterator I = Ty->subtype_begin(),
-       E = Ty->subtype_end(); I != E; ++I)
+      E = Ty->subtype_end(); I != E; ++I)
     printContainedTypes(Out, *I, TypesPrinted);
 
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
@@ -2946,15 +2950,15 @@ static inline bool isFPIntBitCast(Instruction &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DstTy = I.getType();
   return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
-         (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
+    (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
 }
 
 void CWriter::printFunction(Function &F) {
   bool isKernel = false;
-  
+
   if (NamedMDNode * KernelMD = F.getParent()->getNamedMetadata("opencl.kernels")) {
     for (auto iter : KernelMD->operands()) {
-//      DEBUG( errs() << "Kernel Metadata: " << *iter << "\n");
+      //      DEBUG( errs() << "Kernel Metadata: " << *iter << "\n");
       const MDOperand *KernelMDOp = iter->operands().begin();
       Metadata *KMD = KernelMDOp->get();
       if(ValueAsMetadata *KMDVAM = dyn_cast<ValueAsMetadata>(KMD)){
@@ -2976,12 +2980,12 @@ void CWriter::printFunction(Function &F) {
   if (F.hasDLLExportStorageClass()) Out << "__declspec(dllexport) ";
   if (F.hasLocalLinkage()) Out << "static ";
   printFunctionProto(Out, F.getFunctionType(),
-		     std::make_pair(F.getAttributes(), F.getCallingConv()),
-		     GetValueName(&F),
-		     F.arg_begin(), // NOTE: replacing ArgumentList (LLVM-4) with arg iterator
-		     //&F.getArgumentList(),
-		     isKernel);
-  
+      std::make_pair(F.getAttributes(), F.getCallingConv()),
+      GetValueName(&F),
+      F.arg_begin(), // NOTE: replacing ArgumentList (LLVM-4) with arg iterator
+      //&F.getArgumentList(),
+      isKernel);
+
   Out << " {\n";
 
   // If this is a struct return function, handle the result with magic.
@@ -3001,26 +3005,26 @@ void CWriter::printFunction(Function &F) {
   // print local variable information for the function
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
     if (AllocaInst *AI = isDirectAlloca(&*I)) {
-			//DEBUG(errs() << "Processing alloca inst: " << *AI << "\n");
+      //DEBUG(errs() << "Processing alloca inst: " << *AI << "\n");
       unsigned Alignment = AI->getAlignment();
       bool IsOveraligned = Alignment &&
         Alignment > TD->getABITypeAlignment(AI->getAllocatedType());
       Out << "  ";
-//      if (IsOveraligned)
-//        Out << "__MSALIGN__(" << Alignment << ") ";
+      //      if (IsOveraligned)
+      //        Out << "__MSALIGN__(" << Alignment << ") ";
       printTypeName(Out, AI->getAllocatedType(), false) << ' ';
       Out << GetValueName(AI);
       if (IsOveraligned)
         Out << " __attribute__((aligned(" << Alignment << ")))";
       if (AI->isArrayAllocation()) {
-				//DEBUG(errs() << "Alloca is an array allocation!\n");
+        //DEBUG(errs() << "Alloca is an array allocation!\n");
         unsigned arraySize = dyn_cast<ConstantInt>(AI->getArraySize())->getZExtValue();
         Out << "[" << arraySize << "]";
       }
       Out << ";    /* Address-exposed local */\n";
       PrintedVar = true;
     } else if (!isEmptyType(I->getType()) &&
-               !isInlinableInst(*I)) {
+        !isInlinableInst(*I)) {
       Out << "  ";
       printTypeName(Out, I->getType(), false) << ' ' << GetValueName(&*I);
       Out << ";\n";
@@ -3037,7 +3041,7 @@ void CWriter::printFunction(Function &F) {
     // variable to hold the result of the BitCast.
     if (isFPIntBitCast(*I)) {
       Out << "  llvmBitCastUnion " << GetValueName(&*I)
-          << "__BITCAST_TEMPORARY;\n";
+        << "__BITCAST_TEMPORARY;\n";
       PrintedVar = true;
     }
   }
@@ -3046,25 +3050,25 @@ void CWriter::printFunction(Function &F) {
     Out << '\n';
 
   // print the basic blocks
-//  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-  
+  //  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+
   std::set<BasicBlock*> VisitSet;
   BasicBlock* entry = &(F.getEntryBlock());
   // starting printing from entry, then CFG traversal will print the reachable blocks.
   printBBorLoop(entry);
-//  for (df_iterator<BasicBlock*> BI = df_begin(entry), BE = df_end(entry); BI!=BE; ++BI) { 
-//    BasicBlock *BB = *BI;
-//    printBBorLoop(BB);
-//    if(VisitedBlocks.find(BB) == VisitedBlocks.end()) {
-//      VisitedBlocks.insert(BB);
-//      if (Loop *L = LI->getLoopFor(&*BB)) {
-//        if (L->getHeader() == &*BB && L->getParentLoop() == 0)
-//          printLoop(L);
-//      } else {
-//        printBasicBlock(&*BB);
-//      }
-//    }
-//  }
+  //  for (df_iterator<BasicBlock*> BI = df_begin(entry), BE = df_end(entry); BI!=BE; ++BI) { 
+  //    BasicBlock *BB = *BI;
+  //    printBBorLoop(BB);
+  //    if(VisitedBlocks.find(BB) == VisitedBlocks.end()) {
+  //      VisitedBlocks.insert(BB);
+  //      if (Loop *L = LI->getLoopFor(&*BB)) {
+  //        if (L->getHeader() == &*BB && L->getParentLoop() == 0)
+  //          printLoop(L);
+  //      } else {
+  //        printBasicBlock(&*BB);
+  //      }
+  //    }
+  //  }
 
   Out << "}\n\n";
 }
@@ -3074,15 +3078,15 @@ bool CWriter::extractIndVarChain(Instruction *Inst, std::stack<Instruction*> *In
   //Traverse def-use chain of induction variable to make sure that
   //it ends at the branch. Keep stack of all instructions leading there.
   for(User *U : Inst->users()) {
-//    DEBUG(errs() << std::string(indent, '-'));
-//    DEBUG(errs() << "->Found user: " << *U << "\n");
+    //    DEBUG(errs() << std::string(indent, '-'));
+    //    DEBUG(errs() << "->Found user: " << *U << "\n");
     if(Instruction *UInst = dyn_cast<Instruction>(U)) {
       if(UInst == Branch) {
-//        DEBUG(errs() << "Found correct path, returning!\n");
+        //        DEBUG(errs() << "Found correct path, returning!\n");
         return true;
       }
       else if (isa<PHINode>(UInst)) {
-//        DEBUG(errs() << "Reached a PHI Node => Wrong path! Returning!\n");
+        //        DEBUG(errs() << "Reached a PHI Node => Wrong path! Returning!\n");
         continue;
       }
       else {
@@ -3091,7 +3095,7 @@ bool CWriter::extractIndVarChain(Instruction *Inst, std::stack<Instruction*> *In
           return true;
         }
         else {
-//          DEBUG(errs() << "Wrong path, popping: " << *(IndVarChain->top()) << "\n");
+          //          DEBUG(errs() << "Wrong path, popping: " << *(IndVarChain->top()) << "\n");
           IndVarChain->pop();
         }
       }
@@ -3103,24 +3107,24 @@ bool CWriter::extractIndVarChain(Instruction *Inst, std::stack<Instruction*> *In
 
 bool CWriter::findLoopBranch(BranchInst **LBranch, BasicBlock* CurBlock, BasicBlock* LHeader, std::set<BasicBlock*>*visitSet) {
   bool result = false;
-//  DEBUG(errs() << "Finding loop branch in " << CurBlock->getName() << "!\n");
+  //  DEBUG(errs() << "Finding loop branch in " << CurBlock->getName() << "!\n");
   if(BranchInst *LBranchTemp = dyn_cast<BranchInst>(CurBlock->getTerminator())) {
-//    DEBUG(errs() << "Branch: " << *LBranchTemp << "\n");
+    //    DEBUG(errs() << "Branch: " << *LBranchTemp << "\n");
     if(LBranchTemp->isConditional()) {
       if(LBranchTemp->getSuccessor(0) == LHeader || LBranchTemp->getSuccessor(1) == LHeader) {
         *LBranch = LBranchTemp;
-//        DEBUG(errs() << "Found Loop branch: " << **LBranch << "\n");
+        //        DEBUG(errs() << "Found Loop branch: " << **LBranch << "\n");
         result = true;
       } else {
         BasicBlock* NextBlock1 = LBranchTemp->getSuccessor(0);
         BasicBlock* NextBlock2 = LBranchTemp->getSuccessor(1);
         if(visitSet->find(NextBlock1) == visitSet->end()) {
-//          DEBUG(errs() << "Visiting unvisited node: " << NextBlock1->getName() << "\n");
+          //          DEBUG(errs() << "Visiting unvisited node: " << NextBlock1->getName() << "\n");
           visitSet->insert(NextBlock1);
           result |= findLoopBranch(LBranch, NextBlock1, LHeader, visitSet);
         }
         if(visitSet->find(NextBlock2) == visitSet->end()) {
-//          DEBUG(errs() << "Visiting unvisited node: " << NextBlock2->getName() << "\n");
+          //          DEBUG(errs() << "Visiting unvisited node: " << NextBlock2->getName() << "\n");
           visitSet->insert(NextBlock2);
           result |= findLoopBranch(LBranch, NextBlock2, LHeader, visitSet);
         }
@@ -3129,12 +3133,12 @@ bool CWriter::findLoopBranch(BranchInst **LBranch, BasicBlock* CurBlock, BasicBl
     } else {
       if(LBranchTemp->getSuccessor(0) == LHeader) {
         *LBranch = LBranchTemp;
-//        DEBUG(errs() << "Found Loop branch: " << **LBranch << "\n");
+        //        DEBUG(errs() << "Found Loop branch: " << **LBranch << "\n");
         result = true;
       } else {
         BasicBlock *NextBlock = LBranchTemp->getSuccessor(0);
         if(visitSet->find(NextBlock) == visitSet->end()) {
-//          DEBUG(errs() << "Visiting unvisited node: " << NextBlock->getName() << "\n");
+          //          DEBUG(errs() << "Visiting unvisited node: " << NextBlock->getName() << "\n");
           visitSet->insert(NextBlock);
           result |= findLoopBranch(LBranch, NextBlock, LHeader, visitSet);
         }
@@ -3145,15 +3149,15 @@ bool CWriter::findLoopBranch(BranchInst **LBranch, BasicBlock* CurBlock, BasicBl
 }
 
 bool CWriter::traverseUseDefChain(Instruction *I, PHINode *PI) {
-//  DEBUG(errs() << "traversing: " << *I << "\n");
+  //  DEBUG(errs() << "traversing: " << *I << "\n");
   bool result = false;
   if(PHINode *PHI = dyn_cast<PHINode>(I)) {
     if (PI == PHI) {
-//      DEBUG(errs() << "returning true\n");
+      //      DEBUG(errs() << "returning true\n");
       result = true;
 
     } else {
-//      DEBUG(errs() << "returning false\n");
+      //      DEBUG(errs() << "returning false\n");
       return false;
     }
   } else {
@@ -3167,35 +3171,35 @@ bool CWriter::traverseUseDefChain(Instruction *I, PHINode *PI) {
 }
 
 void CWriter::printLoop(Loop *L) {
-  
+
   PredicatedScalarEvolution PSE(*SE, *L);
   Out << "\n\n/* Processing Loop Block: " << L->getName() << " */\n";
 
-//  if(simplifyLoop(L, DT, LI, SE, AC, true)) {
-//    DEBUG(errs() << "Simplified loop!\n" << *L << "\n");
-//  }
+  //  if(simplifyLoop(L, DT, LI, SE, AC, true)) {
+  //    DEBUG(errs() << "Simplified loop!\n" << *L << "\n");
+  //  }
 
   PHINode *InductionVariable;
-//  auto *LoopLatch = L->getLoopLatch();
+  //  auto *LoopLatch = L->getLoopLatch();
   auto *ExitingBlock = L->getExitingBlock();
-//  DEBUG(errs() << "Exiting Block: " << ExitingBlock->getName() << "\n");
+  //  DEBUG(errs() << "Exiting Block: " << ExitingBlock->getName() << "\n");
   auto *ExitingBranch = ExitingBlock->getTerminator();
-//  DEBUG(errs() << "Exiting Branch: " << *ExitingBranch << "\n");
+  //  DEBUG(errs() << "Exiting Branch: " << *ExitingBranch << "\n");
   InductionDescriptor ID;
   if (L->getLoopPreheader()==nullptr) {
-//    DEBUG(errs() << "Loop has no preheader!\n");
+    //    DEBUG(errs() << "Loop has no preheader!\n");
   }
-//  DEBUG(errs() << "Looking for induction variables\n");
-//  if (PHINode *IndVar = L->getCanonicalInductionVariable()) {
-//    InductionVariable = IndVar;
-//    DEBUG(errs() << "Found canonical induction variable:\n" << *IndVar << "\n");
-//  }
+  //  DEBUG(errs() << "Looking for induction variables\n");
+  //  if (PHINode *IndVar = L->getCanonicalInductionVariable()) {
+  //    InductionVariable = IndVar;
+  //    DEBUG(errs() << "Found canonical induction variable:\n" << *IndVar << "\n");
+  //  }
   bool found = false;
   for (auto I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
     PHINode *PHI = cast<PHINode>(I);
-//    DEBUG(errs() << "Phi Node: " << *PHI << "\n");
+    //    DEBUG(errs() << "Phi Node: " << *PHI << "\n");
     if(InductionDescriptor::isInductionPHI(PHI,L,PSE,ID)) {
-//      DEBUG(errs() << "Found induction: " << *PHI << "\n");
+      //      DEBUG(errs() << "Found induction: " << *PHI << "\n");
       InductionVariable = PHI;
       found = true;
       break;
@@ -3208,73 +3212,135 @@ void CWriter::printLoop(Loop *L) {
 
   LInductionVars.insert(InductionVariable);
   LoopIndVarsMap.insert(std::pair<Loop*, PHINode*>(L,InductionVariable));
-  Value *StartValue = ID.getStartValue();
-  const SCEV *Step = ID.getStep();
-//  unsigned IterationCount = SE->getSmallConstantMaxTripCount(L); 
-//  DEBUG(errs() << "StartValue: " << *StartValue << "\nStep: " << *Step << "\nIterationCount: " << IterationCount << "\n");
-
-  std::string IVOp;
-  
-  if (const SCEVConstant *stepConst = dyn_cast<SCEVConstant>(Step)) {
-    if(stepConst->getAPInt().isNonNegative()) {
-      IVOp = " + ";  
-    }
-  }
 
   Value *IV = dyn_cast<Value>(InductionVariable);
   std::string IVName = GetValueName(IV);
-  
-  std::string BranchPredicate;
-  ICmpInst *BranchCondition = dyn_cast<ICmpInst>(dyn_cast<BranchInst>(ExitingBranch)->getCondition());
-  switch(BranchCondition->getPredicate()) {
-    case ICmpInst::ICMP_EQ:  BranchPredicate = " != "; break;
-    case ICmpInst::ICMP_NE:  BranchPredicate = " == "; break;
-    case ICmpInst::ICMP_ULE:
-    case ICmpInst::ICMP_SLE: BranchPredicate = " > "; break;
-    case ICmpInst::ICMP_UGE:
-    case ICmpInst::ICMP_SGE: BranchPredicate = " < "; break;
-    case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_SLT: BranchPredicate = " >= "; break;
-    case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_SGT: BranchPredicate = " <= "; break;
-    default: llvm_unreachable("Illegal ICmp predicate");
-  }
-
-//  DEBUG(errs() << "Branch Condition: " << *BranchCondition << "\n");
-  
-  std::string compLHS, compRHS;
-  Value *CondOp1 = BranchCondition->getOperand(0);
-//  DEBUG(errs() << "CondOp1: " << *CondOp1 << "\n");
-  if (Constant *constOp1 = dyn_cast<Constant>(CondOp1)) {
-//    DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n");
-    compLHS = (constOp1->getUniqueInteger()).toString(10,1);
-  } else {
-//    DEBUG(errs() << "Condition Operand is not a constant, ");
-    if(traverseUseDefChain(dyn_cast<Instruction>(CondOp1), InductionVariable)) {
-//      DEBUG(errs() << "it is the IV.\n");
-      compLHS = GetValueName(IV);
+
+  Optional<Loop::LoopBounds> OLB = L->getBounds(*SE);
+  if(OLB.hasValue()) {
+    Loop::LoopBounds LB = OLB.getValue();
+    Value *StartValue = &(LB.getInitialIVValue());
+    Instruction *StepInstruction = &(LB.getStepInst());
+    Value *StepValue = LB.getStepValue();
+    Value *FinalValue = &(LB.getFinalIVValue());
+    ICmpInst::Predicate LoopPredicate = LB.getCanonicalPredicate();
+    std::string BranchPredicate;
+    switch(LoopPredicate) {
+      case ICmpInst::ICMP_EQ:  BranchPredicate = " == "; break;
+      case ICmpInst::ICMP_NE:  BranchPredicate = " != "; break;
+      case ICmpInst::ICMP_ULE:
+      case ICmpInst::ICMP_SLE: BranchPredicate = " < "; break;
+      case ICmpInst::ICMP_UGE:
+      case ICmpInst::ICMP_SGE: BranchPredicate = " > "; break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT: BranchPredicate = " <= "; break;
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT: BranchPredicate = " >= "; break;
+      default: llvm_unreachable("Illegal ICmp predicate");
+    }
+    errs() << "IV: " << *IV<< "\n";
+    errs() << "StartValue: " << *StartValue<< "\n";
+    errs() << "StepInstruction: " << *StepInstruction<< "\n";
+    errs() << "StepValue: " << *StepValue<< "\n";
+    errs() << "FinalValue: " << *FinalValue<< "\n";
+    errs() << "Branch Predicate: " << BranchPredicate<< "\n";
+    errs() << "Direction: " << ((LB.getDirection() == Loop::LoopBounds::Direction::Increasing) 
+        ? "increasing" : "decreasing") << "\n";
+
+    std::string startStr; 
+    if (ConstantInt *startConst = dyn_cast<ConstantInt>(StartValue)) {
+      startStr = std::to_string(startConst->getSExtValue());
+    } else {
+      startStr = GetValueName(StartValue);
+    }
+    std::string finalStr; 
+    if (ConstantInt *finalConst = dyn_cast<ConstantInt>(FinalValue)) {
+      finalStr = std::to_string(finalConst->getSExtValue());
     } else {
-//      DEBUG(errs() << "it is another variable.\n");
-      compLHS = GetValueName(CondOp1);
+      finalStr = GetValueName(FinalValue);
     }
-  }
-  Value *CondOp2 = BranchCondition->getOperand(1);
-//  DEBUG(errs() << "CondOp2: " << *CondOp2 << "\n");
-  if (Constant *constOp2 = dyn_cast<Constant>(CondOp2)) {
-//    DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n");
-    compRHS = (constOp2->getUniqueInteger()).toString(10,1);
+    std::string stepStr; 
+    if (ConstantInt *stepConst = dyn_cast<ConstantInt>(StepValue)) {
+      stepStr = std::to_string(stepConst->getSExtValue());
+    } else {
+      stepStr = GetValueName(StepValue);
+    }
+
+    errs() << "\n  for ( " << IVName << " = " << startStr << "; " 
+      << IVName << BranchPredicate << finalStr << "; " 
+      << IVName << " = " << IVName << " + " << stepStr << ") {\n";
+
+    Out << "\n  for ( " << IVName << " = " << startStr << "; " 
+      << IVName << BranchPredicate << finalStr << "; " 
+      << IVName << " = " << IVName << " + " << stepStr << ") {\n";
+
   } else {
-//    DEBUG(errs() << "Condition Operand is not a constant.\n");
-    if(traverseUseDefChain(dyn_cast<Instruction>(CondOp2), InductionVariable)) {
-//      DEBUG(errs() << "It is the IV.\n");
-      compRHS = GetValueName(IV);
+    llvm_unreachable("No Loop Bounds!");
+    Value *StartValue = ID.getStartValue();
+    const SCEV *Step = ID.getStep();
+    //  unsigned IterationCount = SE->getSmallConstantMaxTripCount(L); 
+    //  DEBUG(errs() << "StartValue: " << *StartValue << "\nStep: " << *Step << "\nIterationCount: " << IterationCount << "\n");
+
+    std::string IVOp;
+
+    if (const SCEVConstant *stepConst = dyn_cast<SCEVConstant>(Step)) {
+      if(stepConst->getAPInt().isNonNegative()) {
+        IVOp = " + ";  
+      }
+    }
+
+
+    std::string BranchPredicate;
+    ICmpInst *BranchCondition = dyn_cast<ICmpInst>(dyn_cast<BranchInst>(ExitingBranch)->getCondition());
+    switch(BranchCondition->getPredicate()) {
+      case ICmpInst::ICMP_EQ:  BranchPredicate = " != "; break;
+      case ICmpInst::ICMP_NE:  BranchPredicate = " == "; break;
+      case ICmpInst::ICMP_ULE:
+      case ICmpInst::ICMP_SLE: BranchPredicate = " > "; break;
+      case ICmpInst::ICMP_UGE:
+      case ICmpInst::ICMP_SGE: BranchPredicate = " < "; break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT: BranchPredicate = " >= "; break;
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT: BranchPredicate = " <= "; break;
+      default: llvm_unreachable("Illegal ICmp predicate");
+    }
+
+    //  DEBUG(errs() << "Branch Condition: " << *BranchCondition << "\n");
+
+    std::string compLHS, compRHS;
+    Value *CondOp1 = BranchCondition->getOperand(0);
+    //  DEBUG(errs() << "CondOp1: " << *CondOp1 << "\n");
+    if (Constant *constOp1 = dyn_cast<Constant>(CondOp1)) {
+      //    DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n");
+      compLHS = (constOp1->getUniqueInteger()).toString(10,1);
     } else {
-//      DEBUG(errs() << "It is another variable.\n");
-      compRHS = GetValueName(CondOp2);
+      //    DEBUG(errs() << "Condition Operand is not a constant, ");
+      if(traverseUseDefChain(dyn_cast<Instruction>(CondOp1), InductionVariable)) {
+        //      DEBUG(errs() << "it is the IV.\n");
+        compLHS = GetValueName(IV);
+      } else {
+        //      DEBUG(errs() << "it is another variable.\n");
+        compLHS = GetValueName(CondOp1);
+      }
+    }
+    Value *CondOp2 = BranchCondition->getOperand(1);
+    //  DEBUG(errs() << "CondOp2: " << *CondOp2 << "\n");
+    if (Constant *constOp2 = dyn_cast<Constant>(CondOp2)) {
+      //    DEBUG(errs() << "Condition Operand is a constant, inserting it as is.\n");
+      compRHS = (constOp2->getUniqueInteger()).toString(10,1);
+    } else {
+      //    DEBUG(errs() << "Condition Operand is not a constant.\n");
+      if(traverseUseDefChain(dyn_cast<Instruction>(CondOp2), InductionVariable)) {
+        //      DEBUG(errs() << "It is the IV.\n");
+        compRHS = GetValueName(IV);
+      } else {
+        //      DEBUG(errs() << "It is another variable.\n");
+        compRHS = GetValueName(CondOp2);
+      }
     }
-  }
 
-  std::string startStr; 
+    std::string startStr; 
     if (Constant *startConst = dyn_cast<Constant>(StartValue)) {
       startStr = (startConst->getUniqueInteger()).toString(10,1);
     } else {
@@ -3282,203 +3348,24 @@ void CWriter::printLoop(Loop *L) {
     }
 
 
-//  DEBUG(errs() << "  for ( " << IVName << " = " << startStr << "; " 
-//    << compLHS << BranchPredicate << compRHS << "; " 
-//    << IVName << " = " << IVName << IVOp << *Step << ") {\n");
-  
-  Out << "\n  for ( " << IVName << " = " << startStr << "; " 
-    << compLHS << BranchPredicate << compRHS << "; " 
-    << IVName << " = " << IVName << IVOp << *Step << ") {\n";
-  
-//  //Stack for keeping track of induction variable chain
-//  std::stack<Instruction*> IndVarChain;
-//  Value *LBound, *IV, *InitialCondition;
-//  BasicBlock *LHeader = L->getHeader();
-//  std::string LCondition, IndVarIncrement, IndVarInit;
-//  DEBUG(errs() << "Printing loop header: ");
-//  DEBUG(errs() << LHeader->getName()<<"\n");
-//  BranchInst *LBranch = nullptr;
-//  BasicBlock *CurBlock = LHeader;
-//  std::set<BasicBlock*> *visitSet = new std::set<BasicBlock*>();
-//  visitSet->insert(CurBlock);
-//  //  if(findLoopBranch(&LBranch, CurBlock, LHeader, visitSet)) {
-//  //        DEBUG(errs() << "Found Loop branch: " << *LBranch << "\n");
-//  //  }
-//  if(LBranch = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator())) {
-//    DEBUG(errs() << "Printing loop branch: ");
-//    DEBUG(errs() << *(LBranch)<<"\n");
-//    if(ICmpInst *LCmp= dyn_cast<ICmpInst>(LBranch->getOperand(0))) {
-//      DEBUG(errs() << "Printing loop compare instruction: ");
-//      DEBUG(errs() << *(LCmp)<<"\n");
-//      if (PHINode *LIndVar = L->getCanonicalInductionVariable()) {
-//        LInductionVars.insert(LIndVar);
-//        //Loop has a canonical induction variable
-//        DEBUG(errs() << "Loop has canonical induction variable!\n");
-//        DEBUG(errs() << "Printing loop induction variable:\n");
-//        DEBUG(errs() << *(LIndVar)<<"\n");
-//        IndVarChain.push(dyn_cast<Instruction>(LIndVar));
-//        if(extractIndVarChain(dyn_cast<Instruction>(LIndVar), &IndVarChain, LBranch, 1)) {
-//          IV = dyn_cast<Value>(LIndVar);
-//          DEBUG(errs() << "Found Correct Path!\n");
-//          DEBUG(errs() << "Loop Induction Variable: " << *IV << "\n");
-//          // DEBUG(errs() << "Printing stack contents:\n");
-//          // while(!IndVarChain.empty()) {
-//          //   DEBUG(errs() << *(IndVarChain.top()) << "\n");
-//          //   IndVarChain.pop();
-//          // }
-//        }
-//      } else {
-//        DEBUG(errs() << "Loop does not have canonical induction variable, need to find one!\n");
-//        BasicBlock::iterator i,e;
-//        for(i = LHeader->begin(), e = LHeader->end(); i!=e; ++i) {
-//          if(PHINode *PNode = dyn_cast<PHINode>(&*i)) {
-//            DEBUG(errs() << "PHI Node: \n" << *PNode << "\n");
-//            IndVarChain.push(dyn_cast<Instruction>(PNode));
-//            if(extractIndVarChain(dyn_cast<Instruction>(PNode), &IndVarChain, LBranch, 1)) {
-//              IV = dyn_cast<Value>(PNode);
-//              DEBUG(errs() << "Found Correct Path!\n");
-//              DEBUG(errs() << "Loop Induction Variable: " << *IV << "\n");
-//              LInductionVars.insert(PNode);
-//              break;
-//            } else {
-//              IndVarChain.pop();
-//            }
-//          }
-//        }
-//        if(i==e)
-//          llvm_unreachable("No Induction Variable Found!! Something isn't right!\n");
-//      }
-//
-//      PHINode *IVPHI = dyn_cast<PHINode>(IV);
-//      for(BasicBlock *BB : IVPHI->blocks()) {
-//        if(L->contains(BB)) {
-//          DEBUG(errs() << "This is the IV increment: " << *IVPHI->getIncomingValueForBlock(BB) << "\n");
-//          auto *Increment = dyn_cast<Instruction>(IVPHI->getIncomingValueForBlock(BB));
-//
-//          DEBUG(errs() << "OU1: " << *(Increment->getOperandUse(1)) << "\n");
-//
-//          for(auto &U : Increment->uses()) {
-//            DEBUG(errs() << "Uses: " << *U << "\n");
-//          }
-//        } else {
-//          DEBUG(errs() << "This is the IV initial condition: " << *IVPHI->getIncomingValueForBlock(BB) << "\n");
-//          InitialCondition = IVPHI->getIncomingValueForBlock(BB);
-//          if(Constant *IndVarInitConst = dyn_cast<Constant>(InitialCondition)) {
-//            IndVarInit = (IndVarInitConst->getUniqueInteger()).toString(10,1);
-//          } 
-//          else {
-//            IndVarInit = GetValueName(InitialCondition);
-//          }
-//        }
-//      }
-//
-//      llvm_unreachable("HERE!");
-//
-//      while (!IndVarChain.empty()) {
-//        //Now that we have all the instructions acting on the induction variable, need to
-//        //extract the following: 1) Loop Bound, 2) initial condition, 3) increment
-//        Instruction *StackTop = IndVarChain.top();
-//        IndVarChain.pop();
-//        if(StackTop == dyn_cast<Instruction>(LCmp)) {
-//          //First instruction in the stack is going to be the cmpinst. Use it to extract bound 
-//          DEBUG(errs() << "Extracting Bounds from cmpinst!\n");
-//          for (Use &U : StackTop->operands()) {
-//            Value *v = U.get();
-//            if(v!=IndVarChain.top()) {
-//              DEBUG(errs() << *v << "\n");
-//              LBound = v;
-//            }
-//          }
-//          DEBUG(errs() << "Extracting cmp condition\n");
-//          switch(LCmp->getPredicate()) {
-//            case ICmpInst::ICMP_EQ:  LCondition = " == "; break;
-//            case ICmpInst::ICMP_NE:  LCondition = " != "; break;
-//            case ICmpInst::ICMP_ULE:
-//            case ICmpInst::ICMP_SLE: LCondition = " <= "; break;
-//            case ICmpInst::ICMP_UGE:
-//            case ICmpInst::ICMP_SGE: LCondition = " >= "; break;
-//            case ICmpInst::ICMP_ULT:
-//            case ICmpInst::ICMP_SLT: LCondition = " < "; break;
-//            case ICmpInst::ICMP_UGT:
-//            case ICmpInst::ICMP_SGT: LCondition = " > "; break;
-//            default: llvm_unreachable("Illegal ICmp predicate");
-//          }
-//        }
-//        else if(BinaryOperator *STBinOp = dyn_cast<BinaryOperator>(StackTop)) {
-//          // Next instructions are going to be arithmetic ops performed on the induction variable 
-//          switch(STBinOp->getOpcode()) { 
-//            case Instruction::Add:
-//            case Instruction::FAdd: IndVarIncrement += " + "; break;
-//            case Instruction::Sub:
-//            case Instruction::FSub: IndVarIncrement += " - "; break;
-//            case Instruction::Mul:
-//            case Instruction::FMul: IndVarIncrement += " * "; break;
-//            case Instruction::URem:
-//            case Instruction::SRem:
-//            case Instruction::FRem: IndVarIncrement += " % "; break;
-//            case Instruction::UDiv:
-//            case Instruction::SDiv:
-//            case Instruction::FDiv: IndVarIncrement += " / "; break;
-//            case Instruction::And: IndVarIncrement +=  " & "; break;
-//            case Instruction::Or: IndVarIncrement +=   " | "; break;
-//            case Instruction::Xor: IndVarIncrement +=  " ^ "; break;
-//            case Instruction::Shl : IndVarIncrement += " << "; break;
-//            case Instruction::LShr:
-//            case Instruction::AShr: IndVarIncrement += " >> "; break;
-//            default:
-//                                    DEBUG(errs() << "Invalid operator type!" << *STBinOp);
-//                                    llvm_unreachable(0);
-//          }
-//          DEBUG(errs() << "Extracting induction variable increment!\n");
-//          for (Use &U : StackTop->operands()) {
-//            Value *v = U.get();
-//            if(v!=IndVarChain.top()) {
-//              DEBUG(errs() << *v << "\n");
-//              if(Constant *LBoundConst = dyn_cast<Constant>(v)) {
-//                IndVarIncrement += (LBoundConst->getUniqueInteger()).toString(10,1);
-//              } 
-//              else {
-//                IndVarIncrement += v->getName();
-//              }
-//            }
-//          }
-//        }
-//        else if(StackTop == dyn_cast<Instruction>(IV)) {
-//          //Last instruction is going to be the induction variable phi node 
-//          DEBUG(errs() << "Extract initial condition from Phi node\n");
-//          PHINode *PN = dyn_cast<PHINode>(StackTop);
-//          for (BasicBlock* BB : PN->blocks()) {
-//            DEBUG(errs() << BB->getName()<< "\n");
-//            if(!L->contains(BB)) {
-//              DEBUG(errs() << "Found entry point to loop, reading initial condition\n");
-//              InitialCondition = PN->getIncomingValueForBlock(BB);
-//              if(Constant *IndVarInitConst = dyn_cast<Constant>(InitialCondition)) {
-//                IndVarInit = (IndVarInitConst->getUniqueInteger()).toString(10,1);
-//              } 
-//              else {
-//                IndVarInit = GetValueName(InitialCondition);
-//              }
-//              break;
-//            }
-//          }
-//          Out << "\n  for ( " << GetValueName(IV) << " = " << IndVarInit << "; " << GetValueName(IV) << LCondition << GetValueName(LBound) << "; " 
-//            << GetValueName(IV) << " = " << GetValueName(IV) << IndVarIncrement << ") {\n";
-//        }
-//      }
-//    }
-//  } else {
-//    llvm_unreachable("Unable to find loop branch!\n");
-//  }
+    //  DEBUG(errs() << "  for ( " << IVName << " = " << startStr << "; " 
+    //    << compLHS << BranchPredicate << compRHS << "; " 
+    //    << IVName << " = " << IVName << IVOp << *Step << ") {\n");
+
+    Out << "\n  for ( " << IVName << " = " << startStr << "; " 
+      << compLHS << BranchPredicate << compRHS << "; " 
+      << IVName << " = " << IVName << IVOp << *Step << ") {\n";
+  }
 
 
   BasicBlock *BB = L->getHeader();
-//  printBBorLoop(BB);
+  //  printBBorLoop(BB);
   printBasicBlock(BB);
-//  Loop *BBLoop = LI->getLoopFor(BB);
-//  if (BBLoop == L)
-//    printBasicBlock(BB);
-//  else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
-//    printLoop(BBLoop);
+  //  Loop *BBLoop = LI->getLoopFor(BB);
+  //  if (BBLoop == L)
+  //    printBasicBlock(BB);
+  //  else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+  //    printLoop(BBLoop);
 
   //  Out << "  do {     /* Syntactic loop '" << L->getHeader()->getName()
   //    << "' to make GCC happy */\n";
@@ -3509,7 +3396,7 @@ void CWriter::printBasicBlock(BasicBlock *BB) {
       break;
     }
 
-//  if (NeedsLabel) Out << "/* " << GetValueName(BB) << ": */\n";
+  //  if (NeedsLabel) Out << "/* " << GetValueName(BB) << ": */\n";
   Out << "/* " << GetValueName(BB) << ": */\n";
 
   // Output all of the instructions in the basic block...
@@ -3525,10 +3412,10 @@ void CWriter::printBasicBlock(BasicBlock *BB) {
           bool UserPHI = false;
           bool UserCMP = false;
           bool UserOTHER = false;
-////          DEBUG(errs() << "Instruction uses induction variable\n");
+          ////          DEBUG(errs() << "Instruction uses induction variable\n");
           for (User *IUser : I->users()) {
             if (Instruction *UserInst = dyn_cast<Instruction>(IUser)) {
-//              DEBUG(errs() << "User: " << *UserInst << "\n");
+              //              DEBUG(errs() << "User: " << *UserInst << "\n");
               if (dyn_cast<PHINode>(UserInst)) {
                 UserPHI = true;
               } else if (dyn_cast<ICmpInst>(UserInst)) {
@@ -3536,8 +3423,8 @@ void CWriter::printBasicBlock(BasicBlock *BB) {
               } else {
                 UserOTHER = true;
               }
-//              skip = true;
-//              break;
+              //              skip = true;
+              //              break;
             }
           }
           if (UserPHI && UserCMP && !UserOTHER) {
@@ -3549,13 +3436,13 @@ void CWriter::printBasicBlock(BasicBlock *BB) {
         break;
     }
     if(skip){ 
-//      DEBUG(errs() << "Skipping instruction that increments Induction Variable!\n");
+      //      DEBUG(errs() << "Skipping instruction that increments Induction Variable!\n");
       Out << "/* Skipped induction variable use: " << *I << " */\n";
       continue;
     }
     if(PHINode *PN = dyn_cast<PHINode>(I)) {
       if (LInductionVars.find(PN) != LInductionVars.end()) { 
-//        DEBUG(errs() << "Skipping PHINode for Induction Variable!\n");
+        //        DEBUG(errs() << "Skipping PHINode for Induction Variable!\n");
         Out << "/* PHINode of induction variable was here */\n";
         continue;
       }
@@ -3622,11 +3509,7 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
     printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
     printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
 
-    // Skip the first item since that's the default case.
 
-    //for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
-    //for (auto i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
-    
     // CHECK: Needs much testing
     for (auto Case : SI.cases()) {
       ConstantInt* CaseVal = Case.getCaseValue();
@@ -3644,7 +3527,6 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
 
   } else { // model as a series of if statements
     Out << "  ";
-    //    for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
     // CHECK: Needs much testing
     for (auto Case : SI.cases()) {
       Out << "if (";
@@ -3697,7 +3579,7 @@ void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
   for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
     PHINode *PN = cast<PHINode>(I);
     if(LInductionVars.find(PN) == LInductionVars.end()) {
-			Out << "/* Printing phi node: " << *PN << " */\n";
+      Out << "/* Printing phi node: " << *PN << " */\n";
       // Now we have to do the printing.
       Value *IV = PN->getIncomingValueForBlock(CurBlock);
       if (!isa<UndefValue>(IV) && !isEmptyType(IV->getType())) {
@@ -3707,8 +3589,8 @@ void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
         Out << ";   /* for PHI node */\n";
       }
     } else {
-			Out << "/* Skipping phi node: " << *PN << " */\n";
-		}
+      Out << "/* Skipping phi node: " << *PN << " */\n";
+    }
   }
 }
 
@@ -3730,7 +3612,7 @@ void CWriter::printBBorLoop (BasicBlock *BB) {
   } else if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
     //DEBUG(errs() << "Reached block that is top of stack, return instead!\n");
     Out << "/* " << BB->getName() << " is top of stack, return instead! */\n";
-//    ImmPostDommBlocks.pop();
+    //    ImmPostDommBlocks.pop();
   } else {
     VisitedBlocks.insert(BB);
     if(Loop *LL = LI->getLoopFor(BB)) {
@@ -3786,9 +3668,9 @@ bool CWriter::findMatch(BasicBlock *CurrBlock, BasicBlock *CompBlock, BasicBlock
     bool res = false;
     for (auto succ: successors(CompBlock)) {
       if(FindVisitedBlocks.find(succ) == FindVisitedBlocks.end()) {
-      //DEBUG(errs() << "Visiting successor " << succ->getName() << " of " << CompBlock->getName() << "\n");
-      res = res || findMatch(CurrBlock, succ, ImmPostDomm);
-      if (res == true) break;
+        //DEBUG(errs() << "Visiting successor " << succ->getName() << " of " << CompBlock->getName() << "\n");
+        res = res || findMatch(CurrBlock, succ, ImmPostDomm);
+        if (res == true) break;
       } else {
         //DEBUG(errs() << "Skipping successor " << succ->getName() << " of " << CompBlock->getName() << "\n");
       }
@@ -3800,7 +3682,7 @@ bool CWriter::findMatch(BasicBlock *CurrBlock, BasicBlock *CompBlock, BasicBlock
 // that immediately succeeds the current one.
 //
 void CWriter::visitBranchInst(BranchInst &I) {
-  //DEBUG(errs() << "Visiting Branch Instruction: " << I <<"\n");
+  errs() << "Visiting Branch Instruction: " << I <<"\n";
   Out << "\n/* Branch: " << I << " */\n";
 
   if (I.isConditional()) {
@@ -3813,16 +3695,15 @@ void CWriter::visitBranchInst(BranchInst &I) {
     if (ImmPostDomm != BB1 && ImmPostDomm != BB0) {
       findMatch(BB0, BB1, ImmPostDomm);
       FindVisitedBlocks.clear();
-      //llvm_unreachable("here!");
     }
     if(Loop *L = LI->getLoopFor(I.getParent())) {
       if(L == LI->getLoopFor(BB0) && !(L == LI->getLoopFor(BB1))) {
-        //DEBUG(errs() << "This is a loop branch!\n");
+        errs() << "This is a loop branch!\n";
         Out << "/* This is a loop branch! */\n";
         //BB0 is in the loop. Print it if it hsn't been printed
         if(VisitedBlocks.find(BB0) != VisitedBlocks.end()) {
-          //DEBUG(errs() << "Branching back to header: " << BB0->getName() << "\n");
-          //DEBUG(errs() << "This is the end of the loop, closing!\n");
+          errs() << "Branching back to header: " << BB0->getName() << "\n";
+          errs() << "This is the end of the loop, closing!\n";
           Out << "/* Branching back to header: " << BB0->getName() << " */\n";
           Out << "/* Closing loop! */\n";
           //BB0 is the loop header. CLose the loop then print BB1.
@@ -3830,39 +3711,18 @@ void CWriter::visitBranchInst(BranchInst &I) {
           Out << " }\n";
           printPHICopiesForSuccessor (I.getParent(), BB1, 2);
           printBBorLoop(BB1);
-//          if (!ImmPostDommBlocks.empty())  
-//            DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-//          if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB1) {
-//            ImmPostDommBlocks.pop();
-//            DEBUG(errs() << BB1->getName() << ": skipping the print and popping from stack!\n");
-//          } else {
-//            printBBorLoop(BB1);
-//          }
-          //          printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-          //          printBBorLoop(BB1);
         } else {
-          //DEBUG(errs() << "Not branching to header! Branching to: " << BB0->getName() << "\n");
+          errs() << "Not branching to header! Branching to: " << BB0->getName() << "\n";
           //BB0 is not the loop header. That means we are entering loop body
-//          printVBBorLoop(BB0);
-//          DEBUG(errs() << "Here!\n");
-//          printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-//          if (!ImmPostDommBlocks.empty())  
-//            DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-//          if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB1) {
-//            ImmPostDommBlocks.pop();
-//            DEBUG(errs() << BB1->getName() << ": skipping the print and popping from stack!\n");
-//          } else {
-//            printBBorLoop(BB1);
-//          }
 
           llvm_unreachable("loop branch unhandled!\n");
         }
       } else if(L == LI->getLoopFor(BB1) && !(L == LI->getLoopFor(BB0))) {
-        //DEBUG(errs() << "This is a loop branch!\n");
+        errs() << "This is a loop branch!\n";
         Out << "/* This is a loop branch! */\n";
         if(VisitedBlocks.find(BB1) != VisitedBlocks.end()) {
-          //DEBUG(errs() << "Branching back to header: " << BB1->getName() << "\n");
-          //DEBUG(errs() << "This is the end of the loop, closing!\n");
+          errs() << "Branching back to header: " << BB1->getName() << "\n";
+          errs() << "This is the end of the loop, closing!\n";
           Out << "/* Branching back to header: " << BB1->getName() << " */\n";
           Out << "/* Closing loop! */\n";
           //BB0 is the loop header. CLose the loop then print BB1.
@@ -3870,290 +3730,142 @@ void CWriter::visitBranchInst(BranchInst &I) {
           Out << " }\n";
           printPHICopiesForSuccessor (I.getParent(), BB0, 2);
           printBBorLoop(BB0);
-//          if (!ImmPostDommBlocks.empty())  
-//            DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-//          if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB0) {
-//            ImmPostDommBlocks.pop();
-//            DEBUG(errs() << BB0->getName() << ": skipping the print and popping from stack!\n");
-//          } else {
-//            printBBorLoop(BB0);
-//          }
-          //          printPHICopiesForSuccessor (I.getParent(), BB0, 2);
-          //          printBBorLoop(BB0);
         } else {
-          //DEBUG(errs() << "Not branching to header! Branching to: " << BB1->getName() << "\n");
+          errs() << "Not branching to header! Branching to: " << BB1->getName() << "\n";
           //BB1 is not the loop header. That means we are entering loop body
-//          printBBorLoop(BB1);
-//          printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-//          if (!ImmPostDommBlocks.empty())  
-//            DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-//          if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB0) {
-//            ImmPostDommBlocks.pop();
-//            DEBUG(errs() << BB0->getName() << ": skipping the print and popping from stack!\n");
-//          } else {
-//            printBBorLoop(BB1);
-//          }
           llvm_unreachable("loop branch unhandled!\n");
         }
       } else {
-        //DEBUG(errs() << "This is a conditional statement within a loop!\n");
+        errs() << "This is a conditional statement within a loop!\n";
         Out << "/* This is a conditional statement within a loop! */\n";
-				//DEBUG(errs() << ImmPostDomm->getName() << " is the immediate post dominator of " << BB0->getName() << " and " << BB1->getName() << "\n");
-//    if(ImmPostDommBlocks.empty() || (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() != ImmPostDomm)) {
-      if(VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) {
-        //DEBUG(errs() << "Not pushing " << ImmPostDomm->getName() << " because it has already been visited!\n");
-      } else {
-        //DEBUG(errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n");
-        ImmPostDommBlocks.push(ImmPostDomm);
-      }
+        errs() << ImmPostDomm->getName() << " is the immediate post dominator of " << BB0->getName() << " and " << BB1->getName() << "\n";
+        if(VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) {
+          errs() << "Not pushing " << ImmPostDomm->getName() << " because it has already been visited!\n";
+        } else {
+          errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n";
+          ImmPostDommBlocks.push(ImmPostDomm);
+        }
 
         bool noElse = false;
-//        DEBUG(errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n");
-//        ImmPostDommBlocks.push(ImmPostDomm);
         if(BB1 == ImmPostDomm) {
-//          DEBUG(errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n");
-//          ImmPostDommBlocks.push(ImmPostDomm);
-//        } else {
           noElse = true;
         }
         Out << "  if (";
         writeOperand(I.getCondition(), ContextCasted);
         Out << ") { /* " << I << "*/\n";
         printPHICopiesForSuccessor (I.getParent(), BB0, 2);
-//        printPHICopiesForSuccessor (I.getParent(), BB1, 2);
         printBBorLoop(BB0);
-        //DEBUG(errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n");
+        errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n";
         Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n";
         if (!noElse) {
-          //DEBUG(errs() << "Printing else!\n");
+          errs() << "Printing else!\n";
           Out << "  } else { /*" << I << "*/\n";
           printPHICopiesForSuccessor (I.getParent(), BB1, 2);
           ElseBlocks.push(BB1);
           ElseBranches.push(&I);
           printBBorLoop(BB1);
-          //DEBUG(errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n");
-          //DEBUG(errs() << "Check to see if else block is closed!\n");
+          errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n";
+          errs() << "Check to see if else block is closed!\n";
           Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n" ;
           Out << "/* Check to see if else block is closed! */\n" ;
           if(!ElseBlocks.empty() && ElseBlocks.top() == BB1) {
-            //DEBUG(errs() << "Else block not closed, need to close braces!\n");
+            errs() << "Else block not closed, need to close braces!\n";
             Out << "/* Else block not closed, need to close braces! */\n" ;
             Out << "} /* closing " << *(ElseBranches.top()) << " */\n";
             ElseBranches.pop();
             ElseBlocks.pop();
           }
           if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == ImmPostDomm) {
-            //DEBUG(errs() << "Will now pop post dom them handle it!\n");
+            errs() << "Will now pop post dom them handle it!\n";
             ImmPostDommBlocks.pop();
             printBBorLoop(ImmPostDomm);
           } else {
-            //DEBUG(errs() << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n");
+            errs() << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n";
           }
-          //          Out << "  } /* closing " << I << "*/\n";
         } else {
-      //DEBUG(errs() << "No else block. Adding one for phis, then moving to " << BB1->getName() << "!\n");
-      Out << "/* (3913) No else block. Adding one for phis, then moving to " << BB1->getName() << "! */\n";
-      Out << "  } /* closing " << I << "*/\n";
-        //DEBUG(errs() << "Will now pop post dom them handle it!\n");
-        ImmPostDommBlocks.pop();
-			Out << "else {\n";
-      printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-      Out << "}\n";
+          errs() << "No else block. Adding one for phis, then moving to " << BB1->getName() << "!\n";
+          Out << "/* (3913) No else block. Adding one for phis, then moving to " << BB1->getName() << "! */\n";
+          Out << "  } /* closing " << I << "*/\n";
+          errs() << "Will now pop post dom them handle it!\n";
+          ImmPostDommBlocks.pop();
+          Out << "else {\n";
+          printPHICopiesForSuccessor (I.getParent(), BB1, 2);
+          Out << "}\n";
           printBBorLoop(BB1);
         }
       }
-      //        Out << "  if (";
-      //        writeOperand(I.getCondition(), ContextCasted);
-      //        Out << ") {\n";
-      //        printPHICopiesForSuccessor (I.getParent(), BB0, 2);
-      //        printBBorLoop(BB0);
-      //        Out << "  } else {\n";
-      //        printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-      //        printBBorLoop(BB1);
-      //        Out << "  }\n";
     } else {
-      //DEBUG(errs() << "This is a conditional statement!\n");
-    //DEBUG(errs() << ImmPostDomm->getName() << " is the immediate post dominator of " << BB0->getName() << " and " << BB1->getName() << "\n");
-//    if(ImmPostDommBlocks.empty() || (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() != ImmPostDomm)) {
+      errs() << "This is a conditional statement!\n";
+      errs() << ImmPostDomm->getName() << " is the immediate post dominator of " << BB0->getName() << " and " << BB1->getName() << "\n";
       if(VisitedBlocks.find(ImmPostDomm) != VisitedBlocks.end()) {
-        //DEBUG(errs() << "Not pushing " << ImmPostDomm->getName() << " because it has already been visited!\n");
+        errs() << "Not pushing " << ImmPostDomm->getName() << " because it has already been visited!\n";
       } else {
-        //DEBUG(errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n");
+        errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n";
         ImmPostDommBlocks.push(ImmPostDomm);
       }
       bool noElse = false;
-      //      DEBUG(errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n");
-      //      ImmPostDommBlocks.push(ImmPostDomm);
       if(BB1 == ImmPostDomm) {
-        //        DEBUG(errs() << "Pushing " << ImmPostDomm->getName() << " onto stack!\n");
-        //        ImmPostDommBlocks.push(ImmPostDomm);
-        //      } else {
         noElse = true;
-    }
-    Out << "  if (";
-    writeOperand(I.getCondition(), ContextCasted);
-    Out << ") { /* " << I << "*/\n";
-    printPHICopiesForSuccessor (I.getParent(), BB0, 2);
-    printBBorLoop(BB0);
-    //DEBUG(errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n");
-    Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n" ;
-    if (!noElse) {
-      //DEBUG(errs() << "Printing else!\n");
-      Out << "/* Printing else! */\n" ;
-      Out << "  } else { /*" << I << "*/\n";
-      printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-      ElseBlocks.push(BB1);
-      ElseBranches.push(&I);
-      printBBorLoop(BB1);
-      //DEBUG(errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n");
-      //DEBUG(errs() << "Check to see if else block is closed!\n");
-      Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n";
-      Out << "/* Check to see if else block is closed! */\n";
-      if(!ElseBlocks.empty() && ElseBlocks.top() == BB1) {
-        //DEBUG(errs() << "Else block not closed, need to close braces!\n");
-        Out << "/* Else block not closed, need to close braces! */\n";
-        Out << "} /* closing " << *(ElseBranches.top()) << " */\n";
-        ElseBranches.pop();
-        ElseBlocks.pop();
       }
-      if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == ImmPostDomm) {
-        //DEBUG(errs() << "Will now pop post dom them handle it!\n");
-        ImmPostDommBlocks.pop();
-        printBBorLoop(ImmPostDomm);
+      Out << "  if (";
+      writeOperand(I.getCondition(), ContextCasted);
+      Out << ") { /* " << I << "*/\n";
+      printPHICopiesForSuccessor (I.getParent(), BB0, 2);
+      printBBorLoop(BB0);
+      errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n";
+      Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n" ;
+      if (!noElse) {
+        errs() << "Printing else!\n";
+        Out << "/* Printing else! */\n" ;
+        Out << "  } else { /*" << I << "*/\n";
+        printPHICopiesForSuccessor (I.getParent(), BB1, 2);
+        ElseBlocks.push(BB1);
+        ElseBranches.push(&I);
+        printBBorLoop(BB1);
+        errs() << "Back to handling " << I.getParent()->getName() << ": " << I << "\n";
+        errs() << "Check to see if else block is closed!\n";
+        Out << "/* Back to handling " << I.getParent()->getName() << ": " << I << " */\n";
+        Out << "/* Check to see if else block is closed! */\n";
+        if(!ElseBlocks.empty() && ElseBlocks.top() == BB1) {
+          errs() << "Else block not closed, need to close braces!\n";
+          Out << "/* Else block not closed, need to close braces! */\n";
+          Out << "} /* closing " << *(ElseBranches.top()) << " */\n";
+          ElseBranches.pop();
+          ElseBlocks.pop();
+        }
+        if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == ImmPostDomm) {
+          errs() << "Will now pop post dom them handle it!\n";
+          ImmPostDommBlocks.pop();
+          printBBorLoop(ImmPostDomm);
+        } else {
+          errs() << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n";
+        }
       } else {
-        //DEBUG(errs() << "*!*!*!*!*!*!Not sure what is happening here!*!*!*!*!*!*!\n");
-      }
-      //        Out << "  }\n";
-    } else {
-      //DEBUG(errs() << "No else block. Adding one for phis, then moving to " << BB1->getName() << "!\n");
-      Out << "/* (3985) No else block. Adding one for phis, then moving to " << BB1->getName() << "! */\n";
-      Out << "  } /* closing " << I << "*/\n";
-        //DEBUG(errs() << "Will now pop post dom them handle it!\n");
+        errs() << "No else block. Adding one for phis, then moving to " << BB1->getName() << "!\n";
+        Out << "/* (3985) No else block. Adding one for phis, then moving to " << BB1->getName() << "! */\n";
+        Out << "  } /* closing " << I << "*/\n";
+        errs() << "Will now pop post dom them handle it!\n";
         ImmPostDommBlocks.pop();
-			Out << "else {\n";
-      printPHICopiesForSuccessor (I.getParent(), BB1, 2);
-      Out << "}\n";
-			printBBorLoop(BB1);
-    }
-
-    //      DEBUG(errs() << "Now need to print exit block!\n");
-    //      if(!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == ImmPostDomm) {
-    //        DEBUG(errs() << "Top is still ImmPostDomm!\n");
-    //        ImmPostDommBlocks.pop();
-    //      } else {
-    //        DEBUG(errs() << "Printing " << ImmPostDomm->getName() << "\n");
-    //        printBBorLoop(ImmPostDomm);
-    //      }
+        Out << "else {\n";
+        printPHICopiesForSuccessor (I.getParent(), BB1, 2);
+        Out << "}\n";
+        printBBorLoop(BB1);
+      }
     }
   } else {
-    //DEBUG(errs() << "This is an unconditional branch!\n");
-    //    printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+    errs() << "This is an unconditional branch!\n";
     BasicBlock *BB = I.getSuccessor(0); 
-    //    if(Loop *L = LI->getLoopFor(I.getParent())) {
-    //      if(L == LI->getLoopFor(BB)) {
-    //        DEBUG(errs() << "This is a loop branch!\n");
-    //        if(VisitedBlocks.find(BB) != VisitedBlocks.end()) {
-    //          DEBUG(errs() << "Branching back to header: " << BB->getName() << "\n");
-    //          //BB0 is the loop header. CLose the loop then print BB1.
-    //          printPHICopiesForSuccessor (I.getParent(), BB, 2);
-    //          Out << " }\n";
-    //        } else {
-    //          DEBUG(errs() << "Not branching to header! Branching to: " << BB->getName() << "\n");
-    //          printPHICopiesForSuccessor (I.getParent(), BB, 2);
-    //          if (!ImmPostDommBlocks.empty())  
-    //            DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-    //          if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
-    //            ImmPostDommBlocks.pop();
-    //            DEBUG(errs() << "Popping " << BB->getName() << " from the stack and returning!\n");
-    ////            if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
-    ////              DEBUG(errs() << BB->getName() << "is still in the stack, skipping the print for now!\n");
-    ////            } else {
-    ////              DEBUG(errs() << "Printing " << BB->getName() << " now!\n");
-    ////              printBBorLoop(BB);
-    ////            }
-    //          } else {
-    //            printBBorLoop(BB);
-    //          }
-    //          //BB0 is not the loop hearder. Case not handled!
-    //        }
-    //      } else {
-    //        DEBUG(errs()<<"This is not a loop branch!\n");
-    //        printPHICopiesForSuccessor (I.getParent(), BB, 2);
-    //        if (!ImmPostDommBlocks.empty())  
-    //          DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-    //        if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
-    //          ImmPostDommBlocks.pop();
-    //          DEBUG(errs() << "Popping " << BB->getName() << " from the stack and returning!\n");
-    ////          if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
-    ////            DEBUG(errs() << BB->getName() << "is still in the stack, skipping the print for now!\n");
-    ////          } else {
-    ////            DEBUG(errs() << "Printing " << BB->getName() << " now!\n");
-    ////            printBBorLoop(BB);
-    ////          }
-    //        } else {
-    //          printBBorLoop(BB);
-    //        }
-    //      }
-    //      llvm_unreachable("Unhandled unconditional branch!");
-    //    } else {
     printPHICopiesForSuccessor (I.getParent(), BB, 2);
     if (!ElseBlocks.empty() && I.getParent() == ElseBlocks.top()) {
-      //DEBUG(errs() << "Branch marks end of else block, need to close braces!\n");
+      errs() << "Branch marks end of else block, need to close braces!\n";
       Out << "/* Branch marks end of else block, need to close braces! */\n";
       Out << "} /* closing " << *(ElseBranches.top()) << " */\n";
       ElseBranches.pop();
       ElseBlocks.pop();
     }
     printBBorLoop(BB);
-    //      if (!ImmPostDommBlocks.empty())  
-    //        DEBUG(errs() <<  ImmPostDommBlocks.top()->getName() << " is the top of the stack!\n");
-    //      if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
-    //        ImmPostDommBlocks.pop();
-    //        DEBUG(errs() << "Popping " << BB->getName() << " from the stack and returning!\n");
-    ////        if (!ImmPostDommBlocks.empty() && ImmPostDommBlocks.top() == BB) {
-    ////          DEBUG(errs() << BB->getName() << "is still in the stack, skipping the print for now!\n");
-    ////        } else {
-    ////          DEBUG(errs() << "Printing " << BB->getName() << " now!\n");
-    ////          printBBorLoop(BB);
-    ////        }
-    //      } else {
-    //        printBBorLoop(BB);
-    //      }
-    // VisitedBlocks.insert(BB);
-    // if(Loop *LL = LI->getLoopFor(BB)) {
-    //   if (LL->getHeader() == BB)
-    //     printLoop(LL);
-    // } else {
-    //   printBasicBlock(BB);
-    // }
-    //    }
   }
   Out << "\n";
-
-
-  //    if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
-  //      Out << "  if (";
-  //      writeOperand(I.getCondition(), ContextCasted);
-  //      Out << ") {\n";
-  //
-  //      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
-  //      printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
-  //
-  //      if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
-  //        Out << "  } else {\n";
-  //        printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
-  //        printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
-  //      }
-  //    } else {
-  //      // First goto not necessary, assume second one is...
-  //      Out << "  if (!";
-  //      writeOperand(I.getCondition(), ContextCasted);
-  //      Out << ") {\n";
-  //
-  //      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
-  //      printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
-  //    }
-  //
 }
 
 // PHI nodes get copied into temporary values at the end of predecessor basic
@@ -4216,8 +3928,8 @@ static inline bool isConstantAllOnes(const Value *V) {
 bool isNot(const Value *V) {
   if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
     return (Bop->getOpcode() == Instruction::Xor &&
-            (isConstantAllOnes(Bop->getOperand(1)) ||
-             isConstantAllOnes(Bop->getOperand(0))));
+        (isConstantAllOnes(Bop->getOperand(1)) ||
+         isConstantAllOnes(Bop->getOperand(0))));
   return false;
 }
 
@@ -5007,7 +4719,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID) {
                llvm_unreachable(0);
                return false;
              }
-      
+
     case Intrinsic::dbg_value:
     case Intrinsic::dbg_declare:
              return true; // ignore these intrinsics
@@ -5359,9 +5071,9 @@ void CWriter::visitAllocaInst(AllocaInst &I) {
 
 void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
     gep_type_iterator E, bool isArrayType, GetElementPtrInst *GEPI) {
-    //DEBUG(errs() << "Printing GEP\n");
-    //DEBUG(errs() << "\tPtr: " << *Ptr << "\n");
-    //DEBUG(errs() << "\tGEPI: " << *GEPI <<"\n"); 
+  //DEBUG(errs() << "Printing GEP\n");
+  //DEBUG(errs() << "\tPtr: " << *Ptr << "\n");
+  //DEBUG(errs() << "\tGEPI: " << *GEPI <<"\n"); 
   // If there are no indices, just print out the pointer.
   if (I == E) {
     //DEBUG(errs() << "I==E: Calling writeOperand()\n");
@@ -5485,233 +5197,233 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
     Agg = CT->getTypeAtIndex(Index);
   }
   Out << ")";
-    //DEBUG(errs() << "Leaving printGEPExpression\n");
-}
+  //DEBUG(errs() << "Leaving printGEPExpression\n");
+  }
 
-void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
-    bool IsVolatile, unsigned Alignment /*bytes*/) {
+  void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
+      bool IsVolatile, unsigned Alignment /*bytes*/) {
     //DEBUG(errs() << *OperandType << "; " << *Operand << "\n");
-  bool arrayAccess = false;
-  if(isa<GetElementPtrInst>(Operand)) {
-     //DEBUG(errs() << "ISA Get Element Pointer!\n");
-    arrayAccess = true;
-    GEPStack.push(dyn_cast<GetElementPtrInst>(Operand));
-  }
-//  if (isAddressExposed(Operand)) {
-//    DEBUG(errs() << "Is address exposed!!\n");
-//    writeOperandInternal(Operand);
-//    return;
-//  }
-
-  bool IsUnaligned = Alignment &&
-    Alignment < TD->getABITypeAlignment(OperandType);
-  if (!arrayAccess) {
-    if (!IsUnaligned)
-      Out << '*';
-
-    else if (IsUnaligned) {
-      Out << "__UNALIGNED_LOAD__(";
-      printTypeNameUnaligned(Out, OperandType, false);
-      if (IsVolatile) Out << " volatile";
-      Out << ", " << Alignment << ", ";
+    bool arrayAccess = false;
+    if(isa<GetElementPtrInst>(Operand)) {
+      //DEBUG(errs() << "ISA Get Element Pointer!\n");
+      arrayAccess = true;
+      GEPStack.push(dyn_cast<GetElementPtrInst>(Operand));
     }
+    //  if (isAddressExposed(Operand)) {
+    //    DEBUG(errs() << "Is address exposed!!\n");
+    //    writeOperandInternal(Operand);
+    //    return;
+    //  }
+
+    bool IsUnaligned = Alignment &&
+      Alignment < TD->getABITypeAlignment(OperandType);
+    if (!arrayAccess) {
+      if (!IsUnaligned)
+        Out << '*';
+
+      else if (IsUnaligned) {
+        Out << "__UNALIGNED_LOAD__(";
+        printTypeNameUnaligned(Out, OperandType, false);
+        if (IsVolatile) Out << " volatile";
+        Out << ", " << Alignment << ", ";
+      }
 
-    else if (IsVolatile) {
-      Out << "(";
-      printTypeName(Out, OperandType, false);
-      Out << "volatile";
-      Out << "*)";
-    } 
-  }
+      else if (IsVolatile) {
+        Out << "(";
+        printTypeName(Out, OperandType, false);
+        Out << "volatile";
+        Out << "*)";
+      } 
+    }
 
-  writeOperand(Operand,ContextNormal, arrayAccess );
+    writeOperand(Operand,ContextNormal, arrayAccess );
 
-  if (IsUnaligned) {
-    Out << ")";
+    if (IsUnaligned) {
+      Out << ")";
+    }
   }
-}
-
-void CWriter::visitLoadInst(LoadInst &I) {
-  //DEBUG(errs() << "Visiting Load instruction!\n");
-  //  DEBUG(errs() << "Visiting load: " << I << "\n");
-  writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
-      I.getAlignment());
 
-}
+  void CWriter::visitLoadInst(LoadInst &I) {
+    //DEBUG(errs() << "Visiting Load instruction!\n");
+    //  DEBUG(errs() << "Visiting load: " << I << "\n");
+    writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+        I.getAlignment());
 
-void CWriter::visitStoreInst(StoreInst &I) {
-  //DEBUG(errs() << "Visiting store instruction!\n");
-  writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
-      I.isVolatile(), I.getAlignment());
-  Out << " = ";
-  Value *Operand = I.getOperand(0);
-  unsigned BitMask = 0;
-  if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
-    if (!ITy->isPowerOf2ByteWidth())
-      // We have a bit width that doesn't match an even power-of-2 byte
-      // size. Consequently we must & the value with the type's bit mask
-      BitMask = ITy->getBitMask();
-  if (BitMask)
-    Out << "((";
-  writeOperand(Operand, BitMask ? ContextNormal : ContextCasted);
-  if (BitMask)
-    Out << ") & " << BitMask << ")";
-}
-
-void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
-  //  DEBUG(errs() <<"Visiting GEP: " << I << "\n");
-  printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
-      gep_type_end(I), I.getSourceElementType()->isArrayTy(), &I);
-}
+  }
 
-void CWriter::visitVAArgInst(VAArgInst &I) {
-  Out << "va_arg(*(va_list*)";
-  writeOperand(I.getOperand(0), ContextCasted);
-  Out << ", ";
-  printTypeName(Out, I.getType());
-  Out << ");\n ";
-}
+  void CWriter::visitStoreInst(StoreInst &I) {
+    //DEBUG(errs() << "Visiting store instruction!\n");
+    writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+        I.isVolatile(), I.getAlignment());
+    Out << " = ";
+    Value *Operand = I.getOperand(0);
+    unsigned BitMask = 0;
+    if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+      if (!ITy->isPowerOf2ByteWidth())
+        // We have a bit width that doesn't match an even power-of-2 byte
+        // size. Consequently we must & the value with the type's bit mask
+        BitMask = ITy->getBitMask();
+    if (BitMask)
+      Out << "((";
+    writeOperand(Operand, BitMask ? ContextNormal : ContextCasted);
+    if (BitMask)
+      Out << ") & " << BitMask << ")";
+  }
 
-void CWriter::visitInsertElementInst(InsertElementInst &I) {
-  // Start by copying the entire aggregate value into the result variable.
-  writeOperand(I.getOperand(0));
-  Type *EltTy = I.getType()->getElementType();
-  assert(I.getOperand(1)->getType() == EltTy);
-  if (isEmptyType(EltTy)) return;
-
-  // Then do the insert to update the field.
-  Out << ";\n  ";
-  Out << GetValueName(&I) << ".vector[";
-  writeOperand(I.getOperand(2));
-  Out << "] = ";
-  writeOperand(I.getOperand(1), ContextCasted);
-}
+  void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+    //  DEBUG(errs() <<"Visiting GEP: " << I << "\n");
+    printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+        gep_type_end(I), I.getSourceElementType()->isArrayTy(), &I);
+  }
 
-void CWriter::visitExtractElementInst(ExtractElementInst &I) {
-  assert(!isEmptyType(I.getType()));
-  if (isa<UndefValue>(I.getOperand(0))) {
-    Out << "(";
+  void CWriter::visitVAArgInst(VAArgInst &I) {
+    Out << "va_arg(*(va_list*)";
+    writeOperand(I.getOperand(0), ContextCasted);
+    Out << ", ";
     printTypeName(Out, I.getType());
-    Out << ") 0/*UNDEF*/";
-  } else {
-    Out << "(";
+    Out << ");\n ";
+  }
+
+  void CWriter::visitInsertElementInst(InsertElementInst &I) {
+    // Start by copying the entire aggregate value into the result variable.
     writeOperand(I.getOperand(0));
-    Out << ").vector[";
-    writeOperand(I.getOperand(1));
-    Out << "]";
+    Type *EltTy = I.getType()->getElementType();
+    assert(I.getOperand(1)->getType() == EltTy);
+    if (isEmptyType(EltTy)) return;
+
+    // Then do the insert to update the field.
+    Out << ";\n  ";
+    Out << GetValueName(&I) << ".vector[";
+    writeOperand(I.getOperand(2));
+    Out << "] = ";
+    writeOperand(I.getOperand(1), ContextCasted);
   }
-}
 
-// <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>
-// ; yields <m x <ty>>
-void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
-  VectorType *VT = SVI.getType();
-  Type *EltTy = VT->getElementType();
-  VectorType *InputVT = cast<VectorType>(SVI.getOperand(0)->getType());
-  assert(!isEmptyType(VT));
-  assert(InputVT->getElementType() == VT->getElementType());
+  void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+    assert(!isEmptyType(I.getType()));
+    if (isa<UndefValue>(I.getOperand(0))) {
+      Out << "(";
+      printTypeName(Out, I.getType());
+      Out << ") 0/*UNDEF*/";
+    } else {
+      Out << "(";
+      writeOperand(I.getOperand(0));
+      Out << ").vector[";
+      writeOperand(I.getOperand(1));
+      Out << "]";
+    }
+  }
 
-  CtorDeclTypes.insert(VT);
-  Out << "llvm_ctor_";
-  printTypeString(Out, VT, false);
-  Out << "(";
+  // <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>
+  // ; yields <m x <ty>>
+  void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+    VectorType *VT = SVI.getType();
+    Type *EltTy = VT->getElementType();
+    VectorType *InputVT = cast<VectorType>(SVI.getOperand(0)->getType());
+    assert(!isEmptyType(VT));
+    assert(InputVT->getElementType() == VT->getElementType());
 
-  Constant *Zero = Constant::getNullValue(EltTy);
-  unsigned NumElts = VT->getNumElements();
-  unsigned NumInputElts = InputVT->getNumElements(); // n
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (i) Out << ", ";
-    int SrcVal = SVI.getMaskValue(i);
-    if ((unsigned)SrcVal >= NumInputElts * 2) {
-      Out << "/*undef*/";
-      printConstant(Zero, ContextCasted);
-    } else {
-      // If SrcVal belongs [0, n - 1], it extracts value from <v1>
-      // If SrcVal belongs [n, 2 * n - 1], it extracts value from <v2>
-      // In C++, the value false is converted to zero and the value true is
-      // converted to one
-      Value *Op = SVI.getOperand((unsigned)SrcVal >= NumInputElts);
-      if (isa<Instruction>(Op)) {
-        // Do an extractelement of this value from the appropriate input.
-        Out << "(";
-        writeOperand(Op);
-        Out << ").vector[";
-        Out << ((unsigned)SrcVal >= NumInputElts ? SrcVal - NumInputElts : SrcVal);
-        Out << "]";
-      } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+    CtorDeclTypes.insert(VT);
+    Out << "llvm_ctor_";
+    printTypeString(Out, VT, false);
+    Out << "(";
+
+    Constant *Zero = Constant::getNullValue(EltTy);
+    unsigned NumElts = VT->getNumElements();
+    unsigned NumInputElts = InputVT->getNumElements(); // n
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (i) Out << ", ";
+      int SrcVal = SVI.getMaskValue(i);
+      if ((unsigned)SrcVal >= NumInputElts * 2) {
+        Out << "/*undef*/";
         printConstant(Zero, ContextCasted);
       } else {
-        printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
-              (NumElts-1)),
-            ContextNormal);
+        // If SrcVal belongs [0, n - 1], it extracts value from <v1>
+        // If SrcVal belongs [n, 2 * n - 1], it extracts value from <v2>
+        // In C++, the value false is converted to zero and the value true is
+        // converted to one
+        Value *Op = SVI.getOperand((unsigned)SrcVal >= NumInputElts);
+        if (isa<Instruction>(Op)) {
+          // Do an extractelement of this value from the appropriate input.
+          Out << "(";
+          writeOperand(Op);
+          Out << ").vector[";
+          Out << ((unsigned)SrcVal >= NumInputElts ? SrcVal - NumInputElts : SrcVal);
+          Out << "]";
+        } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+          printConstant(Zero, ContextCasted);
+        } else {
+          printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+                (NumElts-1)),
+              ContextNormal);
+        }
       }
     }
+    Out << ")";
   }
-  Out << ")";
-}
 
-void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
-  // Start by copying the entire aggregate value into the result variable.
-  writeOperand(IVI.getOperand(0));
-  Type *EltTy = IVI.getOperand(1)->getType();
-  if (isEmptyType(EltTy)) return;
-
-  // Then do the insert to update the field.
-  Out << ";\n  ";
-  Out << GetValueName(&IVI);
-  for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
-      i != e; ++i) {
-    Type *IndexedTy =
-      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
-          makeArrayRef(b, i));
-    assert(IndexedTy);
-    if (IndexedTy->isArrayTy())
-      Out << ".array[" << *i << "]";
-    else
-      Out << ".field" << *i;
-  }
-  Out << " = ";
-  writeOperand(IVI.getOperand(1), ContextCasted);
-}
+  void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+    // Start by copying the entire aggregate value into the result variable.
+    writeOperand(IVI.getOperand(0));
+    Type *EltTy = IVI.getOperand(1)->getType();
+    if (isEmptyType(EltTy)) return;
 
-void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
-  Out << "(";
-  if (isa<UndefValue>(EVI.getOperand(0))) {
-    Out << "(";
-    printTypeName(Out, EVI.getType());
-    Out << ") 0/*UNDEF*/";
-  } else {
-    writeOperand(EVI.getOperand(0));
-    for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+    // Then do the insert to update the field.
+    Out << ";\n  ";
+    Out << GetValueName(&IVI);
+    for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
         i != e; ++i) {
       Type *IndexedTy =
-        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+        ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
             makeArrayRef(b, i));
+      assert(IndexedTy);
       if (IndexedTy->isArrayTy())
         Out << ".array[" << *i << "]";
       else
         Out << ".field" << *i;
     }
+    Out << " = ";
+    writeOperand(IVI.getOperand(1), ContextCasted);
   }
-  Out << ")";
-}
 
-//===----------------------------------------------------------------------===//
-//                       External Interface declaration
-//===----------------------------------------------------------------------===//
+  void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+    Out << "(";
+    if (isa<UndefValue>(EVI.getOperand(0))) {
+      Out << "(";
+      printTypeName(Out, EVI.getType());
+      Out << ") 0/*UNDEF*/";
+    } else {
+      writeOperand(EVI.getOperand(0));
+      for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+          i != e; ++i) {
+        Type *IndexedTy =
+          ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+              makeArrayRef(b, i));
+        if (IndexedTy->isArrayTy())
+          Out << ".array[" << *i << "]";
+        else
+          Out << ".field" << *i;
+      }
+    }
+    Out << ")";
+  }
 
- bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-					  raw_pwrite_stream &Out,
-                                          raw_pwrite_stream *Out2,
-					  CodeGenFileType FileType,
-                                          bool DisableVerify,
-                                          MachineModuleInfo *MMI){
+  //===----------------------------------------------------------------------===//
+  //                       External Interface declaration
+  //===----------------------------------------------------------------------===//
 
-  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+  bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+      raw_pwrite_stream &Out,
+      raw_pwrite_stream *Out2,
+      CodeGenFileType FileType,
+      bool DisableVerify,
+      MachineModuleInfo *MMI){
 
-  PM.add(createGCLoweringPass());
-  PM.add(createLowerInvokePass());
-  PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
-  PM.add(new CWriter(Out));
-  return false;
-}
+    if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+
+    PM.add(createGCLoweringPass());
+    PM.add(createLowerInvokePass());
+    PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
+    PM.add(new CWriter(Out));
+    return false;
+  }