From d8c8034103cde1910ad67c6823c05d868c48056f Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Mon, 27 Jan 2020 22:22:12 -0600
Subject: [PATCH] Also fix lower case nvptx

---
 .../Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp  |  2 +-
 .../DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp       | 89 ++++++++++---------
 hpvm/test/benchmarks/hpvm-cava/Makefile       |  2 +-
 .../test/benchmarks/parboil/common/mk/hpvm.mk |  4 +-
 hpvm/test/benchmarks/pipeline/Makefile        |  2 +-
 hpvm/test/benchmarks/template/Makefile        |  2 +-
 .../ThreeLevel.atomic.genvisc.ll              |  2 +-
 .../DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll      |  2 +-
 .../DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll  |  2 +-
 9 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
index 3f9f3101a3..25b1779bea 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
@@ -1407,7 +1407,7 @@ void CGT_CPU::codeGen(DFLeafNode *N) {
   // At this point, the CPU backend does not support code generation for
   // the case where allocation node is used, so we skip. This means that a
   // CPU version will not be created, and therefore code generation will
-  // only succeed if another backend (nvptx or spir) has been invoked to
+  // only succeed if another backend (opencl or spir) has been invoked to
   // generate a node function for the node including the allocation node.
   if (N->isAllocationNode()) {
     DEBUG(errs() << "Skipping allocation node\n");
diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
index b3ad2794b9..4567fd2acb 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
@@ -991,49 +991,49 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
   // Look up if we have visited this function before. If we have, then just
   // get the cloned function pointer from DFNode. Otherwise, create the cloned
   // function and add it to the DFNode GenFunc.
-  //  Function *F_nvptx = N->getGenFunc();
-  Function *F_nvptx = N->getGenFuncForTarget(hpvm::GPU_TARGET);
+  //  Function *F_opencl = N->getGenFunc();
+  Function *F_opencl = N->getGenFuncForTarget(hpvm::GPU_TARGET);
 
-  assert(F_nvptx == NULL &&
+  assert(F_opencl == NULL &&
          "Error: Visiting a node for which code already generated");
   // Clone the function
   ValueToValueMapTy VMap;
 
-  // F_nvptx->setName(FName+"_nvptx");
+  // F_opencl->setName(FName+"_opencl");
 
   Twine FName = F->getName();
   StringRef fStr = FName.getSingleStringRef();
-  Twine newFName = Twine(fStr, "_nvptx");
-  F_nvptx = CloneFunction(F, VMap);
-  F_nvptx->setName(newFName);
+  Twine newFName = Twine(fStr, "_opencl");
+  F_opencl = CloneFunction(F, VMap);
+  F_opencl->setName(newFName);
 
   //  errs() << "Old Function Name: " << F->getName() << "\n";
-  //  errs() << "New Function Name: " << F_nvptx->getName() << "\n";
+  //  errs() << "New Function Name: " << F_opencl->getName() << "\n";
 
-  F_nvptx->removeFromParent();
+  F_opencl->removeFromParent();
 
   // Insert the cloned function into the kernels module
-  KernelM->getFunctionList().push_back(F_nvptx);
+  KernelM->getFunctionList().push_back(F_opencl);
 
-  // TODO: Iterate over all the instructions of F_nvptx and identify the
+  // TODO: Iterate over all the instructions of F_opencl and identify the
   // callees and clone them into this module.
-  DEBUG(errs() << *F_nvptx->getType());
-  DEBUG(errs() << *F_nvptx);
+  DEBUG(errs() << *F_opencl->getType());
+  DEBUG(errs() << *F_opencl);
 
   // Transform  the function to void and remove all target dependent attributes
   // from the function
-  F_nvptx = transformFunctionToVoid(F_nvptx);
+  F_opencl = transformFunctionToVoid(F_opencl);
 
   // Add generated function info to DFNode
-  //  N->setGenFunc(F_nvptx, hpvm::GPU_TARGET);
-  N->addGenFunc(F_nvptx, hpvm::GPU_TARGET, false);
+  //  N->setGenFunc(F_opencl, hpvm::GPU_TARGET);
+  N->addGenFunc(F_opencl, hpvm::GPU_TARGET, false);
 
   DEBUG(
       errs()
       << "Removing all attributes from Kernel Function and adding nounwind\n");
-  F_nvptx->removeAttributes(AttributeList::FunctionIndex,
-                            F_nvptx->getAttributes().getFnAttributes());
-  F_nvptx->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+  F_opencl->removeAttributes(AttributeList::FunctionIndex,
+                             F_opencl->getAttributes().getFnAttributes());
+  F_opencl->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
 
   // FIXME: For now, assume only one allocation node
   kernel->AllocationNode = NULL;
@@ -1111,8 +1111,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
   // global address space
   unsigned argIndex = 0;
   std::vector<unsigned> GlobalMemArgs;
-  for (Function::arg_iterator ai = F_nvptx->arg_begin(),
-                              ae = F_nvptx->arg_end();
+  for (Function::arg_iterator ai = F_opencl->arg_begin(),
+                              ae = F_opencl->arg_end();
        ai != ae; ++ai) {
     if (ai->getType()->isPointerTy()) {
       // If the arguement is already chosen for shared memory arguemnt list,
@@ -1133,11 +1133,11 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
   // loads are not dependent on node id of current node, should be moved to
   // constant memory, subject to size of course
   std::vector<unsigned> ConstantMemArgs =
-      globalToConstantMemoryOpt(&GlobalMemArgs, F_nvptx);
+      globalToConstantMemoryOpt(&GlobalMemArgs, F_opencl);
 
-  F_nvptx = changeArgAddrspace(F_nvptx, ConstantMemArgs, GLOBAL_ADDRSPACE);
-  F_nvptx = changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE);
-  F_nvptx = changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE);
+  F_opencl = changeArgAddrspace(F_opencl, ConstantMemArgs, GLOBAL_ADDRSPACE);
+  F_opencl = changeArgAddrspace(F_opencl, SharedMemArgs, SHARED_ADDRSPACE);
+  F_opencl = changeArgAddrspace(F_opencl, GlobalMemArgs, GLOBAL_ADDRSPACE);
 
   // Function to replace call instructions to functions in the kernel
   std::map<Function *, Function *> OrgToClonedFuncMap;
@@ -1168,7 +1168,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
   };
 
   // Go through all the instructions
-  for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e;
+  for (inst_iterator i = inst_begin(F_opencl), e = inst_end(F_opencl); i != e;
        ++i) {
     Instruction *I = &(*i);
     // Leaf nodes should not contain HPVM graph intrinsics or launch
@@ -1189,7 +1189,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
       /**************************** llvm.hpvm.getNode()
        * *****************************/
       case Intrinsic::hpvm_getNode: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNode\n");
+        DEBUG(errs() << F_opencl->getName() << "\t: Handling getNode\n");
         // add mapping <intrinsic, this node> to the node-specific map
         Leaf_HandleToDFNodeMap[II] = N;
         IItoRemove.push_back(II);
@@ -1197,7 +1197,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
       /************************* llvm.hpvm.getParentNode()
        * **************************/
       case Intrinsic::hpvm_getParentNode: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getParentNode\n");
+        DEBUG(errs() << F_opencl->getName() << "\t: Handling getParentNode\n");
         // get the parent node of the arg node
         // get argument node
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
@@ -1213,7 +1213,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
       /*************************** llvm.hpvm.getNumDims()
        * ***************************/
       case Intrinsic::hpvm_getNumDims: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumDims\n");
+        DEBUG(errs() << F_opencl->getName() << "\t: Handling getNumDims\n");
         // get node from map
         // get the appropriate field
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
@@ -1234,7 +1234,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
       case Intrinsic::hpvm_getNodeInstanceID_x:
       case Intrinsic::hpvm_getNodeInstanceID_y:
       case Intrinsic::hpvm_getNodeInstanceID_z: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNodeInstanceID\n"
+        DEBUG(errs() << F_opencl->getName()
+                     << "\t: Handling getNodeInstanceID\n"
                      << "\t: " << *II << "\n");
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
         ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
@@ -1318,7 +1319,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
         // then, why do we need to keep that info in the graph?  (only for the
         // kernel configuration during the call)
 
-        DEBUG(errs() << F_nvptx->getName()
+        DEBUG(errs() << F_opencl->getName()
                      << "\t: Handling getNumNodeInstances\n");
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
         ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
@@ -1376,7 +1377,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
         IItoRemove.push_back(II);
       } break;
       case Intrinsic::hpvm_barrier: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling barrier\n");
+        DEBUG(errs() << F_opencl->getName() << "\t: Handling barrier\n");
         DEBUG(errs() << "Substitute with barrier()\n");
         DEBUG(errs() << *II << "\n");
         FunctionType *FT = FunctionType::get(
@@ -1587,7 +1588,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
   // search for pattern where float is being casted to int and loaded/stored and
   // change it.
   DEBUG(errs() << "finding pattern for replacement!\n");
-  for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e;
+  for (inst_iterator i = inst_begin(F_opencl), e = inst_end(F_opencl); i != e;
        ++i) {
     bool cont = false;
     bool keepGEPI = false;
@@ -1867,8 +1868,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
     KernelM->getFunctionList().push_back(F);
   }
 
-  addCLMetadata(F_nvptx);
-  kernel->KernelFunction = F_nvptx;
+  addCLMetadata(F_opencl);
+  kernel->KernelFunction = F_opencl;
   DEBUG(errs() << "Identified kernel - " << kernel->KernelFunction->getName()
                << "\n");
   DEBUG(errs() << *KernelM);
@@ -2364,13 +2365,13 @@ static std::string getFilenameFromModule(const Module &M) {
 // Changes the data layout of the Module to be compiled with OpenCL backend
 // TODO: Figure out when to call it, probably after duplicating the modules
 static void changeDataLayout(Module &M) {
-  std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
-  std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64";
+  std::string opencl32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
+  std::string opencl64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64";
 
   if (TARGET_PTX == 32)
-    M.setDataLayout(StringRef(nvptx32_layoutStr));
+    M.setDataLayout(StringRef(opencl32_layoutStr));
   else if (TARGET_PTX == 64)
-    M.setDataLayout(StringRef(nvptx64_layoutStr));
+    M.setDataLayout(StringRef(opencl64_layoutStr));
   else
     assert(false && "Invalid PTX target");
 
@@ -2378,13 +2379,13 @@ static void changeDataLayout(Module &M) {
 }
 
 static void changeTargetTriple(Module &M) {
-  std::string nvptx32_TargetTriple = "nvptx--nvidiacl";
-  std::string nvptx64_TargetTriple = "nvptx64--nvidiacl";
+  std::string opencl32_TargetTriple = "opencl--nvidiacl";
+  std::string opencl64_TargetTriple = "opencl64--nvidiacl";
 
   if (TARGET_PTX == 32)
-    M.setTargetTriple(StringRef(nvptx32_TargetTriple));
+    M.setTargetTriple(StringRef(opencl32_TargetTriple));
   else if (TARGET_PTX == 64)
-    M.setTargetTriple(StringRef(nvptx64_TargetTriple));
+    M.setTargetTriple(StringRef(opencl64_TargetTriple));
   else
     assert(false && "Invalid PTX target");
 
@@ -2465,7 +2466,7 @@ static std::string getAtomicOpName(Intrinsic::ID ID) {
 } // End of namespace
 
 char DFG2LLVM_OpenCL::ID = 0;
-static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-nvptx",
+static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-opencl",
 		"Dataflow Graph to LLVM for OpenCL Pass",
 		false /* does not modify the CFG */,
 		true /* transformation,   *
diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile
index d7caf66883..58dfa72aac 100644
--- a/hpvm/test/benchmarks/hpvm-cava/Makefile
+++ b/hpvm/test/benchmarks/hpvm-cava/Makefile
@@ -65,7 +65,7 @@ ifeq ($(TARGET),seq)
   HPVM_OPTFLAGS += -hpvm-timers-cpu
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
   HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
 endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
diff --git a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk
index 8e3ab8e658..5938ca8758 100755
--- a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk
+++ b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk
@@ -15,14 +15,14 @@ HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt
 HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc
 
 TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
-KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl
+KERNEL_GEN_FLAGS = -O3 -target opencl64-nvidia-nvcl
 
 ifeq ($(TARGET),seq)
   DEVICE = CPU_TARGET
   HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
 endif
 
 CFLAGS += -DDEVICE=$(DEVICE)
diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile
index 36f6a1f900..8a55393f24 100644
--- a/hpvm/test/benchmarks/pipeline/Makefile
+++ b/hpvm/test/benchmarks/pipeline/Makefile
@@ -52,7 +52,7 @@ ifeq ($(TARGET),seq)
   HPVM_OPTFLAGS += -hpvm-timers-cpu
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
   HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
 endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
diff --git a/hpvm/test/benchmarks/template/Makefile b/hpvm/test/benchmarks/template/Makefile
index 46b1afe95d..fed129b32d 100644
--- a/hpvm/test/benchmarks/template/Makefile
+++ b/hpvm/test/benchmarks/template/Makefile
@@ -56,7 +56,7 @@ ifeq ($(TARGET),seq)
   HPVM_OPTFLAGS += -hpvm-timers-cpu
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
   HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
 endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
index e3570bcb66..ea6ec14d10 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.atomic.ll'
 source_filename = "ThreeLevel.constmem.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
index b08b951800..9af2e48674 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.ll'
 source_filename = "ThreeLevel.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
index b3cb659f96..5de9fd4e33 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.opt.ll'
 source_filename = "ThreeLevel.opt.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-- 
GitLab