From d8c8034103cde1910ad67c6823c05d868c48056f Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Mon, 27 Jan 2020 22:22:12 -0600 Subject: [PATCH] Also fix lower case nvptx --- .../Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp | 2 +- .../DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp | 89 ++++++++++--------- hpvm/test/benchmarks/hpvm-cava/Makefile | 2 +- .../test/benchmarks/parboil/common/mk/hpvm.mk | 4 +- hpvm/test/benchmarks/pipeline/Makefile | 2 +- hpvm/test/benchmarks/template/Makefile | 2 +- .../ThreeLevel.atomic.genvisc.ll | 2 +- .../DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll | 2 +- .../DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll | 2 +- 9 files changed, 54 insertions(+), 53 deletions(-) diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp index 3f9f3101a3..25b1779bea 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp @@ -1407,7 +1407,7 @@ void CGT_CPU::codeGen(DFLeafNode *N) { // At this point, the CPU backend does not support code generation for // the case where allocation node is used, so we skip. This means that a // CPU version will not be created, and therefore code generation will - // only succeed if another backend (nvptx or spir) has been invoked to + // only succeed if another backend (opencl or spir) has been invoked to // generate a node function for the node including the allocation node. if (N->isAllocationNode()) { DEBUG(errs() << "Skipping allocation node\n"); diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp index b3ad2794b9..4567fd2acb 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp @@ -991,49 +991,49 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. - // Function *F_nvptx = N->getGenFunc(); - Function *F_nvptx = N->getGenFuncForTarget(hpvm::GPU_TARGET); + // Function *F_opencl = N->getGenFunc(); + Function *F_opencl = N->getGenFuncForTarget(hpvm::GPU_TARGET); - assert(F_nvptx == NULL && + assert(F_opencl == NULL && "Error: Visiting a node for which code already generated"); // Clone the function ValueToValueMapTy VMap; - // F_nvptx->setName(FName+"_nvptx"); + // F_opencl->setName(FName+"_opencl"); Twine FName = F->getName(); StringRef fStr = FName.getSingleStringRef(); - Twine newFName = Twine(fStr, "_nvptx"); - F_nvptx = CloneFunction(F, VMap); - F_nvptx->setName(newFName); + Twine newFName = Twine(fStr, "_opencl"); + F_opencl = CloneFunction(F, VMap); + F_opencl->setName(newFName); // errs() << "Old Function Name: " << F->getName() << "\n"; - // errs() << "New Function Name: " << F_nvptx->getName() << "\n"; + // errs() << "New Function Name: " << F_opencl->getName() << "\n"; - F_nvptx->removeFromParent(); + F_opencl->removeFromParent(); // Insert the cloned function into the kernels module - KernelM->getFunctionList().push_back(F_nvptx); + KernelM->getFunctionList().push_back(F_opencl); - // TODO: Iterate over all the instructions of F_nvptx and identify the + // TODO: Iterate over all the instructions of F_opencl and identify the // callees and clone them into this module. - DEBUG(errs() << *F_nvptx->getType()); - DEBUG(errs() << *F_nvptx); + DEBUG(errs() << *F_opencl->getType()); + DEBUG(errs() << *F_opencl); // Transform the function to void and remove all target dependent attributes // from the function - F_nvptx = transformFunctionToVoid(F_nvptx); + F_opencl = transformFunctionToVoid(F_opencl); // Add generated function info to DFNode - // N->setGenFunc(F_nvptx, hpvm::GPU_TARGET); - N->addGenFunc(F_nvptx, hpvm::GPU_TARGET, false); + // N->setGenFunc(F_opencl, hpvm::GPU_TARGET); + N->addGenFunc(F_opencl, hpvm::GPU_TARGET, false); DEBUG( errs() << "Removing all attributes from Kernel Function and adding nounwind\n"); - F_nvptx->removeAttributes(AttributeList::FunctionIndex, - F_nvptx->getAttributes().getFnAttributes()); - F_nvptx->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); + F_opencl->removeAttributes(AttributeList::FunctionIndex, + F_opencl->getAttributes().getFnAttributes()); + F_opencl->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); // FIXME: For now, assume only one allocation node kernel->AllocationNode = NULL; @@ -1111,8 +1111,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { // global address space unsigned argIndex = 0; std::vector<unsigned> GlobalMemArgs; - for (Function::arg_iterator ai = F_nvptx->arg_begin(), - ae = F_nvptx->arg_end(); + for (Function::arg_iterator ai = F_opencl->arg_begin(), + ae = F_opencl->arg_end(); ai != ae; ++ai) { if (ai->getType()->isPointerTy()) { // If the arguement is already chosen for shared memory arguemnt list, @@ -1133,11 +1133,11 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { // loads are not dependent on node id of current node, should be moved to // constant memory, subject to size of course std::vector<unsigned> ConstantMemArgs = - globalToConstantMemoryOpt(&GlobalMemArgs, F_nvptx); + globalToConstantMemoryOpt(&GlobalMemArgs, F_opencl); - F_nvptx = changeArgAddrspace(F_nvptx, ConstantMemArgs, GLOBAL_ADDRSPACE); - F_nvptx = changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE); - F_nvptx = changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE); + F_opencl = changeArgAddrspace(F_opencl, ConstantMemArgs, GLOBAL_ADDRSPACE); + F_opencl = changeArgAddrspace(F_opencl, SharedMemArgs, SHARED_ADDRSPACE); + F_opencl = changeArgAddrspace(F_opencl, GlobalMemArgs, GLOBAL_ADDRSPACE); // Function to replace call instructions to functions in the kernel std::map<Function *, Function *> OrgToClonedFuncMap; @@ -1168,7 +1168,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { }; // Go through all the instructions - for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; + for (inst_iterator i = inst_begin(F_opencl), e = inst_end(F_opencl); i != e; ++i) { Instruction *I = &(*i); // Leaf nodes should not contain HPVM graph intrinsics or launch @@ -1189,7 +1189,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { /**************************** llvm.hpvm.getNode() * *****************************/ case Intrinsic::hpvm_getNode: { - DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNode\n"); + DEBUG(errs() << F_opencl->getName() << "\t: Handling getNode\n"); // add mapping <intrinsic, this node> to the node-specific map Leaf_HandleToDFNodeMap[II] = N; IItoRemove.push_back(II); @@ -1197,7 +1197,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { /************************* llvm.hpvm.getParentNode() * **************************/ case Intrinsic::hpvm_getParentNode: { - DEBUG(errs() << F_nvptx->getName() << "\t: Handling getParentNode\n"); + DEBUG(errs() << F_opencl->getName() << "\t: Handling getParentNode\n"); // get the parent node of the arg node // get argument node ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); @@ -1213,7 +1213,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { /*************************** llvm.hpvm.getNumDims() * ***************************/ case Intrinsic::hpvm_getNumDims: { - DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumDims\n"); + DEBUG(errs() << F_opencl->getName() << "\t: Handling getNumDims\n"); // get node from map // get the appropriate field ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); @@ -1234,7 +1234,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { case Intrinsic::hpvm_getNodeInstanceID_x: case Intrinsic::hpvm_getNodeInstanceID_y: case Intrinsic::hpvm_getNodeInstanceID_z: { - DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNodeInstanceID\n" + DEBUG(errs() << F_opencl->getName() + << "\t: Handling getNodeInstanceID\n" << "\t: " << *II << "\n"); ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; @@ -1318,7 +1319,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { // then, why do we need to keep that info in the graph? (only for the // kernel configuration during the call) - DEBUG(errs() << F_nvptx->getName() + DEBUG(errs() << F_opencl->getName() << "\t: Handling getNumNodeInstances\n"); ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; @@ -1376,7 +1377,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { IItoRemove.push_back(II); } break; case Intrinsic::hpvm_barrier: { - DEBUG(errs() << F_nvptx->getName() << "\t: Handling barrier\n"); + DEBUG(errs() << F_opencl->getName() << "\t: Handling barrier\n"); DEBUG(errs() << "Substitute with barrier()\n"); DEBUG(errs() << *II << "\n"); FunctionType *FT = FunctionType::get( @@ -1587,7 +1588,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { // search for pattern where float is being casted to int and loaded/stored and // change it. DEBUG(errs() << "finding pattern for replacement!\n"); - for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; + for (inst_iterator i = inst_begin(F_opencl), e = inst_end(F_opencl); i != e; ++i) { bool cont = false; bool keepGEPI = false; @@ -1867,8 +1868,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) { KernelM->getFunctionList().push_back(F); } - addCLMetadata(F_nvptx); - kernel->KernelFunction = F_nvptx; + addCLMetadata(F_opencl); + kernel->KernelFunction = F_opencl; DEBUG(errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n"); DEBUG(errs() << *KernelM); @@ -2364,13 +2365,13 @@ static std::string getFilenameFromModule(const Module &M) { // Changes the data layout of the Module to be compiled with OpenCL backend // TODO: Figure out when to call it, probably after duplicating the modules static void changeDataLayout(Module &M) { - std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"; - std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64"; + std::string opencl32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"; + std::string opencl64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64"; if (TARGET_PTX == 32) - M.setDataLayout(StringRef(nvptx32_layoutStr)); + M.setDataLayout(StringRef(opencl32_layoutStr)); else if (TARGET_PTX == 64) - M.setDataLayout(StringRef(nvptx64_layoutStr)); + M.setDataLayout(StringRef(opencl64_layoutStr)); else assert(false && "Invalid PTX target"); @@ -2378,13 +2379,13 @@ static void changeDataLayout(Module &M) { } static void changeTargetTriple(Module &M) { - std::string nvptx32_TargetTriple = "nvptx--nvidiacl"; - std::string nvptx64_TargetTriple = "nvptx64--nvidiacl"; + std::string opencl32_TargetTriple = "opencl--nvidiacl"; + std::string opencl64_TargetTriple = "opencl64--nvidiacl"; if (TARGET_PTX == 32) - M.setTargetTriple(StringRef(nvptx32_TargetTriple)); + M.setTargetTriple(StringRef(opencl32_TargetTriple)); else if (TARGET_PTX == 64) - M.setTargetTriple(StringRef(nvptx64_TargetTriple)); + M.setTargetTriple(StringRef(opencl64_TargetTriple)); else assert(false && "Invalid PTX target"); @@ -2465,7 +2466,7 @@ static std::string getAtomicOpName(Intrinsic::ID ID) { } // End of namespace char DFG2LLVM_OpenCL::ID = 0; -static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-nvptx", +static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-opencl", "Dataflow Graph to LLVM for OpenCL Pass", false /* does not modify the CFG */, true /* transformation, * diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile index d7caf66883..58dfa72aac 100644 --- a/hpvm/test/benchmarks/hpvm-cava/Makefile +++ b/hpvm/test/benchmarks/hpvm-cava/Makefile @@ -65,7 +65,7 @@ ifeq ($(TARGET),seq) HPVM_OPTFLAGS += -hpvm-timers-cpu else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen diff --git a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk index 8e3ab8e658..5938ca8758 100755 --- a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk +++ b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk @@ -15,14 +15,14 @@ HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce -KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl +KERNEL_GEN_FLAGS = -O3 -target opencl64-nvidia-nvcl ifeq ($(TARGET),seq) DEVICE = CPU_TARGET HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG endif CFLAGS += -DDEVICE=$(DEVICE) diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile index 36f6a1f900..8a55393f24 100644 --- a/hpvm/test/benchmarks/pipeline/Makefile +++ b/hpvm/test/benchmarks/pipeline/Makefile @@ -52,7 +52,7 @@ ifeq ($(TARGET),seq) HPVM_OPTFLAGS += -hpvm-timers-cpu else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen diff --git a/hpvm/test/benchmarks/template/Makefile b/hpvm/test/benchmarks/template/Makefile index 46b1afe95d..fed129b32d 100644 --- a/hpvm/test/benchmarks/template/Makefile +++ b/hpvm/test/benchmarks/template/Makefile @@ -56,7 +56,7 @@ ifeq ($(TARGET),seq) HPVM_OPTFLAGS += -hpvm-timers-cpu else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll index e3570bcb66..ea6ec14d10 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl < %s | FileCheck %s ; ModuleID = 'ThreeLevel.atomic.ll' source_filename = "ThreeLevel.constmem.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll index b08b951800..9af2e48674 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl < %s | FileCheck %s ; ModuleID = 'ThreeLevel.ll' source_filename = "ThreeLevel.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll index b3cb659f96..5de9fd4e33 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl < %s | FileCheck %s ; ModuleID = 'ThreeLevel.opt.ll' source_filename = "ThreeLevel.opt.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -- GitLab