Commit d8c80341 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Also fix lower case nvptx

parent 28683228
......@@ -1407,7 +1407,7 @@ void CGT_CPU::codeGen(DFLeafNode *N) {
// At this point, the CPU backend does not support code generation for
// the case where allocation node is used, so we skip. This means that a
// CPU version will not be created, and therefore code generation will
// only succeed if another backend (nvptx or spir) has been invoked to
// only succeed if another backend (opencl or spir) has been invoked to
// generate a node function for the node including the allocation node.
if (N->isAllocationNode()) {
DEBUG(errs() << "Skipping allocation node\n");
......
......@@ -991,49 +991,49 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
// Look up if we have visited this function before. If we have, then just
// get the cloned function pointer from DFNode. Otherwise, create the cloned
// function and add it to the DFNode GenFunc.
// Function *F_nvptx = N->getGenFunc();
Function *F_nvptx = N->getGenFuncForTarget(hpvm::GPU_TARGET);
// Function *F_opencl = N->getGenFunc();
Function *F_opencl = N->getGenFuncForTarget(hpvm::GPU_TARGET);
assert(F_nvptx == NULL &&
assert(F_opencl == NULL &&
"Error: Visiting a node for which code already generated");
// Clone the function
ValueToValueMapTy VMap;
// F_nvptx->setName(FName+"_nvptx");
// F_opencl->setName(FName+"_opencl");
Twine FName = F->getName();
StringRef fStr = FName.getSingleStringRef();
Twine newFName = Twine(fStr, "_nvptx");
F_nvptx = CloneFunction(F, VMap);
F_nvptx->setName(newFName);
Twine newFName = Twine(fStr, "_opencl");
F_opencl = CloneFunction(F, VMap);
F_opencl->setName(newFName);
// errs() << "Old Function Name: " << F->getName() << "\n";
// errs() << "New Function Name: " << F_nvptx->getName() << "\n";
// errs() << "New Function Name: " << F_opencl->getName() << "\n";
F_nvptx->removeFromParent();
F_opencl->removeFromParent();
// Insert the cloned function into the kernels module
KernelM->getFunctionList().push_back(F_nvptx);
KernelM->getFunctionList().push_back(F_opencl);
// TODO: Iterate over all the instructions of F_nvptx and identify the
// TODO: Iterate over all the instructions of F_opencl and identify the
// callees and clone them into this module.
DEBUG(errs() << *F_nvptx->getType());
DEBUG(errs() << *F_nvptx);
DEBUG(errs() << *F_opencl->getType());
DEBUG(errs() << *F_opencl);
// Transform the function to void and remove all target dependent attributes
// from the function
F_nvptx = transformFunctionToVoid(F_nvptx);
F_opencl = transformFunctionToVoid(F_opencl);
// Add generated function info to DFNode
// N->setGenFunc(F_nvptx, hpvm::GPU_TARGET);
N->addGenFunc(F_nvptx, hpvm::GPU_TARGET, false);
// N->setGenFunc(F_opencl, hpvm::GPU_TARGET);
N->addGenFunc(F_opencl, hpvm::GPU_TARGET, false);
DEBUG(
errs()
<< "Removing all attributes from Kernel Function and adding nounwind\n");
F_nvptx->removeAttributes(AttributeList::FunctionIndex,
F_nvptx->getAttributes().getFnAttributes());
F_nvptx->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
F_opencl->removeAttributes(AttributeList::FunctionIndex,
F_opencl->getAttributes().getFnAttributes());
F_opencl->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
// FIXME: For now, assume only one allocation node
kernel->AllocationNode = NULL;
......@@ -1111,8 +1111,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
// global address space
unsigned argIndex = 0;
std::vector<unsigned> GlobalMemArgs;
for (Function::arg_iterator ai = F_nvptx->arg_begin(),
ae = F_nvptx->arg_end();
for (Function::arg_iterator ai = F_opencl->arg_begin(),
ae = F_opencl->arg_end();
ai != ae; ++ai) {
if (ai->getType()->isPointerTy()) {
// If the arguement is already chosen for shared memory arguemnt list,
......@@ -1133,11 +1133,11 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
// loads are not dependent on node id of current node, should be moved to
// constant memory, subject to size of course
std::vector<unsigned> ConstantMemArgs =
globalToConstantMemoryOpt(&GlobalMemArgs, F_nvptx);
globalToConstantMemoryOpt(&GlobalMemArgs, F_opencl);
F_nvptx = changeArgAddrspace(F_nvptx, ConstantMemArgs, GLOBAL_ADDRSPACE);
F_nvptx = changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE);
F_nvptx = changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE);
F_opencl = changeArgAddrspace(F_opencl, ConstantMemArgs, GLOBAL_ADDRSPACE);
F_opencl = changeArgAddrspace(F_opencl, SharedMemArgs, SHARED_ADDRSPACE);
F_opencl = changeArgAddrspace(F_opencl, GlobalMemArgs, GLOBAL_ADDRSPACE);
// Function to replace call instructions to functions in the kernel
std::map<Function *, Function *> OrgToClonedFuncMap;
......@@ -1168,7 +1168,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
};
// Go through all the instructions
for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e;
for (inst_iterator i = inst_begin(F_opencl), e = inst_end(F_opencl); i != e;
++i) {
Instruction *I = &(*i);
// Leaf nodes should not contain HPVM graph intrinsics or launch
......@@ -1189,7 +1189,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
/**************************** llvm.hpvm.getNode()
* *****************************/
case Intrinsic::hpvm_getNode: {
DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNode\n");
DEBUG(errs() << F_opencl->getName() << "\t: Handling getNode\n");
// add mapping <intrinsic, this node> to the node-specific map
Leaf_HandleToDFNodeMap[II] = N;
IItoRemove.push_back(II);
......@@ -1197,7 +1197,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
/************************* llvm.hpvm.getParentNode()
* **************************/
case Intrinsic::hpvm_getParentNode: {
DEBUG(errs() << F_nvptx->getName() << "\t: Handling getParentNode\n");
DEBUG(errs() << F_opencl->getName() << "\t: Handling getParentNode\n");
// get the parent node of the arg node
// get argument node
ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
......@@ -1213,7 +1213,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
/*************************** llvm.hpvm.getNumDims()
* ***************************/
case Intrinsic::hpvm_getNumDims: {
DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumDims\n");
DEBUG(errs() << F_opencl->getName() << "\t: Handling getNumDims\n");
// get node from map
// get the appropriate field
ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
......@@ -1234,7 +1234,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
case Intrinsic::hpvm_getNodeInstanceID_x:
case Intrinsic::hpvm_getNodeInstanceID_y:
case Intrinsic::hpvm_getNodeInstanceID_z: {
DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNodeInstanceID\n"
DEBUG(errs() << F_opencl->getName()
<< "\t: Handling getNodeInstanceID\n"
<< "\t: " << *II << "\n");
ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
......@@ -1318,7 +1319,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
// then, why do we need to keep that info in the graph? (only for the
// kernel configuration during the call)
DEBUG(errs() << F_nvptx->getName()
DEBUG(errs() << F_opencl->getName()
<< "\t: Handling getNumNodeInstances\n");
ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
......@@ -1376,7 +1377,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
IItoRemove.push_back(II);
} break;
case Intrinsic::hpvm_barrier: {
DEBUG(errs() << F_nvptx->getName() << "\t: Handling barrier\n");
DEBUG(errs() << F_opencl->getName() << "\t: Handling barrier\n");
DEBUG(errs() << "Substitute with barrier()\n");
DEBUG(errs() << *II << "\n");
FunctionType *FT = FunctionType::get(
......@@ -1587,7 +1588,7 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
// search for pattern where float is being casted to int and loaded/stored and
// change it.
DEBUG(errs() << "finding pattern for replacement!\n");
for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e;
for (inst_iterator i = inst_begin(F_opencl), e = inst_end(F_opencl); i != e;
++i) {
bool cont = false;
bool keepGEPI = false;
......@@ -1867,8 +1868,8 @@ void CGT_OpenCL::codeGen(DFLeafNode *N) {
KernelM->getFunctionList().push_back(F);
}
addCLMetadata(F_nvptx);
kernel->KernelFunction = F_nvptx;
addCLMetadata(F_opencl);
kernel->KernelFunction = F_opencl;
DEBUG(errs() << "Identified kernel - " << kernel->KernelFunction->getName()
<< "\n");
DEBUG(errs() << *KernelM);
......@@ -2364,13 +2365,13 @@ static std::string getFilenameFromModule(const Module &M) {
// Changes the data layout of the Module to be compiled with OpenCL backend
// TODO: Figure out when to call it, probably after duplicating the modules
static void changeDataLayout(Module &M) {
std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
std::string nvptx64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64";
std::string opencl32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
std::string opencl64_layoutStr = "e-i64:64-v16:16-v32:32-n16:32:64";
if (TARGET_PTX == 32)
M.setDataLayout(StringRef(nvptx32_layoutStr));
M.setDataLayout(StringRef(opencl32_layoutStr));
else if (TARGET_PTX == 64)
M.setDataLayout(StringRef(nvptx64_layoutStr));
M.setDataLayout(StringRef(opencl64_layoutStr));
else
assert(false && "Invalid PTX target");
......@@ -2378,13 +2379,13 @@ static void changeDataLayout(Module &M) {
}
static void changeTargetTriple(Module &M) {
std::string nvptx32_TargetTriple = "nvptx--nvidiacl";
std::string nvptx64_TargetTriple = "nvptx64--nvidiacl";
std::string opencl32_TargetTriple = "opencl--nvidiacl";
std::string opencl64_TargetTriple = "opencl64--nvidiacl";
if (TARGET_PTX == 32)
M.setTargetTriple(StringRef(nvptx32_TargetTriple));
M.setTargetTriple(StringRef(opencl32_TargetTriple));
else if (TARGET_PTX == 64)
M.setTargetTriple(StringRef(nvptx64_TargetTriple));
M.setTargetTriple(StringRef(opencl64_TargetTriple));
else
assert(false && "Invalid PTX target");
......@@ -2465,7 +2466,7 @@ static std::string getAtomicOpName(Intrinsic::ID ID) {
} // End of namespace
char DFG2LLVM_OpenCL::ID = 0;
static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-nvptx",
static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-opencl",
"Dataflow Graph to LLVM for OpenCL Pass",
false /* does not modify the CFG */,
true /* transformation, *
......
......@@ -65,7 +65,7 @@ ifeq ($(TARGET),seq)
HPVM_OPTFLAGS += -hpvm-timers-cpu
else
DEVICE = GPU_TARGET
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
endif
TESTGEN_OPTFLAGS += -hpvm-timers-gen
......
......@@ -15,14 +15,14 @@ HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt
HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc
TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl
KERNEL_GEN_FLAGS = -O3 -target opencl64-nvidia-nvcl
ifeq ($(TARGET),seq)
DEVICE = CPU_TARGET
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
else
DEVICE = GPU_TARGET
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
endif
CFLAGS += -DDEVICE=$(DEVICE)
......
......@@ -52,7 +52,7 @@ ifeq ($(TARGET),seq)
HPVM_OPTFLAGS += -hpvm-timers-cpu
else
DEVICE = GPU_TARGET
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
endif
TESTGEN_OPTFLAGS += -hpvm-timers-gen
......
......@@ -56,7 +56,7 @@ ifeq ($(TARGET),seq)
HPVM_OPTFLAGS += -hpvm-timers-cpu
else
DEVICE = GPU_TARGET
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG
HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
endif
TESTGEN_OPTFLAGS += -hpvm-timers-gen
......
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl < %s | FileCheck %s
; ModuleID = 'ThreeLevel.atomic.ll'
source_filename = "ThreeLevel.constmem.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
......
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl < %s | FileCheck %s
; ModuleID = 'ThreeLevel.ll'
source_filename = "ThreeLevel.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
......
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s
; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-opencl < %s | FileCheck %s
; ModuleID = 'ThreeLevel.opt.ll'
source_filename = "ThreeLevel.opt.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment