diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td index 40f2a3a9ecbe7b777e6910e9a37db43b163165d9..3ff99354f084c1fc887fc2a9e7d82aec2396b8bd 100644 --- a/llvm/include/llvm/IR/IntrinsicsVISC.td +++ b/llvm/include/llvm/IR/IntrinsicsVISC.td @@ -124,38 +124,32 @@ let TargetPrefix = "visc" in { /* Find the unique indentifier of a dataflow node (with respect to his parent * node) in the specified dimension intrinsic - - * i32 llvm.visc.getNodeInstanceID(i8*, i32); */ -// def int_visc_getNodeInstanceID : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, -// llvm_i32_ty], []>; - /* i32 llvm.visc.getNodeInstanceID.[xyz](i8*); + /* i64 llvm.visc.getNodeInstanceID.[xyz](i8*); */ - def int_visc_getNodeInstanceID_x : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + def int_visc_getNodeInstanceID_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNodeInstanceID_y : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + def int_visc_getNodeInstanceID_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNodeInstanceID_z : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + def int_visc_getNodeInstanceID_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; /* Find the number of instances of a dataflow node in the specified dimension * intrinsic - - * i32 llvm.visc.getNumNodeInstances(i8*, i32); */ - /*def int_visc_getNumNodeInstances : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty,*/ - /*llvm_i32_ty], []>;*/ - /* i32 llvm.visc.getNumNodeInstances.[xyz](i8*); + /* i64 llvm.visc.getNumNodeInstances.[xyz](i8*); */ - def int_visc_getNumNodeInstances_x : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + def int_visc_getNumNodeInstances_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNumNodeInstances_y : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + def int_visc_getNumNodeInstances_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNumNodeInstances_z : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + def int_visc_getNumNodeInstances_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; /* Local Barrier diff --git a/llvm/include/llvm/SupportVISC/DFG2LLVM.h b/llvm/include/llvm/SupportVISC/DFG2LLVM.h index fff77dd0f49e41309ac207282c786a874ba15037..5cc8a7956456ac4a7af848dadc31f0918c414dc2 100644 --- a/llvm/include/llvm/SupportVISC/DFG2LLVM.h +++ b/llvm/include/llvm/SupportVISC/DFG2LLVM.h @@ -284,6 +284,7 @@ void CodeGenTraversal::initializeTimerSet(Instruction* InsertBefore) { GlobalValue::CommonLinkage, Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), Twine("viscTimerSet_")+TargetName); + errs() << "New global variable: " << *TimerSet << "\n"; Value* TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet, None, diff --git a/llvm/include/llvm/SupportVISC/VISCUtils.h b/llvm/include/llvm/SupportVISC/VISCUtils.h index 81fa8a4044cd863ac3ee5c63995e035d54d5e9f7..47e7582e5e878ec373bb958826929ad4e8f593b4 100644 --- a/llvm/include/llvm/SupportVISC/VISCUtils.h +++ b/llvm/include/llvm/SupportVISC/VISCUtils.h @@ -1,5 +1,4 @@ - - +// //===---- DFG2LLVM.h - Header file for "VISC Dataflow Graph to Target" ----===// // // The LLVM Compiler Infrastructure @@ -29,10 +28,17 @@ static bool isViscCreateNodeIntrinsic(Instruction* I) { return (II->getCalledFunction()->getName()).startswith("llvm.visc.createNode"); } +static bool isViscCreateNodeCall(Instruction* I) { + if(!isa<CallInst>(I)) + return false; + CallInst* CI = cast<CallInst>(I); + return (CI->getCalledValue()->stripPointerCasts()->getName()).startswith("__visc__createNode"); +} + // Creates a new createNode intrinsic, similar to II but with different // associated function F instead -IntrinsicInst* createIdenticalCreateNodeWithDifferentFunction(Function* F, - IntrinsicInst* II) { +IntrinsicInst* createIdenticalCreateNodeIntrinsicWithDifferentFunction(Function* F, + IntrinsicInst* II) { Module* M = F->getParent(); // Find which createNode intrinsic we need to create @@ -79,7 +85,44 @@ IntrinsicInst* createIdenticalCreateNodeWithDifferentFunction(Function* F, IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI); return CreateNodeII; } +/* +CallInst* createIdenticalCreateNodeCallWithDifferentFunction(Function* F, + CallInst* CI) { + // Find which createNode function call we need to create + Function* CreateNodeF = CI->getCalledValue(); + + ArrayRef<Value*> CreateNodeArgs; + if ((CreateNodeF->stripPointerCasts()->getName()).equals("__visc__createNode")) { + // This is a createNode call + CreateNodeArgs = ArrayRef<Value*>(CreateNodeF); + } else if ((CreateNodeF->stripPointerCasts()->getName()).equals("__visc__createNode1D")) { + // This is a createNode1D call + Value* CreateNode1DArgs[] = {CreateNodeF, CI->getArgOperand(1)}; + CreateNodeArgs = ArrayRef<Value*>(CreateNode1DArgs, 2); + } else if ((CreateNodeF->stripPointerCasts()->getName()).equals("__visc__createNode2D")) { + // This is a createNode2D call + Value* CreateNode2DArgs[] = {CreateNodeF, + CI->getArgOperand(1), + CI->getArgOperand(2)}; + CreateNodeArgs = ArrayRef<Value*>(CreateNode2DArgs, 3); + } else if ((CreateNodeF->stripPointerCasts()->getName()).equals("__visc__createNode3D")) { + // This is a createNode3D call + Value* CreateNode3DArgs[] = {CreateNodeF, + CI->getArgOperand(1), + CI->getArgOperand(2), + CI->getArgOperand(3)}; + CreateNodeArgs = ArrayRef<Value*>(CreateNode3DArgs, 4); + } else { + assert(false && "Unknown createNode call"); + } + + CallInst* newCI = CallInst::Create(CreateNodeF, + CreateNodeArgs, + F->getName()+".cncall"); + return newCI; +} +*/ // Fix VISC hints for this function void fixHintMetadata(Module &M, Function* F, Function* G) { @@ -126,14 +169,25 @@ void replaceNodeFunctionInIR(Module &M, Function* F, Function* G) { if (II->getArgOperand(0) != F) continue; // skip it - errs() << "Fixing use: " << *II << "\n"; // Otherwise, create a new createNode similar to the other one, // but with the changed function as first operand. IntrinsicInst* CreateNodeII = - createIdenticalCreateNodeWithDifferentFunction(G, II); + createIdenticalCreateNodeIntrinsicWithDifferentFunction(G, II); II->replaceAllUsesWith(CreateNodeII); toBeErased.push_back(II); + } else if (isViscCreateNodeCall(I)) { + CallInst* CI = cast<CallInst>(I); + // The found createNode is not associated with the changed function + if (CI->getArgOperand(0) != F) + continue; // skip it + + errs() << "Fixing use: " << *CI << "\n"; + errs() << "in function: " << f->getName() << "\n"; + // Replace use of F with use of G + CI->setArgOperand(0, G); + errs() << "Fixed use: " << *CI << "\n"; } + } for(auto I: toBeErased) { diff --git a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp index 2dc200b1834f2edd3177365b40a024dc6ef57c67..37f74325a74a04e11fad7352115214565c41a689 100644 --- a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp +++ b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp @@ -100,10 +100,21 @@ bool ClearDFG::runOnModule(Module &M) { // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); Function* VI = M.getFunction("llvm.visc.init"); + assert(VI->hasOneUse() && "More than one use of llvm.visc.init\n"); + for(Value::user_iterator ui = VI->user_begin(), ue = VI->user_end(); ui != ue; ui++) { + Instruction* I = dyn_cast<Instruction>(*ui); + I->eraseFromParent(); + } VI->replaceAllUsesWith(UndefValue::get(VI->getType())); VI->eraseFromParent(); Function* VC = M.getFunction("llvm.visc.cleanup"); + assert(VC->hasOneUse() && "More than one use of llvm.visc.cleanup\n"); + for(Value::user_iterator ui = VC->user_begin(), ue = VC->user_end(); ui != ue; ui++) { + Instruction* I = dyn_cast<Instruction>(*ui); + I->eraseFromParent(); + } + VC->replaceAllUsesWith(UndefValue::get(VC->getType())); VC->eraseFromParent(); diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index b9645c14afff31837275efec0a4d5e8421fb2f65..f4b7fe250005021a79bbee51fb750f0e6bfb866e 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #define ENABLE_ASSERTS -#define TARGET_PTX 32 +#define TARGET_PTX 64 #define GENERIC_ADDRSPACE 0 #define GLOBAL_ADDRSPACE 1 #define CONSTANT_ADDRSPACE 4 @@ -164,7 +164,7 @@ class CGT_NVPTX : public CodeGenTraversal { private: //Member variables - Module &KernelM; + std::unique_ptr<Module> KernelM; DFNode* KernelLaunchNode = NULL; Kernel* kernel; @@ -202,15 +202,17 @@ private: public: // Constructor - CGT_NVPTX(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(*CloneModule(&_M)) { + CGT_NVPTX(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(&_M)) { init(); initRuntimeAPI(); + errs() << "Old module pointer: " << &_M << "\n"; + errs() << "New module pointer: " << KernelM.get() << "\n"; // Copying instead of creating new, in order to preserve required info (metadata) // Remove functions, global variables and aliases std::vector<GlobalVariable*> gvv = std::vector<GlobalVariable*>(); - for (Module::global_iterator mi = KernelM.global_begin(), - me = KernelM.global_end(); (mi != me); ++mi) { + for (Module::global_iterator mi = KernelM->global_begin(), + me = KernelM->global_end(); (mi != me); ++mi) { GlobalVariable* gv = &*mi; gvv.push_back(gv); } @@ -220,8 +222,8 @@ public: } std::vector<Function*> fv = std::vector<Function*>(); - for (Module::iterator mi = KernelM.begin(), - me = KernelM.end(); (mi != me); ++mi) { + for (Module::iterator mi = KernelM->begin(), + me = KernelM->end(); (mi != me); ++mi) { Function* f = &*mi; fv.push_back(f); } @@ -231,8 +233,8 @@ public: } std::vector<GlobalAlias*> av = std::vector<GlobalAlias*>(); - for (Module::alias_iterator mi = KernelM.alias_begin(), - me = KernelM.alias_end(); (mi != me); ++mi) { + for (Module::alias_iterator mi = KernelM->alias_begin(), + me = KernelM->alias_end(); (mi != me); ++mi) { GlobalAlias* a = &*mi; av.push_back(a); } @@ -241,11 +243,11 @@ public: (*vi)->eraseFromParent(); } - changeDataLayout(KernelM); - changeTargetTriple(KernelM); + changeDataLayout(*KernelM); + changeTargetTriple(*KernelM); - DEBUG(errs() << KernelM); + DEBUG(errs() << *KernelM); } @@ -984,7 +986,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Insert the cloned function into the kernels module - KernelM.getFunctionList().push_back(F_nvptx); + KernelM->getFunctionList().push_back(F_nvptx); //TODO: Iterate over all the instructions of F_nvptx and identify the @@ -1144,7 +1146,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; int numOfDim = ArgDFNode->getNumOfDim(); DEBUG(errs() << "\t Got node dimension : " << numOfDim << "\n"); - IntegerType* IntTy = Type::getInt32Ty(KernelM.getContext()); + IntegerType* IntTy = Type::getInt32Ty(KernelM->getContext()); ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim); // Replace the result of the intrinsic with the computed value @@ -1175,7 +1177,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Argument of the function to be called ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM.getContext()), dim); + ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); //ArrayRef<Value *> Args(DimConstant); // The following is to find which function to call @@ -1185,6 +1187,10 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { DEBUG(errs() << "Parent Level = " << parentLevel << "\n"); DEBUG(errs() << "Parent Repl factor = " << parentReplFactor << "\n"); + FunctionType* FT = + FunctionType::get(Type::getInt64Ty(KernelM->getContext()), + Type::getInt32Ty(KernelM->getContext()), + false); if ((!parentLevel || !parentReplFactor) && ArgDFNode == N) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel @@ -1193,32 +1199,20 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // itself DEBUG(errs() << "Substitute with get_global_id()\n"); DEBUG(errs() << *II << "\n"); - FunctionType* FT = - FunctionType::get(Type::getInt32Ty(KernelM.getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM.getContext())), - false); OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("get_global_id"), FT)); + (KernelM->getOrInsertFunction(StringRef("get_global_id"), FT)); } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { //DEBUG(errs() << "Here inside cond 2\n"); // We are asking for this node's id with respect to its parent // this is a local id call - FunctionType* FT = - FunctionType::get(Type::getInt32Ty(KernelM.getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM.getContext())), - false); OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("get_local_id"), FT)); + (KernelM->getOrInsertFunction(StringRef("get_local_id"), FT)); //DEBUG(errs() << "exiting condition 2\n"); } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { // We are asking for this node's parent's id with respect to its // parent: this is a group id call - FunctionType* FT = - FunctionType::get(Type::getInt32Ty(KernelM.getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM.getContext())), - false); OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("get_group_id"), FT)); + (KernelM->getOrInsertFunction(StringRef("get_group_id"), FT)); } else { errs() << N->getFuncPointer()->getName() << "\n"; errs() << N->getParent()->getFuncPointer()->getName() << "\n"; @@ -1267,7 +1261,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Argument of the function to be called ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM.getContext()), dim); + ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); //ArrayRef<Value *> Args(DimConstant); // The following is to find which function to call @@ -1275,8 +1269,8 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { int parentLevel = ParentDFNode->getLevel(); int parentReplFactor = ParentDFNode->getNumOfDim(); FunctionType* FT = - FunctionType::get(Type::getInt32Ty(KernelM.getContext()), - Type::getInt32Ty(KernelM.getContext()), + FunctionType::get(Type::getInt64Ty(KernelM->getContext()), + Type::getInt32Ty(KernelM->getContext()), false); if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) { @@ -1284,17 +1278,17 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // replicated. This indicates that the parent node is the kernel // launch, so the instances are global_size (gridDim x blockDim) OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("get_global_size"), FT)); + (KernelM->getOrInsertFunction(StringRef("get_global_size"), FT)); } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { // We are asking for this node's instances // this is a local size (block dim) call OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("get_local_size"), FT)); + (KernelM->getOrInsertFunction(StringRef("get_local_size"), FT)); } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { // We are asking for this node's parent's instances // this is a (global_size/local_size) (grid dim) call OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("get_num_groups"), FT)); + (KernelM->getOrInsertFunction(StringRef("get_num_groups"), FT)); } else { assert(false && "Unable to translate getNumNodeInstances intrinsic"); } @@ -1312,13 +1306,13 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { DEBUG(errs() << F_nvptx->getName() << "\t: Handling barrier\n"); DEBUG(errs() << "Substitute with barrier()\n"); DEBUG(errs() << *II << "\n"); - FunctionType* FT = FunctionType::get(Type::getVoidTy(KernelM.getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM.getContext())), + FunctionType* FT = FunctionType::get(Type::getVoidTy(KernelM->getContext()), + std::vector<Type*>(1, Type::getInt32Ty(KernelM->getContext())), false); Function* OpenCLFunction = cast<Function> - (KernelM.getOrInsertFunction(StringRef("barrier"), FT)); + (KernelM->getOrInsertFunction(StringRef("barrier"), FT)); CallInst* CI = CallInst::Create(OpenCLFunction, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(KernelM.getContext()), 1)), + ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), 1)), "", II); II->replaceAllUsesWith(CI); IItoRemove.push_back(II); @@ -1373,7 +1367,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { if(calleeF->isDeclaration()) { // Add the declaration to kernel module DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n"); - KernelM.getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType()); + KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType()); if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(CI)) { // Now handle a few specific intrinsics // For now, sin and cos are translated to their libclc equivalent @@ -1393,10 +1387,10 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { name = "_Z3cosf"; FunctionType* SinCosFT = FunctionType::get(II->getType(), - Type::getFloatTy(KernelM.getContext()), + Type::getFloatTy(KernelM->getContext()), false); Function* LibclcFunction = cast<Function> - (KernelM.getOrInsertFunction(name, SinCosFT)); + (KernelM->getOrInsertFunction(name, SinCosFT)); CallInst* CI = CallInst::Create(LibclcFunction, II->getArgOperand(0), II->getName(), II); II->replaceAllUsesWith(CI); @@ -1406,7 +1400,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { case Intrinsic::floor: { DEBUG(errs() << "Found floor intrinsic\n"); - F = Intrinsic::getDeclaration(&KernelM, Intrinsic::nvvm_floor_f); + F = Intrinsic::getDeclaration(KernelM.get(), Intrinsic::nvvm_floor_f); FunctionType* FTy = F->getFunctionType(); DEBUG(errs() << *F << "\n"); @@ -1446,7 +1440,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { ValueToValueMapTy VMap; Function* newCalleeF = CloneFunction(calleeF, VMap); newCalleeF->removeFromParent(); //TODO: MARIA check - KernelM.getFunctionList().push_back(newCalleeF); + KernelM->getFunctionList().push_back(newCalleeF); } //TODO: how to handle address space qualifiers in load/store } @@ -1466,7 +1460,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { addCLMetadata(F_nvptx); kernel->KernelFunction = F_nvptx; errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n"; - DEBUG(errs() << KernelM); + DEBUG(errs() << *KernelM); return; } @@ -1581,16 +1575,16 @@ void CGT_NVPTX::addCLMetadata(Function *F) { // they are not mandatory. In future they might be useful to enable // optimizations - MDTuple *MDKernelNode = MDNode::get(KernelM.getContext(), KernelMD); - NamedMDNode *MDN_kernels = KernelM.getOrInsertNamedMetadata("opencl.kernels"); + MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD); + NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels"); MDN_kernels->addOperand(MDKernelNode); - KernelMD.push_back(MDString::get(KernelM.getContext(), "kernel")); + KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel")); // TODO: Replace 1 with the number of the kernel. // Add when support for multiple launces is added - KernelMD.push_back(ValueAsMetadata::get(ConstantInt::get(Type::getInt32Ty(KernelM.getContext()),1))); - MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM.getContext(), KernelMD); - NamedMDNode *MDN_annotations = KernelM.getOrInsertNamedMetadata("nvvm.annotations"); + KernelMD.push_back(ValueAsMetadata::get(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1))); + MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD); + NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations"); MDN_annotations->addOperand(MDNvvmAnnotationsNode); } @@ -1612,7 +1606,7 @@ void CGT_NVPTX::writeKernelsModule() { Passes.add( createPrintModulePass(Out.os())); - Passes.run(KernelM); + Passes.run(*KernelM); // Declare success. Out.keep(); diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp index a8c465269d1695d9ec41b5c24ee96d621108892e..8c0429af57cafd3fd9c102fe540ca568121560df 100644 --- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp +++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp @@ -975,6 +975,7 @@ bool GenVISC::runOnModule(Module &M) { Function* CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode); DEBUG(errs() << *CreateNodeF << "\n"); errs() << *I << "\n"; + errs() << "in " << I->getParent()->getParent()->getName() << "\n"; // Get i8* cast to function pointer Function* graphFunc = cast<Function>(CI->getArgOperand(0)); @@ -1147,6 +1148,7 @@ bool GenVISC::runOnModule(Module &M) { ReplaceCallWithIntrinsic(I, Intrinsic::visc_malloc, &toBeErased); } if (isVISCCall_return(I)) { + errs() << "Function before visc return processing\n" << *I->getParent()->getParent() << "\n"; // The operands to this call are the values to be returned by the node Value* ReturnVal = genCodeForReturn(CI); DEBUG(errs() << *ReturnVal << "\n"); @@ -1184,6 +1186,7 @@ bool GenVISC::runOnModule(Module &M) { //CI->replaceAllUsesWith(RetInst); toBeErased.push_back(CI); ReplaceInstWithInst(oldReturn, RetInst); + errs() << "Function after visc return processing\n" << *I->getParent()->getParent() << "\n"; } @@ -1274,6 +1277,7 @@ bool GenVISC::runOnModule(Module &M) { } if(bind == mutateTypeCause::mtc_BIND || bind == mutateTypeCause::mtc_RETURN) { + errs() << "Function before fixing return type\n" << *f << "\n"; // Argument type list. std::vector<Type*> FArgTypes; for(Function::const_arg_iterator ai = f->arg_begin(), ae = f->arg_end(); @@ -1282,8 +1286,9 @@ bool GenVISC::runOnModule(Module &M) { } errs() << "Fixing return statements of Function " << f->getName() << "\n"; + Type* ReturnType = f->getReturnType(); - DEBUG(errs() << *ReturnType << "\n";); + errs() << *ReturnType << "\n"; assert((ReturnType->isVoidTy() || isa<StructType>(ReturnType)) && "Return type should either be a struct or void type!"); @@ -1323,6 +1328,8 @@ bool GenVISC::runOnModule(Module &M) { errs() << "New: " << newF->getName() << "\n"; errs() << "New Type: " << *newF->getType() << "\n"; replaceNodeFunctionInIR(*f->getParent(), f, newF); + errs() << "Function after fixing return type\n" << *newF << "\n"; + // Argument type list. } @@ -1580,6 +1587,12 @@ static Function* transformReturnTypeToStruct(Function* F) { // Currently only works for void return types DEBUG(errs() << "Transforming return type of function to Struct: " << F->getName() << "\n"); + if (isa<StructType>(F->getReturnType())) { + DEBUG(errs() << "Return type is already a Struct: " << F->getName() << ": " << *F->getReturnType() << "\n"); + errs() << "Return type is already a Struct:\nFunction:\n " << *F << ":\n\tReturn Type: " << *F->getReturnType() << "\n"; + return F; + } + assert(F->getReturnType()->isVoidTy() && "Unhandled case - Only void return type handled\n"); // Create the argument type list with added argument types diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index fe8d7ad566f6dc862c101f521938366ad1bec9f9..4cb652135b6b50060c8b7a8ed8979f68e5ce5ced 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -1327,6 +1327,11 @@ void* llvm_visc_ocl_argument_ptr(void* graphID, void* input, int arg_index, size pthread_mutex_lock(&ocl_mtx); // Set Kernel Argument //pthread_mutex_lock(&ocl_mtx); + cout << "clKernel:" << Context->clKernel << "\n"; + cout << "arg_index:" << arg_index << "\n"; + cout << "sizeof(clmem):" << sizeof(cl_mem) << "\n"; + cout << "sizeof(d_input):" << sizeof(d_input) << "\n"; + cout << "&d_input:" << (void*) d_input << "\n"; cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void*)&d_input); //pthread_mutex_unlock(&ocl_mtx); checkErr(errcode, CL_SUCCESS, "Failure to set pointer argument"); diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel.cl b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel.cl index f376a27d90003e3c7c18dafb9f64a8b459a40029..5ee2fd5df0a57bc59c1f714bc3efb6b3670b0386 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel.cl +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel.cl @@ -16,10 +16,10 @@ __kernel void mysgemmNT( __global const float *A, int lda, __global const float int m = get_global_id(0); int n = get_global_id(1); - for (int i = 0; i < k; ++i) { - float a = A[m + i * lda]; - float b = B[n + i * ldb]; - c += a * b; - } - C[m+n*ldc] = C[m+n*ldc] * beta + alpha * c; + /*for (int i = 0; i < k; ++i) {*/ + /*float a = A[m + i * lda]; */ + /*float b = B[n + i * ldb];*/ + /*c += a * b;*/ + /*}*/ + C[m+n*ldc] = beta + alpha * c; } diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc index cd91d26735295328745a975df42706cf6574b149..b20c6a996a815f818e25091073a94cd41939a1ec 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc @@ -145,6 +145,19 @@ int main (int argc, char *argv[]) { clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); CHECK_ERROR("clBuildProgram") + size_t binarySizes = 0; + clStatus = clGetProgramInfo(clProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySizes, NULL); + CHECK_ERROR("clGetProgramInfo") + + std::cout << "Binary Size = " << binarySizes << "\n"; + + unsigned char* binaries = (unsigned char*) malloc(binarySizes); + clStatus = clGetProgramInfo(clProgram, CL_PROGRAM_BINARIES, binarySizes, &binaries, NULL); + CHECK_ERROR("clGetProgramInfo") + + std::cout << "Binary = \n" << binaries << "\n"; + + cl_kernel clKernel = clCreateKernel(clProgram,"mysgemmNT",&clStatus); CHECK_ERROR("clCreateKernel") diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc index b2f798a51490007331824b5a9e1237309f9579c2..17eb5656df92dd2f08d89c4d6149e2e5d06526e4 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc @@ -48,10 +48,7 @@ void mysgemmNT( float* A, int lda, float* B, int ldb, float* C, int ldc, int k, for (int i = 0; i < k; ++i) { float a = A[m + i * lda]; float b = B[n + i * ldb]; - //if(a>b) - c += a * b; - //else - //c = a*b; + c += a * b; } C[m+n*ldc] = C[m+n*ldc] * beta + alpha * c; } @@ -95,13 +92,20 @@ int main (int argc, char *argv[]) { /* Read command line. Expect 3 inputs: A, B and B^T in column-major layout*/ params = pb_ReadParameters(&argc, argv); + + unsigned iter = 0; + while(params->inpFiles[iter] != NULL) { + printf("Found input file %d - %s\n", iter, params->inpFiles[iter]); + iter++; + } if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] == NULL) || (params->inpFiles[2] == NULL) || (params->inpFiles[3] != NULL)) { - fprintf(stderr, "Expecting three input filenames\n"); + printf("Expecting three input filenames\n"); exit(-1); + return 0; } /* Read in data */ @@ -109,6 +113,7 @@ int main (int argc, char *argv[]) { readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); + printf("This is in between two reads\n"); // load B^T readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc index 76e1400650926d78a5820ee5383f915d50a0a45a..663a984783be43eecb50e65a073ce20d06153261 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc_sh/main.cc @@ -107,13 +107,13 @@ void SgemmLeaf( float* A, size_t bytesA, int lda, float* B, size_t bytesB, int l void* thisNode = __visc__getNode(); void* parentNode = __visc__getParentNode(thisNode); - int lx = __visc__getNodeInstanceID_x(thisNode); - int ly = __visc__getNodeInstanceID_y(thisNode); + long lx = __visc__getNodeInstanceID_x(thisNode); + long ly = __visc__getNodeInstanceID_y(thisNode); - int gx = __visc__getNodeInstanceID_x(parentNode); - int gy = __visc__getNodeInstanceID_y(parentNode); + long gx = __visc__getNodeInstanceID_x(parentNode); + long gy = __visc__getNodeInstanceID_y(parentNode); - int dimx = __visc__getNumNodeInstances_x(thisNode); + long dimx = __visc__getNumNodeInstances_x(thisNode); float c[TILE_N]; for (int i=0; i < TILE_N; i++) diff --git a/llvm/test/VISC/parboil/common/include/visc.h b/llvm/test/VISC/parboil/common/include/visc.h index 62eca0c298ba7f7725080e677b7c61a1b8dec782..f25c3cc4dcf090b6a057c369da732b80895788d0 100644 --- a/llvm/test/VISC/parboil/common/include/visc.h +++ b/llvm/test/VISC/parboil/common/include/visc.h @@ -45,12 +45,12 @@ void* __visc__getNode(); void* __visc__getParentNode(void*); void __visc__barrier(); void* __visc__malloc(long); -unsigned __visc__getNodeInstanceID_x(void*); -unsigned __visc__getNodeInstanceID_y(void*); -unsigned __visc__getNodeInstanceID_z(void*); -unsigned __visc__getNumNodeInstances_x(void*); -unsigned __visc__getNumNodeInstances_y(void*); -unsigned __visc__getNumNodeInstances_z(void*); +long __visc__getNodeInstanceID_x(void*); +long __visc__getNodeInstanceID_y(void*); +long __visc__getNodeInstanceID_z(void*); +long __visc__getNumNodeInstances_x(void*); +long __visc__getNumNodeInstances_y(void*); +long __visc__getNumNodeInstances_z(void*); // Atomic // signed int @@ -90,10 +90,10 @@ float __visc__cos(float); #include <unistd.h> -int get_global_id(int); -int get_group_id(int); -int get_local_id(int); -int get_local_size(int); +long get_global_id(int); +long get_group_id(int); +long get_local_id(int); +long get_local_size(int); void llvm_visc_track_mem(void*, size_t); diff --git a/llvm/test/VISC/parboil/common/mk/opencl.mk b/llvm/test/VISC/parboil/common/mk/opencl.mk index 4a05a389bc9795e86418c1bbb252b988b57fda96..3e2df549b749574d14240d1bfd3d6de8f43d2803 100644 --- a/llvm/test/VISC/parboil/common/mk/opencl.mk +++ b/llvm/test/VISC/parboil/common/mk/opencl.mk @@ -11,7 +11,7 @@ CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS) LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS) -LLVM_INSTALL:=$(LLVM_SRC_ROOT)/Release+Asserts +LLVM_INSTALL:=$(LLVM_SRC_ROOT)/../build LIBCLC:=$(LLVM_SRC_ROOT)/../libclc LLVM_CC:=$(LLVM_INSTALL)/bin/clang LLVM_LINK:=$(LLVM_INSTALL)/bin/llvm-link diff --git a/llvm/test/VISC/parboil/common/mk/visc.mk b/llvm/test/VISC/parboil/common/mk/visc.mk index e3b541378df6d2e1c82410648761995aee63b820..325301aa11dca7998e75df88bc532d3597663ab7 100755 --- a/llvm/test/VISC/parboil/common/mk/visc.mk +++ b/llvm/test/VISC/parboil/common/mk/visc.mk @@ -14,10 +14,11 @@ LIBCLC_LIB_PATH = $(LLVM_SRC_ROOT)/../../libclc-install/lib/clc VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.ll -LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc +#LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc +LIBCLC_NVPTX_LIB = nvptx64--nvidiacl.bc TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce -KERNEL_GEN_FLAGS = -O3 -target nvptx +KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl ifeq ($(TARGET),x86) DEVICE = SPIR_TARGET