From 9de3eae28281ab36772e6bf30e931adc0a60b6c0 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Fri, 11 Mar 2016 03:00:28 -0600 Subject: [PATCH] (1) Added atomic visc intrinsics and support for them in PTX backend - IntrinsicsVISC.td, visc.h, GenVISC.cpp (2) Simplified GenVISC to easily add support for new intrinsics which have a 1 to 1 mapping instruction during code gen (3) Added runtime api call to set ocl shared memory argument --- llvm/include/llvm/IR/IntrinsicsVISC.td | 50 ++- .../DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp | 223 +++++++---- llvm/lib/Transforms/GenVISC/GenVISC.cpp | 355 ++++++++---------- llvm/lib/Transforms/LocalMem/LocalMem.cpp | 2 +- llvm/projects/visc-rt/visc-rt.cpp | 12 +- llvm/projects/visc-rt/visc-rt.h | 1 + llvm/test/VISC/parboil/.ycm_extra_conf.py | 1 + llvm/test/VISC/parboil/common/include/visc.h | 25 ++ 8 files changed, 390 insertions(+), 279 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td index 7f56304384..cb30dffe52 100644 --- a/llvm/include/llvm/IR/IntrinsicsVISC.td +++ b/llvm/include/llvm/IR/IntrinsicsVISC.td @@ -144,8 +144,8 @@ let TargetPrefix = "visc" in { * intrinsic - * i32 llvm.visc.getNumNodeInstances(i8*, i32); */ -// def int_visc_getNumNodeInstances : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, -// llvm_i32_ty], []>; + /*def int_visc_getNumNodeInstances : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty,*/ + /*llvm_i32_ty], []>;*/ /* i32 llvm.visc.getNumNodeInstances.[xyz](i8*); */ @@ -175,6 +175,52 @@ let TargetPrefix = "visc" in { def int_visc_getVectorLength : Intrinsic<[llvm_i32_ty], [], []>; /* ============ Atomic intrinsics ============= */ + // Atomic arithmetic operations + + /* i32 llvm.visc.atomic.cmpxchg(i32*, i32)*/ + def int_visc_atomic_cmpxchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, + llvm_i32_ty], []>; + /* i32 llvm.visc.atomic.add(i32*, i32)*/ + def int_visc_atomic_add: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.visc.atomic.sub(i32*, i32)*/ + def int_visc_atomic_sub: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.visc.atomic.xchg(i32*, i32)*/ + def int_visc_atomic_xchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.visc.atomic.inc(i32*, i32)*/ + def int_visc_atomic_inc: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + []>; + + /* i32 llvm.visc.atomic.dec(i32*, i32)*/ + def int_visc_atomic_dec: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + []>; + + /* i32 llvm.visc.atomic.min(i32*, i32)*/ + def int_visc_atomic_min: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.visc.atomic.maxi32*, i32)*/ + def int_visc_atomic_max: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + // Atomic bitwise operations + + /* i32 llvm.visc.atomic.and(i32*, i32)*/ + def int_visc_atomic_and: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.visc.atomic.or(i32*, i32)*/ + def int_visc_atomic_or: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; + + /* i32 llvm.visc.atomic.xor(i32*, i32)*/ + def int_visc_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + []>; } diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index be46aec0dd..f3a32a9828 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -134,6 +134,8 @@ static void changeDataLayout(Module &); static void changeTargetTriple(Module &); static void findReturnInst(Function *, std::vector<ReturnInst *> &); static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &); +static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID); +static std::string getAtomicOpName(Intrinsic::ID); // DFG2LLVM_NVPTX - The first implementation. struct DFG2LLVM_NVPTX : public DFG2LLVM { @@ -160,6 +162,7 @@ private: Constant* llvm_visc_ocl_wait; Constant* llvm_visc_ocl_initContext; Constant* llvm_visc_ocl_clearContext; + Constant* llvm_visc_ocl_argument_shared; Constant* llvm_visc_ocl_argument_scalar; Constant* llvm_visc_ocl_argument_ptr; Constant* llvm_visc_ocl_output_ptr; @@ -260,6 +263,7 @@ void CGT_NVPTX::initRuntimeAPI() { DECLARE(llvm_visc_ocl_wait); DECLARE(llvm_visc_ocl_initContext); DECLARE(llvm_visc_ocl_clearContext); + DECLARE(llvm_visc_ocl_argument_shared); DECLARE(llvm_visc_ocl_argument_scalar); DECLARE(llvm_visc_ocl_argument_ptr); DECLARE(llvm_visc_ocl_output_ptr); @@ -507,60 +511,55 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi ArrayRef<Value*>(setInputArgs, 4), "", RI); } } - DEBUG(errs() << "Setup output edges of node and insert visc api\n"); - std::map<unsigned, std::pair<Value*, unsigned> > kernelSharedInArgMap = - K->getSharedInArgMap(); + DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); + // Check to see if all the allocation sizes are constant (determined + // statically) bool constSizes = true; - for (std::map<unsigned, std::pair<Value*, unsigned> >::iterator - ib = kernelSharedInArgMap.begin(), - ie = kernelSharedInArgMap.end(); ib != ie && constSizes; ++ib) { - Value* sizeVal = ib->second.first; - constSizes = isa<Constant>(sizeVal); + for (auto& e: K->getSharedInArgMap()) { + constSizes &= isa<Constant>(e.second.first); } + // If the sizes are all constant if (constSizes) { - for (std::map<unsigned, std::pair<Value*, unsigned> >::iterator - ib = kernelSharedInArgMap.begin(), - ie = kernelSharedInArgMap.end(); ib != ie; ++ib) { - unsigned i = ib->first; - Value* inputVal = ib->second.first; + for (auto& e: K->getSharedInArgMap()) { + unsigned argNum = e.first; + Value* allocSize = e.second.first; - DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); + DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); - // input value has been obtained. - // inputVal is a scalar value - if (i % 2 == 0) { // Shared memory ptr argument - scalar at size position + if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { + // Shared memory ptr argument - scalar at size position switchToTimer(visc_TimerID_COPY_SCALAR, RI); - assert(isa<Constant>(inputVal) && "Only constant shared memory size is supported"); + assert(isa<Constant>(allocSize) && "Constant shared memory size is expected"); Value* setInputArgs[] = {GraphID, - ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext())), - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - inputVal + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + allocSize }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - - } else { // Sharem memory size argument - scalar at address position + CallInst::Create(llvm_visc_ocl_argument_shared, + ArrayRef<Value*>(setInputArgs, 3), "", RI); + } + else { + // Sharem memory size argument - scalar at address position switchToTimer(visc_TimerID_COPY_SCALAR, RI); // Store the scalar value on stack and then pass the pointer to its // location - AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), - kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".ptr", RI); - StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); + AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), + allocSize->getName()+".sharedMem.ptr", RI); + StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, + Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, Type::getInt8PtrTy(M.getContext()), - kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".i8ptr", + allocSize->getName()+".sharedMem.i8ptr", RI); Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - ConstantExpr::getSizeOf(inputVal->getType()) + allocSizeI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + ConstantExpr::getSizeOf(allocSize->getType()) }; CallInst::Create(llvm_visc_ocl_argument_scalar, ArrayRef<Value*>(setInputArgs, 4), "", RI); @@ -584,44 +583,41 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi ExtractValueInstVec.push_back(EI); } - for (std::map<unsigned, std::pair<Value*, unsigned> >::iterator - ib = kernelSharedInArgMap.begin(), - ie = kernelSharedInArgMap.end(); ib != ie; ++ib) { - unsigned i = ib->first; - Value* inputVal = ExtractValueInstVec[ib->second.second/2]; + for (auto& e: K->getSharedInArgMap()) { + unsigned argNum = e.first; + Value* allocSize = ExtractValueInstVec[e.second.second/2]; - DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); + DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); - // input value has been obtained. - // inputVal is a scalar value - if (i % 2 == 0) { // Shared memory ptr argument - scalar at size position + if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { + // Shared memory ptr argument - scalar at size position switchToTimer(visc_TimerID_COPY_SCALAR, RI); Value* setInputArgs[] = {GraphID, - ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext())), - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - inputVal + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + allocSize }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - - } else { // Sharem memory size argument - scalar at address position + CallInst::Create(llvm_visc_ocl_argument_shared, + ArrayRef<Value*>(setInputArgs, 3), "", RI); + } + else { + // Sharem memory size argument - scalar at address position switchToTimer(visc_TimerID_COPY_SCALAR, RI); // Store the scalar value on stack and then pass the pointer to its // location - AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), - kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".ptr", RI); - StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); + AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), + allocSize->getName()+".sharedMem.ptr", RI); + StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, + Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, Type::getInt8PtrTy(M.getContext()), - kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".i8ptr", + allocSize->getName()+".sharedMem.i8ptr", RI); Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - ConstantExpr::getSizeOf(inputVal->getType()) + allocSizeI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + ConstantExpr::getSizeOf(allocSize->getType()) }; CallInst::Create(llvm_visc_ocl_argument_scalar, ArrayRef<Value*>(setInputArgs, 4), "", RI); @@ -629,8 +625,8 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi } } - DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); + DEBUG(errs() << "Setup output edges of node and insert visc api\n"); // Set output if struct is not an empty struct StructType* OutputTy = K->KernelLeafNode->getOutputType(); std::vector<Value*> d_Outputs; @@ -911,7 +907,6 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Checking which node is the kernel launch DFNode* PNode = N->getParent(); - errs() << "Parent Node: " << PNode << " " << PNode->getFuncPointer()->getName() << "\n"; int pLevel = PNode->getLevel(); int pReplFactor = PNode->getNumOfDim(); @@ -986,9 +981,9 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), iee = N->indfedge_end(); ieb != iee; ++ieb) { DFNode *SrcDFNode = (*ieb)->getSourceDF(); - errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n"; - errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n"; - errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n"; + DEBUG(errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n"); if (!SrcDFNode->isDummyNode()) { assert(SrcDFNode->isAllocationNode()); kernel->AllocationNode = dyn_cast<DFLeafNode>(SrcDFNode); @@ -1284,18 +1279,72 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { errs() << F_nvptx->getName() << "\t: Handling barrier\n"; errs() << "Substitute with barrier()\n"; errs() << *II << "\n"; - FunctionType* FT = - FunctionType::get(Type::getVoidTy(getGlobalContext() /*KernelM.getContext()*/), + FunctionType* FT = FunctionType::get(Type::getVoidTy(getGlobalContext() /*KernelM.getContext()*/), std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)), false); - Function* OpenCLFunction = cast<Function> + Function* OpenCLFunction = cast<Function> (KernelM.getOrInsertFunction(StringRef("barrier"), FT)); CallInst* CI = CallInst::Create(OpenCLFunction, ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1)), "", II); II->replaceAllUsesWith(CI); + IItoRemove.push_back(II); + } + break; + case Intrinsic::visc_atomic_cmpxchg: + break; + case Intrinsic::visc_atomic_add: + case Intrinsic::visc_atomic_sub: + case Intrinsic::visc_atomic_xchg: + case Intrinsic::visc_atomic_min: + case Intrinsic::visc_atomic_max: + case Intrinsic::visc_atomic_and: + case Intrinsic::visc_atomic_or: + case Intrinsic::visc_atomic_xor: + //case Intrinsic::visc_atomic_inc: + //case Intrinsic::visc_atomic_dec: + { + errs() << *II << "\n"; + // Only have support for i32 atomic intrinsics + assert(II->getType() == Type::getInt32Ty(II->getContext()) + && "Only support i32 atomic intrinsics for now"); + // Store the argument types and operand values in vectors + //std::vector<Type*> ArgTypes; + //std::vector<Value*> ArgValues; + //for(unsigned i=0; i < II->getNumArgOperands(); i++) { + //Value* V = II->getArgOperand(i); + //if(V->getType()->isPointerTy()) { + //If it is a pointer type, then bit cast to i32* as intrinsics use + //i8* for all pointers + //V = CastInst::CreatePointerCast(V, Type::getInt32PtrTy(II->getContext()), "", II); + //errs() << *V << "\n"; + //} + //ArgTypes.push_back(V->getType()); + //ArgValues.push_back(V); + //} + // Substitute with atomicrmw instruction + assert(II->getNumArgOperands() == 2 && "Expecting 2 operands for these atomics"); + Value* Ptr = II->getArgOperand(0); + Value* Val = II->getArgOperand(1); + assert(Ptr->getType()->isPointerTy() + && "First argument of supported atomics is expected to be a pointer"); + PointerType* PtrTy = cast<PointerType>(Ptr->getType()); + if(PtrTy != Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) { + Ptr = CastInst::CreatePointerCast(Ptr, Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()), "", II); + } + AtomicRMWInst* AtomicInst = new AtomicRMWInst(getAtomicOp(II->getIntrinsicID()), + Ptr, Val, llvm::SequentiallyConsistent, llvm::CrossThread, II); + AtomicInst->setVolatile(true); + // Create OpenCL function call + //FunctionType* FT = FunctionType::get(Type::getInt32Ty(getGlobalContext()), + //ArgTypes, false); + //Function* OpenCLFunction = cast<Function>(KernelM.getOrInsertFunction( + //StringRef(getAtomicOpName(II->getIntrinsicID())), FT)); + //CallInst* CI = CallInst::Create(OpenCLFunction, ArgValues, II->getName(), II); + //errs() << "Substitute with: " << *CI << "\n"; + errs() << "Substitute with: " << *AtomicInst << "\n"; + II->replaceAllUsesWith(AtomicInst); IItoRemove.push_back(II); - } break; default: @@ -1326,8 +1375,10 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // Traverse the vector backwards, otherwise definitions are deleted while // their subsequent uses are still around for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) + re = IItoRemove.rend(); ri != re; ++ri) { + errs() << "Erasing: " << **ri << "\n"; (*ri)->eraseFromParent(); + } addCLMetadata(F_nvptx); kernel->KernelFunction = F_nvptx; @@ -1714,6 +1765,44 @@ static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vecto } } +// Helper funtion, returns the atomicrmw op, corresponding to intrinsic atomic op +static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID ID) { + switch(ID) { + case Intrinsic::visc_atomic_add: return AtomicRMWInst::Add; + case Intrinsic::visc_atomic_sub: return AtomicRMWInst::Sub; + case Intrinsic::visc_atomic_min: return AtomicRMWInst::Min; + case Intrinsic::visc_atomic_max: return AtomicRMWInst::Max; + //case Intrinsic::visc_atomic_inc: return AtomicRMWInst::Inc; + //case Intrinsic::visc_atomic_dec: return AtomicRMWInst::Dec; + case Intrinsic::visc_atomic_xchg: return AtomicRMWInst::Xchg; + case Intrinsic::visc_atomic_and: return AtomicRMWInst::And; + case Intrinsic::visc_atomic_or: return AtomicRMWInst::Or; + case Intrinsic::visc_atomic_xor: return AtomicRMWInst::Xor; + default: + llvm_unreachable("Unsupported atomic intrinsic!"); + }; +} + + +// Helper funtion, returns the OpenCL function name, corresponding to atomic op +static std::string getAtomicOpName(Intrinsic::ID ID) { + switch(ID) { + case Intrinsic::visc_atomic_cmpxchg: return "atom_cmpxchg"; + case Intrinsic::visc_atomic_add: return "atom_add"; + case Intrinsic::visc_atomic_sub: return "atom_sub"; + case Intrinsic::visc_atomic_min: return "atom_min"; + case Intrinsic::visc_atomic_max: return "atom_max"; + case Intrinsic::visc_atomic_inc: return "atom_inc"; + case Intrinsic::visc_atomic_dec: return "atom_dec"; + case Intrinsic::visc_atomic_xchg: return "atom_xchg"; + case Intrinsic::visc_atomic_and: return "atom_and"; + case Intrinsic::visc_atomic_or: return "atom_or"; + case Intrinsic::visc_atomic_xor: return "atom_xor"; + default: + llvm_unreachable("Unsupported atomic intrinsic!"); + }; +} + } // End of namespace char DFG2LLVM_NVPTX::ID = 0; diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp index cac740e4a2..8957ebe2c8 100644 --- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp +++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp @@ -37,13 +37,54 @@ static void transformReturnTypeToStruct(Function* F); // Check if the dummy function call is a __visc__node call #define IS_VISC_CALL(callName) \ - static bool isVISC##callName##Call(Instruction* I) { \ + static bool isVISCCall_##callName(Instruction* I) { \ if(!isa<CallInst>(I)) \ return false; \ CallInst* CI = cast<CallInst>(I); \ return (CI->getCalledValue()->stripPointerCasts()->getName()).equals("__visc__"#callName); \ } +static void ReplaceCallWithIntrinsic(Instruction* I, Intrinsic::ID IntrinsicID, std::vector<Instruction*>* Erase) { + // Check if the instruction is Call Instruction + assert(isa<CallInst>(I) && "Expecting CallInst"); + CallInst* CI = cast<CallInst>(I); + DEBUG(errs() << "Found call: " << *CI << "\n"); + + // Find the correct intrinsic call + Module* M = CI->getParent()->getParent()->getParent(); + Function* F = Intrinsic::getDeclaration(M, IntrinsicID); + FunctionType* FTy = F->getFunctionType(); + DEBUG(errs() << *F << "\n"); + + // Create argument list + assert(CI->getNumArgOperands() == FTy->getNumParams() + && "Number of arguments of call do not match with Intrinsic"); + std::vector<Value*> args; + for(unsigned i=0; i < CI->getNumArgOperands(); i++) { + Value* V = CI->getArgOperand(i); + // Either the type should match or both should be of pointer type + assert(V->getType() == FTy->getParamType(i) || + (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy()) + && "Dummy function call argument does not match with Intrinsic argument!"); + // If the types do not match, then both must be pointer type and pointer + // cast needs to be performed + if(V->getType() != FTy->getParamType(i)) { + V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI); + } + args.push_back(V); + } + // Insert call instruction + CallInst* Inst = CallInst::Create(F, args, CI->getName(), CI); + + DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n"); + + CI->replaceAllUsesWith(Inst); + // If the previous instruction needs to be erased, insert it in the vector + // Erased + if(Erase != NULL) + Erase->push_back(CI); +} + IS_VISC_CALL(launch) /* Exists but not required */ IS_VISC_CALL(edge) /* Exists but not required */ IS_VISC_CALL(createNode) @@ -65,6 +106,18 @@ IS_VISC_CALL(getNodeInstanceID_z) IS_VISC_CALL(getNumNodeInstances_x) IS_VISC_CALL(getNumNodeInstances_y) IS_VISC_CALL(getNumNodeInstances_z) +// Atomics +IS_VISC_CALL(atomic_cmpxchg) +IS_VISC_CALL(atomic_add) +IS_VISC_CALL(atomic_sub) +IS_VISC_CALL(atomic_xchg) +IS_VISC_CALL(atomic_inc) +IS_VISC_CALL(atomic_dec) +IS_VISC_CALL(atomic_min) +IS_VISC_CALL(atomic_max) +IS_VISC_CALL(atomic_and) +IS_VISC_CALL(atomic_or) +IS_VISC_CALL(atomic_xor) IS_VISC_CALL(init) IS_VISC_CALL(node) @@ -119,7 +172,7 @@ static void addArgs(Function* F, unsigned numArgs, std::string names[]) { // values being returned into a struct and returning it static Value* genCodeForReturn(CallInst* CI) { LLVMContext& Ctx = CI->getContext(); - assert(isVISCreturnCall(CI) + assert(isVISCCall_return(CI) && "__visc__return instruction expected!"); std::vector<Type*> ArgTypes; for(unsigned i=0; i < CI->getNumArgOperands(); i++) { @@ -133,15 +186,15 @@ static Value* genCodeForReturn(CallInst* CI) { 0, "returnStruct", CI); - errs() << "Generate Instructin:\n"; - errs() << *IV << "\n"; + DEBUG(errs() << "Code generation for return:\n"); + DEBUG(errs() << *IV << "\n"); for(unsigned i=1; i < CI->getNumArgOperands(); i++) { IV = InsertValueInst::Create(IV, CI->getArgOperand(i), i, IV->getName(), CI); - errs() << *IV << "\n"; + DEBUG(errs() << *IV << "\n"); } return IV; @@ -279,7 +332,7 @@ static std::vector<CallInst*>* getWaitList(Value* GraphID) { for(Value::use_iterator ui = GraphID->use_begin(), ue = GraphID->use_end(); ui!=ue; ++ui) { if(CallInst* waitI = dyn_cast<CallInst>(*ui)) { - assert(isVISCwaitCall(waitI) + assert(isVISCCall_wait(waitI) && "GraphID can only be used by __visc__wait call"); WaitList->push_back(waitI); } @@ -726,7 +779,7 @@ bool GenVISC::runOnModule(Module &M) { errs() << "\nGENVISC PASS\n"; this->M = &M; -// Load Runtime API Module + // Load Runtime API Module SMDiagnostic Err; char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); @@ -792,7 +845,7 @@ bool GenVISC::runOnModule(Module &M) { LLVMContext& Ctx = CI->getContext(); // If __visc__node call found, generate the test case - if(isVISCnodeCall(I)) { + if(isVISCCall_node(I)) { errs() << "Found visc node call in Function: " << f->getName() << "\n"; assert(CI->getNumArgOperands() >= 5 && "__visc__node call should have atleast 5 arguments!"); @@ -800,62 +853,25 @@ bool GenVISC::runOnModule(Module &M) { // Place this call in the list of instructions to be erased. toBeErased.push_back(CI); } - if(isVISCinitCall(I)) { - Function* InitF = Intrinsic::getDeclaration(&M, Intrinsic::visc_init); - DEBUG(errs() << *InitF << "\n"); - CallInst* InitInst = CallInst::Create(InitF, - None, "", CI); - toBeErased.push_back(CI); - DEBUG(errs() << "Found visc init call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *InitInst << "\n"); + if(isVISCCall_init(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_init, &toBeErased); } - if(isVISCcleanupCall(I)) { - Function* CleanupF = Intrinsic::getDeclaration(&M, Intrinsic::visc_cleanup); - CallInst* CleanupInst = CallInst::Create(CleanupF, - None, - "", CI); - DEBUG(errs() << "Found visc cleanup call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *CleanupInst << "\n"); - toBeErased.push_back(CI); + if(isVISCCall_cleanup(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_cleanup, &toBeErased); } - if(isVISCwaitCall(I)) { - Function* WaitF = Intrinsic::getDeclaration(&M, Intrinsic::visc_wait); - DEBUG(errs() << *WaitF << "\n"); - CallInst* WaitInst = CallInst::Create(WaitF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "", CI); - DEBUG(errs() << "Found visc wait call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *WaitInst << "\n"); - CI->replaceAllUsesWith(WaitInst); - toBeErased.push_back(CI); + if(isVISCCall_wait(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_wait, &toBeErased); } - if(isVISCtrackMemoryCall(I)) { - Function* TrackMemoryF = Intrinsic::getDeclaration(&M, Intrinsic::visc_trackMemory); - Value* TrackMemArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; - CallInst* TrackMemInst = CallInst::Create(TrackMemoryF, - ArrayRef<Value*>(TrackMemArgs,2), - "", CI); - DEBUG(errs() << "Found visc track memory call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *TrackMemInst << "\n"); + if(isVISCCall_trackMemory(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_trackMemory, &toBeErased); } - if(isVISCuntrackMemoryCall(I)) { - Function* UntrackMemoryF = Intrinsic::getDeclaration(&M, Intrinsic::visc_untrackMemory); - CallInst* UntrackMemInst = CallInst::Create(UntrackMemoryF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "", CI); - DEBUG(errs() << "Found visc *un*track memory call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *UntrackMemInst << "\n"); + if(isVISCCall_untrackMemory(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_untrackMemory, &toBeErased); } - if(isVISCrequestMemoryCall(I)) { - Function* RequestMemoryF = Intrinsic::getDeclaration(&M, Intrinsic::visc_requestMemory); - Value* RequestMemArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; - CallInst* RequestMemInst = CallInst::Create(RequestMemoryF, - ArrayRef<Value*>(RequestMemArgs,2), - "", CI); - DEBUG(errs() << "Found visc request memory call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *RequestMemInst << "\n"); + if(isVISCCall_requestMemory(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_requestMemory, &toBeErased); } - if(isVISChintCall(I)) { + if(isVISCCall_hint(I)) { assert(isa<ConstantInt>(CI->getArgOperand(0)) && "Argument to hint must be constant integer!"); ConstantInt* hint = cast<ConstantInt>(CI->getArgOperand(0)); @@ -866,7 +882,7 @@ bool GenVISC::runOnModule(Module &M) { DEBUG(errs() << "Found visc hint call: " << *CI << "\n"); toBeErased.push_back(CI); } - if(isVISClaunchCall(I)) { + if(isVISCCall_launch(I)) { Function* LaunchF = Intrinsic::getDeclaration(&M, Intrinsic::visc_launch); DEBUG(errs() << *LaunchF << "\n"); // Get i8* cast to function pointer @@ -886,33 +902,14 @@ bool GenVISC::runOnModule(Module &M) { DEBUG(errs() << "\tSubstitute with: " << *LaunchInst << "\n"); CI->replaceAllUsesWith(LaunchInst); toBeErased.push_back(CI); - } - if(isVISCpushCall(I)) { - Function* PushF = Intrinsic::getDeclaration(&M, Intrinsic::visc_push); - DEBUG(errs() << *PushF << "\n"); - - Value* PushArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; - CallInst* PushInst = CallInst::Create(PushF, - ArrayRef<Value*>(PushArgs, 2), - "", CI); - DEBUG(errs() << "Found visc push call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *PushInst << "\n"); - CI->replaceAllUsesWith(PushInst); - toBeErased.push_back(CI); + if(isVISCCall_push(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_push, &toBeErased); } - if(isVISCpopCall(I)) { - Function* PopF = Intrinsic::getDeclaration(&M, Intrinsic::visc_pop); - DEBUG(errs() << *PopF << "\n"); - CallInst* PopInst = CallInst::Create(PopF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "output", CI); - DEBUG(errs() << "Found visc pop call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *PopInst << "\n"); - CI->replaceAllUsesWith(PopInst); - toBeErased.push_back(CI); + if(isVISCCall_pop(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_pop, &toBeErased); } - if(isVISCcreateNodeCall(I)) { + if(isVISCCall_createNode(I)) { Function* CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode); DEBUG(errs() << *CreateNodeF << "\n"); @@ -930,7 +927,7 @@ bool GenVISC::runOnModule(Module &M) { toBeErased.push_back(CI); } - if(isVISCcreateNode1DCall(I)) { + if(isVISCCall_createNode1D(I)) { Function* CreateNode1DF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode1D); DEBUG(errs() << *CreateNode1DF << "\n"); @@ -948,7 +945,7 @@ bool GenVISC::runOnModule(Module &M) { CI->replaceAllUsesWith(CreateNode1DInst); toBeErased.push_back(CI); } - if(isVISCcreateNode2DCall(I)) { + if(isVISCCall_createNode2D(I)) { Function* CreateNode2DF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode2D); DEBUG(errs() << *CreateNode2DF << "\n"); @@ -966,7 +963,7 @@ bool GenVISC::runOnModule(Module &M) { CI->replaceAllUsesWith(CreateNode2DInst); toBeErased.push_back(CI); } - if(isVISCcreateNode3DCall(I)) { + if(isVISCCall_createNode3D(I)) { Function* CreateNode3DF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode3D); DEBUG(errs() << *CreateNode3DF << "\n"); @@ -987,7 +984,7 @@ bool GenVISC::runOnModule(Module &M) { toBeErased.push_back(CI); } - if(isVISCedgeCall(I)) { + if(isVISCCall_edge(I)) { Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge); DEBUG(errs() << *EdgeF << "\n"); ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(4)); @@ -1005,7 +1002,7 @@ bool GenVISC::runOnModule(Module &M) { CI->replaceAllUsesWith(EdgeInst); toBeErased.push_back(CI); } - if(isVISCbindInCall(I)) { + if(isVISCCall_bindIn(I)) { Function* BindInF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input); DEBUG(errs() << *BindInF << "\n"); // Check if this is a streaming bind or not @@ -1023,7 +1020,7 @@ bool GenVISC::runOnModule(Module &M) { CI->replaceAllUsesWith(BindInInst); toBeErased.push_back(CI); } - if(isVISCbindOutCall(I)) { + if(isVISCCall_bindOut(I)) { Function* BindOutF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output); DEBUG(errs() << *BindOutF << "\n"); // Check if this is a streaming bind or not @@ -1041,55 +1038,24 @@ bool GenVISC::runOnModule(Module &M) { CI->replaceAllUsesWith(BindOutInst); toBeErased.push_back(CI); } - if(isVISCattributesCall(I)) { + if(isVISCCall_attributes(I)) { Function* F = CI->getParent()->getParent(); handleVISCAttributes(F, CI); toBeErased.push_back(CI); } - if (isVISCgetNodeCall(I)) { - Function* GetNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNode); - DEBUG(errs() << *GetNodeF << "\n"); - CallInst* GetNodeInst = CallInst::Create(GetNodeF, - None, "this.node", CI); - DEBUG(errs() << "Found visc getNode call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *GetNodeInst << "\n"); - CI->replaceAllUsesWith(GetNodeInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNode(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNode, &toBeErased); } - if (isVISCgetParentNodeCall(I)) { - Function* GetParentNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getParentNode); - DEBUG(errs() << *GetParentNodeF << "\n"); - CallInst* GetParentNodeInst = CallInst::Create(GetParentNodeF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "this.node", CI); - DEBUG(errs() << "Found visc getParentNode call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *GetParentNodeInst << "\n"); - CI->replaceAllUsesWith(GetParentNodeInst); - toBeErased.push_back(CI); - + if (isVISCCall_getParentNode(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getParentNode, &toBeErased); } - if (isVISCbarrierCall(I)) { - Function* BarrierF = Intrinsic::getDeclaration(&M, Intrinsic::visc_barrier); - DEBUG(errs() << *BarrierF << "\n"); - CallInst* BarrierInst = CallInst::Create(BarrierF, - None, "", CI); - DEBUG(errs() << "Found visc barrier call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *BarrierInst << "\n"); - CI->replaceAllUsesWith(BarrierInst); - toBeErased.push_back(CI); + if (isVISCCall_barrier(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_barrier, &toBeErased); } - if (isVISCmallocCall(I)) { - Function* MallocF = Intrinsic::getDeclaration(&M, Intrinsic::visc_malloc); - DEBUG(errs() << *MallocF << "\n"); - CallInst* MallocInst = CallInst::Create(MallocF, - CI->getArgOperand(0), "", CI); - DEBUG(errs() << "Found visc malloc call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *MallocInst << "\n"); - CI->replaceAllUsesWith(MallocInst); - toBeErased.push_back(CI); + if (isVISCCall_malloc(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_malloc, &toBeErased); } - if (isVISCreturnCall(I)) { + if (isVISCCall_return(I)) { // The operands to this call are the values to be returned by the node Value* ReturnVal = genCodeForReturn(CI); DEBUG(errs() << *ReturnVal << "\n"); @@ -1118,95 +1084,68 @@ bool GenVISC::runOnModule(Module &M) { && "Multiple returns with mismatching types"); ReturnInst* RetInst = ReturnInst::Create(Ctx, ReturnVal); - errs() << "Found visc return call: " << *CI << "\n"; + DEBUG(errs() << "Found visc return call: " << *CI << "\n"); Instruction* oldReturn = CI->getParent()->getTerminator(); assert(isa<ReturnInst>(oldReturn) && "Expecting a return to be the terminator of this BB!"); - errs() << "Found return statement of BB: " << *oldReturn << "\n"; - errs() << "\tSubstitute return with: " << *RetInst << "\n"; + DEBUG(errs() << "Found return statement of BB: " << *oldReturn << "\n"); + DEBUG(errs() << "\tSubstitute return with: " << *RetInst << "\n"); //CI->replaceAllUsesWith(RetInst); toBeErased.push_back(CI); ReplaceInstWithInst(oldReturn, RetInst); } - if (isVISCgetNodeInstanceID_xCall(I)) { - Function* NodeInstanceID_xF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNodeInstanceID_x); - DEBUG(errs() << *NodeInstanceID_xF << "\n"); - CallInst* NodeInstanceID_xInst = CallInst::Create(NodeInstanceID_xF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "instanceID_x", CI); - DEBUG(errs() << "Found visc getNodeInstanceID_x call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *NodeInstanceID_xInst << "\n"); - CI->replaceAllUsesWith(NodeInstanceID_xInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNodeInstanceID_x(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_x, &toBeErased); } - if (isVISCgetNodeInstanceID_yCall(I)) { - Function* NodeInstanceID_yF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNodeInstanceID_y); - DEBUG(errs() << *NodeInstanceID_yF << "\n"); - // Check if this is a streaming bind or not - CallInst* NodeInstanceID_yInst = CallInst::Create(NodeInstanceID_yF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "instanceID_x", CI); - DEBUG(errs() << "Found visc getNodeInstanceID_y call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *NodeInstanceID_yInst << "\n"); - CI->replaceAllUsesWith(NodeInstanceID_yInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNodeInstanceID_y(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_y, &toBeErased); } - if (isVISCgetNodeInstanceID_zCall(I)) { - Function* NodeInstanceID_zF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNodeInstanceID_z); - DEBUG(errs() << *NodeInstanceID_zF << "\n"); - // Check if this is a streaming bind or not - CallInst* NodeInstanceID_zInst = CallInst::Create(NodeInstanceID_zF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "instanceID_x", CI); - DEBUG(errs() << "Found visc getNodeInstanceID_z call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *NodeInstanceID_zInst << "\n"); - CI->replaceAllUsesWith(NodeInstanceID_zInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNodeInstanceID_z(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_z, &toBeErased); } - if (isVISCgetNumNodeInstances_xCall(I)) { - Function* NumNodeInstances_xF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNumNodeInstances_x); - DEBUG(errs() << *NumNodeInstances_xF << "\n"); - // Check if this is a streaming bind or not - CallInst* NumNodeInstances_xInst = CallInst::Create(NumNodeInstances_xF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "instanceID_x", CI); - DEBUG(errs() << "Found visc getNumNodeInstances_x call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *NumNodeInstances_xInst << "\n"); - CI->replaceAllUsesWith(NumNodeInstances_xInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNumNodeInstances_x(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_x, &toBeErased); } - if (isVISCgetNumNodeInstances_yCall(I)) { - Function* NumNodeInstances_yF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNumNodeInstances_y); - DEBUG(errs() << *NumNodeInstances_yF << "\n"); - // Check if this is a streaming bind or not - CallInst* NumNodeInstances_yInst = CallInst::Create(NumNodeInstances_yF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "instanceID_x", CI); - DEBUG(errs() << "Found visc getNumNodeInstances_y call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *NumNodeInstances_yInst << "\n"); - CI->replaceAllUsesWith(NumNodeInstances_yInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNumNodeInstances_y(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_y, &toBeErased); } - if (isVISCgetNumNodeInstances_zCall(I)) { - Function* NumNodeInstances_zF = Intrinsic::getDeclaration(&M, Intrinsic::visc_getNumNodeInstances_z); - DEBUG(errs() << *NumNodeInstances_zF << "\n"); - // Check if this is a streaming bind or not - CallInst* NumNodeInstances_zInst = CallInst::Create(NumNodeInstances_zF, - ArrayRef<Value*>(CI->getArgOperand(0)), - "instanceID_x", CI); - DEBUG(errs() << "Found visc getNumNodeInstances_z call: " << *CI << "\n"); - DEBUG(errs() << "\tSubstitute with: " << *NumNodeInstances_zInst << "\n"); - CI->replaceAllUsesWith(NumNodeInstances_zInst); - toBeErased.push_back(CI); - + if (isVISCCall_getNumNodeInstances_z(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_z, &toBeErased); + } + if (isVISCCall_atomic_cmpxchg(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_cmpxchg, &toBeErased); + } + if (isVISCCall_atomic_add(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_add, &toBeErased); + } + if (isVISCCall_atomic_sub(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_sub, &toBeErased); + } + if (isVISCCall_atomic_xchg(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xchg, &toBeErased); + } + if (isVISCCall_atomic_inc(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_inc, &toBeErased); + } + if (isVISCCall_atomic_dec(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_dec, &toBeErased); + } + if (isVISCCall_atomic_min(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_min, &toBeErased); + } + if (isVISCCall_atomic_max(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_max, &toBeErased); + } + if (isVISCCall_atomic_and(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_and, &toBeErased); + } + if (isVISCCall_atomic_or(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_or, &toBeErased); + } + if (isVISCCall_atomic_xor(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xor, &toBeErased); } - } } @@ -1299,7 +1238,7 @@ void GenVISC::genKernel(Function* KernelF, CallInst* CI, StructType* RetTy) { // in/out to pointer arguments for (inst_iterator i = inst_begin(KernelF), e = inst_end(KernelF); i != e; ++i) { Instruction *I = &(*i); - if(isVISCattributesCall(I)) { + if(isVISCCall_attributes(I)) { handleVISCAttributes(KernelF, cast<CallInst>(I)); //I->eraseFromParent(); break; @@ -1484,9 +1423,9 @@ static inline ConstantInt* getTimerID(Module& M, enum visc_TimerID timer) { static void transformReturnTypeToStruct(Function* F) { // Currently only works for void return types - errs() << "Transforming return type of function to Struct: " << F->getName() << "\n"; + DEBUG(errs() << "Transforming return type of function to Struct: " << F->getName() << "\n"); if(!F->getReturnType()->isVoidTy()) { - errs() << "Warning: Unhandled case - Only void return type handled\n"; + errs() << "Warning: Unhandled case - Only void return type handled. Function: " << F->getName() << "\n"; return; } // Create the argument type list with added argument types diff --git a/llvm/lib/Transforms/LocalMem/LocalMem.cpp b/llvm/lib/Transforms/LocalMem/LocalMem.cpp index 7041a3bf36..0de36b51cb 100644 --- a/llvm/lib/Transforms/LocalMem/LocalMem.cpp +++ b/llvm/lib/Transforms/LocalMem/LocalMem.cpp @@ -74,7 +74,7 @@ public: }; bool LocalMem::runOnModule(Module &M) { - errs() << "\nLocalMem PASS\n"; + errs() << "\nLOCALMEM PASS\n"; // Get the BuildDFG Analysis Results: // - Dataflow graph diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index b087ceff6a..856ce0ea03 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -153,9 +153,9 @@ static void* llvm_visc_ocl_request_mem(void* ptr, size_t size, DFNodeContext_OCL else clFlags = CL_MEM_READ_ONLY; visc_SwitchToTimer(&kernel_timer, visc_TimerID_COPY); - cl_mem d_input = clCreateBuffer(Context->clOCLContext, clFlags, size, NULL, &errcode); checkErr(errcode, CL_SUCCESS, "Failure to allocate memory on device"); + DEBUG(cout<< "\nMemory allocated on device: " << d_input << "\n"); if(isInput) errcode = clEnqueueWriteBuffer(Context->clCommandQue, d_input, @@ -1252,6 +1252,16 @@ void llvm_visc_ocl_clearContext(void* graphID) { } +void llvm_visc_ocl_argument_shared(void* graphID, int arg_index, size_t size) { + DEBUG(cout << "Set Shared Memory Input:"); + DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size << "\n"); + DFNodeContext_OCL* Context = (DFNodeContext_OCL*) graphID; + DEBUG(cout << "Using Context: " << Context << "\n"); + DEBUG(cout << "Using clKernel: " << Context->clKernel << "\n"); + cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument"); +} + void llvm_visc_ocl_argument_scalar(void* graphID, void* input, int arg_index, size_t size) { DEBUG(cout << "Set Scalar Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size << "\n"); diff --git a/llvm/projects/visc-rt/visc-rt.h b/llvm/projects/visc-rt/visc-rt.h index 68bd51d45d..aa8745560a 100644 --- a/llvm/projects/visc-rt/visc-rt.h +++ b/llvm/projects/visc-rt/visc-rt.h @@ -161,6 +161,7 @@ void llvm_visc_x86_wait(void*); void* llvm_visc_ocl_initContext(enum visc::Target); void llvm_visc_ocl_clearContext(void*); +void llvm_visc_ocl_argument_shared(void*, int, size_t); void llvm_visc_ocl_argument_scalar(void*, void*, int, size_t); void* llvm_visc_ocl_argument_ptr(void*, void*, int, size_t, bool, bool); void* llvm_visc_ocl_output_ptr(void*, int, size_t); diff --git a/llvm/test/VISC/parboil/.ycm_extra_conf.py b/llvm/test/VISC/parboil/.ycm_extra_conf.py index 3615b034aa..bccfaddfeb 100644 --- a/llvm/test/VISC/parboil/.ycm_extra_conf.py +++ b/llvm/test/VISC/parboil/.ycm_extra_conf.py @@ -51,6 +51,7 @@ flags = [ '-I./include', '-isystem', '/opt/intel/opencl-sdk/include' '-isystem', '/usr/local/cuda/include', + '-isystem', '/home/psrivas2/current-src/include', ] # Set this to the absolute path to the folder (NOT the file!) containing the diff --git a/llvm/test/VISC/parboil/common/include/visc.h b/llvm/test/VISC/parboil/common/include/visc.h index ea706ccd0c..d407f256dc 100644 --- a/llvm/test/VISC/parboil/common/include/visc.h +++ b/llvm/test/VISC/parboil/common/include/visc.h @@ -40,6 +40,31 @@ unsigned __visc__getNumNodeInstances_x(void*); unsigned __visc__getNumNodeInstances_y(void*); unsigned __visc__getNumNodeInstances_z(void*); +// Atomic +// signed int +int __visc__atomic_cmpxchg(int*, int, int); +int __visc__atomic_add(int*, int); +int __visc__atomic_sub(int*, int); +int __visc__atomic_xchg(int*, int); +int __visc__atomic_inc(int*); +int __visc__atomic_dec(int*); +int __visc__atomic_min(int*, int); +int __visc__atomic_max(int*, int); +int __visc__atomic_and(int*, int); +int __visc__atomic_or(int*, int); +int __visc__atomic_xor(int*, int); +// unsigned int +//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned); +//unsigned __visc__atomic_add(unsigned*, unsigned); +//unsigned __visc__atomic_sub(unsigned*, unsigned); +//unsigned __visc__atomic_xchg(unsigned*, unsigned); +//unsigned __visc__atomic_inc(unsigned*); +//unsigned __visc__atomic_dec(unsigned*); +//unsigned __visc__atomic_min(unsigned*, unsigned); +//unsigned __visc__atomic_max(unsigned*, unsigned); +//unsigned __visc__atomic_and(unsigned*, unsigned); +//unsigned __visc__atomic_or(unsigned*, unsigned); +//unsigned __visc__atomic_xor(unsigned*, unsigned); #endif -- GitLab