diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp index c608f1feeb71edf44a75fa3af0ad942b89ef77e8..8282ed2374ad54d9fd0aae98b7f48f85a345115a 100644 --- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp @@ -11,6 +11,7 @@ #define TARGET_PTX 32 #define GENERIC_ADDRSPACE 0 #define GLOBAL_ADDRSPACE 1 +#define SHARED_ADDRSPACE 3 #define DEBUG_TYPE "DFG2LLVM_SPIR" #include "llvm/IR/DataLayout.h" @@ -60,15 +61,16 @@ public: // calls class Kernel { public: - Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::vector<unsigned> _inArgMap = - std::vector<unsigned>(), std::vector<unsigned> _outArgMap = - std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*> - _globalWGSize = std::vector<Value*>(), - unsigned _blockDim = 0, - std::vector<Value*> _localWGSize = std::vector<Value*>()) + Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::map<unsigned, unsigned> _inArgMap = + std::map<unsigned, unsigned>(), + std::map<unsigned, std::pair<Value*, unsigned> > _sharedInArgMap = + std::map<unsigned, std::pair<Value*, unsigned> >(), + std::vector<unsigned> _outArgMap = std::vector<unsigned>(), + unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), + unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), - outArgMap(_outArgMap), gridDim(_gridDim), globalWGSize(_globalWGSize), - blockDim(_blockDim), localWGSize(_localWGSize) { + sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), gridDim(_gridDim), + globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) { assert(gridDim == globalWGSize.size() && "gridDim should be same as the size of vector globalWGSize"); @@ -78,7 +80,14 @@ public: Function* KernelFunction; DFLeafNode* KernelLeafNode; - std::vector<unsigned> inArgMap; + std::map<unsigned, unsigned> inArgMap; + // Map for shared memory arguments + std::map<unsigned, std::pair<Value*, unsigned> > sharedInArgMap; + // Fields for (potential) allocation node + DFLeafNode* AllocationNode; + Function* AllocationFunction; + std::map<unsigned, unsigned> allocInArgMap; + std::vector<unsigned> outArgMap; unsigned gridDim; std::vector<Value*> globalWGSize; @@ -86,13 +95,20 @@ public: std::vector<Value*> localWGSize; std::vector<int> localDimMap; - std::vector<unsigned> getInArgMap() { + std::map<unsigned, unsigned> getInArgMap() { return inArgMap; } - void setInArgMap(std::vector<unsigned> map) { + void setInArgMap(std::map<unsigned, unsigned> map) { inArgMap = map; } + std::map<unsigned, std::pair<Value*, unsigned> > getSharedInArgMap() { + return sharedInArgMap; + } + void setSharedInArgMap(std::map<unsigned, std::pair<Value*, unsigned> > map) { + sharedInArgMap = map; + } + std::vector<unsigned> getOutArgMap() { return outArgMap; } @@ -121,6 +137,9 @@ static void changeTargetTriple(Module &); static std::string printType(Type*); static StringRef getMangledName(std::string); static void findReturnInst(Function *, std::vector<ReturnInst *> &); +static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &); +static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID); +static std::string getAtomicOpName(Intrinsic::ID); // DFG2LLVM_SPIR - The first implementation. @@ -148,6 +167,7 @@ private: Constant* llvm_visc_ocl_wait; Constant* llvm_visc_ocl_initContext; Constant* llvm_visc_ocl_clearContext; + Constant* llvm_visc_ocl_argument_shared; Constant* llvm_visc_ocl_argument_scalar; Constant* llvm_visc_ocl_argument_ptr; Constant* llvm_visc_ocl_output_ptr; @@ -159,6 +179,7 @@ private: std::string getKernelsModuleName(Module &M); void fixValueAddrspace(Value* V, unsigned addrspace); void changeArgAddrspace(Function* F, unsigned i); + void changeArgAddrspace(Function* F, std::vector<unsigned> &Ags, unsigned i); void addCLMetadata(Function* F); void transformFunctionToVoid(Function* F); void removeInOutAttributes(Function* F); @@ -249,6 +270,7 @@ void CGT_SPIR::initRuntimeAPI() { DECLARE(llvm_visc_ocl_wait); DECLARE(llvm_visc_ocl_initContext); DECLARE(llvm_visc_ocl_clearContext); + DECLARE(llvm_visc_ocl_argument_shared); DECLARE(llvm_visc_ocl_argument_scalar); DECLARE(llvm_visc_ocl_argument_ptr); DECLARE(llvm_visc_ocl_output_ptr); @@ -409,11 +431,20 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil // Vector to hold the device memory object that need to be cleared before we release // context std::vector<Value*> DevicePointers; + + std::map<unsigned, unsigned> kernelInArgMap = K->getInArgMap(); +/* for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) { // The kernel object gives us the mapping of arguments from kernel launch // node function (F_X86) to kernel (kernel->KF) Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]); + +*/ + for(std::map<unsigned, unsigned>::iterator ib = kernelInArgMap.begin(), + ie = kernelInArgMap.end(); ib != ie; ++ib) { + unsigned i = ib->first; + Value* inputVal = getArgumentAt(F_X86, ib->second); DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); // input value has been obtained. @@ -443,7 +474,12 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil Type::getInt8PtrTy(M.getContext()), inputVal->getName()+".i8ptr", RI); - Value* inputSize = getArgumentAt(F_X86, K->getInArgMap()[i+1]); + + // Assert that the pointer argument size (next argument) is in the map + assert(kernelInArgMap.find(i+1) != kernelInArgMap.end()); + + Value* inputSize = getArgumentAt(F_X86, kernelInArgMap[i+1]); + assert(inputSize->getType() == Type::getInt64Ty(M.getContext()) && "Pointer type input must always be followed by size (integer type)"); Value* setInputArgs[] = {GraphID, @@ -481,8 +517,122 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil CallInst::Create(llvm_visc_ocl_argument_scalar, ArrayRef<Value*>(setInputArgs, 4), "", RI); } + } + + DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); + // Check to see if all the allocation sizes are constant (determined + // statically) + bool constSizes = true; + for (auto& e: K->getSharedInArgMap()) { + constSizes &= isa<Constant>(e.second.first); } + + // If the sizes are all constant + if (constSizes) { + for (auto& e: K->getSharedInArgMap()) { + unsigned argNum = e.first; + Value* allocSize = e.second.first; + + DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); + + if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { + // Shared memory ptr argument - scalar at size position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + + assert(isa<Constant>(allocSize) && "Constant shared memory size is expected"); + + Value* setInputArgs[] = {GraphID, + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + allocSize + }; + CallInst::Create(llvm_visc_ocl_argument_shared, + ArrayRef<Value*>(setInputArgs, 3), "", RI); + } + else { + // Sharem memory size argument - scalar at address position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + // Store the scalar value on stack and then pass the pointer to its + // location + AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), + allocSize->getName()+".sharedMem.ptr", RI); + StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); + + Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, + Type::getInt8PtrTy(M.getContext()), + allocSize->getName()+".sharedMem.i8ptr", + RI); + + Value* setInputArgs[] = {GraphID, + allocSizeI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + ConstantExpr::getSizeOf(allocSize->getType()) + }; + CallInst::Create(llvm_visc_ocl_argument_scalar, + ArrayRef<Value*>(setInputArgs, 4), "", RI); + } + } + } else { + + Function *F_alloc = K->AllocationFunction; + StructType *FAllocRetTy = dyn_cast<StructType>(F_alloc->getReturnType()); + assert(FAllocRetTy && "Allocation node with no struct return type"); + + std::vector<Value *> AllocInputArgs; + for (unsigned i = 0; i < K->allocInArgMap.size(); i++) { + AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i))); + } + + CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI); + std::vector<ExtractValueInst *> ExtractValueInstVec; + for (unsigned i = 1; i < FAllocRetTy->getNumElements(); i += 2) { + ExtractValueInst *EI = ExtractValueInst::Create(CI, i, "", RI); + ExtractValueInstVec.push_back(EI); + } + + for (auto& e: K->getSharedInArgMap()) { + unsigned argNum = e.first; + Value* allocSize = ExtractValueInstVec[e.second.second/2]; + + DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); + + if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { + // Shared memory ptr argument - scalar at size position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + + Value* setInputArgs[] = {GraphID, + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + allocSize + }; + CallInst::Create(llvm_visc_ocl_argument_shared, + ArrayRef<Value*>(setInputArgs, 3), "", RI); + } + else { + // Sharem memory size argument - scalar at address position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + // Store the scalar value on stack and then pass the pointer to its + // location + AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), + allocSize->getName()+".sharedMem.ptr", RI); + StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); + + Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, + Type::getInt8PtrTy(M.getContext()), + allocSize->getName()+".sharedMem.i8ptr", + RI); + + Value* setInputArgs[] = {GraphID, + allocSizeI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), + ConstantExpr::getSizeOf(allocSize->getType()) + }; + CallInst::Create(llvm_visc_ocl_argument_scalar, + ArrayRef<Value*>(setInputArgs, 4), "", RI); + } + } + } + + DEBUG(errs() << "Setup output edges of node and insert visc api\n"); // Set output if struct is not an empty struct @@ -682,14 +832,17 @@ void CGT_SPIR::codeGen(DFInternalNode* N) { } else { DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); // Keep track of the arguments order. - std::vector<unsigned> inmap1 = N->getInArgMap(); - std::vector<unsigned> inmap2 = kernel->getInArgMap(); - // TODO: Verify when we have incoming edges from more than one nodes - // The limit is the size of inmap2, because this is the number of kernel arguments - for (unsigned i = 0; i < inmap2.size(); i++) { - inmap2[i] = inmap1[inmap2[i]]; + std::map<unsigned, unsigned> inmap1 = N->getInArgMap(); + std::map<unsigned, unsigned> inmap2 = kernel->getInArgMap(); + // TODO: Structure assumed: one thread node, one allocation node (at most), + // TB node + std::map<unsigned, unsigned> inmapFinal; + for (std::map<unsigned, unsigned>::iterator ib = inmap2.begin(), ie = inmap2.end(); + ib != ie; ++ib) { + inmapFinal[ib->first] = inmap1[ib->second]; } - kernel->setInArgMap(inmap2); + + kernel->setInArgMap(inmapFinal); // Keep track of the output arguments order. std::vector<unsigned> outmap1 = N->getOutArgMap(); @@ -723,6 +876,10 @@ void CGT_SPIR::codeGen(DFInternalNode* N) { // find the source location in Parent of N. Retrieve the argument from // parent to insert in the vector. unsigned argNum = Arg->getArgNo(); + // This argument will be coming from the parent node, not the allocation + // Node + assert(N->getInArgMap().find(argNum) != N->getInArgMap().end()); + unsigned parentArgNum = N->getInArgMap()[argNum]; Argument* A = getArgumentAt(N->getParent()->getFuncPointer(), parentArgNum); localWGSizeMapped.push_back(A); @@ -770,6 +927,12 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { return; } + // Skip code generation if it is an allocation node + if(N->isAllocationNode()) { + DEBUG(errs() << "Skipping allocation node\n"); + return; + } + // Generate code only if it has the right hint if(!checkPreferredTarget(N, visc::SPIR_TARGET)) { errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; @@ -791,6 +954,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { kernel = new Kernel(NULL, N, N->getInArgMap(), + N->getSharedInArgMap(), N->getOutArgMap(), N->getNumOfDim(), N->getDimLimits()); @@ -804,6 +968,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { kernel = new Kernel(NULL, // kernel function N, // kernel leaf node N->getInArgMap(), // kenel argument mapping + N->getSharedInArgMap(), N->getOutArgMap(), // kernel output mapping from the leaf to the interemediate node PNode->getNumOfDim(), // gridDim PNode->getDimLimits(),// grid size @@ -846,6 +1011,93 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { transformFunctionToVoid(F_spir); removeInOutAttributes(F_spir); + //FIXME: For now, assume only one allocation node + kernel->AllocationNode = NULL; + + for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), iee = N->indfedge_end(); + ieb != iee; ++ieb) { + DFNode *SrcDFNode = (*ieb)->getSourceDF(); + DEBUG(errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n"); + if (!SrcDFNode->isDummyNode()) { + assert(SrcDFNode->isAllocationNode()); + kernel->AllocationNode = dyn_cast<DFLeafNode>(SrcDFNode); + kernel->allocInArgMap = SrcDFNode->getInArgMap(); + break; + } + } + + // Vector for shared memory arguments + std::vector<unsigned> SharedMemArgs; + + // If no allocation node was found, SharedMemArgs is empty + if (kernel->AllocationNode) { + + ValueToValueMapTy VMap; + Function *F_alloc = CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap, true); + // Insert the cloned function into the kernels module + M.getFunctionList().push_back(F_alloc); + + std::vector<IntrinsicInst *> ViscMallocInstVec; + findIntrinsicInst(F_alloc, Intrinsic::visc_malloc, ViscMallocInstVec); + + for (unsigned i = 0; i < ViscMallocInstVec.size(); i++) { + IntrinsicInst *II = ViscMallocInstVec[i]; + assert(II->hasOneUse() && "visc_malloc result is used more than once"); + II->replaceAllUsesWith(ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))); + II->eraseFromParent(); + } + kernel->AllocationFunction = F_alloc; + + // This could be used to check that the allocation node has the appropriate + // number of fields in its return struct +/* + ReturnInst *RI = ReturnInstVec[0]; + Value *RetVal = RI->getReturnValue(); + Type *RetTy = RetVal->getType(); + StructType *RetStructTy = dyn_cast<StructType>(RetTy); + assert(RetStructTy && "Allocation node does not return a struct type"); + unsigned numFields = RetStructTy->getNumElements(); +*/ + std::map<unsigned, std::pair<Value*, unsigned> > sharedInMap = kernel->getSharedInArgMap(); + AllocationNodeProperty* APN = + (AllocationNodeProperty*) kernel->AllocationNode->getProperty(DFNode::Allocation); + for (auto& AllocPair: APN->getAllocationList()) { + unsigned destPos = AllocPair.first->getDestPosition(); + unsigned srcPos = AllocPair.first->getSourcePosition(); + SharedMemArgs.push_back(destPos); + sharedInMap[destPos] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); + sharedInMap[destPos+1] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); + } + kernel->setSharedInArgMap(sharedInMap); + } + std::sort(SharedMemArgs.begin(), SharedMemArgs.end()); + + // All pointer args which are not shared memory pointers have to be moved to + // global address space + unsigned argIndex = 0; + std::vector<unsigned> GlobalMemArgs; + for(auto& Arg: F_spir->getArgumentList()) { + if (Arg.getType()->isPointerTy()) { + // If the arguement is already chosen for shared memory arguemnt list, skip. + // Else put it in Global memory arguement list + if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) { + GlobalMemArgs.push_back(argIndex); + } + } + argIndex++; + } + std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end()); + + /* At this point, we assume that chescks for the fact that SharedMemArgs only + contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the + analysis pass */ + + changeArgAddrspace(F_spir, SharedMemArgs, SHARED_ADDRSPACE); + changeArgAddrspace(F_spir, GlobalMemArgs, GLOBAL_ADDRSPACE); + + // Go through all the instructions for (inst_iterator i = inst_begin(F_spir), e = inst_end(F_spir); i != e; ++i) { Instruction *I = &(*i); @@ -1047,6 +1299,79 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { IItoRemove.push_back(II); } break; + case Intrinsic::visc_barrier: + { + errs() << F_spir->getName() << "\t: Handling barrier\n"; + errs() << "Substitute with barrier()\n"; + errs() << *II << "\n"; + FunctionType* FT = FunctionType::get(Type::getVoidTy(getGlobalContext() /*KernelM.getContext()*/), + std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)), + false); + Function* OpenCLFunction = cast<Function> + (KernelM.getOrInsertFunction(getMangledName("barrier"), FT)); + CallInst* CI = CallInst::Create(OpenCLFunction, + ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1)), + "", II); + II->replaceAllUsesWith(CI); + IItoRemove.push_back(II); + } + break; + case Intrinsic::visc_atomic_cmpxchg: + break; + case Intrinsic::visc_atomic_add: + case Intrinsic::visc_atomic_sub: + case Intrinsic::visc_atomic_xchg: + case Intrinsic::visc_atomic_min: + case Intrinsic::visc_atomic_max: + case Intrinsic::visc_atomic_and: + case Intrinsic::visc_atomic_or: + case Intrinsic::visc_atomic_xor: + //case Intrinsic::visc_atomic_inc: + //case Intrinsic::visc_atomic_dec: + { + errs() << *II << "\n"; + // Only have support for i32 atomic intrinsics + assert(II->getType() == Type::getInt32Ty(II->getContext()) + && "Only support i32 atomic intrinsics for now"); + // Store the argument types and operand values in vectors + //std::vector<Type*> ArgTypes; + //std::vector<Value*> ArgValues; + //for(unsigned i=0; i < II->getNumArgOperands(); i++) { + //Value* V = II->getArgOperand(i); + //if(V->getType()->isPointerTy()) { + //If it is a pointer type, then bit cast to i32* as intrinsics use + //i8* for all pointers + //V = CastInst::CreatePointerCast(V, Type::getInt32PtrTy(II->getContext()), "", II); + //errs() << *V << "\n"; + //} + //ArgTypes.push_back(V->getType()); + //ArgValues.push_back(V); + //} + // Substitute with atomicrmw instruction + assert(II->getNumArgOperands() == 2 && "Expecting 2 operands for these atomics"); + Value* Ptr = II->getArgOperand(0); + Value* Val = II->getArgOperand(1); + assert(Ptr->getType()->isPointerTy() + && "First argument of supported atomics is expected to be a pointer"); + PointerType* PtrTy = cast<PointerType>(Ptr->getType()); + if(PtrTy != Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) { + Ptr = CastInst::CreatePointerCast(Ptr, Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()), "", II); + } + AtomicRMWInst* AtomicInst = new AtomicRMWInst(getAtomicOp(II->getIntrinsicID()), + Ptr, Val, llvm::SequentiallyConsistent, llvm::CrossThread, II); + AtomicInst->setVolatile(true); + // Create OpenCL function call + //FunctionType* FT = FunctionType::get(Type::getInt32Ty(getGlobalContext()), + //ArgTypes, false); + //Function* OpenCLFunction = cast<Function>(KernelM.getOrInsertFunction( + //StringRef(getAtomicOpName(II->getIntrinsicID())), FT)); + //CallInst* CI = CallInst::Create(OpenCLFunction, ArgValues, II->getName(), II); + //errs() << "Substitute with: " << *CI << "\n"; + errs() << "Substitute with: " << *AtomicInst << "\n"; + II->replaceAllUsesWith(AtomicInst); + IItoRemove.push_back(II); + } + break; default: assert(false && "Unknown VISC Intrinsic!"); break; @@ -1161,6 +1486,25 @@ void CGT_SPIR::changeArgAddrspace(Function* F, unsigned addrspace) { DEBUG(errs() << *F->getFunctionType() << "\n" <<*F << "\n"); } +void CGT_SPIR::changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned addrspace) { + unsigned idx = 0; + std::vector<Type*> ArgTypes; + for(auto& arg: F->getArgumentList()) { + DEBUG(errs() << arg << "\n"); + unsigned argno = arg.getArgNo(); + if ((idx < Args.size()) && (argno == Args[idx])) { + fixValueAddrspace(&arg, addrspace); + idx++; + } + ArgTypes.push_back(arg.getType()); + } + FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, false); + PointerType* PTy = FTy->getPointerTo(cast<PointerType>(F->getType())->getAddressSpace()); + + F->mutateType(PTy); + DEBUG(errs() << *F->getFunctionType() << "\n" <<*F << "\n"); +} + /* Add metadata to module KernelM, for OpenCL kernels */ void CGT_SPIR::addCLMetadata(Function *F) { // TODO: There is additional metadata used by kernel files but we skip them as @@ -1474,6 +1818,55 @@ static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVe } } +// Helper function, populate a vector with all IntrinsicID intrinsics in a function +static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) { + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &(*i); + IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); + if (II && II->getIntrinsicID() == IntrinsicID) { + IntrinsicInstVec.push_back(II); + } + } +} + +// Helper funtion, returns the atomicrmw op, corresponding to intrinsic atomic op +static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID ID) { + switch(ID) { + case Intrinsic::visc_atomic_add: return AtomicRMWInst::Add; + case Intrinsic::visc_atomic_sub: return AtomicRMWInst::Sub; + case Intrinsic::visc_atomic_min: return AtomicRMWInst::Min; + case Intrinsic::visc_atomic_max: return AtomicRMWInst::Max; + //case Intrinsic::visc_atomic_inc: return AtomicRMWInst::Inc; + //case Intrinsic::visc_atomic_dec: return AtomicRMWInst::Dec; + case Intrinsic::visc_atomic_xchg: return AtomicRMWInst::Xchg; + case Intrinsic::visc_atomic_and: return AtomicRMWInst::And; + case Intrinsic::visc_atomic_or: return AtomicRMWInst::Or; + case Intrinsic::visc_atomic_xor: return AtomicRMWInst::Xor; + default: + llvm_unreachable("Unsupported atomic intrinsic!"); + }; +} + +// Helper funtion, returns the OpenCL function name, corresponding to atomic op +static std::string getAtomicOpName(Intrinsic::ID ID) { + switch(ID) { + case Intrinsic::visc_atomic_cmpxchg: return "atom_cmpxchg"; + case Intrinsic::visc_atomic_add: return "atom_add"; + case Intrinsic::visc_atomic_sub: return "atom_sub"; + case Intrinsic::visc_atomic_min: return "atom_min"; + case Intrinsic::visc_atomic_max: return "atom_max"; + case Intrinsic::visc_atomic_inc: return "atom_inc"; + case Intrinsic::visc_atomic_dec: return "atom_dec"; + case Intrinsic::visc_atomic_xchg: return "atom_xchg"; + case Intrinsic::visc_atomic_and: return "atom_and"; + case Intrinsic::visc_atomic_or: return "atom_or"; + case Intrinsic::visc_atomic_xor: return "atom_xor"; + default: + llvm_unreachable("Unsupported atomic intrinsic!"); + }; +} + + } // End of namespace char DFG2LLVM_SPIR::ID = 0;