diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index 8049471a16910a0b33b8e60ebca577d395627222..68724684e56648d307df52624e47ed7393bfd3f9 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -1,7 +1,6 @@ add_subdirectory(BuildDFG) add_subdirectory(ClearDFG) add_subdirectory(DFG2LLVM_NVPTX) -#add_subdirectory(DFG2LLVM_SPIR) add_subdirectory(DFG2LLVM_X86) add_subdirectory(GenVISC) add_subdirectory(LocalMem) diff --git a/hpvm/lib/Transforms/DFG2LLVM_SPIR/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_SPIR/CMakeLists.txt deleted file mode 100644 index 15d3ab08ad185a6df26a02e4c4463936c9b963c5..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/DFG2LLVM_SPIR/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_library( LLVMDFG2LLVM_SPIR - MODULE - DFG2LLVM_SPIR.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/hpvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/hpvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp deleted file mode 100644 index e294a94537576ac5ca748af7fd17d70e5a133717..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp +++ /dev/null @@ -1,2010 +0,0 @@ -//=== DFG2LLVM_SPIR.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define ENABLE_ASSERTS -#define TARGET_PTX 32 -#define GENERIC_ADDRSPACE 0 -#define GLOBAL_ADDRSPACE 1 -#define SHARED_ADDRSPACE 3 - -#define DEBUG_TYPE "DFG2LLVM_SPIR" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/IR/Attributes.h" -#include "SupportVISC/VISCHint.h" -#include "SupportVISC/VISCTimer.h" -#include "SupportVISC/DFG2LLVM.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm-c/Core.h" - -#include "SupportVISC/VISCUtils.h" -#include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Support/ToolOutputFile.h" -#include "llvm/IR/UseListOrder.h" - -#include <sstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; -using namespace viscUtils; - -// VISC Command line option to use timer or not -static cl::opt<bool> -VISCTimer_SPIR("visc-timers-spir", cl::desc("Enable visc timers")); - -namespace { -// Helper class declarations - -// Class to maintain the tuple of host pointer, device pointer and size -// in bytes. Would have preferred to use tuple but support not yet available -class OutputPtr { -public: - OutputPtr(Value* _h_ptr, Value* _d_ptr, Value* _bytes) - : h_ptr(_h_ptr), d_ptr(_d_ptr), bytes(_bytes) {} - - Value* h_ptr; - Value* d_ptr; - Value* bytes; -}; - -// Class to maintain important kernel info required for generating runtime -// calls -class Kernel { -public: - Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::map<unsigned, unsigned> _inArgMap = - std::map<unsigned, unsigned>(), - std::map<unsigned, std::pair<Value*, unsigned> > _sharedInArgMap = - std::map<unsigned, std::pair<Value*, unsigned> >(), - std::vector<unsigned> _outArgMap = std::vector<unsigned>(), - unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), - unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) - : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), - sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), gridDim(_gridDim), - globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) { - - assert(gridDim == globalWGSize.size() - && "gridDim should be same as the size of vector globalWGSize"); - assert(blockDim == localWGSize.size() - && "blockDim should be same as the size of vector localWGSize"); - } - - Function* KernelFunction; - DFLeafNode* KernelLeafNode; - std::map<unsigned, unsigned> inArgMap; - // Map for shared memory arguments - std::map<unsigned, std::pair<Value*, unsigned> > sharedInArgMap; - // Fields for (potential) allocation node - DFLeafNode* AllocationNode; - Function* AllocationFunction; - std::map<unsigned, unsigned> allocInArgMap; - - std::vector<unsigned> outArgMap; - unsigned gridDim; - std::vector<Value*> globalWGSize; - unsigned blockDim; - std::vector<Value*> localWGSize; - std::vector<int> localDimMap; - - std::map<unsigned, unsigned> getInArgMap() { - return inArgMap; - } - void setInArgMap(std::map<unsigned, unsigned> map) { - inArgMap = map; - } - - std::map<unsigned, std::pair<Value*, unsigned> > getSharedInArgMap() { - return sharedInArgMap; - } - void setSharedInArgMap(std::map<unsigned, std::pair<Value*, unsigned> > map) { - sharedInArgMap = map; - } - - std::vector<unsigned> getOutArgMap() { - return outArgMap; - } - void setOutArgMap(std::vector<unsigned> map) { - outArgMap = map; - } - - void setLocalWGSize(std::vector<Value*> V) { - localWGSize = V; - } - - bool hasLocalWG() { - return blockDim != 0; - } -}; - -// Helper function declarations -static void getExecuteNodeParams(Module &M, Value* &, Value* &, Value* &, Kernel*, - ValueToValueMapTy&, Instruction*); -static Value* genWorkGroupPtr(Module &M, std::vector<Value*>, ValueToValueMapTy&, - Instruction*, const Twine& WGName = "WGSize"); -static std::string getSPIRFilename(const Module&); -static std::string getFilenameFromModule(const Module& M); -static void changeDataLayout(Module &); -static void changeTargetTriple(Module &); -static std::string printType(Type*); -static StringRef getMangledName(std::string); -static StringRef getAtomicMangledName(std::string, unsigned, bool); -static void findReturnInst(Function *, std::vector<ReturnInst *> &); -static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &); -static StringRef getAtomicOpName(Intrinsic::ID, unsigned); -static std::string getMathFunctionName(Intrinsic::ID); - -// DFG2LLVM_SPIR - The first implementation. -struct DFG2LLVM_SPIR : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_SPIR() : DFG2LLVM(ID) {} - -private: - -public: - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_SPIR : public CodeGenTraversal { - -private: - //Member variables - std::unique_ptr<Module> KernelM; - DFNode* KernelLaunchNode = nullptr; - Kernel* kernel; - - // VISC Runtime API - Constant* llvm_visc_ocl_launch; - Constant* llvm_visc_ocl_wait; - Constant* llvm_visc_ocl_initContext; - Constant* llvm_visc_ocl_clearContext; - Constant* llvm_visc_ocl_argument_shared; - Constant* llvm_visc_ocl_argument_scalar; - Constant* llvm_visc_ocl_argument_ptr; - Constant* llvm_visc_ocl_output_ptr; - Constant* llvm_visc_ocl_free; - Constant* llvm_visc_ocl_getOutput; - Constant* llvm_visc_ocl_executeNode; - - //Functions - std::string getKernelsModuleName(Module &M); - void fixValueAddrspace(Value* V, unsigned addrspace); - Function* changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned i); - void removeAttributeAtArguments(Function* F, std::vector<unsigned> &Ags, Attribute::AttrKind attrKind); - void addCLMetadata(Function* F); - Function* transformFunctionToVoid(Function* F); - void removeInOutAttributes(Function* F); - void insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName); - - // Virtual Functions - void init() { - VISCTimer = VISCTimer_SPIR; - TargetName = "SPIR"; - } - void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - -public: - - // Constructor - CGT_SPIR(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(&_M)) { - KernelLaunchNode = NULL; - init(); - initRuntimeAPI(); - errs() << "Old module pointer: " << &_M << "\n"; - errs() << "New module pointer: " << KernelM.get() << "\n"; - // Copying instead of creating new, in order to preserve required info (metadata) - // Remove functions, global variables and aliases - std::vector<GlobalVariable*> gvv = std::vector<GlobalVariable*>(); - for (Module::global_iterator mi = KernelM->global_begin(), - me = KernelM->global_end(); (mi != me); ++mi) { - GlobalVariable* gv = &*mi; - gvv.push_back(gv); - } - for (std::vector<GlobalVariable*>::iterator vi = gvv.begin(); vi != gvv.end(); ++vi) { - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - - std::vector<Function*> fv = std::vector<Function*>(); - for (Module::iterator mi = KernelM->begin(), - me = KernelM->end(); (mi != me); ++mi) { - Function* f = &*mi; - fv.push_back(f); - } - for (std::vector<Function*>::iterator vi = fv.begin(); vi != fv.end(); ++vi) { - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - - std::vector<GlobalAlias*> av = std::vector<GlobalAlias*>(); - for (Module::alias_iterator mi = KernelM->alias_begin(), - me = KernelM->alias_end(); (mi != me); ++mi) { - GlobalAlias* a = &*mi; - av.push_back(a); - } - for (std::vector<GlobalAlias*>::iterator vi = av.begin(); vi != av.end(); ++vi) { - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - - changeDataLayout(*KernelM); - changeTargetTriple(*KernelM); - - DEBUG(errs() << *KernelM); - - } - - void removeLLVMIntrinsics(); - void writeKernelsModule(); -}; - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_SPIR::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!"); - - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot + "/tools/hpvm/visc-rt/visc-rt.ll"; - errs() << "Open file: " << runtimeAPI.str() << "\n"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == NULL) - DEBUG(errs() << Err.getMessage()); - else - errs() << "Successfully loaded visc-rt API module\n"; - - // Get or insert the global declarations for launch/wait functions - DECLARE(llvm_visc_ocl_launch); - DECLARE(llvm_visc_ocl_wait); - DECLARE(llvm_visc_ocl_initContext); - DECLARE(llvm_visc_ocl_clearContext); - DECLARE(llvm_visc_ocl_argument_shared); - DECLARE(llvm_visc_ocl_argument_scalar); - DECLARE(llvm_visc_ocl_argument_ptr); - DECLARE(llvm_visc_ocl_output_ptr); - DECLARE(llvm_visc_ocl_free); - DECLARE(llvm_visc_ocl_getOutput); - DECLARE(llvm_visc_ocl_executeNode); - - // Get or insert timerAPI functions as well if you plan to use timers - initTimerAPI(); - - // Insert init context in main - DEBUG(errs() << "Gen Code to initialize SPIR Timer\n"); - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once"); - - InitCall = cast<Instruction>(*VI->user_begin()); - initializeTimerSet(InitCall); - switchToTimer(visc_TimerID_INIT_CTX, InitCall); - CallInst::Create(llvm_visc_ocl_initContext, - ArrayRef<Value*>(getTargetID(M, visc::SPIR_TARGET)), - "", InitCall); - switchToTimer(visc_TimerID_NONE, InitCall); - - // Insert print instruction at visc exit - DEBUG(errs() << "Gen Code to print SPIR Timer\n"); - Function* VC = M.getFunction("llvm.visc.cleanup"); - DEBUG(errs() << *VC << "\n"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once"); - - CleanupCall = cast<Instruction>(*VC->user_begin()); - printTimerSet(CleanupCall); - - -} - -// Generate Code to call the kernel -// The plan is to replace the internal node with a leaf node. This method is -// used to generate a function to associate with this leaf node. The function -// is responsible for all the memory allocation/transfer and invoking the -// kernel call on the device -void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName) { - // Check if clone already exists. If it does, it means we have visited this - // function before. -// assert(N->getGenFunc() == NULL && "Code already generated for this node"); - - assert(N->getGenFuncForTarget(visc::SPIR_TARGET) == NULL && - "Code already generated for this node"); - - // Useful values - Value* True = ConstantInt::get(Type::getInt1Ty(M.getContext()), 1); - Value* False = ConstantInt::get(Type::getInt1Ty(M.getContext()), 0); - - // If kernel struct has not been initialized with kernel function, then fail - assert(K != NULL && "No kernel found!!"); - - DEBUG(errs() << "Generating kernel call code\n"); - - Function* F = N->getFuncPointer(); - - - // Create of clone of F with no instructions. Only the type is the same as F - // without the extra arguments. - Function* F_X86; - - // Clone the function, if we are seeing this function for the first time. We - // only need a clone in terms of type. - ValueToValueMapTy VMap; - - // Create new function with the same type - F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); - - // Loop over the arguments, copying the names of arguments over. - Function::arg_iterator dest_iterator = F_X86->arg_begin(); - for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); - i != e; ++i) { - dest_iterator->setName(i->getName()); // Copy the name over... - // Increment dest iterator - ++dest_iterator; - } - - // Add a basic block to this empty function - BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86); - ReturnInst* RI = ReturnInst::Create(M.getContext(), - UndefValue::get(F_X86->getReturnType()), BB); - - // FIXME: Adding Index and Dim arguments are probably not required except - // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do - // have those arguments) - - // Add Index and Dim arguments except for the root node - if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - F_X86 = addIdxDimArgs(F_X86); - - BB = &*F_X86->begin(); - RI = cast<ReturnInst>(BB->getTerminator()); - - //Add the generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::SPIR_TARGET, true); - - // Loop over the arguments, to create the VMap - dest_iterator = F_X86->arg_begin(); - for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); - i != e; ++i) { - // Add mapping to VMap and increment dest iterator - VMap[&*i] = &*dest_iterator; - ++dest_iterator; - } - - /* TODO: Use this code to verufy if this is a good pattern for OCL kernel - - // Sort children in topological order before code generation for kernel call - N->getChildGraph()->sortChildren(); - - // The DFNode N has the property that it has only one child (leaving Entry - // and Exit dummy nodes). This child is the OCL kernel. This simplifies code - // generation for kernel calls significantly. All the inputs to this child - // node would either be constants or from the parent node N. - - assert(N->getChildGraph()->size() == 3 - && "Node expected to have just one non-dummy node!"); - - DFNode* C; - for(DFGraph::children_iterator ci = N->getChildGraph()->begin(), - ce = N->getChildGraph()->end(); ci != ce; ++ci) { - C = *ci; - // Skip dummy node call - if (!C->isDummyNode()) - break; - } - - assert(C->isDummyNode() == false && "Internal Node only contains dummy nodes!"); - - Function* CF = C->getFuncPointer(); - */ - Function* KF = K->KernelLeafNode->getFuncPointer(); - // Initialize context - //DEBUG(errs() << "Initializing context" << "\n"); - //CallInst::Create(llvm_visc_ocl_initContext, None, "", RI); - - DEBUG(errs() << "Initializing commandQ" << "\n"); - // Initialize command queue - switchToTimer(visc_TimerID_SETUP, InitCall); - Value* fileStr = getStringPointer(FileName, InitCall, "Filename"); - DEBUG(errs() << "Kernel Filename constant: " << *fileStr << "\n"); - DEBUG(errs() << "Generating code for kernel - " << K->KernelFunction->getName()<< "\n"); - Value* kernelStr = getStringPointer(K->KernelFunction->getName(), InitCall,"KernelName"); - - Value* LaunchInstArgs[] = {fileStr, kernelStr}; - - DEBUG(errs() << "Inserting launch call" << "\n"); - CallInst* SPIR_Ctx = CallInst::Create(llvm_visc_ocl_launch, - ArrayRef<Value*>(LaunchInstArgs, 2), - "graph"+KF->getName(), - InitCall); - DEBUG(errs() << *SPIR_Ctx << "\n"); - GraphIDAddr = new GlobalVariable(M, - SPIR_Ctx->getType(), - false, - GlobalValue::CommonLinkage, - Constant::getNullValue(SPIR_Ctx->getType()), - "graph"+KF->getName()+".addr"); - DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n"); - StoreInst* SI = new StoreInst(SPIR_Ctx, GraphIDAddr, InitCall); - DEBUG(errs() << *SI << "\n"); - switchToTimer(visc_TimerID_NONE, InitCall); - switchToTimer(visc_TimerID_SETUP, RI); - Value* GraphID = new LoadInst(GraphIDAddr, "graph."+KF->getName(), RI); - - // Iterate over the required input edges of the node and use the visc-rt API - // to set inputs - DEBUG(errs() << "Iterate over input edges of node and insert visc api\n"); - std::vector<OutputPtr> OutputPointers; - // Vector to hold the device memory object that need to be cleared before we release - // context - std::vector<Value*> DevicePointers; - - std::map<unsigned, unsigned> kernelInArgMap = K->getInArgMap(); -/* - for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) { - - // The kernel object gives us the mapping of arguments from kernel launch - // node function (F_X86) to kernel (kernel->KF) - Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]); - -*/ - for(std::map<unsigned, unsigned>::iterator ib = kernelInArgMap.begin(), - ie = kernelInArgMap.end(); ib != ie; ++ib) { - unsigned i = ib->first; - Value* inputVal = getArgumentAt(F_X86, ib->second); - DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); - - // input value has been obtained. - // Check if input is a scalar value or a pointer operand - // For scalar values such as int, float, etc. the size is simply the size of - // type on target machine, but for pointers, the size of data would be the - // next integer argument - if(inputVal->getType()->isPointerTy()) { - - switchToTimer(visc_TimerID_COPY_PTR, RI); - // Pointer Input - // CheckAttribute - Value* isOutput = (hasAttribute(KF, i, Attribute::Out))? True : False; - Value* isInput = ((hasAttribute(KF, i, Attribute::Out)) - && !(hasAttribute(KF, i, Attribute::In)))? False : True; - - Argument* A = getArgumentAt(KF, i); - if(isOutput == True) { - DEBUG(errs() << *A << " is an OUTPUT argument\n"); - } - if(isInput == True) { - DEBUG(errs() << *A << " is an INPUT argument\n"); - } - - - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputVal, - Type::getInt8PtrTy(M.getContext()), - inputVal->getName()+".i8ptr", - RI); - - // Assert that the pointer argument size (next argument) is in the map - assert(kernelInArgMap.find(i+1) != kernelInArgMap.end()); - - Value* inputSize = getArgumentAt(F_X86, kernelInArgMap[i+1]); - - assert(inputSize->getType() == Type::getInt64Ty(M.getContext()) - && "Pointer type input must always be followed by size (integer type)"); - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - inputSize, - isInput, - isOutput - }; - Value* d_ptr = CallInst::Create(llvm_visc_ocl_argument_ptr, - ArrayRef<Value*>(setInputArgs, 6), "", RI); - DevicePointers.push_back(d_ptr); - // If this has out attribute, store the returned device pointer in - // memory to read device memory later - if(isOutput == True) OutputPointers.push_back(OutputPtr(inputValI8Ptr, d_ptr, inputSize)); - } - else { - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Scalar Input - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), inputVal->getName()+".ptr", RI); - StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); - - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, - Type::getInt8PtrTy(M.getContext()), - inputVal->getName()+".i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - ConstantExpr::getSizeOf(inputVal->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - } - } - - DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); - - // Check to see if all the allocation sizes are constant (determined - // statically) - bool constSizes = true; - for (auto& e: K->getSharedInArgMap()) { - constSizes &= isa<Constant>(e.second.first); - } - - // If the sizes are all constant - if (constSizes) { - for (auto& e: K->getSharedInArgMap()) { - unsigned argNum = e.first; - Value* allocSize = e.second.first; - - DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); - - if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { - // Shared memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - - assert(isa<Constant>(allocSize) && "Constant shared memory size is expected"); - - Value* setInputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - allocSize - }; - CallInst::Create(llvm_visc_ocl_argument_shared, - ArrayRef<Value*>(setInputArgs, 3), "", RI); - } - else { - // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), - allocSize->getName()+".sharedMem.ptr", RI); - StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - - Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, - Type::getInt8PtrTy(M.getContext()), - allocSize->getName()+".sharedMem.i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - allocSizeI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - ConstantExpr::getSizeOf(allocSize->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - } - } - } else { - - Function *F_alloc = K->AllocationFunction; - StructType *FAllocRetTy = dyn_cast<StructType>(F_alloc->getReturnType()); - assert(FAllocRetTy && "Allocation node with no struct return type"); - - std::vector<Value *> AllocInputArgs; - for (unsigned i = 0; i < K->allocInArgMap.size(); i++) { - AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i))); - } - - CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI); - std::vector<ExtractValueInst *> ExtractValueInstVec; - for (unsigned i = 1; i < FAllocRetTy->getNumElements(); i += 2) { - ExtractValueInst *EI = ExtractValueInst::Create(CI, i, "", RI); - ExtractValueInstVec.push_back(EI); - } - - for (auto& e: K->getSharedInArgMap()) { - unsigned argNum = e.first; - Value* allocSize = ExtractValueInstVec[e.second.second/2]; - - DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = " << *allocSize << "\n"); - - if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) { - // Shared memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - - Value* setInputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - allocSize - }; - CallInst::Create(llvm_visc_ocl_argument_shared, - ArrayRef<Value*>(setInputArgs, 3), "", RI); - } - else { - // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(), - allocSize->getName()+".sharedMem.ptr", RI); - StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI); - - Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr, - Type::getInt8PtrTy(M.getContext()), - allocSize->getName()+".sharedMem.i8ptr", - RI); - - Value* setInputArgs[] = {GraphID, - allocSizeI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum), - ConstantExpr::getSizeOf(allocSize->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - } - } - } - - - DEBUG(errs() << "Setup output edges of node and insert visc api\n"); - - // Set output if struct is not an empty struct - StructType* OutputTy = K->KernelLeafNode->getOutputType(); - std::vector<Value*> d_Outputs; - if(!OutputTy->isEmptyTy()) { - switchToTimer(visc_TimerID_COPY_PTR, RI); - // Not an empty struct - // Iterate over all elements of the struct and put them in - for(unsigned i=0; i < OutputTy->getNumElements(); i++) { - unsigned outputIndex = KF->getFunctionType()->getNumParams()+i; - Value* setOutputArgs[] = {GraphID, - ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), - ConstantExpr::getSizeOf(OutputTy->getElementType(i))}; - - CallInst* d_Output = CallInst::Create(llvm_visc_ocl_output_ptr, - ArrayRef<Value*>(setOutputArgs, 3), - "d_output."+KF->getName(), - RI); - d_Outputs.push_back(d_Output); - } - } - - // Enqueue kernel - // Need work dim, localworksize, globalworksize - // Allocate size_t[numDims] space on stack. Store the work group sizes and - // pass it as an argument to ExecNode - - switchToTimer(visc_TimerID_MISC, RI); - Value *workDim, *LocalWGPtr, *GlobalWGPtr; - getExecuteNodeParams(M, workDim, LocalWGPtr, GlobalWGPtr, K, VMap, RI); - switchToTimer(visc_TimerID_KERNEL, RI); - Value* ExecNodeArgs[] = {GraphID, - workDim, - LocalWGPtr, - GlobalWGPtr - }; - CallInst* Event = CallInst::Create(llvm_visc_ocl_executeNode, - ArrayRef<Value*>(ExecNodeArgs, 4), - "event."+KF->getName(), - RI); - DEBUG(errs() << "Execute Node Call: " << *Event << "\n"); - - // Wait for Kernel to Finish - CallInst::Create(llvm_visc_ocl_wait, - ArrayRef<Value*>(GraphID), - "", - RI); - - switchToTimer(visc_TimerID_READ_OUTPUT, RI); - // Read Output Struct if not empty - if(!OutputTy->isEmptyTy()) { - std::vector<Value*>h_Outputs; - Value* KernelOutput = UndefValue::get(OutputTy); - for(unsigned i=0; i < OutputTy->getNumElements(); i++) { - Value* GetOutputArgs[] = {GraphID, - Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - d_Outputs[i], - ConstantExpr::getSizeOf(OutputTy->getElementType(i)) - }; - CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, - ArrayRef<Value*>(GetOutputArgs, 4), - "h_output."+KF->getName()+".addr", - RI); - // Read each device pointer listed in output struct - // Load the output struct - CastInst* BI = BitCastInst::CreatePointerCast(h_Output, - OutputTy->getElementType(i)->getPointerTo(), "output.ptr", RI); - - Value* OutputElement = new LoadInst(BI, "output."+KF->getName(), RI); - KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, ArrayRef<unsigned>(i), - KF->getName()+"output", RI); - } - OutputMap[K->KernelLeafNode] = KernelOutput; - } - - // Read all the pointer arguments which had side effects i.e., had out - // attribute - DEBUG(errs() << "Output Pointers : " << OutputPointers.size() << "\n"); - // FIXME: Not reading output pointers anymore as we read them when data is - // actually requested - /*for(auto output: OutputPointers) { - DEBUG(errs() << "Read: " << *output.d_ptr << "\n"); - DEBUG(errs() << "\tTo: " << *output.h_ptr << "\n"); - DEBUG(errs() << "\t#bytes: " << *output.bytes << "\n"); - - Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, output.bytes}; - CallInst* CI = CallInst::Create(llvm_visc_ocl_getOutput, - ArrayRef<Value*>(GetOutputArgs, 4), - "", RI); - }*/ - switchToTimer(visc_TimerID_MEM_FREE, RI); - // Clear Context and free device memory - DEBUG(errs() << "Clearing context" << "\n"); - // Free Device Memory - for(auto d_ptr: DevicePointers) { - CallInst::Create(llvm_visc_ocl_free, ArrayRef<Value*>(d_ptr), "", RI); - } - switchToTimer(visc_TimerID_CLEAR_CTX, CleanupCall); - // Clear Context - LoadInst* LI = new LoadInst(GraphIDAddr, "", CleanupCall); - CallInst::Create(llvm_visc_ocl_clearContext, ArrayRef<Value*>(LI), "", CleanupCall); - switchToTimer(visc_TimerID_NONE, CleanupCall); - - switchToTimer(visc_TimerID_MISC, RI); - DEBUG(errs() << "*** Generating epilogue code for the function****\n"); - // Generate code for output bindings - // Get Exit node - DFNode* C = N->getChildGraph()->getExit(); - // Get OutputType of this node - StructType* OutTy = N->getOutputType(); - Value *retVal = UndefValue::get(F_X86->getReturnType()); - // Find the kernel's output arg map, to use instead of the bindings - std::vector<unsigned> outArgMap = kernel->getOutArgMap(); - // Find all the input edges to exit node - for (unsigned i=0; i < OutTy->getNumElements(); i++) { - DEBUG(errs() << "Output Edge " << i << "\n"); - // Find the incoming edge at the requested input port - DFEdge* E = C->getInDFEdgeAt(i); - - assert(E && "No Binding for output element!"); - // Find the Source DFNode associated with the incoming edge - DFNode* SrcDF = E->getSourceDF(); - - DEBUG(errs() << "Edge source -- " << SrcDF->getFuncPointer()->getName() << "\n"); - - // If Source DFNode is a dummyNode, edge is from parent. Get the - // argument from argument list of this internal node - Value* inputVal; - if(SrcDF->isEntryNode()) { - inputVal = getArgumentAt(F_X86, i); - DEBUG(errs() << "Argument "<< i<< " = " << *inputVal << "\n"); - } - else { - // edge is from a internal node - // Check - code should already be generated for this source dfnode - // FIXME: Since the 2-level kernel code gen has aspecific structure, we - // can assume the SrcDF is same as Kernel Leaf node. - // Use outArgMap to get correct mapping - SrcDF = K->KernelLeafNode; - assert(OutputMap.count(SrcDF) - && "Source node call not found. Dependency violation!"); - - // Find Output Value associated with the Source DFNode using OutputMap - Value* CI = OutputMap[SrcDF]; - - // Extract element at source position from this call instruction - std::vector<unsigned> IndexList; - // i is the destination of DFEdge E - // Use the mapping instead of the bindings -// IndexList.push_back(E->getSourcePosition()); - IndexList.push_back(outArgMap[i]); - DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n"); - ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList, - "",RI); - inputVal = EI; - } - std::vector<unsigned> IdxList; - IdxList.push_back(i); - retVal = InsertValueInst::Create(retVal, inputVal, IdxList, "", RI); - } - - DEBUG(errs() << "Extracted all\n"); - switchToTimer(visc_TimerID_NONE, RI); - retVal->setName("output"); - ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal); - ReplaceInstWithInst(RI, newRI); -} - - -// Right now, only targeting the one level case. In general, device functions -// can return values so we don't need to change them -void CGT_SPIR::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - if(KernelLaunchNode == NULL) - errs () << "No kernel launch node\n"; - else { - errs () << "KernelLaunchNode: " << KernelLaunchNode->getFuncPointer()->getName() << "\n"; - } - - - if (!KernelLaunchNode) { - DEBUG(errs() << "No code generated (host code for kernel launch complete).\n"); - return; - } - - if (N == KernelLaunchNode) { - DEBUG(errs() << "Found kernel launch node. Generating host code.\n"); - //TODO - - // Now the remaining nodes to be visited should be ignored - KernelLaunchNode = NULL; - DEBUG(errs() << "Insert Runtime calls\n"); - insertRuntimeCalls(N, kernel, getSPIRFilename(M)); - - } else { - DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); - // Keep track of the arguments order. - std::map<unsigned, unsigned> inmap1 = N->getInArgMap(); - std::map<unsigned, unsigned> inmap2 = kernel->getInArgMap(); - // TODO: Structure assumed: one thread node, one allocation node (at most), - // TB node - std::map<unsigned, unsigned> inmapFinal; - for (std::map<unsigned, unsigned>::iterator ib = inmap2.begin(), ie = inmap2.end(); - ib != ie; ++ib) { - inmapFinal[ib->first] = inmap1[ib->second]; - } - - kernel->setInArgMap(inmapFinal); - - // Keep track of the output arguments order. - std::vector<unsigned> outmap1 = N->getOutArgMap(); - std::vector<unsigned> outmap2 = kernel->getOutArgMap(); - - // TODO: Change when we have incoming edges to the dummy exit node from more - // than one nodes. In this case, the number of bindings is the same, but - // their destination position, thus the index in outmap1, is not - // 0 ... outmap2.size()-1 - // The limit is the size of outmap2, because this is the number of kernel - // output arguments for which the mapping matters - // For now, it reasonable to assume that all the kernel arguments are returned, - // maybe plys some others from other nodes, thus outmap2.size() <= outmap1.size() - for (unsigned i = 0; i < outmap2.size(); i++) { - outmap1[i] = outmap2[outmap1[i]]; - } - kernel->setOutArgMap(outmap1); - - // Track the source of local dimlimits for the kernel - // Dimension limit can either be a constant or an argument of parent - // function. Since Internal node would no longer exist, we need to insert the - // localWGSize with values from the parent of N. - std::vector<Value*> localWGSizeMapped; - for (unsigned i = 0; i < kernel->localWGSize.size(); i++) { - if (isa<Constant>(kernel->localWGSize[i])) { - // if constant, use as it is - localWGSizeMapped.push_back(kernel->localWGSize[i]); - } - else if (Argument* Arg = dyn_cast<Argument>(kernel->localWGSize[i])) { - // if argument, find the argument location in N. Use InArgMap of N to - // find the source location in Parent of N. Retrieve the argument from - // parent to insert in the vector. - unsigned argNum = Arg->getArgNo(); - // This argument will be coming from the parent node, not the allocation - // Node - assert(N->getInArgMap().find(argNum) != N->getInArgMap().end()); - - unsigned parentArgNum = N->getInArgMap()[argNum]; - Argument* A = getArgumentAt(N->getParent()->getFuncPointer(), parentArgNum); - localWGSizeMapped.push_back(A); - } - else { - assert(false && "LocalWGsize using value which is neither argument nor constant!"); - } - } - // Update localWGSize vector of kernel - kernel->setLocalWGSize(localWGSizeMapped); - } - -} - -//static bool checkPreferredTarget(DFNode* N, visc::Target T) { - //Function* F = N->getFuncPointer(); - //Module* M = F->getParent(); - //NamedMDNode* HintNode; - //switch (T) { - //case visc::GPU_TARGET: - //HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); - //break; - //case visc::SPIR_TARGET: - //HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); - //break; - //case visc::CPU_TARGET: - //HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); - //break; - //default: - //llvm_unreachable("Target Not supported yet!"); - //} - //for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { - //MDNode* MetaNode = HintNode->getOperand(i); - //if(F == MetaNode->getOperand(0)) - //return true; - //} - //return false; -//} - -void CGT_SPIR::codeGen(DFLeafNode* N) { - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Skip code generation if it is an allocation node - if(N->isAllocationNode()) { - DEBUG(errs() << "Skipping allocation node\n"); - return; - } - - // Generate code only if it has the right hint -// if(!checkPreferredTarget(N, visc::SPIR_TARGET)) { -// errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; -// return; -// } - if(!preferredTargetIncludes(N, visc::SPIR_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; - return; - } - - // Checking which node is the kernel launch - DFNode* PNode = N->getParent(); - int pLevel = PNode->getLevel(); - int pReplFactor = PNode->getNumOfDim(); - - // Choose parent node as kernel launch if: - // (1) Parent is the top level node i.e., Root of DFG - // OR - // (2) Parent does not have multiple instances - errs() << "pLevel = " << pLevel << "\n"; - errs() << "pReplFactor = " << pReplFactor << "\n"; - - if (!pLevel || !pReplFactor) { - errs() << "*************** Kernel Gen: 1-Level Hierarchy **************\n"; - KernelLaunchNode = PNode; - errs() << "Setting Kernel Launch Node\n"; - kernel = new Kernel(NULL, - N, - N->getInArgMap(), - N->getSharedInArgMap(), - N->getOutArgMap(), - N->getNumOfDim(), - N->getDimLimits()); - } - else { - // Converting a 2-level DFG to opencl kernel - errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n"; - KernelLaunchNode = PNode->getParent(); - assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match"); - // Contains the instructions generating the kernel configuration parameters - kernel = new Kernel(NULL, // kernel function - N, // kernel leaf node - N->getInArgMap(), // kenel argument mapping - N->getSharedInArgMap(), - N->getOutArgMap(), // kernel output mapping from the leaf to the interemediate node - PNode->getNumOfDim(), // gridDim - PNode->getDimLimits(),// grid size - N->getNumOfDim(), // blockDim - N->getDimLimits()); // block size - - } - - std::vector<IntrinsicInst *> IItoRemove; - BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap; - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); - - // Look up if we have visited this function before. If we have, then just - // get the cloned function pointer from DFNode. Otherwise, create the cloned - // function and add it to the DFNode GenFunc. - Function *F_spir = N->getGenFuncForTarget(visc::SPIR_TARGET); - assert(F_spir == NULL && "Error: Visiting a node for which code already generated"); - - // Clone the function - ValueToValueMapTy VMap; - - Twine FName = F->getName(); - F_spir = CloneFunction(F, VMap); - F_spir->setName(FName+"_spir"); - errs() << "Old Function Name: " << F->getName() << "\n"; - errs() << "New Function Name: " << F_spir->getName() << "\n"; - - F_spir->removeFromParent(); - - // Insert the cloned function into the kernels module - KernelM->getFunctionList().push_back(F_spir); - - //TODO: Iterate over all the instructions of F_spir and identify the - //callees and clone them into this module. - DEBUG(errs() << *F_spir->getType()); - DEBUG(errs() << *F_spir); - - //Add generated function info to DFNode - //N->setGenFunc(F_spir, visc::SPIR_TARGET); - - F_spir = transformFunctionToVoid(F_spir); - - // Add generated function info to DFNode - //N->setGenFunc(F_spir, visc::SPIR_TARGET); - - removeInOutAttributes(F_spir); - - //Add generated function info to DFNode - N->addGenFunc(F_spir, visc::SPIR_TARGET, false); - - DEBUG(errs() << "Removing all attributes from Kernel Function and adding nounwind\n"); - F_spir->removeAttributes(AttributeSet::FunctionIndex, F_spir->getAttributes().getFnAttributes()); - F_spir->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - - - //FIXME: For now, assume only one allocation node - kernel->AllocationNode = NULL; - - for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), iee = N->indfedge_end(); - ieb != iee; ++ieb) { - DFNode *SrcDFNode = (*ieb)->getSourceDF(); - DEBUG(errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n"); - DEBUG(errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n"); - DEBUG(errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n"); - if (!SrcDFNode->isDummyNode()) { - assert(SrcDFNode->isAllocationNode()); - kernel->AllocationNode = dyn_cast<DFLeafNode>(SrcDFNode); - kernel->allocInArgMap = SrcDFNode->getInArgMap(); - break; - } - } - - // Vector for shared memory arguments - std::vector<unsigned> SharedMemArgs; - - // If no allocation node was found, SharedMemArgs is empty - if (kernel->AllocationNode) { - - ValueToValueMapTy VMap; - Function *F_alloc = CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap); - //F_alloc->removeFromParent(); - // Insert the cloned function into the kernels module - //M.getFunctionList().push_back(F_alloc); - - std::vector<IntrinsicInst *> ViscMallocInstVec; - findIntrinsicInst(F_alloc, Intrinsic::visc_malloc, ViscMallocInstVec); - - for (unsigned i = 0; i < ViscMallocInstVec.size(); i++) { - IntrinsicInst *II = ViscMallocInstVec[i]; - assert(II->hasOneUse() && "visc_malloc result is used more than once"); - II->replaceAllUsesWith(ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))); - II->eraseFromParent(); - } - kernel->AllocationFunction = F_alloc; - - // This could be used to check that the allocation node has the appropriate - // number of fields in its return struct -/* - ReturnInst *RI = ReturnInstVec[0]; - Value *RetVal = RI->getReturnValue(); - Type *RetTy = RetVal->getType(); - StructType *RetStructTy = dyn_cast<StructType>(RetTy); - assert(RetStructTy && "Allocation node does not return a struct type"); - unsigned numFields = RetStructTy->getNumElements(); -*/ - std::map<unsigned, std::pair<Value*, unsigned> > sharedInMap = kernel->getSharedInArgMap(); - AllocationNodeProperty* APN = - (AllocationNodeProperty*) kernel->AllocationNode->getProperty(DFNode::Allocation); - for (auto& AllocPair: APN->getAllocationList()) { - unsigned destPos = AllocPair.first->getDestPosition(); - unsigned srcPos = AllocPair.first->getSourcePosition(); - SharedMemArgs.push_back(destPos); - sharedInMap[destPos] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); - sharedInMap[destPos+1] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); - } - kernel->setSharedInArgMap(sharedInMap); - } - std::sort(SharedMemArgs.begin(), SharedMemArgs.end()); - - // All pointer args which are not shared memory pointers have to be moved to - // global address space - unsigned argIndex = 0; - std::vector<unsigned> GlobalMemArgs; - for(auto& Arg: F_spir->getArgumentList()) { - if (Arg.getType()->isPointerTy()) { - // If the arguement is already chosen for shared memory arguemnt list, skip. - // Else put it in Global memory arguement list - if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) { - GlobalMemArgs.push_back(argIndex); - } - } - argIndex++; - } - std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end()); - - /* At this point, we assume that chescks for the fact that SharedMemArgs only - contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the - analysis pass */ - - F_spir = changeArgAddrspace(F_spir, SharedMemArgs, SHARED_ADDRSPACE); - removeAttributeAtArguments(F_spir, SharedMemArgs, Attribute::NoCapture); - F_spir = changeArgAddrspace(F_spir, GlobalMemArgs, GLOBAL_ADDRSPACE); - - - // Go through all the instructions - for (inst_iterator i = inst_begin(F_spir), e = inst_end(F_spir); i != e; ++i) { - Instruction *I = &(*i); - // Leaf nodes should not contain VISC graph intrinsics or launch - assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!"); - assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!"); - - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - IntrinsicInst* ArgII; - DFNode* ArgDFNode; - - /************************ Handle VISC Query intrinsics ************************/ - - switch (II->getIntrinsicID()) { - /**************************** llvm.visc.getNode() *****************************/ - case Intrinsic::visc_getNode: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getNode\n"); - // add mapping <intrinsic, this node> to the node-specific map - Leaf_HandleToDFNodeMap[II] = N; - IItoRemove.push_back(II); - } - break; - /************************* llvm.visc.getParentNode() **************************/ - case Intrinsic::visc_getParentNode: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getParentNode\n"); - // get the parent node of the arg node - // get argument node - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - // get the parent node of the arg node - // Add mapping <intrinsic, parent node> to the node-specific map - // the argument node must have been added to the map, orelse the - // code could not refer to it - Leaf_HandleToDFNodeMap[II] = ArgDFNode->getParent(); - - IItoRemove.push_back(II); - } - break; - /*************************** llvm.visc.getNumDims() ***************************/ - case Intrinsic::visc_getNumDims: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getNumDims\n"); - // get node from map - // get the appropriate field - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - int numOfDim = ArgDFNode->getNumOfDim(); - DEBUG(errs() << "\t Got node dimension : " << numOfDim << "\n"); - IntegerType* IntTy = Type::getInt32Ty(KernelM->getContext()); - ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim); - - // Replace the result of the intrinsic with the computed value - II->replaceAllUsesWith(numOfDimConstant); - - IItoRemove.push_back(II); - } - break; - /*********************** llvm.visc.getNodeInstanceID() ************************/ - case Intrinsic::visc_getNodeInstanceID_x: - case Intrinsic::visc_getNodeInstanceID_y: - case Intrinsic::visc_getNodeInstanceID_z: { - DEBUG(errs() << F_spir->getName() << "\t: Handling getNodeInstanceID\n"); - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - assert(ArgDFNode && "Arg node is NULL"); - // A leaf node always has a parent - DFNode* ParentDFNode = ArgDFNode->getParent(); - assert(ParentDFNode && "Parent node of a leaf is NULL"); - - // Get the number associated with the required dimension - // FIXME: The order is important! - // These three intrinsics need to be consecutive x,y,z - uint64_t dim = II->getIntrinsicID() - - Intrinsic::visc_getNodeInstanceID_x; - assert((dim >= 0) && (dim < 3) && "Invalid dimension argument"); - DEBUG(errs() << "\t dimension = " << dim << "\n"); - - // Argument of the function to be called - ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); - //ArrayRef<Value *> Args(DimConstant); - - // The following is to find which function to call - Function * OpenCLFunction; - int parentLevel = N->getParent()->getLevel(); - int parentReplFactor = N->getParent()->getNumOfDim(); - DEBUG(errs() << "Parent Level = " << parentLevel << "\n"); - DEBUG(errs() << "Parent Repl factor = " << parentReplFactor << "\n"); - - FunctionType* FT = - FunctionType::get(Type::getInt64Ty(KernelM->getContext()), - ArrayRef<Type*>(Type::getInt32Ty(KernelM->getContext())), - false); - - if ((!parentLevel || !parentReplFactor) && ArgDFNode == N) { - // We only have one level in the hierarchy or the parent node is not - // replicated. This indicates that the parent node is the kernel - // launch, so we need to specify a global id. - // We can translate this only if the argument is the current node - // itself - DEBUG(errs() << "Substitute with get_global_id()\n"); - DEBUG(errs() << *II << "\n"); - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_global_id"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { - // We are asking for this node's id with respect to its parent - // this is a local id call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_local_id"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { - // We are asking for this node's parent's id with respect to its - // parent: this is a group id call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_group_id"), FT)); - } else { - errs() << N->getFuncPointer()->getName() << "\n"; - errs() << N->getParent()->getFuncPointer()->getName() << "\n"; - errs() << *II << "\n"; - - assert(false && "Unable to translate getNodeInstanceID intrinsic"); - } - - // Create call instruction, insert it before the intrinsic and truncate - // the output to 32 bits and replace all the uses of the previous - // instruction with the new one - CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); - II->replaceAllUsesWith(CI); - - IItoRemove.push_back(II); - } - break; - /********************** llvm.visc.getNumNodeInstances() ***********************/ - case Intrinsic::visc_getNumNodeInstances_x: - case Intrinsic::visc_getNumNodeInstances_y: - case Intrinsic::visc_getNumNodeInstances_z: { -//TODO: think about whether this is the best way to go -// there are hw specific registers. therefore it is good to have the intrinsic -// but then, why do we need to keep that info in the graph? -// (only for the kernel configuration during the call) - - DEBUG(errs() << F_spir->getName() << "\t: Handling getNumNodeInstances\n"); - ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts()); - ArgDFNode = Leaf_HandleToDFNodeMap[ArgII]; - // A leaf node always has a parent - DFNode* ParentDFNode = ArgDFNode->getParent(); - assert(ParentDFNode && "Parent node of a leaf is NULL"); - - // Get the number associated with the required dimension - // FIXME: The order is important! - // These three intrinsics need to be consecutive x,y,z - uint64_t dim = II->getIntrinsicID() - - Intrinsic::visc_getNumNodeInstances_x; - assert((dim >= 0) && (dim < 3) && "Invalid dimension argument"); - DEBUG(errs() << "\t dimension = " << dim << "\n"); - - // Argument of the function to be called - ConstantInt * DimConstant = - ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim); - //ArrayRef<Value *> Args(DimConstant); - - // The following is to find which function to call - Function * OpenCLFunction; - int parentLevel = ParentDFNode->getLevel(); - int parentReplFactor = ParentDFNode->getNumOfDim(); - - FunctionType* FT = - FunctionType::get(Type::getInt64Ty(KernelM->getContext()), - Type::getInt32Ty(KernelM->getContext()), - false); - if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) { - // We only have one level in the hierarchy or the parent node is not - // replicated. This indicates that the parent node is the kernel - // launch, so the instances are global_size (gridDim x blockDim) - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_global_size"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { - // We are asking for this node's instances - // this is a local size (block dim) call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_local_size"), FT)); - } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { - // We are asking for this node's parent's instances - // this is a (global_size/local_size) (grid dim) call - OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("get_num_groups"), FT)); - } else { - assert(false && "Unable to translate getNumNodeInstances intrinsic"); - } - - // Create call instruction, insert it before the intrinsic and truncate - // the output to 32 bits and replace all the uses of the previous - // instruction with the new one - CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II); - II->replaceAllUsesWith(CI); - - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_barrier: - { - DEBUG(errs() << F_spir->getName() << "\t: Handling barrier\n"); - DEBUG(errs() << "Substitute with barrier()\n"); - DEBUG(errs() << *II << "\n"); - FunctionType* FT = FunctionType::get(Type::getVoidTy(KernelM->getContext()), - std::vector<Type*>(1, Type::getInt32Ty(KernelM->getContext())), - false); - Function* OpenCLFunction = cast<Function> - (KernelM->getOrInsertFunction(getMangledName("barrier"), FT)); - CallInst* CI = CallInst::Create(OpenCLFunction, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), 1)), - "", II); - II->replaceAllUsesWith(CI); - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_atomic_cmpxchg: - case Intrinsic::visc_atomic_add: - case Intrinsic::visc_atomic_sub: - case Intrinsic::visc_atomic_xchg: - case Intrinsic::visc_atomic_min: - case Intrinsic::visc_atomic_umin: - case Intrinsic::visc_atomic_max: - case Intrinsic::visc_atomic_umax: - case Intrinsic::visc_atomic_and: - case Intrinsic::visc_atomic_or: - case Intrinsic::visc_atomic_xor: - case Intrinsic::visc_atomic_inc: - case Intrinsic::visc_atomic_dec: - { - DEBUG(errs() << *II << "\n"); - // Only have support for i32 atomic intrinsics - assert(II->getType() == Type::getInt32Ty(II->getContext()) - && "Only support i32 atomic intrinsics for now"); - // Substitute with appropriate atomic builtin - assert(II->getNumArgOperands() == 2 && "Expecting 2 operands for these atomics"); - - Value* Ptr = II->getArgOperand(0); - Value* Val = II->getArgOperand(1); - assert(Ptr->getType()->isPointerTy() - && "First argument of supported atomics is expected to be a pointer"); - PointerType* PtrTy = cast<PointerType>(Ptr->getType()); - if(PtrTy != Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) { - Ptr = CastInst::CreatePointerCast(Ptr, - Type::getInt32PtrTy(II->getContext(), - PtrTy->getAddressSpace()), "", II); - } - - StringRef name = getAtomicOpName(II->getIntrinsicID(), PtrTy->getAddressSpace()); - - Type* paramTypes[] = { Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()), - Type::getInt32Ty(KernelM->getContext()) - }; - FunctionType* AtomicFT = FunctionType::get(II->getType(), - ArrayRef<Type*>(paramTypes, 2), - false); - Function* AtomicFunction = cast<Function> - (KernelM->getOrInsertFunction(name, AtomicFT)); - Value* atomicArgs[] = { Ptr, Val }; - CallInst* AtomicInst = CallInst::Create(AtomicFunction, - ArrayRef<Value*>(atomicArgs, 2), - "", II); - - DEBUG(errs() << "Substitute with: " << *AtomicInst << "\n"); - II->replaceAllUsesWith(AtomicInst); - IItoRemove.push_back(II); - } - break; - default: - assert(false && "Unknown VISC Intrinsic!"); - break; - } - - } - else if(CallInst* CI = dyn_cast<CallInst>(I)) { - DEBUG(errs() << "Found a call: " << *CI << "\n"); - Function* calleeF = cast<Function>(CI->getCalledValue()->stripPointerCasts()); - if(calleeF->isDeclaration()) { - // Add the declaration to kernel module - DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n"); - KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType()); - if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(CI)) { - // Now handle a few specific intrinsics - // For now, sin and cos are translated to their libclc equivalent - switch(II->getIntrinsicID()) { - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::sqrt: - case Intrinsic::floor: - case Intrinsic::nvvm_rsqrt_approx_f: - { - DEBUG(errs() << "Found math function: " << *II << "\n"); - // Get the builtin function - // SPIR uses mangled name for builtin math functions - assert(II->getType()->isFloatTy() - && "Only handling sin(float) and cos(float)!"); - std::string name = getMathFunctionName(II->getIntrinsicID()); - - FunctionType* MathFT = FunctionType::get(II->getType(), - Type::getFloatTy(KernelM->getContext()), - false); - Function* MathFunction = cast<Function> - (KernelM->getOrInsertFunction(name, MathFT)); - CallInst* CI = CallInst::Create(MathFunction, II->getArgOperand(0), II->getName(), II); - - II->replaceAllUsesWith(CI); - IItoRemove.push_back(II); - break; - } - default: - DEBUG(errs() << "[WARNING] Found Intrinsic: " << *II << "\n" ); - } - } - - } - else { - // Clone the function - ValueToValueMapTy VMap; - Function* newCalleeF = CloneFunction(calleeF, VMap); - newCalleeF->removeFromParent(); //TODO: MARIA check - KernelM->getFunctionList().push_back(newCalleeF); - } - //TODO: how to handle address space qualifiers in load/store - } - - } - - // We need to do this explicitly: DCE pass will not remove them because we - // have assumed theworst memory behaviour for these function calls - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) - (*ri)->eraseFromParent(); - - addCLMetadata(F_spir); - kernel->KernelFunction = F_spir; - errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n"; - DEBUG(errs() << *KernelM); - - return; -} - -bool DFG2LLVM_SPIR::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_SPIR PASS\n"; - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - // - Maps from i8* hansles to DFNode and DFEdge - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - // DFInternalNode *Root = DFG.getRoot(); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap(); - // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); - - // Visitor for Code Generation Graph Traversal - CGT_SPIR *CGTVisitor = new CGT_SPIR(M, DFG); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - // This is not required. Itrinsics that do not have a use are not a problem - //CGTVisitor->removeLLVMIntrinsics(); - CGTVisitor->writeKernelsModule(); - - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - -std::string CGT_SPIR::getKernelsModuleName(Module &M) { - /*SmallString<128> currentDir; - llvm::sys::fs::current_path(currentDir); - std::string fileName = getFilenameFromModule(M); - Twine output = Twine(currentDir) + "/Output/" + fileName + ""; - return output.str().append(".kernels.ll");*/ - std::string mid = M.getModuleIdentifier(); - return mid.append(".kernels.ll"); -} - -void CGT_SPIR::fixValueAddrspace(Value* V, unsigned addrspace) { - assert(isa<PointerType>(V->getType()) - && "Value should be of Pointer Type!"); - PointerType* OldTy = cast<PointerType>(V->getType()); - PointerType* NewTy = PointerType::get(OldTy->getElementType(), addrspace); - V->mutateType(NewTy); - for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; ui++) { - // Change all uses producing pointer type in same address space to new - // addressspace. - if(PointerType* PTy = dyn_cast<PointerType>((*ui)->getType())) { - if(PTy->getAddressSpace() == OldTy->getAddressSpace()) { - fixValueAddrspace(*ui, addrspace); - } - } - } -} - -Function* CGT_SPIR::changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned addrspace) { - unsigned idx = 0; - std::vector<Type*> ArgTypes; - for(auto& arg: F->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - unsigned argno = arg.getArgNo(); - if ((idx < Args.size()) && (argno == Args[idx])) { - fixValueAddrspace(&arg, addrspace); - idx++; - } - ArgTypes.push_back(arg.getType()); - } - FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, false); - - //F->mutateType(PTy); - Function* newF = cloneFunction(F, newFT, false); - replaceNodeFunctionInIR(*F->getParent(), F, newF); - - DEBUG(errs() << *newF->getFunctionType() << "\n" <<*newF << "\n"); - return newF; -} - -/* Remove the specified argument from arguments at positions denoted in Args */ -void CGT_SPIR::removeAttributeAtArguments(Function* F, std::vector<unsigned> &Args, Attribute::AttrKind attrKind) { - DEBUG(errs() << "Removing nocapture attribute from shared memory arguments of function " << F->getName() << "\n"); - - unsigned cnt = 0, arg_no = 0; - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae && arg_no < Args.size(); ++ai, ++cnt) { - - if (Args[arg_no] == cnt) { - AttributeSet AS = F->getAttributes(); - AttrBuilder AB(AS, ai->getArgNo()+1); - AB.removeAttribute(attrKind); - AttributeSet argAS = AttributeSet::get(F->getContext(), ai->getArgNo()+1, AB); - F->removeAttributes(1+ai->getArgNo(), AS.getParamAttributes(ai->getArgNo() + 1)); - F->addAttributes(1+ai->getArgNo(), argAS); - - arg_no++; - } - } -} - -/* Add metadata to module KernelM, for OpenCL kernels */ -void CGT_SPIR::addCLMetadata(Function *F) { - // TODO: There is additional metadata used by kernel files but we skip them as - // they are not mandatory. In future they might be useful to enable - // optimizations - - IRBuilder<> Builder(&*F->begin()); - - // Create node for "kernel_arg_type" - SmallVector<Metadata*,8> argTypeNames; - argTypeNames.push_back(MDString::get(KernelM->getContext(), "kernel_arg_type")); - - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; - ai++) { - argTypeNames.push_back(MDString::get(KernelM->getContext(), printType(ai->getType()))); - } - // All argument type names are in the vector. Create a metadata node - // "kernel_arg_type" - MDTuple* KernelArgTypes = MDNode::get(KernelM->getContext(), argTypeNames); - - // Create kernel metadata node containg the kernel function and the - // "kernel_arg_type" metadata node created above - SmallVector<Metadata*,8> KernelMD; - KernelMD.push_back(ValueAsMetadata::get(F)); - KernelMD.push_back(KernelArgTypes); - MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD); - - // Create metadata node opencl.kernels. It points to the kernel metadata node - NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels"); - MDN_kernels->addOperand(MDKernelNode); - - //KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel")); - // TODO: Replace 1 with the number of the kernel. - // Add when support for multiple launces is added - //KernelMD.push_back(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1)); - //MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD); - //NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations"); - //MDN_annotations->addOperand(MDNvvmAnnotationsNode); - -} - -/* Function to remove all remaining declarations of llvm intrinsics, - * as they are not supported in SPIR. - */ -void CGT_SPIR::removeLLVMIntrinsics() { - - std::vector<Function*> fv = std::vector<Function*>(); - - for (Module::iterator mi = KernelM->begin(), me = KernelM->end(); (mi != me); ++mi) { - Function* F = &*mi; - if (F->isDeclaration() && F->getName().startswith("llvm.")) { - DEBUG(errs() << "Declaration: " << F->getName() << " with " << F->getNumUses() <<"uses.\n"); - assert(F->hasNUses(0) && "LLVM intrinsic function still in use"); - fv.push_back(F); - } - } - - for (std::vector<Function*>::iterator vi = fv.begin(); vi != fv.end(); ++vi) { - DEBUG(errs() << "Erasing declaration: " << (*vi)->getName() <<"\n"); - (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType())); - (*vi)->eraseFromParent(); - } - -} - -void CGT_SPIR::writeKernelsModule() { - - // In addition to deleteing all otjer functions, we also want to spice it up a - // little bit. Do this now. - legacy::PassManager Passes; - - std::error_code EC; - tool_output_file Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); - if (EC) { - errs() << EC.message() << "\n"; - } - - Passes.add( - createPrintModulePass(Out.os())); - - Passes.run(*KernelM); - - // Declare success. - Out.keep(); -} - -Function* CGT_SPIR::transformFunctionToVoid(Function* F) { - - // FIXME: Maybe do that using the Node? - StructType* FRetTy = cast<StructType>(F->getReturnType()); - assert(FRetTy && "Return Type must always be a struct"); - - // Keeps return statements, because we will need to replace them - std::vector<ReturnInst *> RItoRemove; - findReturnInst(F, RItoRemove); - - - // Check for { } return struct, which means that the function returns void - if (FRetTy->isEmptyTy()) { - - DEBUG(errs() << "\tFunction output struct is void\n"); - DEBUG(errs() << "\tNo parameters added\n"); - - // Replacing return statements with others returning void - for (std::vector<ReturnInst *>::iterator i = RItoRemove.begin(), - e = RItoRemove.end(); i != e; ++i) { - ReturnInst::Create((F->getContext()), 0, (*i)); - (*i)->eraseFromParent(); - } - DEBUG(errs() << "\tChanged return statements to return void\n"); - } - else { - // The struct has return values, thus needs to be converted to parameter - - // Iterate over all element types of return struct and add arguments to the - // function - std::vector<Argument*> Args; - for (unsigned i=0; i<FRetTy->getNumElements(); i++) { - Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F); - Args.push_back(RetArg); - DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n"); - } - - Function::arg_iterator ai, ae; - - DEBUG(errs() << "\tReplacing Return statements\n"); - // Replace return statements with extractValue and store instructions - for (std::vector<ReturnInst *>::iterator rii = RItoRemove.begin(), - rie = RItoRemove.end(); rii != rie; ++rii) { - ReturnInst* RI = (*rii); - Value* RetVal = RI->getReturnValue(); - for(unsigned i = 0; i < Args.size(); i++) { - ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i), - Args[i]->getName()+".val", RI); - new StoreInst(EI, Args[i], RI); - } - // assert(RetVal && "Return value should not be null at this point"); - // StructType* RetType = cast<StructType>(RetVal->getType()); - // assert(RetType && "Return type is not a struct"); - - ReturnInst::Create((F->getContext()), 0, RI); - RI->eraseFromParent(); - - } - } - DEBUG(errs() << "\tReplaced return statements\n"); - - // Create the argument type list with the added argument's type - std::vector<Type*> ArgTypes; - for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - ArgTypes.push_back(ai->getType()); - } - - // Adding new arguments to the function argument list, would not change the - // function type. We need to change the type of this function to reflect the - // added arguments - Type* VoidRetType = Type::getVoidTy(F->getContext()); - FunctionType* newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg()); - - // Change the function type - //F->mutateType(PTy); - Function* newF = cloneFunction(F, newFT, false); - replaceNodeFunctionInIR(*F->getParent(), F, newF); - - return newF; -} - -// Remove the visc in/out attributes from kernel function -void CGT_SPIR::removeInOutAttributes(Function* F) { - DEBUG(errs() << "Removing visc attributes from argument list of function " << F->getName() << "\n"); - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ai++) { - - AttributeSet AS = F->getAttributes(); - AttrBuilder AB(AS, ai->getArgNo()+1); - AB.removeAttribute(Attribute::In); - AB.removeAttribute(Attribute::Out); - AB.removeAttribute(Attribute::InOut); - AttributeSet argAS = AttributeSet::get(F->getContext(), ai->getArgNo()+1, AB); - F->removeAttributes(1+ai->getArgNo(), AS.getParamAttributes(ai->getArgNo() + 1)); - F->addAttributes(1+ai->getArgNo(), argAS); - - } -} - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -// Calculate execute node parameters which include, number of diemnsions for -// dynamic instances of the kernel, local and global work group sizes. -static void getExecuteNodeParams(Module &M, Value* &workDim, Value* &LocalWGPtr, Value* - &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) { - - // Assign number of dimenstions a constant value - workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim); - - // If local work group size if null - if(!kernel->hasLocalWG()) { - LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext())); - } - else { - for(unsigned i = 0; i < kernel->localWGSize.size(); i++) { - if(isa<Argument>(kernel->localWGSize[i])) - kernel->localWGSize[i] = VMap[kernel->localWGSize[i]]; - } - LocalWGPtr = genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize"); - } - - for(unsigned i = 0; i < kernel->globalWGSize.size(); i++) { - if(isa<Argument>(kernel->globalWGSize[i])) - kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]]; - } - - // For OpenCL, global work group size is the total bumber of instances in each - // dimension. So, multiply local and global dim limits. - std::vector<Value*> globalWGSizeInsts; - if(kernel->hasLocalWG()) { - for (unsigned i = 0; i < kernel->gridDim; i++) { - BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); - globalWGSizeInsts.push_back(MulInst); - } - } - else { - globalWGSizeInsts = kernel->globalWGSize; - } - GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize"); - DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); -} - -// CodeGen for allocating space for Work Group on stack and returning a pointer -// to its address -static Value* genWorkGroupPtr(Module &M, std::vector<Value*> WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) { - Value* WGPtr; - // Get int64_t and or ease of use - Type* Int64Ty = Type::getInt64Ty(M.getContext()); - - // Work Group type is [#dim x i64] - Type* WGTy = ArrayType::get(Int64Ty, WGSize.size()); - // Allocate space of Global work group data on stack and get pointer to - // first element. - AllocaInst* WG = new AllocaInst(WGTy, WGName, IB); - WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB); - Value* nextDim = WGPtr; - DEBUG(errs() << *WGPtr << "\n"); - - // Iterate over the number of dimensions and store the global work group - // size in that dimension - for(unsigned i=0; i < WGSize.size(); i++) { - assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!"); - - if(WGSize[i]->getType() != Int64Ty) { - // If number of dimensions are mentioned in any other integer format, - // generate code to extend it to i64. We need to use the mapped value in - // the new generated function, hence the use of VMap - // FIXME: Why are we changing the kernel WGSize vector here? - DEBUG(errs() << "Not i64. Zero extend required.\n"); - DEBUG(errs() << *WGSize[i] << "\n"); - CastInst* CI = BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB); - DEBUG(errs() << "Bitcast done.\n"); - StoreInst* SI = new StoreInst(CI, nextDim, IB); - DEBUG(errs() << "Zero extend done.\n"); - DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n"); - } else { - // Store the value representing work group size in ith dimension on - // stack - StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB); - - DEBUG(errs() << "\t Work group size: " << *SI << "\n"); - } - if(i+1 < WGSize.size()) { - // Move to next dimension - GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, nextDim, - ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)), - WG->getName()+"."+Twine(i+1), - IB); - DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n"); - nextDim = GEP; - } - } - return WGPtr; - -} - -//Get generated SPIR binary name -static std::string getSPIRFilename(const Module& M) { - std::string mid = M.getModuleIdentifier(); - return mid.append(".kernels.bc"); - -} - -// Get the name of the input file from module ID -static std::string getFilenameFromModule(const Module& M) { - std::string moduleID = M.getModuleIdentifier(); - return moduleID.substr(moduleID.find_last_of("/")+1); -} - -// Changes the data layout of the Module to be compiled with SPIR backend -// TODO: Figure out when to call it, probably after duplicating the modules -static void changeDataLayout(Module &M) { - std::string spir64_layoutStr = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"; - - M.setDataLayout(StringRef(spir64_layoutStr)); - return; -} - -static void changeTargetTriple(Module &M) { - std::string spir64_TargetTriple = "spir64-unknown-unknown"; - M.setTargetTriple(StringRef(spir64_TargetTriple)); -} - -// Helper function, generate a string representation of a type -static std::string printType(Type* ty) { - std::string type_str; - raw_string_ostream rso(type_str); - ty->print(rso); - return rso.str(); -} - -// Helper function to get mangled names of OpenCL built ins -static StringRef getMangledName(std::string name) { - Twine mangledName = "_Z"+Twine(name.size())+name+"j"; - return StringRef(mangledName.str()); -} - - -// Helper function, populate a vector with all return statements in a function -static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) { - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - ReturnInst* RI = dyn_cast<ReturnInst>(I); - if (RI) { - ReturnInstVec.push_back(RI); - } - } -} - -// Helper function, populate a vector with all IntrinsicID intrinsics in a function -static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) { - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - if (II && II->getIntrinsicID() == IntrinsicID) { - IntrinsicInstVec.push_back(II); - } - } -} - -// Helper function to get mangled names of OpenCL built ins for atomics -static StringRef getAtomicMangledName(std::string name, unsigned addrspace, bool sign) { - Twine mangledName = "_Z" + - Twine(name.size())+name + - "PU3AS"+Twine(addrspace) + "jj"; -// ((sign) ? "ii" : "jj"); - return StringRef(mangledName.str()); -} - -// Helper funtion, returns the OpenCL function name corresponding to atomic op -static StringRef getAtomicOpName(Intrinsic::ID ID, unsigned addrspace) { - switch(ID) { - case Intrinsic::visc_atomic_cmpxchg: - return getAtomicMangledName("atom_cmpxchg", addrspace, true); - case Intrinsic::visc_atomic_add: - return getAtomicMangledName("atom_add", addrspace, true); - case Intrinsic::visc_atomic_sub: - return getAtomicMangledName("atom_sub", addrspace, true); - case Intrinsic::visc_atomic_min: - return getAtomicMangledName("atom_min", addrspace, true); - case Intrinsic::visc_atomic_umin: - return getAtomicMangledName("atom_min", addrspace, false); - case Intrinsic::visc_atomic_max: - return getAtomicMangledName("atom_max", addrspace, true); - case Intrinsic::visc_atomic_umax: - return getAtomicMangledName("atom_max", addrspace, false); - case Intrinsic::visc_atomic_inc: - return getAtomicMangledName("atom_inc", addrspace, true); - case Intrinsic::visc_atomic_dec: - return getAtomicMangledName("atom_dec", addrspace, true); - case Intrinsic::visc_atomic_xchg: - return getAtomicMangledName("atom_xchg", addrspace, true); - case Intrinsic::visc_atomic_and: - return getAtomicMangledName("atom_and", addrspace, true); - case Intrinsic::visc_atomic_or: - return getAtomicMangledName("atom_or", addrspace, true); - case Intrinsic::visc_atomic_xor: - return getAtomicMangledName("atom_xor", addrspace, true); - default: - llvm_unreachable("Unsupported atomic intrinsic!"); - }; -} - -static std::string getMathFunctionName(Intrinsic::ID ID) { - switch(ID) { - case Intrinsic::sin: return "_Z3sinf"; - case Intrinsic::cos: return "_Z3cosf"; - case Intrinsic::sqrt: return "_Z4sqrtf"; - case Intrinsic::floor: return "_Z5floorf"; - case Intrinsic::nvvm_rsqrt_approx_f: return "_Z5rsqrtf"; - default: - llvm_unreachable("Unsupported math function!"); - }; -} - -} // End of namespace - -char DFG2LLVM_SPIR::ID = 0; -static RegisterPass<DFG2LLVM_SPIR> X("dfg2llvm-spir", - "Dataflow Graph to LLVM for SPIR Pass", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/hpvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.exports b/hpvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.exports deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/lib/Transforms/DFG2LLVM_SPIR/LLVMBuild.txt b/hpvm/lib/Transforms/DFG2LLVM_SPIR/LLVMBuild.txt deleted file mode 100644 index 72c4de9efdc816ca74d54d96f4f66afd467d1639..0000000000000000000000000000000000000000 --- a/hpvm/lib/Transforms/DFG2LLVM_SPIR/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_SPIR/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = DFG2LLVM_SPIR -parent = Transforms