diff --git a/hpvm/docs/compilation.md b/hpvm/docs/compilation.md index 8e68d00174b6fb63bfb647a0dbee1aa5dbd10b6a..6381fec7d856c79fdd2ed31bc23fe02990c9e38d 100644 --- a/hpvm/docs/compilation.md +++ b/hpvm/docs/compilation.md @@ -5,11 +5,11 @@ Compilation of an HPVM program involves the following steps: 2. `opt` takes (`main.ll`) and invoke the GenHPVM pass on it, which converts the HPVM-C function calls to HPVM intrinsics. This generates the HPVM textual representation (`main.hpvm.ll`). 3. `opt` takes the HPVM textual representation (`main.hpvm.ll`) and invokes the following passes in sequence: * BuildDFG: Converts the textual representation to the internal HPVM representation. - * LocalMem and DFG2LLVM_NVPTX: Invoked only when GPU target is selected. Generates the kernel module (`main.kernels.ll`) and the portion of the host code that invokes the kernel into the host module (`main.host.ll`). - * DFG2LLVM_X86: Generates either all, or the remainder of the host module (`main.host.ll`) depending on the chosen target. + * LocalMem and DFG2LLVM_OpenCL: Invoked only when GPU target is selected. Generates the kernel module (`main.kernels.ll`) and the portion of the host code that invokes the kernel into the host module (`main.host.ll`). + * DFG2LLVM_CPU: Generates either all, or the remainder of the host module (`main.host.ll`) depending on the chosen target. * ClearDFG: Deletes the internal HPVM representation from memory. 4. `clang` is used to to compile any remaining project files that would be later linked with the host module. 5. `llvm-link` takes the host module and all the other generate `ll` files, and links them with the HPVM runtime module (`hpvm-rt.bc`), to generate the linked host module (`main.host.linked.ll`). 6. Generate the executable code from the generated `ll` files for all parts of the program: * GPU target: `llvm-cbe` takes the kernel module (`main.kernels.ll`) and generates an OpenCL representation of the kernels that will be invoked by the host. - * X86 target: `clang` takes the linked host module (`main.host.linked.ll`) and generates the X86 binary. + * CPU target: `clang` takes the linked host module (`main.host.linked.ll`) and generates the CPU binary. diff --git a/hpvm/include/SupportHPVM/DFGraph.h b/hpvm/include/SupportHPVM/DFGraph.h index d904e2401d7e9a58a38e9bca024de1a437cd56d1..2deb2ca8f5c17620da0ddf60e1ef269acde52235 100644 --- a/hpvm/include/SupportHPVM/DFGraph.h +++ b/hpvm/include/SupportHPVM/DFGraph.h @@ -51,11 +51,11 @@ struct TargetGenFunctions { }; struct TargetGenFuncInfo { - bool cpu_hasX86Func; - bool gpu_hasX86Func; - bool spir_hasX86Func; - bool cudnn_hasX86Func; - bool promise_hasX86Func; + bool cpu_hasCPUFunc; + bool gpu_hasCPUFunc; + bool spir_hasCPUFunc; + bool cudnn_hasCPUFunc; + bool promise_hasCPUFunc; }; class DFGraph { @@ -191,7 +191,7 @@ private: ///< (if multiple are available) struct TargetGenFuncInfo GenFuncInfo; ///< True for each target generated function - ///< if the associated genFunc is an x86 function + ///< if the associated genFunc is an cpu function DFInternalNode *Parent; ///< Pointer to parent dataflow Node unsigned NumOfDim; ///< Number of dimensions std::vector<Value *> DimLimits; ///< Number of instances in each dimension @@ -349,15 +349,15 @@ public: Function *getGenFunc() const { return GenFunc; } - void setHasX86FuncForTarget(hpvm::Target T, bool isX86Func) { + void setHasCPUFuncForTarget(hpvm::Target T, bool isCPUFunc) { switch (T) { case hpvm::None: return; // Do nothing. case hpvm::CPU_TARGET: - GenFuncInfo.cpu_hasX86Func = isX86Func; + GenFuncInfo.cpu_hasCPUFunc = isCPUFunc; break; case hpvm::GPU_TARGET: - GenFuncInfo.gpu_hasX86Func = isX86Func; + GenFuncInfo.gpu_hasCPUFunc = isCPUFunc; break; case hpvm::CPU_OR_GPU_TARGET: break; @@ -368,14 +368,14 @@ public: return; } - bool hasX86GenFuncForTarget(hpvm::Target T) const { + bool hasCPUGenFuncForTarget(hpvm::Target T) const { switch (T) { case hpvm::None: return false; case hpvm::CPU_TARGET: - return GenFuncInfo.cpu_hasX86Func; + return GenFuncInfo.cpu_hasCPUFunc; case hpvm::GPU_TARGET: - return GenFuncInfo.gpu_hasX86Func; + return GenFuncInfo.gpu_hasCPUFunc; case hpvm::CPU_OR_GPU_TARGET: assert(false && "Single target expected (CPU/GPU/SPIR/CUDNN/PROMISE)\n"); default: @@ -384,7 +384,7 @@ public: return false; } - void addGenFunc(Function *F, hpvm::Target T, bool isX86Func) { + void addGenFunc(Function *F, hpvm::Target T, bool isCPUFunc) { switch (T) { case hpvm::CPU_TARGET: @@ -393,7 +393,7 @@ public: << FuncPointer->getName() << "\n"); } GenFuncs.CPUGenFunc = F; - GenFuncInfo.cpu_hasX86Func = isX86Func; + GenFuncInfo.cpu_hasCPUFunc = isCPUFunc; break; case hpvm::GPU_TARGET: if (GenFuncs.GPUGenFunc != NULL) { @@ -401,7 +401,7 @@ public: << FuncPointer->getName() << "\n"); } GenFuncs.GPUGenFunc = F; - GenFuncInfo.gpu_hasX86Func = isX86Func; + GenFuncInfo.gpu_hasCPUFunc = isCPUFunc; break; case hpvm::CPU_OR_GPU_TARGET: assert(false && "A node function should be set with a tag specifying its \ @@ -437,11 +437,11 @@ public: return; case hpvm::CPU_TARGET: GenFuncs.CPUGenFunc = NULL; - GenFuncInfo.cpu_hasX86Func = false; + GenFuncInfo.cpu_hasCPUFunc = false; break; case hpvm::GPU_TARGET: GenFuncs.GPUGenFunc = NULL; - GenFuncInfo.gpu_hasX86Func = false; + GenFuncInfo.gpu_hasCPUFunc = false; break; case hpvm::CPU_OR_GPU_TARGET: assert(false && @@ -690,11 +690,11 @@ DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, hpvm::Target _Hint, GenFuncs.CUDNNGenFunc = NULL; GenFuncs.PROMISEGenFunc = NULL; - GenFuncInfo.cpu_hasX86Func = false; - GenFuncInfo.gpu_hasX86Func = false; - GenFuncInfo.spir_hasX86Func = false; - GenFuncInfo.cudnn_hasX86Func = false; - GenFuncInfo.cudnn_hasX86Func = false; + GenFuncInfo.cpu_hasCPUFunc = false; + GenFuncInfo.gpu_hasCPUFunc = false; + GenFuncInfo.spir_hasCPUFunc = false; + GenFuncInfo.cudnn_hasCPUFunc = false; + GenFuncInfo.cudnn_hasCPUFunc = false; } void DFNode::setRank(unsigned r) { diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index 5c9b8b9fe026ea5612caa124535e02d28d619c53..74917773b04146456b84db9b2bbf0814cd9bf387 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(BuildDFG) add_subdirectory(ClearDFG) -add_subdirectory(DFG2LLVM_NVPTX) -add_subdirectory(DFG2LLVM_X86) +add_subdirectory(DFG2LLVM_OpenCL) +add_subdirectory(DFG2LLVM_CPU) add_subdirectory(GenHPVM) add_subdirectory(LocalMem) diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_CPU/CMakeLists.txt similarity index 79% rename from hpvm/lib/Transforms/DFG2LLVM_X86/CMakeLists.txt rename to hpvm/lib/Transforms/DFG2LLVM_CPU/CMakeLists.txt index 0a3a225f1967dd73d44d1401a2bc45cb8d43ee69..b4e129ba01837cf328912f7787b861f843f4f581 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_X86/CMakeLists.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/CMakeLists.txt @@ -4,9 +4,9 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLLVM_BUILD_DIR=${PROJECT_BINARY_DIR}") -add_llvm_library( LLVMDFG2LLVM_X86 +add_llvm_library( LLVMDFG2LLVM_CPU MODULE - DFG2LLVM_X86.cpp + DFG2LLVM_CPU.cpp DEPENDS intrinsics_gen PLUGIN_TOOL diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp similarity index 88% rename from hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp rename to hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp index 633afee593d01d6a579905cfd0f85f66e3060968..3f9f3101a3b0025a67ff432684163d6b859c6eb8 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp @@ -1,4 +1,4 @@ -//===-------------------------- DFG2LLVM_X86.cpp --------------------------===// +//===-------------------------- DFG2LLVM_CPU.cpp --------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "DFG2LLVM_X86" +#define DEBUG_TYPE "DFG2LLVM_CPU" #include "SupportHPVM/DFG2LLVM.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -34,15 +34,15 @@ using namespace builddfg; using namespace dfg2llvm; // HPVM Command line option to use timer or not -static cl::opt<bool> HPVMTimer_X86("hpvm-timers-x86", +static cl::opt<bool> HPVMTimer_CPU("hpvm-timers-cpu", cl::desc("Enable hpvm timers")); namespace { -// DFG2LLVM_X86 - The first implementation. -struct DFG2LLVM_X86 : public DFG2LLVM { +// DFG2LLVM_CPU - The first implementation. +struct DFG2LLVM_CPU : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid - DFG2LLVM_X86() : DFG2LLVM(ID) {} + DFG2LLVM_CPU() : DFG2LLVM(ID) {} private: // Member variables @@ -54,16 +54,16 @@ public: }; // Visitor for Code generation traversal (tree traversal for now) -class CGT_X86 : public CodeGenTraversal { +class CGT_CPU : public CodeGenTraversal { private: // Member variables FunctionCallee malloc; // HPVM Runtime API - FunctionCallee llvm_hpvm_x86_launch; - FunctionCallee llvm_hpvm_x86_wait; - FunctionCallee llvm_hpvm_x86_argument_ptr; + FunctionCallee llvm_hpvm_cpu_launch; + FunctionCallee llvm_hpvm_cpu_wait; + FunctionCallee llvm_hpvm_cpu_argument_ptr; FunctionCallee llvm_hpvm_streamLaunch; FunctionCallee llvm_hpvm_streamPush; @@ -76,10 +76,10 @@ private: FunctionCallee llvm_hpvm_createThread; FunctionCallee llvm_hpvm_bufferPush; FunctionCallee llvm_hpvm_bufferPop; - FunctionCallee llvm_hpvm_x86_dstack_push; - FunctionCallee llvm_hpvm_x86_dstack_pop; - FunctionCallee llvm_hpvm_x86_getDimLimit; - FunctionCallee llvm_hpvm_x86_getDimInstance; + FunctionCallee llvm_hpvm_cpu_dstack_push; + FunctionCallee llvm_hpvm_cpu_dstack_pop; + FunctionCallee llvm_hpvm_cpu_getDimLimit; + FunctionCallee llvm_hpvm_cpu_getDimInstance; // Functions std::vector<IntrinsicInst *> *getUseList(Value *LI); @@ -87,11 +87,11 @@ private: void addWhileLoop(Instruction *, Instruction *, Instruction *, Value *); Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *); Argument *getArgumentFromEnd(Function *F, unsigned offset); - Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86, + Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU, Instruction *InsertBefore); - void invokeChild_X86(DFNode *C, Function *F_X86, ValueToValueMapTy &VMap, + void invokeChild_CPU(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, Instruction *InsertBefore); - void invokeChild_PTX(DFNode *C, Function *F_X86, ValueToValueMapTy &VMap, + void invokeChild_PTX(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, Instruction *InsertBefore); StructType *getArgumentListStructTy(DFNode *); Function *createFunctionFilter(DFNode *C); @@ -102,8 +102,8 @@ private: // Virtual Functions void init() { - HPVMTimer = HPVMTimer_X86; - TargetName = "X86"; + HPVMTimer = HPVMTimer_CPU; + TargetName = "CPU"; } void initRuntimeAPI(); void codeGen(DFInternalNode *N); @@ -113,7 +113,7 @@ private: public: // Constructor - CGT_X86(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) { + CGT_CPU(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) { init(); initRuntimeAPI(); } @@ -122,8 +122,8 @@ public: void codeGenLaunchStreaming(DFInternalNode *Root); }; -bool DFG2LLVM_X86::runOnModule(Module &M) { - DEBUG(errs() << "\nDFG2LLVM_X86 PASS\n"); +bool DFG2LLVM_CPU::runOnModule(Module &M) { + DEBUG(errs() << "\nDFG2LLVM_CPU PASS\n"); // Get the BuildDFG Analysis Results: // - Dataflow graph @@ -136,7 +136,7 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap(); // Visitor for Code Generation Graph Traversal - CGT_X86 *CGTVisitor = new CGT_X86(M, DFG); + CGT_CPU *CGTVisitor = new CGT_CPU(M, DFG); // Iterate over all the DFGs and produce code for each one of them for (auto &rootNode : Roots) { @@ -160,7 +160,7 @@ bool DFG2LLVM_X86::runOnModule(Module &M) { } // Initialize the HPVM runtime API. This makes it easier to insert these calls -void CGT_X86::initRuntimeAPI() { +void CGT_CPU::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; @@ -176,10 +176,10 @@ void CGT_X86::initRuntimeAPI() { DEBUG(errs() << "Successfully loaded hpvm-rt API module\n"); // Get or insert the global declarations for launch/wait functions - DECLARE(llvm_hpvm_x86_launch); + DECLARE(llvm_hpvm_cpu_launch); DECLARE(malloc); - DECLARE(llvm_hpvm_x86_wait); - DECLARE(llvm_hpvm_x86_argument_ptr); + DECLARE(llvm_hpvm_cpu_wait); + DECLARE(llvm_hpvm_cpu_argument_ptr); DECLARE(llvm_hpvm_streamLaunch); DECLARE(llvm_hpvm_streamPush); DECLARE(llvm_hpvm_streamPop); @@ -191,10 +191,10 @@ void CGT_X86::initRuntimeAPI() { DECLARE(llvm_hpvm_createThread); DECLARE(llvm_hpvm_bufferPush); DECLARE(llvm_hpvm_bufferPop); - DECLARE(llvm_hpvm_x86_dstack_push); - DECLARE(llvm_hpvm_x86_dstack_pop); - DECLARE(llvm_hpvm_x86_getDimLimit); - DECLARE(llvm_hpvm_x86_getDimInstance); + DECLARE(llvm_hpvm_cpu_dstack_push); + DECLARE(llvm_hpvm_cpu_dstack_pop); + DECLARE(llvm_hpvm_cpu_getDimLimit); + DECLARE(llvm_hpvm_cpu_getDimInstance); // Get or insert timerAPI functions as well if you plan to use timers initTimerAPI(); @@ -202,7 +202,7 @@ void CGT_X86::initRuntimeAPI() { // Insert init context in main Function *VI = M.getFunction("llvm.hpvm.init"); assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); - DEBUG(errs() << "Inserting x86 timer initialization\n"); + DEBUG(errs() << "Inserting cpu timer initialization\n"); Instruction *I = cast<Instruction>(*VI->user_begin()); initializeTimerSet(I); switchToTimer(hpvm_TimerID_NONE, I); @@ -210,13 +210,13 @@ void CGT_X86::initRuntimeAPI() { Function *VC = M.getFunction("llvm.hpvm.cleanup"); assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once"); - DEBUG(errs() << "Inserting x86 timer print\n"); + DEBUG(errs() << "Inserting cpu timer print\n"); printTimerSet(I); } /* Returns vector of all wait instructions */ -std::vector<IntrinsicInst *> *CGT_X86::getUseList(Value *GraphID) { +std::vector<IntrinsicInst *> *CGT_CPU::getUseList(Value *GraphID) { std::vector<IntrinsicInst *> *UseList = new std::vector<IntrinsicInst *>(); // It must have been loaded from memory somewhere for (Value::user_iterator ui = GraphID->user_begin(), @@ -234,7 +234,7 @@ std::vector<IntrinsicInst *> *CGT_X86::getUseList(Value *GraphID) { /* Traverse the function argument list in reverse order to get argument at a * distance offset fromt he end of argument list of function F */ -Argument *CGT_X86::getArgumentFromEnd(Function *F, unsigned offset) { +Argument *CGT_CPU::getArgumentFromEnd(Function *F, unsigned offset) { assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) && "Invalid offset to access arguments!"); Function::arg_iterator e = F->arg_end(); @@ -259,7 +259,7 @@ Argument *CGT_X86::getArgumentFromEnd(Function *F, unsigned offset) { * which loops over bidy if true and goes to end if false * (5) Update phi node of body */ -void CGT_X86::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart, +void CGT_CPU::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart, Instruction *BodyEnd, Value *TerminationCond) { BasicBlock *Entry = CondBlockStart->getParent(); BasicBlock *CondBlock = Entry->splitBasicBlock(CondBlockStart, "condition"); @@ -276,7 +276,7 @@ void CGT_X86::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart, ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch); } -Instruction *CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, +Instruction *CGT_CPU::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, BasicBlock *Body) { Module *M = Entry->getParent()->getParent(); Type *Int64Ty = Type::getInt64Ty(M->getContext()); @@ -311,7 +311,7 @@ Instruction *CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, * which loops over bidy if true and goes to end if false * (5) Update phi node of body */ -Value *CGT_X86::addLoop(Instruction *I, Value *limit, const Twine &indexName) { +Value *CGT_CPU::addLoop(Instruction *I, Value *limit, const Twine &indexName) { BasicBlock *Entry = I->getParent(); BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body"); @@ -356,7 +356,7 @@ Value *CGT_X86::addLoop(Instruction *I, Value *limit, const Twine &indexName) { // types, output types and isLastInput buffer type. All the streaming // inputs/outputs are converted to i8*, since this is the type of buffer // handles. -StructType *CGT_X86::getArgumentListStructTy(DFNode *C) { +StructType *CGT_CPU::getArgumentListStructTy(DFNode *C) { std::vector<Type *> TyList; // Input types Function *CF = C->getFuncPointer(); @@ -384,7 +384,7 @@ StructType *CGT_X86::getArgumentListStructTy(DFNode *C) { return STy; } -void CGT_X86::startNodeThread(DFNode *C, std::vector<Value *> Args, +void CGT_CPU::startNodeThread(DFNode *C, std::vector<Value *> Args, DenseMap<DFEdge *, Value *> EdgeBufferMap, Value *isLastInputBuffer, Value *graphID, Instruction *IB) { @@ -495,7 +495,7 @@ void CGT_X86::startNodeThread(DFNode *C, std::vector<Value *> Args, ArrayRef<Value *>(CreateThreadArgs, 3), "", IB); } -Function *CGT_X86::createLaunchFunction(DFInternalNode *N) { +Function *CGT_CPU::createLaunchFunction(DFInternalNode *N) { DEBUG(errs() << "Generating Streaming Launch Function\n"); // Get Function associated with Node N Function *NF = N->getFuncPointer(); @@ -643,7 +643,7 @@ Function *CGT_X86::createLaunchFunction(DFInternalNode *N) { * Modify each of the instrinsic in host code * Launch, Push, Pop, Wait */ -void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) { +void CGT_CPU::codeGenLaunchStreaming(DFInternalNode *Root) { IntrinsicInst *LI = Root->getInstruction(); Function *RootLaunch = createLaunchFunction(Root); // Substitute launch intrinsic main @@ -654,7 +654,7 @@ void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) { "graph" + Root->getFuncPointer()->getName(), LI); DEBUG(errs() << *LaunchInst << "\n"); - // Replace all wait instructions with x86 specific wait instructions + // Replace all wait instructions with cpu specific wait instructions DEBUG(errs() << "Substitute wait, push, pop intrinsics\n"); std::vector<IntrinsicInst *> *UseList = getUseList(LI); for (unsigned i = 0; i < UseList->size(); ++i) { @@ -684,7 +684,7 @@ void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) { } } -void CGT_X86::codeGenLaunch(DFInternalNode *Root) { +void CGT_CPU::codeGenLaunch(DFInternalNode *Root) { // TODO: Place an assert to check if the constant passed by launch intrinsic // as the number of arguments to DFG is same as the number of arguments of the // root of DFG @@ -725,28 +725,28 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) { switchToTimer(hpvm_TimerID_ARG_UNPACK, RI); DEBUG(errs() << "Created Empty Launch Function\n"); - // Find the X86 function generated for Root and - // Function* RootF_X86 = Root->getGenFunc(); - Function *RootF_X86 = Root->getGenFuncForTarget(hpvm::CPU_TARGET); - assert(RootF_X86 && "Error: No generated CPU function for Root node\n"); - assert(Root->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && - "Error: Generated Function for Root node with no x86 wrapper\n"); - - // Generate a call to RootF_X86 with null parameters for now + // Find the CPU function generated for Root and + // Function* RootF_CPU = Root->getGenFunc(); + Function *RootF_CPU = Root->getGenFuncForTarget(hpvm::CPU_TARGET); + assert(RootF_CPU && "Error: No generated CPU function for Root node\n"); + assert(Root->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && + "Error: Generated Function for Root node with no cpu wrapper\n"); + + // Generate a call to RootF_CPU with null parameters for now std::vector<Value *> Args; - for (unsigned i = 0; i < RootF_X86->getFunctionType()->getNumParams(); i++) { + for (unsigned i = 0; i < RootF_CPU->getFunctionType()->getNumParams(); i++) { Args.push_back( - Constant::getNullValue(RootF_X86->getFunctionType()->getParamType(i))); + Constant::getNullValue(RootF_CPU->getFunctionType()->getParamType(i))); } CallInst *CI = - CallInst::Create(RootF_X86, Args, RootF_X86->getName() + ".output", RI); + CallInst::Create(RootF_CPU, Args, RootF_CPU->getName() + ".output", RI); // Extract input data from i8* data.addr and patch them to correct argument of - // call to RootF_X86. For each argument + // call to RootF_CPU. For each argument std::vector<Type *> TyList; std::vector<std::string> names; - for (Function::arg_iterator ai = RootF_X86->arg_begin(), - ae = RootF_X86->arg_end(); + for (Function::arg_iterator ai = RootF_CPU->arg_begin(), + ae = RootF_CPU->arg_end(); ai != ae; ++ai) { TyList.push_back(ai->getType()); names.push_back(ai->getName()); @@ -756,19 +756,19 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) { for (unsigned i = 0; i < CI->getNumArgOperands(); i++) CI->setArgOperand(i, elements[i]); - // Add timers around Call to RootF_X86 function + // Add timers around Call to RootF_CPU function switchToTimer(hpvm_TimerID_COMPUTATION, CI); switchToTimer(hpvm_TimerID_OUTPUT_PACK, RI); StructType *RootRetTy = - cast<StructType>(RootF_X86->getFunctionType()->getReturnType()); + cast<StructType>(RootF_CPU->getFunctionType()->getReturnType()); // if Root has non empty return if (RootRetTy->getNumElements()) { // We can't access the type of the arg struct - build it std::vector<Type *> TyList; - for (Function::arg_iterator ai = RootF_X86->arg_begin(), - ae = RootF_X86->arg_end(); + for (Function::arg_iterator ai = RootF_CPU->arg_begin(), + ae = RootF_CPU->arg_end(); ai != ae; ++ai) { TyList.push_back(ai->getType()); } @@ -776,7 +776,7 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) { StructType *ArgStructTy = StructType::create( M.getContext(), ArrayRef<Type *>(TyList), - (RootF_X86->getName() + ".arg.struct.ty").str(), true); + (RootF_CPU->getName() + ".arg.struct.ty").str(), true); // Cast the data pointer to the type of the arg struct CastInst *OutputAddrCast = CastInst::CreatePointerCast( @@ -816,19 +816,19 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) { // Substitute launch intrinsic main Value *LaunchInstArgs[] = {AppFunc, LI->getArgOperand(1)}; CallInst *LaunchInst = CallInst::Create( - llvm_hpvm_x86_launch, ArrayRef<Value *>(LaunchInstArgs, 2), + llvm_hpvm_cpu_launch, ArrayRef<Value *>(LaunchInstArgs, 2), "graph" + Root->getFuncPointer()->getName(), LI); // ReplaceInstWithInst(LI, LaunchInst); DEBUG(errs() << *LaunchInst << "\n"); - // Replace all wait instructions with x86 specific wait instructions + // Replace all wait instructions with cpu specific wait instructions std::vector<IntrinsicInst *> *UseList = getUseList(LI); for (unsigned i = 0; i < UseList->size(); ++i) { IntrinsicInst *II = UseList->at(i); CallInst *CI; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_wait: - CI = CallInst::Create(llvm_hpvm_x86_wait, ArrayRef<Value *>(LaunchInst), + CI = CallInst::Create(llvm_hpvm_cpu_wait, ArrayRef<Value *>(LaunchInst), ""); break; case Intrinsic::hpvm_push: @@ -848,7 +848,7 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) { } } -Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86, +Value *CGT_CPU::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU, Instruction *InsertBefore) { // TODO: Assumption is that each input port of a node has just one // incoming edge. May change later on. @@ -863,7 +863,7 @@ Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86, // argument from argument list of this internal node Value *inputVal; if (SrcDF->isEntryNode()) { - inputVal = getArgumentAt(ParentF_X86, E->getSourcePosition()); + inputVal = getArgumentAt(ParentF_CPU, E->getSourcePosition()); DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); } else { // edge is from a sibling @@ -885,38 +885,38 @@ Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86, return inputVal; } -void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86, +void CGT_CPU::invokeChild_CPU(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, Instruction *IB) { Function *CF = C->getFuncPointer(); - // Function* CF_X86 = C->getGenFunc(); - Function *CF_X86 = C->getGenFuncForTarget(hpvm::CPU_TARGET); - assert(CF_X86 != NULL && + // Function* CF_CPU = C->getGenFunc(); + Function *CF_CPU = C->getGenFuncForTarget(hpvm::CPU_TARGET); + assert(CF_CPU != NULL && "Found leaf node for which code generation has not happened yet!\n"); - assert(C->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && - "The generated function to be called from x86 backend is not an x86 " + assert(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && + "The generated function to be called from cpu backend is not an cpu " "function\n"); - DEBUG(errs() << "Invoking child node" << CF_X86->getName() << "\n"); + DEBUG(errs() << "Invoking child node" << CF_CPU->getName() << "\n"); std::vector<Value *> Args; // Create argument list to pass to call instruction // First find the correct values using the edges // The remaing six values are inserted as constants for now. for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) { - Args.push_back(getInValueAt(C, i, F_X86, IB)); + Args.push_back(getInValueAt(C, i, F_CPU, IB)); } - Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_X86->getContext()), 0); + Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_CPU->getContext()), 0); for (unsigned j = 0; j < 6; j++) Args.push_back(I64Zero); - DEBUG(errs() << "Gen Function type: " << *CF_X86->getType() << "\n"); + DEBUG(errs() << "Gen Function type: " << *CF_CPU->getType() << "\n"); DEBUG(errs() << "Node Function type: " << *CF->getType() << "\n"); DEBUG(errs() << "Arguments: " << Args.size() << "\n"); - // Call the F_X86 function associated with this node + // Call the F_CPU function associated with this node CallInst *CI = - CallInst::Create(CF_X86, Args, CF_X86->getName() + "_output", IB); + CallInst::Create(CF_CPU, Args, CF_CPU->getName() + "_output", IB); DEBUG(errs() << *CI << "\n"); OutputMap[C] = CI; @@ -928,7 +928,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86, Value *indexLimit = NULL; // Limit can either be a constant or an arguement of the internal node. // In case of constant we can use that constant value directly in the - // new F_X86 function. In case of an argument, we need to get the mapped + // new F_CPU function. In case of an argument, we need to get the mapped // value using VMap if (isa<Constant>(C->getDimLimits()[j])) { indexLimit = C->getDimLimits()[j]; @@ -960,7 +960,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86, CI->getArgOperand(numArgs - 6 + 2) // iZ }; - CallInst *Push = CallInst::Create(llvm_hpvm_x86_dstack_push, + CallInst *Push = CallInst::Create(llvm_hpvm_cpu_dstack_push, ArrayRef<Value *>(args, 7), "", CI); DEBUG(errs() << "Push on stack: " << *Push << "\n"); // Insert call to runtime to pop the dim limits and instanceID from the depth @@ -973,7 +973,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86, assert(NextI->getParent() == CI->getParent() && "Next Instruction should also belong to the same basic block!"); - CallInst *Pop = CallInst::Create(llvm_hpvm_x86_dstack_pop, None, "", NextI); + CallInst *Pop = CallInst::Create(llvm_hpvm_cpu_dstack_pop, None, "", NextI); DEBUG(errs() << "Pop from stack: " << *Pop << "\n"); DEBUG(errs() << *CI->getParent()->getParent()); } @@ -994,7 +994,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86, // Add runtime API calls to push output for each of the streaming outputs // Add loop around the basic block, which exits the loop if isLastInput is false -Function *CGT_X86::createFunctionFilter(DFNode *C) { +Function *CGT_CPU::createFunctionFilter(DFNode *C) { DEBUG(errs() << "*********Creating Function filter for " << C->getFuncPointer()->getName() << "*****\n"); @@ -1160,7 +1160,7 @@ Function *CGT_X86::createFunctionFilter(DFNode *C) { return CF_Pipeline; } -void CGT_X86::codeGen(DFInternalNode *N) { +void CGT_CPU::codeGen(DFInternalNode *N) { // Check if N is root node and its graph is streaming. We do not do codeGen // for Root in such a case if (N->isRoot() && N->isChildGraphStreaming()) @@ -1182,7 +1182,7 @@ void CGT_X86::codeGen(DFInternalNode *N) { // Sort children in topological order before code generation N->getChildGraph()->sortChildren(); - // Only process if all children have a CPU x86 function + // Only process if all children have a CPU cpu function // Otherwise skip to end bool codeGen = true; for (DFGraph::children_iterator ci = N->getChildGraph()->begin(), @@ -1193,8 +1193,8 @@ void CGT_X86::codeGen(DFInternalNode *N) { if (C->isDummyNode()) continue; - if (!(C->hasX86GenFuncForTarget(hpvm::CPU_TARGET))) { - DEBUG(errs() << "No CPU x86 version for child node " + if (!(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET))) { + DEBUG(errs() << "No CPU cpu version for child node " << C->getFuncPointer()->getName() << "\n Skip code gen for parent node " << N->getFuncPointer()->getName() << "\n"); @@ -1206,18 +1206,18 @@ void CGT_X86::codeGen(DFInternalNode *N) { Function *F = N->getFuncPointer(); // Create of clone of F with no instructions. Only the type is the same as F // without the extra arguments. - Function *F_X86; + Function *F_CPU; // Clone the function, if we are seeing this function for the first time. We // only need a clone in terms of type. ValueToValueMapTy VMap; // Create new function with the same type - F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), + F_CPU = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); // Loop over the arguments, copying the names of arguments over. - Function::arg_iterator dest_iterator = F_X86->arg_begin(); + Function::arg_iterator dest_iterator = F_CPU->arg_begin(); for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); i != e; ++i) { dest_iterator->setName(i->getName()); // Copy the name over... @@ -1226,24 +1226,24 @@ void CGT_X86::codeGen(DFInternalNode *N) { } // Add a basic block to this empty function - BasicBlock *BB = BasicBlock::Create(F_X86->getContext(), "entry", F_X86); + BasicBlock *BB = BasicBlock::Create(F_CPU->getContext(), "entry", F_CPU); ReturnInst *RI = ReturnInst::Create( - F_X86->getContext(), UndefValue::get(F_X86->getReturnType()), BB); + F_CPU->getContext(), UndefValue::get(F_CPU->getReturnType()), BB); // Add Index and Dim arguments except for the root node and the child graph // of parent node is not streaming if (!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - F_X86 = addIdxDimArgs(F_X86); + F_CPU = addIdxDimArgs(F_CPU); - BB = &*F_X86->begin(); + BB = &*F_CPU->begin(); RI = cast<ReturnInst>(BB->getTerminator()); // Add generated function info to DFNode - // N->setGenFunc(F_X86, hpvm::CPU_TARGET); - N->addGenFunc(F_X86, hpvm::CPU_TARGET, true); + // N->setGenFunc(F_CPU, hpvm::CPU_TARGET); + N->addGenFunc(F_CPU, hpvm::CPU_TARGET, true); // Loop over the arguments, to create the VMap. - dest_iterator = F_X86->arg_begin(); + dest_iterator = F_CPU->arg_begin(); for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); i != e; ++i) { // Add mapping and increment dest iterator @@ -1261,7 +1261,7 @@ void CGT_X86::codeGen(DFInternalNode *N) { continue; // Create calls to CPU function of child node - invokeChild_X86(C, F_X86, VMap, RI); + invokeChild_CPU(C, F_CPU, VMap, RI); } DEBUG(errs() << "*** Generating epilogue code for the function****\n"); @@ -1270,7 +1270,7 @@ void CGT_X86::codeGen(DFInternalNode *N) { DFNode *C = N->getChildGraph()->getExit(); // Get OutputType of this node StructType *OutTy = N->getOutputType(); - Value *retVal = UndefValue::get(F_X86->getReturnType()); + Value *retVal = UndefValue::get(F_CPU->getReturnType()); // Find all the input edges to exit node for (unsigned i = 0; i < OutTy->getNumElements(); i++) { DEBUG(errs() << "Output Edge " << i << "\n"); @@ -1288,7 +1288,7 @@ void CGT_X86::codeGen(DFInternalNode *N) { // argument from argument list of this internal node Value *inputVal; if (SrcDF->isEntryNode()) { - inputVal = getArgumentAt(F_X86, i); + inputVal = getArgumentAt(F_CPU, i); DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); } else { // edge is from a internal node @@ -1313,14 +1313,14 @@ void CGT_X86::codeGen(DFInternalNode *N) { } DEBUG(errs() << "Extracted all\n"); retVal->setName("output"); - ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal); + ReturnInst *newRI = ReturnInst::Create(F_CPU->getContext(), retVal); ReplaceInstWithInst(RI, newRI); } //-------------------------------------------------------------------------// // Here, we need to check if this node (N) has more than one versions // If so, we query the policy and have a call to each version - // If not, we see which version exists, check that it is in fact an x86 + // If not, we see which version exists, check that it is in fact an cpu // function and save it as the CPU_TARGET function // TODO: hpvm_id per node, so we can use this for id for policies @@ -1328,16 +1328,16 @@ void CGT_X86::codeGen(DFInternalNode *N) { Function *CF = N->getGenFuncForTarget(hpvm::CPU_TARGET); Function *GF = N->getGenFuncForTarget(hpvm::GPU_TARGET); - bool CFx86 = N->hasX86GenFuncForTarget(hpvm::CPU_TARGET); - bool GFx86 = N->hasX86GenFuncForTarget(hpvm::GPU_TARGET); + bool CFcpu = N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET); + bool GFcpu = N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET); DEBUG(errs() << "Before editing\n"); DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() << " with tag " << N->getTag() << "\n"); DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null") << "\n"); - DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"); + DEBUG(errs() << "hascpuGenFuncForCPU : " << CFcpu << "\n"); DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null") << "\n"); - DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"); + DEBUG(errs() << "hascpuGenFuncForGPU : " << GFcpu << "\n"); if (N->getTag() == hpvm::None) { // No code is available for this node. This (usually) means that this @@ -1357,15 +1357,15 @@ void CGT_X86::codeGen(DFInternalNode *N) { switch (N->getTag()) { case hpvm::CPU_TARGET: assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) && ""); - assert(N->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && ""); + assert(N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && ""); assert(!(N->getGenFuncForTarget(hpvm::GPU_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET)) && ""); + assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) && ""); break; case hpvm::GPU_TARGET: assert(!(N->getGenFuncForTarget(hpvm::CPU_TARGET)) && ""); - assert(!(N->hasX86GenFuncForTarget(hpvm::CPU_TARGET)) && ""); + assert(!(N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET)) && ""); assert(N->getGenFuncForTarget(hpvm::GPU_TARGET) && ""); - assert(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET) && ""); + assert(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET) && ""); break; default: assert(false && "Unreachable: we checked that tag was single target!\n"); @@ -1380,16 +1380,16 @@ void CGT_X86::codeGen(DFInternalNode *N) { CF = N->getGenFuncForTarget(hpvm::CPU_TARGET); GF = N->getGenFuncForTarget(hpvm::GPU_TARGET); - CFx86 = N->hasX86GenFuncForTarget(hpvm::CPU_TARGET); - GFx86 = N->hasX86GenFuncForTarget(hpvm::GPU_TARGET); + CFcpu = N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET); + GFcpu = N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET); DEBUG(errs() << "After editing\n"); DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() << " with tag " << N->getTag() << "\n"); DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null") << "\n"); - DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n"); + DEBUG(errs() << "hascpuGenFuncForCPU : " << CFcpu << "\n"); DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null") << "\n"); - DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n"); + DEBUG(errs() << "hascpuGenFuncForGPU : " << GFcpu << "\n"); } else { assert(false && "Multiple tags unsupported!"); @@ -1397,14 +1397,14 @@ void CGT_X86::codeGen(DFInternalNode *N) { } // Code generation for leaf nodes -void CGT_X86::codeGen(DFLeafNode *N) { +void CGT_CPU::codeGen(DFLeafNode *N) { // Skip code generation if it is a dummy node if (N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } - // At this point, the X86 backend does not support code generation for + // At this point, the CPU backend does not support code generation for // the case where allocation node is used, so we skip. This means that a // CPU version will not be created, and therefore code generation will // only succeed if another backend (nvptx or spir) has been invoked to @@ -1425,9 +1425,9 @@ void CGT_X86::codeGen(DFLeafNode *N) { switch (N->getTag()) { case hpvm::GPU_TARGET: - // A leaf node should not have an x86 function for GPU - // by design of DFG2LLVM_NVPTX backend - assert(!(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET)) && + // A leaf node should not have an cpu function for GPU + // by design of DFG2LLVM_OpenCL backend + assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) && "Leaf node not expected to have GPU GenFunc"); break; default: @@ -1448,34 +1448,34 @@ void CGT_X86::codeGen(DFLeafNode *N) { Function *F = N->getFuncPointer(); // Clone the function, if we are seeing this function for the first time. - Function *F_X86; + Function *F_CPU; ValueToValueMapTy VMap; - F_X86 = CloneFunction(F, VMap); - F_X86->removeFromParent(); + F_CPU = CloneFunction(F, VMap); + F_CPU->removeFromParent(); // Insert the cloned function into the module - M.getFunctionList().push_back(F_X86); + M.getFunctionList().push_back(F_CPU); // Add the new argument to the argument list. Add arguments only if the cild // graph of parent node is not streaming if (!N->getParent()->isChildGraphStreaming()) - F_X86 = addIdxDimArgs(F_X86); + F_CPU = addIdxDimArgs(F_CPU); // Add generated function info to DFNode - // N->setGenFunc(F_X86, hpvm::CPU_TARGET); - N->addGenFunc(F_X86, hpvm::CPU_TARGET, true); + // N->setGenFunc(F_CPU, hpvm::CPU_TARGET); + N->addGenFunc(F_CPU, hpvm::CPU_TARGET, true); // Go through the arguments, and any pointer arguments with in attribute need - // to have x86_argument_ptr call to get the x86 ptr of the argument + // to have cpu_argument_ptr call to get the cpu ptr of the argument // Insert these calls in a new BB which would dominate all other BBs // Create new BB - BasicBlock *EntryBB = &*F_X86->begin(); + BasicBlock *EntryBB = &*F_CPU->begin(); BasicBlock *BB = - BasicBlock::Create(M.getContext(), "getHPVMPtrArgs", F_X86, EntryBB); + BasicBlock::Create(M.getContext(), "getHPVMPtrArgs", F_CPU, EntryBB); BranchInst *Terminator = BranchInst::Create(EntryBB, BB); // Insert calls - for (Function::arg_iterator ai = F_X86->arg_begin(), ae = F_X86->arg_end(); + for (Function::arg_iterator ai = F_CPU->arg_begin(), ae = F_CPU->arg_end(); ai != ae; ++ai) { - if (F_X86->getAttributes().hasAttribute(ai->getArgNo() + 1, + if (F_CPU->getAttributes().hasAttribute(ai->getArgNo() + 1, Attribute::In)) { assert(ai->getType()->isPointerTy() && "Only pointer arguments can have hpvm in/out attributes "); @@ -1488,14 +1488,14 @@ void CGT_X86::codeGen(DFLeafNode *N) { &*ai, Type::getInt8PtrTy(M.getContext()), ai->getName() + ".i8ptr", Terminator); Value *ArgPtrCallArgs[] = {BI, size}; - CallInst::Create(llvm_hpvm_x86_argument_ptr, + CallInst::Create(llvm_hpvm_cpu_argument_ptr, ArrayRef<Value *>(ArgPtrCallArgs, 2), "", Terminator); } } DEBUG(errs() << *BB << "\n"); // Go through all the instructions - for (inst_iterator i = inst_begin(F_X86), e = inst_end(F_X86); i != e; ++i) { + for (inst_iterator i = inst_begin(F_CPU), e = inst_end(F_CPU); i != e; ++i) { Instruction *I = &(*i); DEBUG(errs() << *I << "\n"); // Leaf nodes should not contain HPVM graph intrinsics or launch @@ -1572,19 +1572,19 @@ void CGT_X86::codeGen(DFLeafNode *N) { "ID!"); // For immediate ancestor, use the extra argument introduced in - // F_X86 + // F_CPU int numParamsF = F->getFunctionType()->getNumParams(); - int numParamsF_X86 = F_X86->getFunctionType()->getNumParams(); + int numParamsF_CPU = F_CPU->getFunctionType()->getNumParams(); assert( - (numParamsF_X86 - numParamsF == 6) && + (numParamsF_CPU - numParamsF == 6) && "Difference of arguments between function and its clone is not 6!"); if (parentLevel == 0) { // Case when the query is for this node itself unsigned offset = 3 + (3 - dim); - // Traverse argument list of F_X86 in reverse order to find the + // Traverse argument list of F_CPU in reverse order to find the // correct index or dim argument. - Argument *indexVal = getArgumentFromEnd(F_X86, offset); + Argument *indexVal = getArgumentFromEnd(F_CPU, offset); assert(indexVal && "Index argument not found. Invalid offset!"); DEBUG(errs() << *II << " replaced with " << *indexVal << "\n"); @@ -1596,7 +1596,7 @@ void CGT_X86::codeGen(DFLeafNode *N) { Value *args[] = { ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel), ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)}; - CallInst *CI = CallInst::Create(llvm_hpvm_x86_getDimInstance, + CallInst *CI = CallInst::Create(llvm_hpvm_cpu_getDimInstance, ArrayRef<Value *>(args, 2), "nodeInstanceID", II); DEBUG(errs() << *II << " replaced with " << *CI << "\n"); @@ -1630,19 +1630,19 @@ void CGT_X86::codeGen(DFLeafNode *N) { "Intrinsic ID!"); // For immediate ancestor, use the extra argument introduced in - // F_X86 + // F_CPU int numParamsF = F->getFunctionType()->getNumParams(); - int numParamsF_X86 = F_X86->getFunctionType()->getNumParams(); + int numParamsF_CPU = F_CPU->getFunctionType()->getNumParams(); assert( - (numParamsF_X86 - numParamsF == 6) && + (numParamsF_CPU - numParamsF == 6) && "Difference of arguments between function and its clone is not 6!"); if (parentLevel == 0) { // Case when the query is for this node itself unsigned offset = 3 - dim; - // Traverse argument list of F_X86 in reverse order to find the + // Traverse argument list of F_CPU in reverse order to find the // correct index or dim argument. - Argument *limitVal = getArgumentFromEnd(F_X86, offset); + Argument *limitVal = getArgumentFromEnd(F_CPU, offset); assert(limitVal && "Limit argument not found. Invalid offset!"); DEBUG(errs() << *II << " replaced with " << *limitVal << "\n"); @@ -1654,7 +1654,7 @@ void CGT_X86::codeGen(DFLeafNode *N) { Value *args[] = { ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel), ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)}; - CallInst *CI = CallInst::Create(llvm_hpvm_x86_getDimLimit, + CallInst *CI = CallInst::Create(llvm_hpvm_cpu_getDimLimit, ArrayRef<Value *>(args, 2), "numNodeInstances", II); DEBUG(errs() << *II << " replaced with " << *CI << "\n"); @@ -1682,13 +1682,13 @@ void CGT_X86::codeGen(DFLeafNode *N) { (*i)->eraseFromParent(); } - DEBUG(errs() << *F_X86); + DEBUG(errs() << *F_CPU); } } // End of namespace -char DFG2LLVM_X86::ID = 0; -static RegisterPass<DFG2LLVM_X86> - X("dfg2llvm-x86", "Dataflow Graph to LLVM for X86 backend", +char DFG2LLVM_CPU::ID = 0; +static RegisterPass<DFG2LLVM_CPU> + X("dfg2llvm-cpu", "Dataflow Graph to LLVM for CPU backend", false /* does not modify the CFG */, true /* transformation, not just analysis */); diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.exports b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.exports similarity index 100% rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.exports rename to hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.exports diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt b/hpvm/lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt similarity index 87% rename from hpvm/lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt rename to hpvm/lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt index 1e82065bf06fe059cbd081b42a9f83e37352b703..30ba8a76365d02ca8fcdcb34948442ef89f5755e 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt --------------*- Conf -*--===; +;===- ./lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt --------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,5 +17,5 @@ [component_0] type = Library -name = DFG2LLVM_X86 +name = DFG2LLVM_CPU parent = Transforms diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt similarity index 78% rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/CMakeLists.txt rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt index 832f6334a4bc048992ee545844941f44ef2c8fe0..00c651eaa250fc114f229f30e0cb7c121154ff96 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/CMakeLists.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt @@ -4,9 +4,9 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLLVM_BUILD_DIR=${PROJECT_BINARY_DIR}") -add_llvm_library( LLVMDFG2LLVM_NVPTX +add_llvm_library( LLVMDFG2LLVM_OpenCL MODULE - DFG2LLVM_NVPTX.cpp + DFG2LLVM_OpenCL.cpp DEPENDS intrinsics_gen diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp similarity index 96% rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp index d250562043b633aa69b4ac6bf77ba2bf51167093..b3ad2794b94614e9e866933151817942177c2589 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp @@ -1,4 +1,4 @@ -//=== DFG2LLVM_NVPTX.cpp ===// +//=== DFG2LLVM_OpenCL.cpp ===// // // The LLVM Compiler Infrastructure // @@ -14,7 +14,7 @@ #define CONSTANT_ADDRSPACE 4 #define SHARED_ADDRSPACE 3 -#define DEBUG_TYPE "DFG2LLVM_NVPTX" +#define DEBUG_TYPE "DFG2LLVM_OpenCL" #include "SupportHPVM/DFG2LLVM.h" #include "SupportHPVM/HPVMTimer.h" #include "SupportHPVM/HPVMUtils.h" @@ -54,8 +54,8 @@ using namespace dfg2llvm; using namespace hpvmUtils; // HPVM Command line option to use timer or not -static cl::opt<bool> HPVMTimer_NVPTX("hpvm-timers-ptx", - cl::desc("Enable hpvm timers")); +static cl::opt<bool> HPVMTimer_OpenCL("hpvm-timers-ptx", + cl::desc("Enable hpvm timers")); namespace { // Helper class declarations @@ -149,10 +149,10 @@ static void findIntrinsicInst(Function *, Intrinsic::ID, static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID); static std::string getAtomicOpName(Intrinsic::ID); -// DFG2LLVM_NVPTX - The first implementation. -struct DFG2LLVM_NVPTX : public DFG2LLVM { +// DFG2LLVM_OpenCL - The first implementation. +struct DFG2LLVM_OpenCL : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid - DFG2LLVM_NVPTX() : DFG2LLVM(ID) {} + DFG2LLVM_OpenCL() : DFG2LLVM(ID) {} private: public: @@ -160,7 +160,7 @@ public: }; // Visitor for Code generation traversal (tree traversal for now) -class CGT_NVPTX : public CodeGenTraversal { +class CGT_OpenCL : public CodeGenTraversal { private: // Member variables @@ -194,8 +194,8 @@ private: // Virtual Functions void init() { - HPVMTimer = HPVMTimer_NVPTX; - TargetName = "NVPTX"; + HPVMTimer = HPVMTimer_OpenCL; + TargetName = "OpenCL"; } void initRuntimeAPI(); void codeGen(DFInternalNode *N); @@ -203,7 +203,7 @@ private: public: // Constructor - CGT_NVPTX(Module &_M, BuildDFG &_DFG) + CGT_OpenCL(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(_M)) { init(); initRuntimeAPI(); @@ -257,7 +257,7 @@ public: }; // Initialize the HPVM runtime API. This makes it easier to insert these calls -void CGT_NVPTX::initRuntimeAPI() { +void CGT_OpenCL::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; @@ -289,7 +289,7 @@ void CGT_NVPTX::initRuntimeAPI() { initTimerAPI(); // Insert init context in main - DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n"); + DEBUG(errs() << "Gen Code to initialize OpenCL Timer\n"); Function *VI = M.getFunction("llvm.hpvm.init"); assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); @@ -302,7 +302,7 @@ void CGT_NVPTX::initRuntimeAPI() { switchToTimer(hpvm_TimerID_NONE, InitCall); // Insert print instruction at hpvm exit - DEBUG(errs() << "Gen Code to print NVPTX Timer\n"); + DEBUG(errs() << "Gen Code to print OpenCL Timer\n"); Function *VC = M.getFunction("llvm.hpvm.cleanup"); DEBUG(errs() << *VC << "\n"); assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once"); @@ -316,8 +316,8 @@ void CGT_NVPTX::initRuntimeAPI() { // used to generate a function to associate with this leaf node. The function // is responsible for all the memory allocation/transfer and invoking the // kernel call on the device -void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, - const Twine &FileName) { +void CGT_OpenCL::insertRuntimeCalls(DFInternalNode *N, Kernel *K, + const Twine &FileName) { // Check if clone already exists. If it does, it means we have visited this // function before. // assert(N->getGenFunc() == NULL && "Code already generated for this node"); @@ -338,18 +338,18 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, // Create of clone of F with no instructions. Only the type is the same as F // without the extra arguments. - Function *F_X86; + Function *F_CPU; // Clone the function, if we are seeing this function for the first time. We // only need a clone in terms of type. ValueToValueMapTy VMap; // Create new function with the same type - F_X86 = + F_CPU = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); // Loop over the arguments, copying the names of arguments over. - Function::arg_iterator dest_iterator = F_X86->arg_begin(); + Function::arg_iterator dest_iterator = F_CPU->arg_begin(); for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); i != e; ++i) { dest_iterator->setName(i->getName()); // Copy the name over... @@ -358,29 +358,29 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, } // Add a basic block to this empty function - BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86); + BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_CPU); ReturnInst *RI = ReturnInst::Create( - M.getContext(), UndefValue::get(F_X86->getReturnType()), BB); + M.getContext(), UndefValue::get(F_CPU->getReturnType()), BB); // FIXME: Adding Index and Dim arguments are probably not required except - // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do + // for consistency purpose (DFG2LLVM_CPU does assume that all leaf nodes do // have those arguments) // Add Index and Dim arguments except for the root node if (!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - F_X86 = addIdxDimArgs(F_X86); + F_CPU = addIdxDimArgs(F_CPU); - BB = &*F_X86->begin(); + BB = &*F_CPU->begin(); RI = cast<ReturnInst>(BB->getTerminator()); // Add the generated function info to DFNode - // N->setGenFunc(F_X86, hpvm::CPU_TARGET); - N->addGenFunc(F_X86, hpvm::GPU_TARGET, true); - DEBUG(errs() << "Added GPUGenFunc: " << F_X86->getName() << " for node " + // N->setGenFunc(F_CPU, hpvm::CPU_TARGET); + N->addGenFunc(F_CPU, hpvm::GPU_TARGET, true); + DEBUG(errs() << "Added GPUGenFunc: " << F_CPU->getName() << " for node " << N->getFuncPointer()->getName() << "\n"); // Loop over the arguments, to create the VMap - dest_iterator = F_X86->arg_begin(); + dest_iterator = F_CPU->arg_begin(); for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); i != e; ++i) { // Add mapping to VMap and increment dest iterator @@ -435,16 +435,16 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, DEBUG(errs() << "Inserting launch call" << "\n"); - CallInst *NVPTX_Ctx = CallInst::Create(llvm_hpvm_ocl_launch, - ArrayRef<Value *>(LaunchInstArgs, 2), - "graph" + KF->getName(), InitCall); - DEBUG(errs() << *NVPTX_Ctx << "\n"); - GraphIDAddr = new GlobalVariable(M, NVPTX_Ctx->getType(), false, - GlobalValue::CommonLinkage, - Constant::getNullValue(NVPTX_Ctx->getType()), - "graph" + KF->getName() + ".addr"); + CallInst *OpenCL_Ctx = CallInst::Create(llvm_hpvm_ocl_launch, + ArrayRef<Value *>(LaunchInstArgs, 2), + "graph" + KF->getName(), InitCall); + DEBUG(errs() << *OpenCL_Ctx << "\n"); + GraphIDAddr = new GlobalVariable( + M, OpenCL_Ctx->getType(), false, GlobalValue::CommonLinkage, + Constant::getNullValue(OpenCL_Ctx->getType()), + "graph" + KF->getName() + ".addr"); DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n"); - StoreInst *SI = new StoreInst(NVPTX_Ctx, GraphIDAddr, InitCall); + StoreInst *SI = new StoreInst(OpenCL_Ctx, GraphIDAddr, InitCall); DEBUG(errs() << *SI << "\n"); switchToTimer(hpvm_TimerID_NONE, InitCall); switchToTimer(hpvm_TimerID_SETUP, RI); @@ -463,14 +463,14 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) { // The kernel object gives us the mapping of arguments from kernel launch - // node function (F_X86) to kernel (kernel->KF) - Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]); + // node function (F_CPU) to kernel (kernel->KF) + Value* inputVal = getArgumentAt(F_CPU, K->getInArgMap()[i]); */ for (auto &InArgMapPair : kernelInArgMap) { unsigned i = InArgMapPair.first; - Value *inputVal = getArgumentAt(F_X86, InArgMapPair.second); + Value *inputVal = getArgumentAt(F_CPU, InArgMapPair.second); DEBUG(errs() << "\tArgument " << i << " = " << *inputVal << "\n"); // input value has been obtained. @@ -504,7 +504,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, // Assert that the pointer argument size (next argument) is in the map assert(kernelInArgMap.find(i + 1) != kernelInArgMap.end()); - Value *inputSize = getArgumentAt(F_X86, kernelInArgMap[i + 1]); + Value *inputSize = getArgumentAt(F_CPU, kernelInArgMap[i + 1]); assert( inputSize->getType() == Type::getInt64Ty(M.getContext()) && "Pointer type input must always be followed by size (integer type)"); @@ -606,7 +606,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, std::vector<Value *> AllocInputArgs; for (unsigned i = 0; i < K->allocInArgMap.size(); i++) { - AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i))); + AllocInputArgs.push_back(getArgumentAt(F_CPU, K->allocInArgMap.at(i))); } CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI); @@ -759,7 +759,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, DFNode *C = N->getChildGraph()->getExit(); // Get OutputType of this node StructType *OutTy = N->getOutputType(); - Value *retVal = UndefValue::get(F_X86->getReturnType()); + Value *retVal = UndefValue::get(F_CPU->getReturnType()); // Find the kernel's output arg map, to use instead of the bindings std::vector<unsigned> outArgMap = kernel->getOutArgMap(); // Find all the input edges to exit node @@ -779,7 +779,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, // argument from argument list of this internal node Value *inputVal; if (SrcDF->isEntryNode()) { - inputVal = getArgumentAt(F_X86, i); + inputVal = getArgumentAt(F_CPU, i); DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); } else { // edge is from a internal node @@ -812,13 +812,13 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K, DEBUG(errs() << "Extracted all\n"); switchToTimer(hpvm_TimerID_NONE, RI); retVal->setName("output"); - ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal); + ReturnInst *newRI = ReturnInst::Create(F_CPU->getContext(), retVal); ReplaceInstWithInst(RI, newRI); } // Right now, only targeting the one level case. In general, device functions // can return values so we don't need to change them -void CGT_NVPTX::codeGen(DFInternalNode *N) { +void CGT_OpenCL::codeGen(DFInternalNode *N) { DEBUG(errs() << "Inside internal node: " << N->getFuncPointer()->getName() << "\n"); if (KernelLaunchNode == NULL) @@ -910,7 +910,7 @@ void CGT_NVPTX::codeGen(DFInternalNode *N) { } } -void CGT_NVPTX::codeGen(DFLeafNode *N) { +void CGT_OpenCL::codeGen(DFLeafNode *N) { DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName() << "\n"); @@ -1625,7 +1625,7 @@ void CGT_NVPTX::codeGen(DFLeafNode *N) { // check that addressspace is 1 // if (GEPIaddrspace != 1) { // // does not fit this pattern - addrspace of pointer - //argument is not global continue; + // argument is not global continue; // } if (!(GEPI->hasOneUse())) { // does not fit this pattern - more than one uses @@ -1876,8 +1876,8 @@ void CGT_NVPTX::codeGen(DFLeafNode *N) { return; } -bool DFG2LLVM_NVPTX::runOnModule(Module &M) { - DEBUG(errs() << "\nDFG2LLVM_NVPTX PASS\n"); +bool DFG2LLVM_OpenCL::runOnModule(Module &M) { + DEBUG(errs() << "\nDFG2LLVM_OpenCL PASS\n"); // Get the BuildDFG Analysis Results: // - Dataflow graph @@ -1891,7 +1891,7 @@ bool DFG2LLVM_NVPTX::runOnModule(Module &M) { // = DFG.getHandleToDFEdgeMap(); // Visitor for Code Generation Graph Traversal - CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG); + CGT_OpenCL *CGTVisitor = new CGT_OpenCL(M, DFG); // Iterate over all the DFGs and produce code for each one of them for (auto rootNode : Roots) { @@ -1907,7 +1907,7 @@ bool DFG2LLVM_NVPTX::runOnModule(Module &M) { return true; } -std::string CGT_NVPTX::getKernelsModuleName(Module &M) { +std::string CGT_OpenCL::getKernelsModuleName(Module &M) { /*SmallString<128> currentDir; llvm::sys::fs::current_path(currentDir); std::string fileName = getFilenameFromModule(M); @@ -1917,7 +1917,7 @@ std::string CGT_NVPTX::getKernelsModuleName(Module &M) { return mid.append(".kernels.ll"); } -void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) { +void CGT_OpenCL::fixValueAddrspace(Value *V, unsigned addrspace) { assert(isa<PointerType>(V->getType()) && "Value should be of Pointer Type!"); PointerType *OldTy = cast<PointerType>(V->getType()); PointerType *NewTy = PointerType::get(OldTy->getElementType(), addrspace); @@ -1935,8 +1935,8 @@ void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) { } std::vector<unsigned> -CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs, - Function *F) { +CGT_OpenCL::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs, + Function *F) { std::vector<unsigned> ConstantMemArgs; for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ++ai) { @@ -1959,9 +1959,9 @@ CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs, return ConstantMemArgs; } -Function *CGT_NVPTX::changeArgAddrspace(Function *F, - std::vector<unsigned> &Args, - unsigned addrspace) { +Function *CGT_OpenCL::changeArgAddrspace(Function *F, + std::vector<unsigned> &Args, + unsigned addrspace) { unsigned idx = 0; std::vector<Type *> ArgTypes; for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; @@ -1986,7 +1986,7 @@ Function *CGT_NVPTX::changeArgAddrspace(Function *F, } /* Add metadata to module KernelM, for OpenCL kernels */ -void CGT_NVPTX::addCLMetadata(Function *F) { +void CGT_OpenCL::addCLMetadata(Function *F) { IRBuilder<> Builder(&*F->begin()); @@ -2013,7 +2013,7 @@ void CGT_NVPTX::addCLMetadata(Function *F) { MDN_annotations->addOperand(MDNvvmAnnotationsNode); } -void CGT_NVPTX::writeKernelsModule() { +void CGT_OpenCL::writeKernelsModule() { // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. @@ -2035,7 +2035,7 @@ void CGT_NVPTX::writeKernelsModule() { Out.keep(); } -Function *CGT_NVPTX::transformFunctionToVoid(Function *F) { +Function *CGT_OpenCL::transformFunctionToVoid(Function *F) { DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n"); // FIXME: Maybe do that using the Node? @@ -2361,7 +2361,7 @@ static std::string getFilenameFromModule(const Module &M) { return moduleID.substr(moduleID.find_last_of("/") + 1); } -// Changes the data layout of the Module to be compiled with NVPTX backend +// Changes the data layout of the Module to be compiled with OpenCL backend // TODO: Figure out when to call it, probably after duplicating the modules static void changeDataLayout(Module &M) { std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"; @@ -2464,9 +2464,9 @@ static std::string getAtomicOpName(Intrinsic::ID ID) { } // End of namespace -char DFG2LLVM_NVPTX::ID = 0; -static RegisterPass<DFG2LLVM_NVPTX> X("dfg2llvm-nvptx", - "Dataflow Graph to LLVM for NVPTX Pass", +char DFG2LLVM_OpenCL::ID = 0; +static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-nvptx", + "Dataflow Graph to LLVM for OpenCL Pass", false /* does not modify the CFG */, true /* transformation, * * not just analysis */); diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.exports b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.exports similarity index 100% rename from hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.exports rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.exports diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt similarity index 84% rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt index fb7cae49f8452ee6f207e6f0ed87d9ea9d3e65e6..08d8b9d98d66c63cb02b4be8395b57c448482906 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===; +;===- ./lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt ------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,5 +17,5 @@ [component_0] type = Library -name = DFG2LLVM_NVPTX +name = DFG2LLVM_OpenCL parent = Transforms diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp index e0e017c03e017edef7e6c1dfed17ceb8db9d2ba5..b6273ec2cca712469269f68f538ce437e9b062ec 100644 --- a/hpvm/projects/hpvm-rt/hpvm-rt.cpp +++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp @@ -39,7 +39,7 @@ typedef struct { std::vector<CircularBuffer<uint64_t> *> *BindOutputBuffers; std::vector<CircularBuffer<uint64_t> *> *EdgeBuffers; std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers; -} DFNodeContext_X86; +} DFNodeContext_CPU; typedef struct { cl_context clOCLContext; @@ -212,7 +212,7 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) { /************************* Depth Stack Routines ***************************/ -void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, +void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, uint64_t limitY, uint64_t iY, uint64_t limitZ, uint64_t iZ) { DEBUG(cout << "Pushing node information on stack:\n"); @@ -226,7 +226,7 @@ void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, pthread_mutex_unlock(&ocl_mtx); } -void llvm_hpvm_x86_dstack_pop() { +void llvm_hpvm_cpu_dstack_pop() { DEBUG(cout << "Popping from depth stack\n"); pthread_mutex_lock(&ocl_mtx); DStack.pop_back(); @@ -234,7 +234,7 @@ void llvm_hpvm_x86_dstack_pop() { pthread_mutex_unlock(&ocl_mtx); } -uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) { +uint64_t llvm_hpvm_cpu_getDimLimit(unsigned level, unsigned dim) { DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level << flush << "\n"); pthread_mutex_lock(&ocl_mtx); @@ -246,7 +246,7 @@ uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) { return result; } -uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim) { +uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) { DEBUG(cout << "Request instance id for dim " << dim << " of ancestor " << level << flush << "\n"); pthread_mutex_lock(&ocl_mtx); @@ -350,13 +350,13 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, return d_input; } -void *llvm_hpvm_x86_argument_ptr(void *ptr, size_t size) { +void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) { return llvm_hpvm_request_mem(ptr, size); } void *llvm_hpvm_request_mem(void *ptr, size_t size) { pthread_mutex_lock(&ocl_mtx); - DEBUG(cout << "[X86] Request memory: " << ptr << flush << "\n"); + DEBUG(cout << "[CPU] Request memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE == NULL) { cout << "ERROR: Requesting memory not present in Table\n"; @@ -1152,8 +1152,8 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { // Launch API for a streaming dataflow graph void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { - DFNodeContext_X86 *Context = - (DFNodeContext_X86 *)malloc(sizeof(DFNodeContext_X86)); + DFNodeContext_CPU *Context = + (DFNodeContext_CPU *)malloc(sizeof(DFNodeContext_CPU)); Context->threads = new std::vector<pthread_t>(); Context->ArgInPortSizeMap = new std::map<unsigned, uint64_t>(); @@ -1176,7 +1176,7 @@ void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) { void llvm_hpvm_streamPush(void *graphID, void *args) { DEBUG(cout << "StreamPush -- Graph: " << graphID << ", Arguments: " << args << flush << "\n"); - DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID; unsigned offset = 0; for (unsigned i = 0; i < Ctx->ArgInPortSizeMap->size(); i++) { uint64_t element; @@ -1198,7 +1198,7 @@ void llvm_hpvm_streamPush(void *graphID, void *args) { // Pop API for a streaming dataflow graph void *llvm_hpvm_streamPop(void *graphID) { DEBUG(cout << "StreamPop -- Graph: " << graphID << flush << "\n"); - DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID; unsigned totalBytes = 0; for (uint64_t size : *(Ctx->BindOutSizes)) totalBytes += size; @@ -1216,7 +1216,7 @@ void *llvm_hpvm_streamPop(void *graphID) { // Wait API for a streaming dataflow graph void llvm_hpvm_streamWait(void *graphID) { DEBUG(cout << "StreamWait -- Graph: " << graphID << flush << "\n"); - DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID; // Push garbage to all other input buffers for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) { uint64_t element = 0; @@ -1235,7 +1235,7 @@ void *llvm_hpvm_createBindInBuffer(void *graphID, uint64_t size, unsigned inArgPort) { DEBUG(cout << "Create BindInBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); - DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindIn"); DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n"); @@ -1249,7 +1249,7 @@ void *llvm_hpvm_createBindInBuffer(void *graphID, uint64_t size, void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); - DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; // Twine name = Twine("Bind.Out.")+Twine(Context->BindOutputBuffers->size()); CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindOut"); @@ -1261,7 +1261,7 @@ void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) { void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); - DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; // Twine name = Twine("Edge.")+Twine(Context->EdgeBuffers->size()); CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "Edge"); @@ -1274,7 +1274,7 @@ void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) { void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) { DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID << ", Size: " << size << flush << "\n"); - DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; // Twine name = Twine("isLastInput.")+Twine(Context->EdgeBuffers->size()); CircularBuffer<uint64_t> *bufferID = new CircularBuffer<uint64_t>(BUFFER_SIZE, "LastInput"); @@ -1286,7 +1286,7 @@ void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) { // Free buffers void llvm_hpvm_freeBuffers(void *graphID) { DEBUG(cout << "Free all buffers -- Graph: " << graphID << flush << "\n"); - DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID; for (CircularBuffer<uint64_t> *bufferID : *(Context->BindInputBuffers)) delete bufferID; for (CircularBuffer<uint64_t> *bufferID : *(Context->BindOutputBuffers)) @@ -1314,7 +1314,7 @@ void llvm_hpvm_createThread(void *graphID, void *(*Func)(void *), void *arguments) { DEBUG(cout << "Create Thread -- Graph: " << graphID << ", Func: " << Func << ", Args: " << arguments << flush << "\n"); - DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID; int err; pthread_t threadID; if ((err = pthread_create(&threadID, NULL, Func, arguments)) != 0) @@ -1326,16 +1326,16 @@ void llvm_hpvm_createThread(void *graphID, void *(*Func)(void *), // Wait for thread to finish void llvm_hpvm_freeThreads(void *graphID) { DEBUG(cout << "Free Threads -- Graph: " << graphID << flush << "\n"); - DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID; + DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID; for (pthread_t thread : *(Ctx->threads)) pthread_join(thread, NULL); } /************************ OPENCL & PTHREAD API ********************************/ -void *llvm_hpvm_x86_launch(void *(*rootFunc)(void *), void *arguments) { - DFNodeContext_X86 *Context = - (DFNodeContext_X86 *)malloc(sizeof(DFNodeContext_X86)); +void *llvm_hpvm_cpu_launch(void *(*rootFunc)(void *), void *arguments) { + DFNodeContext_CPU *Context = + (DFNodeContext_CPU *)malloc(sizeof(DFNodeContext_CPU)); // int err; // if((err = pthread_create(&Context->threadID, NULL, rootFunc, arguments)) != // 0) cout << "Failed to create pthread. Error code = " << err << flush << @@ -1344,9 +1344,9 @@ void *llvm_hpvm_x86_launch(void *(*rootFunc)(void *), void *arguments) { return Context; } -void llvm_hpvm_x86_wait(void *graphID) { +void llvm_hpvm_cpu_wait(void *graphID) { DEBUG(cout << "Waiting for pthread to finish ...\n"); - // DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID; + // DFNodeContext_CPU* Context = (DFNodeContext_CPU*) graphID; // pthread_join(Context->threadID, NULL); free(graphID); DEBUG(cout << "\t... pthread Done!\n"); @@ -1451,8 +1451,7 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { DEBUG(cout << "\tNAME = " << buffer << flush << "\n"); clGetPlatformInfo(platformId, CL_PLATFORM_VENDOR, 10240, buffer, NULL); DEBUG(cout << "\tVENDOR = " << buffer << flush << "\n"); - clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer, - NULL); + clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL); DEBUG(cout << "\tEXTENSIONS = " << buffer << flush << "\n"); } else { platformId = findPlatform("intel"); @@ -1466,8 +1465,7 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { DEBUG(cout << "\tNAME = " << buffer << flush << "\n"); clGetPlatformInfo(platformId, CL_PLATFORM_VENDOR, 10240, buffer, NULL); DEBUG(cout << "\tVENDOR = " << buffer << flush << "\n"); - clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer, - NULL); + clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL); DEBUG(cout << "\tEXTENSIONS = " << buffer << flush << "\n"); } DEBUG(cout << "Found plarform with id: " << platformId << "\n"); @@ -1483,7 +1481,7 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { errcode = clGetContextInfo(globalOCLContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes); checkErr(errcode, CL_SUCCESS, "Failure to get context info length"); - + DEBUG(cout << "Got databytes: " << dataBytes << "\n"); clDevices = (cl_device_id *)malloc(dataBytes); diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.h b/hpvm/projects/hpvm-rt/hpvm-rt.h index 519b467c9047fbbdeea3a4610bedda3a77c36fe2..94fe5b5ef0d82aca9f7556f7022aa513b9d2cc28 100644 --- a/hpvm/projects/hpvm-rt/hpvm-rt.h +++ b/hpvm/projects/hpvm-rt/hpvm-rt.h @@ -64,12 +64,12 @@ public: unsigned getNumDim() const { return numDim; } }; -void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX = 0, uint64_t iX = 0, +void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX = 0, uint64_t iX = 0, uint64_t limitY = 0, uint64_t iY = 0, uint64_t limitZ = 0, uint64_t iZ = 0); -void llvm_hpvm_x86_dstack_pop(); -uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim); -uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim); +void llvm_hpvm_cpu_dstack_pop(); +uint64_t llvm_hpvm_cpu_getDimLimit(unsigned level, unsigned dim); +uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim); /********************* Memory Tracker **********************************/ class MemTrackerEntry { @@ -148,11 +148,11 @@ void llvm_hpvm_untrack_mem(void *); void *llvm_hpvm_request_mem(void *, size_t); /*********************** OPENCL & PTHREAD API **************************/ -void *llvm_hpvm_x86_launch(void *(void *), void *); -void llvm_hpvm_x86_wait(void *); +void *llvm_hpvm_cpu_launch(void *(void *), void *); +void llvm_hpvm_cpu_wait(void *); void *llvm_hpvm_ocl_initContext(enum hpvm::Target); -void *llvm_hpvm_x86_argument_ptr(void *, size_t); +void *llvm_hpvm_cpu_argument_ptr(void *, size_t); void llvm_hpvm_ocl_clearContext(void *); void llvm_hpvm_ocl_argument_shared(void *, int, size_t); diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile index 07bb7f06c0544dc87c8c4947bf04501e5e410e29..d7caf6688370b137e90ad300830bddd6cb8eacd4 100644 --- a/hpvm/test/benchmarks/hpvm-cava/Makefile +++ b/hpvm/test/benchmarks/hpvm-cava/Makefile @@ -61,12 +61,12 @@ TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-x86 + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-cpu else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen diff --git a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk index 9e0318600a3a2d43ed60922e2f48e7e23ea290a7..8e3ab8e65856d7a80c8477748c2eccfcf7c78219 100755 --- a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk +++ b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk @@ -19,10 +19,10 @@ KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG endif CFLAGS += -DDEVICE=$(DEVICE) @@ -30,16 +30,16 @@ CXXFLAGS += -DDEVICE=$(DEVICE) HOST_LINKFLAGS = -ifeq ($(TIMER),x86) - HPVM_OPTFLAGS += -hpvm-timers-x86 +ifeq ($(TIMER),cpu) + HPVM_OPTFLAGS += -hpvm-timers-cpu else ifeq ($(TIMER),gen) TESTGEN_OPTFLAGS += -hpvm-timers-gen else ifeq ($(TIMER),no) else ifeq ($(TARGET),seq) - HPVM_OPTFLAGS += -hpvm-timers-x86 + HPVM_OPTFLAGS += -hpvm-timers-cpu else - HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx + HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen endif diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile index 7a246a651a06ea67246578371d8797682aea5bfd..36f6a1f9005f3dadcf2a3a97c0ba27d6fb6f0ab2 100644 --- a/hpvm/test/benchmarks/pipeline/Makefile +++ b/hpvm/test/benchmarks/pipeline/Makefile @@ -48,12 +48,12 @@ TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-x86 + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-cpu else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen diff --git a/hpvm/test/benchmarks/pipeline/src/main.cc b/hpvm/test/benchmarks/pipeline/src/main.cc index cda1d975a63fc07c174ed57ddef1e72f0973f033..057c13b62745ba618b13b9f2c1443fb41ca45bdb 100644 --- a/hpvm/test/benchmarks/pipeline/src/main.cc +++ b/hpvm/test/benchmarks/pipeline/src/main.cc @@ -143,7 +143,7 @@ void packData(struct InStruct *args, float *I, size_t bytesI, float *Is, * Need 2D grid, a thread per pixel * No use of separable algorithm because we need to do this in one kernel * No use of shared memory because - * - we don't handle it in the X86 pass + * - we don't handle it in the CPU pass */ #define GAUSSIAN_SIZE 7 @@ -452,7 +452,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL, float *B, * Need 2D grid, a thread per pixel * No use of separable algorithm because we need to do this in one kernel * No use of shared memory because - * - we don't handle it in the X86 pass + * - we don't handle it in the CPU pass */ #define SOBEL_SIZE 3 @@ -834,7 +834,7 @@ int main(int argc, char *argv[]) { resize(E, out, Size(HEIGHT, WIDTH)); imshow(input_window, in); imshow(output_window, out); -// waitKey(0); + // waitKey(0); struct InStruct *args = (struct InStruct *)malloc(sizeof(InStruct)); packData(args, (float *)src.data, I_sz, (float *)Is.data, I_sz, @@ -873,7 +873,7 @@ int main(int argc, char *argv[]) { __hpvm__push(DFG, args); void *ret = __hpvm__pop(DFG); // This is reading the result of the streaming graph - size_t framesize = ((OutStruct *)ret)->ret; + size_t framesize = ((OutStruct *)ret)->ret; llvm_hpvm_request_mem(maxG, bytesMaxG); llvm_hpvm_request_mem(E.data, I_sz); diff --git a/hpvm/test/benchmarks/template/Makefile b/hpvm/test/benchmarks/template/Makefile index 5524f05286be7fb8bea1aac163f5732e1f31c966..46b1afe95df2f7f3a3f7e3a71b7952d744da5b65 100644 --- a/hpvm/test/benchmarks/template/Makefile +++ b/hpvm/test/benchmarks/template/Makefile @@ -52,12 +52,12 @@ TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce ifeq ($(TARGET),seq) DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-x86 + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-cpu else DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx + HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG + HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx endif TESTGEN_OPTFLAGS += -hpvm-timers-gen diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll index 451035b21ede68a4796ebd1a0baa3645a77a31ef..e3570bcb664811af5e07539d93d19cf8fc2bcddf 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s ; ModuleID = 'ThreeLevel.atomic.ll' source_filename = "ThreeLevel.constmem.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll index ed99bee9f704b3dff96abcbd50982ec64a38c2d5..b08b951800a0871b3eeb14d61246bfd032ad88e4 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s ; ModuleID = 'ThreeLevel.ll' source_filename = "ThreeLevel.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll index 060608fdc5ae28ff52382fd722e7288c5531874f..b3cb659f9600cbbd4b12e9b1131e5c2f5112eb67 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx < %s | FileCheck %s ; ModuleID = 'ThreeLevel.opt.ll' source_filename = "ThreeLevel.opt.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll index 1373d13159ee90421d75a2f16e99e3d4a9a24bdd..f7f943a2550cb6745f0146d97ba6cea5cf5d9a6e 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s ; ModuleID = 'CreateNode.ll' source_filename = "CreateNode.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -10,9 +10,9 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: i32 @main( ; CHECK: call void @llvm.hpvm.init() -; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8* +; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8* ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* -; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8* +; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8* ; CHECK-LABEL: @PipeRoot_cloned( ; CHECK: call i8* @llvm.hpvm.createNode( @@ -23,12 +23,12 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func_cloned.node ; CHECK-LABEL: @Func_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned -; CHECK: call i8* @llvm_hpvm_x86_argument_ptr( +; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr( ; CHECK-LABEL: @PipeRoot_cloned.2( -; CHECK: call void @llvm_hpvm_x86_dstack_push( +; CHECK: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @LaunchDataflowGraph(i8* ; call %struct.out.PipeRoot @PipeRoot_cloned.2( diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll index a60f28a08a3bad2272687169bb1f4778f1bb8b6e..35d63f16ef3ccde72b8827ba63770c1e1afadd9f 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s ; ModuleID = 'ThreeLevel.ll' source_filename = "ThreeLevel.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -13,9 +13,9 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: i32 @main( ; CHECK: call void @llvm.hpvm.init() -; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8* +; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8* ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* -; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8* +; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8* ; CHECK-LABEL: @Func3_cloned( ; CHECK: call i8* @llvm.hpvm.createNode2D( @@ -42,26 +42,26 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node ; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned -; CHECK: call i8* @llvm_hpvm_x86_argument_ptr( +; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr( ; CHECK-LABEL: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned( ; CHECK-LABEL: for.body1: ; CHECK: %index.y = phi i64 [ 0, %for.body ], [ %index.y.inc, %for.body1 ] -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push( +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned( ; CHECK-LABEL: for.body: ; CHECK-NEXT: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ] -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push( +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @PipeRoot_cloned.4( -; CHECK: call void @llvm_hpvm_x86_dstack_push( +; CHECK: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @LaunchDataflowGraph( ; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.4( diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll index 5ce7a58e2189d1a00806979af6bab0cbe1029852..3f74a190f31e707e17f939d9639814443aef642c 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s ; ModuleID = 'TwoLaunch.ll' source_filename = "TwoLaunch.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -11,12 +11,12 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: i32 @main( ; CHECK: call void @llvm.hpvm.init() -; CHECK: @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8* +; CHECK: @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8* ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* -; CHECK: @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph.7, i8* +; CHECK: @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph.7, i8* ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* -; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8* -; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8* +; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8* +; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8* ; CHECK-LABEL: @Func2_cloned( ; CHECK: call i8* @llvm.hpvm.createNode1D( @@ -35,30 +35,30 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node ; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK: call i8* @llvm_hpvm_x86_argument_ptr( +; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr( ; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned( ; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ] -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push( +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @PipeRoot_cloned.3( -; CHECK: call void @llvm_hpvm_x86_dstack_push( +; CHECK: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @LaunchDataflowGraph(i8* ; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.3( ; CHECK-LABEL: @Func1_cloned.4_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK: @llvm_hpvm_x86_argument_ptr( +; CHECK: @llvm_hpvm_cpu_argument_ptr( ; CHECK-LABEL: @Func2_cloned.5_cloned_cloned_cloned_cloned_cloned_cloned( ; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ] -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push( +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func1_cloned.4_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @LaunchDataflowGraph.7(i8* ; call %struct.out.PipeRoot @PipeRoot_cloned.6( diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll index b218b70fd0e32b6e6222e7a14e88ab3a09f57977..f8ee61f1a70120a4e57bb94e272912083b7b3c1a 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s ; ModuleID = 'TwoLevel.ll' source_filename = "TwoLevel.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -11,9 +11,9 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: i32 @main( ; CHECK: call void @llvm.hpvm.init() -; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8* +; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8* ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* -; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8* +; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8* ; CHECK-LABEL: @Func2_cloned( ; CHECK: call i8* @llvm.hpvm.createNode1D( @@ -32,19 +32,19 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node ; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK: call i8* @llvm_hpvm_x86_argument_ptr( +; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr( ; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned( ; CHECK-LABEL: for.body ; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ] -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push( +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @PipeRoot_cloned.3( -; CHECK: call void @llvm_hpvm_x86_dstack_push( +; CHECK: call void @llvm_hpvm_cpu_dstack_push( ; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned( -; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop() +; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop() ; CHECK-LABEL: @LaunchDataflowGraph(i8* ; call %struct.out.PipeRoot @PipeRoot_cloned.3( diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll index a0f0f6ecfc4b68cbc3f86272fb11cf3702f9b54e..1bfa5f0c0b3eb9237c242d0ba56ee6f17960dfec 100644 --- a/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll +++ b/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll @@ -1,4 +1,4 @@ -; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 < %s | FileCheck %s +; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu < %s | FileCheck %s ; ModuleID = 'oneLaunchAlloca.ll' source_filename = "oneLaunchAlloca.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -13,9 +13,9 @@ declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0 ; CHECK-LABEL: i32 @main( ; CHECK: call void @llvm.hpvm.init() -; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8* +; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8* ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* -; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8* +; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8* ; CHECK-LABEL: @PipeRoot_cloned.1( diff --git a/hpvm/test/unitTests/ThreeLevel.ll b/hpvm/test/unitTests/ThreeLevel.ll index d8bf050234264e55be6af269e40ab5f2ef36a03b..840a2b5685d33d02584b72d96482fedda9a52fb6 100644 --- a/hpvm/test/unitTests/ThreeLevel.ll +++ b/hpvm/test/unitTests/ThreeLevel.ll @@ -1,4 +1,4 @@ -; RUN: opt - load LLVMGenHPVM.so -S -genhpvm < %s +; RUN: opt -load LLVMGenHPVM.so -S -genhpvm < %s ; ModuleID = 'TwoLevel.c' source_filename = "TwoLevel.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local void @Func1(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 { entry: tail call void @__hpvm__hint(i32 1) #3 - tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32* %Out, i32 1, i32* %Out) #3 + tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #3 %0 = load i32, i32* %In, align 4, !tbaa !2 store i32 %0, i32* %Out, align 4, !tbaa !2 tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3 diff --git a/hpvm/test/unitTests/TwoLevel.ll b/hpvm/test/unitTests/TwoLevel.ll index 0289319517b7d6a1f83f7b64d615bcbd72630821..840a2b5685d33d02584b72d96482fedda9a52fb6 100644 --- a/hpvm/test/unitTests/TwoLevel.ll +++ b/hpvm/test/unitTests/TwoLevel.ll @@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local void @Func1(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 { entry: tail call void @__hpvm__hint(i32 1) #3 - tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32* %Out, i32 1, i32* %Out) #3 + tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #3 %0 = load i32, i32* %In, align 4, !tbaa !2 store i32 %0, i32* %Out, align 4, !tbaa !2 tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3