Commit a750f6e8 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Now referring to X86 target as CPU

parent d9d6f341
......@@ -6,10 +6,10 @@ Compilation of an HPVM program involves the following steps:
3. `opt` takes the HPVM textual representation (`main.hpvm.ll`) and invokes the following passes in sequence:
* BuildDFG: Converts the textual representation to the internal HPVM representation.
* LocalMem and DFG2LLVM_NVPTX: Invoked only when GPU target is selected. Generates the kernel module (`main.kernels.ll`) and the portion of the host code that invokes the kernel into the host module (`main.host.ll`).
* DFG2LLVM_X86: Generates either all, or the remainder of the host module (`main.host.ll`) depending on the chosen target.
* DFG2LLVM_CPU: Generates either all, or the remainder of the host module (`main.host.ll`) depending on the chosen target.
* ClearDFG: Deletes the internal HPVM representation from memory.
4. `clang` is used to to compile any remaining project files that would be later linked with the host module.
5. `llvm-link` takes the host module and all the other generate `ll` files, and links them with the HPVM runtime module (`hpvm-rt.bc`), to generate the linked host module (`main.host.linked.ll`).
6. Generate the executable code from the generated `ll` files for all parts of the program:
* GPU target: `llvm-cbe` takes the kernel module (`main.kernels.ll`) and generates an OpenCL representation of the kernels that will be invoked by the host.
* X86 target: `clang` takes the linked host module (`main.host.linked.ll`) and generates the X86 binary.
* CPU target: `clang` takes the linked host module (`main.host.linked.ll`) and generates the CPU binary.
......@@ -51,11 +51,11 @@ struct TargetGenFunctions {
};
struct TargetGenFuncInfo {
bool cpu_hasX86Func;
bool gpu_hasX86Func;
bool spir_hasX86Func;
bool cudnn_hasX86Func;
bool promise_hasX86Func;
bool cpu_hasCPUFunc;
bool gpu_hasCPUFunc;
bool spir_hasCPUFunc;
bool cudnn_hasCPUFunc;
bool promise_hasCPUFunc;
};
class DFGraph {
......@@ -191,7 +191,7 @@ private:
///< (if multiple are available)
struct TargetGenFuncInfo GenFuncInfo;
///< True for each target generated function
///< if the associated genFunc is an x86 function
///< if the associated genFunc is an cpu function
DFInternalNode *Parent; ///< Pointer to parent dataflow Node
unsigned NumOfDim; ///< Number of dimensions
std::vector<Value *> DimLimits; ///< Number of instances in each dimension
......@@ -349,15 +349,15 @@ public:
Function *getGenFunc() const { return GenFunc; }
void setHasX86FuncForTarget(hpvm::Target T, bool isX86Func) {
void setHasCPUFuncForTarget(hpvm::Target T, bool isCPUFunc) {
switch (T) {
case hpvm::None:
return; // Do nothing.
case hpvm::CPU_TARGET:
GenFuncInfo.cpu_hasX86Func = isX86Func;
GenFuncInfo.cpu_hasCPUFunc = isCPUFunc;
break;
case hpvm::GPU_TARGET:
GenFuncInfo.gpu_hasX86Func = isX86Func;
GenFuncInfo.gpu_hasCPUFunc = isCPUFunc;
break;
case hpvm::CPU_OR_GPU_TARGET:
break;
......@@ -368,14 +368,14 @@ public:
return;
}
bool hasX86GenFuncForTarget(hpvm::Target T) const {
bool hasCPUGenFuncForTarget(hpvm::Target T) const {
switch (T) {
case hpvm::None:
return false;
case hpvm::CPU_TARGET:
return GenFuncInfo.cpu_hasX86Func;
return GenFuncInfo.cpu_hasCPUFunc;
case hpvm::GPU_TARGET:
return GenFuncInfo.gpu_hasX86Func;
return GenFuncInfo.gpu_hasCPUFunc;
case hpvm::CPU_OR_GPU_TARGET:
assert(false && "Single target expected (CPU/GPU/SPIR/CUDNN/PROMISE)\n");
default:
......@@ -384,7 +384,7 @@ public:
return false;
}
void addGenFunc(Function *F, hpvm::Target T, bool isX86Func) {
void addGenFunc(Function *F, hpvm::Target T, bool isCPUFunc) {
switch (T) {
case hpvm::CPU_TARGET:
......@@ -393,7 +393,7 @@ public:
<< FuncPointer->getName() << "\n");
}
GenFuncs.CPUGenFunc = F;
GenFuncInfo.cpu_hasX86Func = isX86Func;
GenFuncInfo.cpu_hasCPUFunc = isCPUFunc;
break;
case hpvm::GPU_TARGET:
if (GenFuncs.GPUGenFunc != NULL) {
......@@ -401,7 +401,7 @@ public:
<< FuncPointer->getName() << "\n");
}
GenFuncs.GPUGenFunc = F;
GenFuncInfo.gpu_hasX86Func = isX86Func;
GenFuncInfo.gpu_hasCPUFunc = isCPUFunc;
break;
case hpvm::CPU_OR_GPU_TARGET:
assert(false && "A node function should be set with a tag specifying its \
......@@ -437,11 +437,11 @@ public:
return;
case hpvm::CPU_TARGET:
GenFuncs.CPUGenFunc = NULL;
GenFuncInfo.cpu_hasX86Func = false;
GenFuncInfo.cpu_hasCPUFunc = false;
break;
case hpvm::GPU_TARGET:
GenFuncs.GPUGenFunc = NULL;
GenFuncInfo.gpu_hasX86Func = false;
GenFuncInfo.gpu_hasCPUFunc = false;
break;
case hpvm::CPU_OR_GPU_TARGET:
assert(false &&
......@@ -690,11 +690,11 @@ DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, hpvm::Target _Hint,
GenFuncs.CUDNNGenFunc = NULL;
GenFuncs.PROMISEGenFunc = NULL;
GenFuncInfo.cpu_hasX86Func = false;
GenFuncInfo.gpu_hasX86Func = false;
GenFuncInfo.spir_hasX86Func = false;
GenFuncInfo.cudnn_hasX86Func = false;
GenFuncInfo.cudnn_hasX86Func = false;
GenFuncInfo.cpu_hasCPUFunc = false;
GenFuncInfo.gpu_hasCPUFunc = false;
GenFuncInfo.spir_hasCPUFunc = false;
GenFuncInfo.cudnn_hasCPUFunc = false;
GenFuncInfo.cudnn_hasCPUFunc = false;
}
void DFNode::setRank(unsigned r) {
......
add_subdirectory(BuildDFG)
add_subdirectory(ClearDFG)
add_subdirectory(DFG2LLVM_NVPTX)
add_subdirectory(DFG2LLVM_X86)
add_subdirectory(DFG2LLVM_CPU)
add_subdirectory(GenHPVM)
add_subdirectory(LocalMem)
......@@ -4,9 +4,9 @@ endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLLVM_BUILD_DIR=${PROJECT_BINARY_DIR}")
add_llvm_library( LLVMDFG2LLVM_X86
add_llvm_library( LLVMDFG2LLVM_CPU
MODULE
DFG2LLVM_X86.cpp
DFG2LLVM_CPU.cpp
DEPENDS intrinsics_gen
PLUGIN_TOOL
......
;===- ./lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt --------------*- Conf -*--===;
;===- ./lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt --------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
......@@ -17,5 +17,5 @@
[component_0]
type = Library
name = DFG2LLVM_X86
name = DFG2LLVM_CPU
parent = Transforms
//=== DFG2LLVM_NVPTX.cpp ===//
//=== DFG2LLVM_OpenCL.cpp ===//
//
// The LLVM Compiler Infrastructure
//
......@@ -14,7 +14,7 @@
#define CONSTANT_ADDRSPACE 4
#define SHARED_ADDRSPACE 3
#define DEBUG_TYPE "DFG2LLVM_NVPTX"
#define DEBUG_TYPE "DFG2LLVM_OpenCL"
#include "SupportHPVM/DFG2LLVM.h"
#include "SupportHPVM/HPVMTimer.h"
#include "SupportHPVM/HPVMUtils.h"
......@@ -54,8 +54,8 @@ using namespace dfg2llvm;
using namespace hpvmUtils;
// HPVM Command line option to use timer or not
static cl::opt<bool> HPVMTimer_NVPTX("hpvm-timers-ptx",
cl::desc("Enable hpvm timers"));
static cl::opt<bool> HPVMTimer_OpenCL("hpvm-timers-ptx",
cl::desc("Enable hpvm timers"));
namespace {
// Helper class declarations
......@@ -149,10 +149,10 @@ static void findIntrinsicInst(Function *, Intrinsic::ID,
static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID);
static std::string getAtomicOpName(Intrinsic::ID);
// DFG2LLVM_NVPTX - The first implementation.
struct DFG2LLVM_NVPTX : public DFG2LLVM {
// DFG2LLVM_OpenCL - The first implementation.
struct DFG2LLVM_OpenCL : public DFG2LLVM {
static char ID; // Pass identification, replacement for typeid
DFG2LLVM_NVPTX() : DFG2LLVM(ID) {}
DFG2LLVM_OpenCL() : DFG2LLVM(ID) {}
private:
public:
......@@ -160,7 +160,7 @@ public:
};
// Visitor for Code generation traversal (tree traversal for now)
class CGT_NVPTX : public CodeGenTraversal {
class CGT_OpenCL : public CodeGenTraversal {
private:
// Member variables
......@@ -194,8 +194,8 @@ private:
// Virtual Functions
void init() {
HPVMTimer = HPVMTimer_NVPTX;
TargetName = "NVPTX";
HPVMTimer = HPVMTimer_OpenCL;
TargetName = "OpenCL";
}
void initRuntimeAPI();
void codeGen(DFInternalNode *N);
......@@ -203,7 +203,7 @@ private:
public:
// Constructor
CGT_NVPTX(Module &_M, BuildDFG &_DFG)
CGT_OpenCL(Module &_M, BuildDFG &_DFG)
: CodeGenTraversal(_M, _DFG), KernelM(CloneModule(_M)) {
init();
initRuntimeAPI();
......@@ -257,7 +257,7 @@ public:
};
// Initialize the HPVM runtime API. This makes it easier to insert these calls
void CGT_NVPTX::initRuntimeAPI() {
void CGT_OpenCL::initRuntimeAPI() {
// Load Runtime API Module
SMDiagnostic Err;
......@@ -289,7 +289,7 @@ void CGT_NVPTX::initRuntimeAPI() {
initTimerAPI();
// Insert init context in main
DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n");
DEBUG(errs() << "Gen Code to initialize OpenCL Timer\n");
Function *VI = M.getFunction("llvm.hpvm.init");
assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once");
......@@ -302,7 +302,7 @@ void CGT_NVPTX::initRuntimeAPI() {
switchToTimer(hpvm_TimerID_NONE, InitCall);
// Insert print instruction at hpvm exit
DEBUG(errs() << "Gen Code to print NVPTX Timer\n");
DEBUG(errs() << "Gen Code to print OpenCL Timer\n");
Function *VC = M.getFunction("llvm.hpvm.cleanup");
DEBUG(errs() << *VC << "\n");
assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once");
......@@ -316,8 +316,8 @@ void CGT_NVPTX::initRuntimeAPI() {
// used to generate a function to associate with this leaf node. The function
// is responsible for all the memory allocation/transfer and invoking the
// kernel call on the device
void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
const Twine &FileName) {
void CGT_OpenCL::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
const Twine &FileName) {
// Check if clone already exists. If it does, it means we have visited this
// function before.
// assert(N->getGenFunc() == NULL && "Code already generated for this node");
......@@ -338,18 +338,18 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
// Create of clone of F with no instructions. Only the type is the same as F
// without the extra arguments.
Function *F_X86;
Function *F_CPU;
// Clone the function, if we are seeing this function for the first time. We
// only need a clone in terms of type.
ValueToValueMapTy VMap;
// Create new function with the same type
F_X86 =
F_CPU =
Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M);
// Loop over the arguments, copying the names of arguments over.
Function::arg_iterator dest_iterator = F_X86->arg_begin();
Function::arg_iterator dest_iterator = F_CPU->arg_begin();
for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
i != e; ++i) {
dest_iterator->setName(i->getName()); // Copy the name over...
......@@ -358,29 +358,29 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
}
// Add a basic block to this empty function
BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86);
BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_CPU);
ReturnInst *RI = ReturnInst::Create(
M.getContext(), UndefValue::get(F_X86->getReturnType()), BB);
M.getContext(), UndefValue::get(F_CPU->getReturnType()), BB);
// FIXME: Adding Index and Dim arguments are probably not required except
// for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do
// for consistency purpose (DFG2LLVM_CPU does assume that all leaf nodes do
// have those arguments)
// Add Index and Dim arguments except for the root node
if (!N->isRoot() && !N->getParent()->isChildGraphStreaming())
F_X86 = addIdxDimArgs(F_X86);
F_CPU = addIdxDimArgs(F_CPU);
BB = &*F_X86->begin();
BB = &*F_CPU->begin();
RI = cast<ReturnInst>(BB->getTerminator());
// Add the generated function info to DFNode
// N->setGenFunc(F_X86, hpvm::CPU_TARGET);
N->addGenFunc(F_X86, hpvm::GPU_TARGET, true);
DEBUG(errs() << "Added GPUGenFunc: " << F_X86->getName() << " for node "
// N->setGenFunc(F_CPU, hpvm::CPU_TARGET);
N->addGenFunc(F_CPU, hpvm::GPU_TARGET, true);
DEBUG(errs() << "Added GPUGenFunc: " << F_CPU->getName() << " for node "
<< N->getFuncPointer()->getName() << "\n");
// Loop over the arguments, to create the VMap
dest_iterator = F_X86->arg_begin();
dest_iterator = F_CPU->arg_begin();
for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
i != e; ++i) {
// Add mapping to VMap and increment dest iterator
......@@ -435,16 +435,16 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
DEBUG(errs() << "Inserting launch call"
<< "\n");
CallInst *NVPTX_Ctx = CallInst::Create(llvm_hpvm_ocl_launch,
ArrayRef<Value *>(LaunchInstArgs, 2),
"graph" + KF->getName(), InitCall);
DEBUG(errs() << *NVPTX_Ctx << "\n");
GraphIDAddr = new GlobalVariable(M, NVPTX_Ctx->getType(), false,
GlobalValue::CommonLinkage,
Constant::getNullValue(NVPTX_Ctx->getType()),
"graph" + KF->getName() + ".addr");
CallInst *OpenCL_Ctx = CallInst::Create(llvm_hpvm_ocl_launch,
ArrayRef<Value *>(LaunchInstArgs, 2),
"graph" + KF->getName(), InitCall);
DEBUG(errs() << *OpenCL_Ctx << "\n");
GraphIDAddr = new GlobalVariable(
M, OpenCL_Ctx->getType(), false, GlobalValue::CommonLinkage,
Constant::getNullValue(OpenCL_Ctx->getType()),
"graph" + KF->getName() + ".addr");
DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n");
StoreInst *SI = new StoreInst(NVPTX_Ctx, GraphIDAddr, InitCall);
StoreInst *SI = new StoreInst(OpenCL_Ctx, GraphIDAddr, InitCall);
DEBUG(errs() << *SI << "\n");
switchToTimer(hpvm_TimerID_NONE, InitCall);
switchToTimer(hpvm_TimerID_SETUP, RI);
......@@ -463,14 +463,14 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) {
// The kernel object gives us the mapping of arguments from kernel launch
// node function (F_X86) to kernel (kernel->KF)
Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]);
// node function (F_CPU) to kernel (kernel->KF)
Value* inputVal = getArgumentAt(F_CPU, K->getInArgMap()[i]);
*/
for (auto &InArgMapPair : kernelInArgMap) {
unsigned i = InArgMapPair.first;
Value *inputVal = getArgumentAt(F_X86, InArgMapPair.second);
Value *inputVal = getArgumentAt(F_CPU, InArgMapPair.second);
DEBUG(errs() << "\tArgument " << i << " = " << *inputVal << "\n");
// input value has been obtained.
......@@ -504,7 +504,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
// Assert that the pointer argument size (next argument) is in the map
assert(kernelInArgMap.find(i + 1) != kernelInArgMap.end());
Value *inputSize = getArgumentAt(F_X86, kernelInArgMap[i + 1]);
Value *inputSize = getArgumentAt(F_CPU, kernelInArgMap[i + 1]);
assert(
inputSize->getType() == Type::getInt64Ty(M.getContext()) &&
"Pointer type input must always be followed by size (integer type)");
......@@ -606,7 +606,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
std::vector<Value *> AllocInputArgs;
for (unsigned i = 0; i < K->allocInArgMap.size(); i++) {
AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i)));
AllocInputArgs.push_back(getArgumentAt(F_CPU, K->allocInArgMap.at(i)));
}
CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI);
......@@ -759,7 +759,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
DFNode *C = N->getChildGraph()->getExit();
// Get OutputType of this node
StructType *OutTy = N->getOutputType();
Value *retVal = UndefValue::get(F_X86->getReturnType());
Value *retVal = UndefValue::get(F_CPU->getReturnType());
// Find the kernel's output arg map, to use instead of the bindings
std::vector<unsigned> outArgMap = kernel->getOutArgMap();
// Find all the input edges to exit node
......@@ -779,7 +779,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
// argument from argument list of this internal node
Value *inputVal;
if (SrcDF->isEntryNode()) {
inputVal = getArgumentAt(F_X86, i);
inputVal = getArgumentAt(F_CPU, i);
DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n");
} else {
// edge is from a internal node
......@@ -812,13 +812,13 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
DEBUG(errs() << "Extracted all\n");
switchToTimer(hpvm_TimerID_NONE, RI);
retVal->setName("output");
ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal);
ReturnInst *newRI = ReturnInst::Create(F_CPU->getContext(), retVal);
ReplaceInstWithInst(RI, newRI);
}
// Right now, only targeting the one level case. In general, device functions
// can return values so we don't need to change them
void CGT_NVPTX::codeGen(DFInternalNode *N) {
void CGT_OpenCL::codeGen(DFInternalNode *N) {
DEBUG(errs() << "Inside internal node: " << N->getFuncPointer()->getName()
<< "\n");
if (KernelLaunchNode == NULL)
......@@ -910,7 +910,7 @@ void CGT_NVPTX::codeGen(DFInternalNode *N) {
}
}
void CGT_NVPTX::codeGen(DFLeafNode *N) {
void CGT_OpenCL::codeGen(DFLeafNode *N) {
DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName()
<< "\n");
......@@ -1625,7 +1625,7 @@ void CGT_NVPTX::codeGen(DFLeafNode *N) {
// check that addressspace is 1
// if (GEPIaddrspace != 1) {
// // does not fit this pattern - addrspace of pointer
//argument is not global continue;
// argument is not global continue;
// }
if (!(GEPI->hasOneUse())) {
// does not fit this pattern - more than one uses
......@@ -1876,8 +1876,8 @@ void CGT_NVPTX::codeGen(DFLeafNode *N) {
return;
}
bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
DEBUG(errs() << "\nDFG2LLVM_NVPTX PASS\n");
bool DFG2LLVM_OpenCL::runOnModule(Module &M) {
DEBUG(errs() << "\nDFG2LLVM_OpenCL PASS\n");
// Get the BuildDFG Analysis Results:
// - Dataflow graph
......@@ -1891,7 +1891,7 @@ bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
// = DFG.getHandleToDFEdgeMap();
// Visitor for Code Generation Graph Traversal
CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG);
CGT_OpenCL *CGTVisitor = new CGT_OpenCL(M, DFG);
// Iterate over all the DFGs and produce code for each one of them
for (auto rootNode : Roots) {
......@@ -1907,7 +1907,7 @@ bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
return true;
}
std::string CGT_NVPTX::getKernelsModuleName(Module &M) {
std::string CGT_OpenCL::getKernelsModuleName(Module &M) {
/*SmallString<128> currentDir;
llvm::sys::fs::current_path(currentDir);
std::string fileName = getFilenameFromModule(M);
......@@ -1917,7 +1917,7 @@ std::string CGT_NVPTX::getKernelsModuleName(Module &M) {
return mid.append(".kernels.ll");
}
void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) {
void CGT_OpenCL::fixValueAddrspace(Value *V, unsigned addrspace) {
assert(isa<PointerType>(V->getType()) && "Value should be of Pointer Type!");
PointerType *OldTy = cast<PointerType>(V->getType());
PointerType *NewTy = PointerType::get(OldTy->getElementType(), addrspace);
......@@ -1935,8 +1935,8 @@ void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) {
}
std::vector<unsigned>
CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs,
Function *F) {
CGT_OpenCL::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs,
Function *F) {
std::vector<unsigned> ConstantMemArgs;
for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
++ai) {
......@@ -1959,9 +1959,9 @@ CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs,
return ConstantMemArgs;
}
Function *CGT_NVPTX::changeArgAddrspace(Function *F,
std::vector<unsigned> &Args,
unsigned addrspace) {
Function *CGT_OpenCL::changeArgAddrspace(Function *F,
std::vector<unsigned> &Args,
unsigned addrspace) {
unsigned idx = 0;
std::vector<Type *> ArgTypes;
for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
......@@ -1986,7 +1986,7 @@ Function *CGT_NVPTX::changeArgAddrspace(Function *F,
}
/* Add metadata to module KernelM, for OpenCL kernels */
void CGT_NVPTX::addCLMetadata(Function *F) {
void CGT_OpenCL::addCLMetadata(Function *F) {
IRBuilder<> Builder(&*F->begin());
......@@ -2013,7 +2013,7 @@ void CGT_NVPTX::addCLMetadata(Function *F) {
MDN_annotations->addOperand(MDNvvmAnnotationsNode);
}
void CGT_NVPTX::writeKernelsModule() {
void CGT_OpenCL::writeKernelsModule() {
// In addition to deleting all other functions, we also want to spiff it
// up a little bit. Do this now.
......@@ -2035,7 +2035,7 @@ void CGT_NVPTX::writeKernelsModule() {
Out.keep();
}
Function *CGT_NVPTX::transformFunctionToVoid(Function *F) {
Function *CGT_OpenCL::transformFunctionToVoid(Function *F) {
DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n");
// FIXME: Maybe do that using the Node?
......@@ -2361,7 +2361,7 @@ static std::string getFilenameFromModule(const Module &M) {
return moduleID.substr(moduleID.find_last_of("/") + 1);
}
// Changes the data layout of the Module to be compiled with NVPTX backend
// Changes the data layout of the Module to be compiled with OpenCL backend
// TODO: Figure out when to call it, probably after duplicating the modules
static void changeDataLayout(Module &M) {
std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
......@@ -2464,9 +2464,9 @@ static std::string getAtomicOpName(Intrinsic::ID ID) {
} // End of namespace
char DFG2LLVM_NVPTX::ID = 0;
static RegisterPass<DFG2LLVM_NVPTX> X("dfg2llvm-nvptx",
"Dataflow Graph to LLVM for NVPTX Pass",
char DFG2LLVM_OpenCL::ID = 0;
static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-nvptx",
"Dataflow Graph to LLVM for OpenCL Pass",
false /* does not modify the CFG */,
true /* transformation, *
* not just analysis */);
......@@ -39,7 +39,7 @@ typedef struct {
std::vector<CircularBuffer<uint64_t> *> *BindOutputBuffers;
std::vector<CircularBuffer<uint64_t> *> *EdgeBuffers;
std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers;
} DFNodeContext_X86;
} DFNodeContext_CPU;
typedef struct {
cl_context clOCLContext;
......@@ -212,7 +212,7 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) {
/************************* Depth Stack Routines ***************************/
void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
uint64_t limitY, uint64_t iY, uint64_t limitZ,
uint64_t iZ) {
DEBUG(cout << "Pushing node information on stack:\n");
......@@ -226,7 +226,7 @@ void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
pthread_mutex_unlock(&ocl_mtx);
}
void llvm_hpvm_x86_dstack_pop() {
void llvm_hpvm_cpu_dstack_pop() {
DEBUG(cout << "Popping from depth stack\n");
pthread_mutex_lock(&ocl_mtx);
DStack.pop_back();
......@@ -234,7 +234,7 @@ void llvm_hpvm_x86_dstack_pop() {
pthread_mutex_unlock(&ocl_mtx);
}
uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) {
uint64_t llvm_hpvm_cpu_getDimLimit(unsigned level, unsigned dim) {
DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level
<< flush << "\n");
pthread_mutex_lock(&ocl_mtx);
......@@ -246,7 +246,7 @@ uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) {
return result;
}
uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim) {
uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) {
DEBUG(cout << "Request instance id for dim " << dim << " of ancestor "
<< level << flush << "\n");
pthread_mutex_lock(&ocl_mtx);
......@@ -350,13 +350,13 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
return d_input;
}
void *llvm_hpvm_x86_argument_ptr(void *ptr, size_t size) {
void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) {
return llvm_hpvm_request_mem(ptr, size);
}