Skip to content
Snippets Groups Projects
Commit 3f87f33f authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

(1) Moved Kernel to a separate class

(2) Made all the local functions static so that they are not visible outside
    this file
(3) Runs correctly. Now have to finish support for attributes

Point to note: Writing a module messes it up in memory, hence it should always
be done when it is not to be used anymore
parent 68411c2a
No related branches found
No related tags found
No related merge requests found
......@@ -25,6 +25,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/FileSystem.h"
#include <sstream>
......@@ -36,11 +37,13 @@ using namespace builddfg;
namespace {
// Helper function declarations
void changeDataLayout(Module &);
void changeTargetTriple(Module &);
std::string printType(Type*);
std::string convertInt(int);
void findReturnInst(Function *, std::vector<ReturnInst *> &);
static std::string getPTXFilename(const Module&);
static std::string getFilenameFromModule(const Module& M);
static void changeDataLayout(Module &);
static void changeTargetTriple(Module &);
static std::string printType(Type*);
static std::string convertInt(int);
static void findReturnInst(Function *, std::vector<ReturnInst *> &);
// DFG2LLVM_NVPTX - The first implementation.
struct DFG2LLVM_NVPTX : public ModulePass {
......@@ -64,6 +67,26 @@ namespace {
};
// Visitor for Code generation traversal (tree traversal for now)
class Kernel {
public:
Kernel(Function* _KF, unsigned _gridDim = 0, std::vector<Value*>
_globalWGSize = std::vector<Value*>(), unsigned _blockDim = 0,
std::vector<Value*> _localWGSize = std::vector<Value*>()) : KernelFunction(_KF),
gridDim(_gridDim), globalWGSize(_globalWGSize), blockDim(_blockDim),
localWGSize(_localWGSize) {
assert(gridDim == globalWGSize.size()
&& "gridDim should be same as the size of vector globalWGSize");
assert(blockDim == localWGSize.size()
&& "blockDim should be same as the size of vector localWGSize");
}
Function* KernelFunction;
unsigned gridDim;
unsigned blockDim;
std::vector<Value*> globalWGSize;
std::vector<Value*> localWGSize;
};
class CodeGenTraversal : public DFNodeVisitor {
private:
......@@ -71,14 +94,8 @@ namespace {
Module &M;
Module &KernelM;
BuildDFG &DFG;
DFNode * KernelLaunchNode;
struct { Function * KF;
unsigned gridDim;
unsigned blockDim;
std::vector<Value*> localWGSize;
std::vector<Value*> globalWGSize;
} kernel;
DFNode* KernelLaunchNode;
Kernel* kernel;
// Map from Old function associated with DFNode to new cloned function with
// extra index and dimension arguments. This map also serves to find out if
// we already have an index and dim extended function copy or not (i.e.,
......@@ -110,7 +127,7 @@ namespace {
Argument* getArgumentAt(Function* F, unsigned offset);
Value* getInValueAt(DFNode* Child, unsigned i, Function* ParentF_X86,
Instruction* InsertBefore);
void insertRuntimeCalls(DFInternalNode* N, const Twine& FileName, const Twine& Kernel);
void insertRuntimeCalls(DFInternalNode* N, const Twine& FileName);
void codeGen(DFInternalNode* N);
void codeGen(DFLeafNode* N);
......@@ -326,11 +343,14 @@ namespace {
// used to generate a function to associate with this leaf node. The function
// is responsible for all the memory allocation/transfer and invoking the
// kernel call on the device
void CodeGenTraversal::insertRuntimeCalls(DFInternalNode* N, const Twine& FileName, const Twine& KernelName) {
void CodeGenTraversal::insertRuntimeCalls(DFInternalNode* N, const Twine& FileName) {
// Check if clone already exists. If it does, it means we have visited this
// function before and nothing else needs to be done for this leaf node.
// function before.
assert(N->getGenFunc() == NULL && "Code already generated for this node");
// If kernel struct has not been initialized with kernel function, then fail
assert(kernel != NULL && "No kernel found!!");
DEBUG(errs() << "Generating kernel call code\n");
Function* F = N->getFuncPointer();
......@@ -401,10 +421,12 @@ namespace {
DEBUG(errs() << "Initializing commandQ" << "\n");
// Initialize command queue
Value* file = getStringPointer(FileName, RI, "Filename");
Value* kernel = getStringPointer(KernelName, RI,"KernelName");
Value* fileStr = getStringPointer(FileName, RI, "Filename");
errs() << *fileStr << "\n";
errs() << "Generating code for kernel - " << kernel->KernelFunction->getName()<< "\n";
Value* kernelStr = getStringPointer(kernel->KernelFunction->getName(), RI,"KernelName");
Value* LaunchInstArgs[] = {file, kernel};
Value* LaunchInstArgs[] = {fileStr, kernelStr};
DEBUG(errs() << "Inserting launch call" << "\n");
CallInst* GraphID = CallInst::Create(llvm_visc_ptx_launch,
......@@ -489,15 +511,46 @@ namespace {
// Need work dim, localworksize, globalworksize
// FIXME: Talk to DFG2LLVM_PTX pass to figure out the workdim, loacal work
// size and global work size
// Allocate size_t[numDims] space on stack. Store the work group sizes and
// pass it as an argument to ExecNode
Type* Int64Ty = Type::getInt64Ty(M.getContext());
Type* GlobalWGTy = ArrayType::get(Int64Ty, kernel->gridDim);
AllocaInst* GlobalWG = new AllocaInst(GlobalWGTy, "GlobalWGSize", RI);
Value* GlobalWGPtr = BitCastInst::CreatePointerCast(GlobalWG, Int64Ty->getPointerTo(), GlobalWG->getName()+".0", RI);
Value* nextDim = GlobalWGPtr;
errs() << *GlobalWGPtr << "\n";
Constant* IntOne = ConstantInt::get(Int64Ty, 1);
errs() << *IntOne << "\n";
for(unsigned i=0; i < kernel->gridDim; i++) {
errs() << *kernel->globalWGSize[i]->getType() << "\n";
errs() << *nextDim->getType() << "\n";
assert(kernel->globalWGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!");
if(kernel->globalWGSize[i]->getType() != Int64Ty) {
kernel->globalWGSize[i] = BitCastInst::CreateIntegerCast(VMap[kernel->globalWGSize[i]], Int64Ty, true, "", RI);
StoreInst* SI = new StoreInst(kernel->globalWGSize[i], nextDim, RI);
errs() << *SI << "\n";
} else {
StoreInst* SI = new StoreInst(VMap[kernel->globalWGSize[i]], nextDim, RI);
errs() << *SI << "\n";
}
if(i+1 < kernel->gridDim) {
GetElementPtrInst* GEP = GetElementPtrInst::Create(nextDim, ArrayRef<Value*>(IntOne), GlobalWG->getName()+"."+Twine(i+1), RI);
errs() << *GEP << "\n";
nextDim = GEP;
}
}
errs() << *llvm_visc_ptx_executeNode << "\n";
errs() << *GlobalWGPtr << "\n";
Value* ExecNodeArgs[] = {GraphID,
ConstantInt::get(Type::getInt32Ty(M.getContext()), C->getNumOfDim()),
Constant::getNullValue(Type::getInt64PtrTy(M.getContext())),
Constant::getNullValue(Type::getInt64PtrTy(M.getContext()))
GlobalWGPtr
};
CallInst* Event = CallInst::Create(llvm_visc_ptx_executeNode,
ArrayRef<Value*>(ExecNodeArgs, 4),
"event."+CF->getName(),
RI);
errs() << *Event << "\n";
// Wait for Kernel to Finish
CallInst::Create(llvm_visc_ptx_wait,
ArrayRef<Value*>(GraphID),
......@@ -615,9 +668,9 @@ namespace {
// Now the remaining nodes to be visited should be ignored
KernelLaunchNode = NULL;
writeKernelsModule();
errs() << "Insert Runtime calls\n";
insertRuntimeCalls(N, getKernelsModuleName(M), "matrixMul");
insertRuntimeCalls(N, getPTXFilename(M));
writeKernelsModule();
} else {
DEBUG(errs() << "Found intermediate node. Getting size parameters.\n");
......@@ -643,31 +696,36 @@ namespace {
if (!pLevel || !pReplFactor) {
KernelLaunchNode = PNode;
kernel = new Kernel(NULL, N->getNumOfDim(), N->getDimLimits());
// TODO: Find a better way of choosing parameters
kernel.gridDim = N->getNumOfDim();
kernel.blockDim = N->getNumOfDim();
kernel.globalWGSize = N->getDimLimits();
IntegerType* IntTy = Type::getInt32Ty(KernelM.getContext());
//kernel->gridDim = N->getNumOfDim();
//kernel->blockDim = N->getNumOfDim();
//kernel->globalWGSize = N->getDimLimits();
//kernel->localWGSize = N->getDimLimits();
//FIXME: Comment this out as we can provide localWGSize as null
//IntegerType* IntTy = Type::getInt32Ty(KernelM.getContext());
// TODO: How to choose the div factor;
ConstantInt* divFactor = ConstantInt::getSigned(IntTy, (int64_t) 16);
std::vector<Value*> tmp(kernel.gridDim, divFactor);
for (unsigned i = 0; i < kernel.gridDim; i++) {
BinaryOperator* SDivInst = BinaryOperator::CreateSDiv(kernel.globalWGSize[i],tmp[i]);
kernel.localWGSize.push_back(SDivInst);
}
//ConstantInt* divFactor = ConstantInt::getSigned(IntTy, (int64_t) 16);
//std::vector<Value*> tmp(kernel->gridDim, divFactor);
//for (unsigned i = 0; i < kernel->gridDim; i++) {
// BinaryOperator* SDivInst = BinaryOperator::CreateSDiv(kernel->globalWGSize[i],tmp[i]);
// kernel->localWGSize.push_back(SDivInst);
//}
}
else {
errs() << "*************** Entering else part **************\n";
/*
KernelLaunchNode = PNode->getParent();
kernel.gridDim = PNode->getNumOfDim();
kernel.blockDim = N->getNumOfDim();
kernel->gridDim = PNode->getNumOfDim();
kernel->blockDim = N->getNumOfDim();
// TODO: Handle different number of dimensions
assert((kernel.gridDim == kernel.blockDim) && "Dimension number must match");
assert((kernel->gridDim == kernel->blockDim) && "Dimension number must match");
std::vector<Value*> numOfBlocks = PNode->getDimLimits();
kernel.localWGSize = N->getDimLimits();
for (unsigned i = 0; i < kernel.gridDim; i++) {
BinaryOperator* MulInst = BinaryOperator::CreateMul(kernel.localWGSize[i],numOfBlocks[i]);
kernel.globalWGSize.push_back(MulInst);
}
kernel->localWGSize = N->getDimLimits();
for (unsigned i = 0; i < kernel->gridDim; i++) {
//BinaryOperator* MulInst = BinaryOperator::CreateMul(kernel->localWGSize[i],numOfBlocks[i]);
//kernel->globalWGSize.push_back(MulInst);
}*/
}
std::vector<IntrinsicInst *> IItoRemove;
......@@ -922,7 +980,8 @@ namespace {
(*ri)->eraseFromParent();
addCLMetadata(F_nvptx);
kernel.KF = F_nvptx;
kernel->KernelFunction = F_nvptx;
errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n";
DEBUG(errs() << KernelM);
return;
......@@ -945,13 +1004,19 @@ namespace {
// Initiate code generation for root DFNode
CGTVisitor->visit(Root);
//TODO: Edit module epilogue to remove the VISC intrinsic declarations
delete CGTVisitor;
return true;
}
std::string CodeGenTraversal::getKernelsModuleName(Module &M) {
/*SmallString<128> currentDir;
llvm::sys::fs::current_path(currentDir);
std::string fileName = getFilenameFromModule(M);
Twine output = Twine(currentDir) + "/Output/" + fileName + "";
return output.str().append(".kernels.ll");*/
std::string mid = M.getModuleIdentifier();
return mid.append("_kernels.ll");
return mid.append(".kernels.ll");
}
void CodeGenTraversal::fixValueAddrspace(Value* V, unsigned addrspace) {
......@@ -1174,9 +1239,22 @@ namespace {
* Helper functions *
******************************************************************************/
// Get generated PTX binary name
static std::string getPTXFilename(const Module& M) {
std::string moduleID = M.getModuleIdentifier();
moduleID.append(".nvptx.s");
return moduleID;
}
// Get the name of the input file from module ID
static std::string getFilenameFromModule(const Module& M) {
std::string moduleID = M.getModuleIdentifier();
return moduleID.substr(moduleID.find_last_of("/")+1);
}
// Changes the data layout of the Module to be compiled with NVPTX backend
// TODO: Figure out when to call it, probably after duplicating the modules
void changeDataLayout(Module &M) {
static void changeDataLayout(Module &M) {
std::string nvptx32_layoutStr = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
std::string nvptx64_layoutStr = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
......@@ -1189,7 +1267,7 @@ namespace {
return;
}
void changeTargetTriple(Module &M) {
static void changeTargetTriple(Module &M) {
std::string nvptx32_TargetTriple = "nvptx--nvidiacl";
std::string nvptx64_TargetTriple = "nvptx64--nvidiacl";
......@@ -1203,7 +1281,7 @@ namespace {
}
// Helper function, generate a string representation of a type
std::string printType(Type* ty) {
static std::string printType(Type* ty) {
std::string type_str;
raw_string_ostream rso(type_str);
ty->print(rso);
......@@ -1211,14 +1289,14 @@ namespace {
}
// Helper function, convert int to string
std::string convertInt(int number) {
static std::string convertInt(int number) {
std::stringstream ss;//create a stringstream
ss << number;//add number to the stream
return ss.str();//return a string with the contents of the stream
}
// Helper function, populate a vector with all return statements in a function
void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) {
static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) {
for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
Instruction *I = &(*i);
ReturnInst* RI = dyn_cast<ReturnInst>(I);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment