Skip to content
Snippets Groups Projects
Commit 62399b30 authored by Akash Kothari's avatar Akash Kothari
Browse files

LLVM-9-port DFG2LLVM_CUDNN pass

parent 85b25027
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,8 @@ if(WIN32 OR CYGWIN)
set(LLVM_LINK_COMPONENTS Core Support)
endif()
add_llvm_loadable_module( LLVMDFG2LLVM_CUDNN
add_llvm_library( LLVMDFG2LLVM_CUDNN
MODULE
DFG2LLVM_CUDNN.cpp
DEPENDS
......@@ -10,3 +11,4 @@ add_llvm_loadable_module( LLVMDFG2LLVM_CUDNN
PLUGIN_TOOL
opt
)
......@@ -9,6 +9,7 @@
#define ENABLE_ASSERTS
#define DEBUG_TYPE "DFG2LLVM_CUDNN"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
......@@ -23,9 +24,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/IR/Attributes.h"
#include "llvm-c/Core.h"
#include "llvm/SupportVISC/VISCTimer.h"
#include "llvm/SupportVISC/DFG2LLVM.h"
#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
#include "SupportHPVM/DFG2LLVM.h"
#include "InPlaceDFG/InPlaceDFGAnalysis.h"
#include <sstream>
using namespace llvm;
......@@ -64,9 +66,9 @@ private:
InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;
// VISC Runtime API and Tensor runtime API
Constant* llvm_hpvm_initTensorRt;
Constant* llvm_hpvm_cleanupTensorRt;
Constant* hpvm_request_tensor;
FunctionCallee llvm_hpvm_initTensorRt;
FunctionCallee llvm_hpvm_cleanupTensorRt;
FunctionCallee hpvm_request_tensor;
// Functions
bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N);
......@@ -145,7 +147,7 @@ void CGT_CUDNN::initRuntimeAPI() {
runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
if(runtimeModule == nullptr)
DEBUG(errs() << Err.getMessage());
else
else
DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
// Get or insert Global declarations for
......@@ -156,27 +158,18 @@ void CGT_CUDNN::initRuntimeAPI() {
DECLARE(llvm_hpvm_cleanupTensorRt);
DECLARE(hpvm_request_tensor);
// Find visc.init and visc.cleanup calls, and add placeholder methods
// Find hpvm.init and visc.cleanup calls, and add placeholder methods
// for initialization and cleanup of the hpvm tensor runtime
/*
LLVMContext &C = M.getContext();
auto *FuncType = FunctionType::get(Type::getVoidTy(C), ArrayRef<Type *>({Type::getInt32Ty(C)}), false);
llvm_hpvm_initTensorRt = M.getOrInsertFunction(StringRef("llvm_hpvm_initTensorRt"), FuncType);
FuncType = FunctionType::get(Type::getVoidTy(C), ArrayRef<Type *>({}), false);
llvm_hpvm_cleanupTensorRt = M.getOrInsertFunction(StringRef("llvm_hpvm_cleanupTensorRt"), FuncType);
FuncType = FunctionType::get(Type::getVoidTy(C), ArrayRef<Type *>({Type::getInt8PtrTy(C), Type::getInt32Ty(C)}), false);
hpvm_request_tensor = M.getOrInsertFunction(StringRef("hpvm_request_tensor"), FuncType);
*/
Function* VI = M.getFunction("llvm.visc.init");
assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n");
Function* VI = M.getFunction("llvm.hpvm.init");
assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n");
InitCall = cast<Instruction>(*VI->user_begin());
CallInst::Create(llvm_hpvm_initTensorRt,
ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
"", InitCall);
Function* VC = M.getFunction("llvm.visc.cleanup");
assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n");
Function* VC = M.getFunction("llvm.hpvm.cleanup");
assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n");
CleanupCall = cast<Instruction>(*VC->user_begin());
CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
......@@ -203,7 +196,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
}
// Generate code only if it has the right hint
if (!checkPreferredTarget(N, visc::CUDNN_TARGET)) {
if (!checkPreferredTarget(N, hpvm::CUDNN_TARGET)) {
errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
return;
}
......@@ -226,7 +219,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
// Look up if we have visited this function before. If we have, then just
// get the cloned function pointer from DFNode. Otherwise, create the cloned
// function and add it to the DFNode GenFunc.
Function *F_cudnn = N->getGenFuncForTarget(visc::CUDNN_TARGET);
Function *F_cudnn = N->getGenFuncForTarget(hpvm::CUDNN_TARGET);
assert((F_cudnn == NULL) &&
"Error: Visiting a node for which code already generated");
......@@ -240,13 +233,13 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
F_cudnn->removeFromParent();
M.getFunctionList().push_back(F_cudnn);
N->addGenFunc(F_cudnn, visc::CUDNN_TARGET, true);
N->addGenFunc(F_cudnn, hpvm::CUDNN_TARGET, true);
// Adding nounwind to generated function : FIXME: needed?
DEBUG(errs() << "Adding nounwind to generated function\n");
F_cudnn->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
F_cudnn->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
// Add llvm_visc_requestTensor calls for every pointer argument of the function
// Add llvm_hpvm_requestTensor calls for every pointer argument of the function
// (they are all expected to be tensors), at the beginning of the function.
// This is the first instruction of the function, insert them before this
Instruction* FI = &*(F_cudnn->getEntryBlock().begin());
......@@ -271,19 +264,19 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) {
Instruction *I = &(*i);
if (BuildDFG::isViscIntrinsic(I)) {
if (BuildDFG::isHPVMIntrinsic(I)) {
IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
//assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
//assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")
// && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
//if (!(II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")){
//if (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){
//continue; // skip non-tensor ops
//}
/********************* Handle VISC Tensor intrinsics ********************/
switch (II->getIntrinsicID()) {
case Intrinsic::visc_tensor_convolution:
case Intrinsic::hpvm_tensor_convolution:
{ /* llvm.hpvm.tensor.mul */
// Tensor mul is not in place.
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
......@@ -304,7 +297,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(conv_precision);
// Create cudnn runtime function call
Constant* tensorConvolution;
FunctionCallee tensorConvolution;
DECLARE(tensorConvolution);
CallInst* CI = CallInst::Create(tensorConvolution,
......@@ -317,7 +310,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
}
break;
case Intrinsic::visc_tensor_group_convolution:
case Intrinsic::hpvm_tensor_group_convolution:
{ /* llvm.hpvm.tensor.mul */
// Tensor mul is not in place.
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
......@@ -337,7 +330,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(7));
// Create cudnn runtime function call
Constant* tensorConvolution;
FunctionCallee tensorConvolution;
DECLARE(tensorConvolution);
CallInst* CI = CallInst::Create(tensorConvolution,
......@@ -350,7 +343,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
}
break;
case Intrinsic::visc_tensor_batchnorm:
case Intrinsic::hpvm_tensor_batchnorm:
{ /* llvm.hpvm.tensor.batchnorm */
// Tensor batchnorm is in place.
// FIXME: Add Check for InPlace Analysis
......@@ -366,7 +359,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(5));
// Create cudnn runtime function call
Constant* tensorBatchNorm;
FunctionCallee tensorBatchNorm;
DECLARE(tensorBatchNorm);
CallInst* CI = CallInst::Create(tensorBatchNorm,
......@@ -380,7 +373,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
break;
case Intrinsic::visc_tensor_mul:
case Intrinsic::hpvm_tensor_mul:
{ /* llvm.hpvm.tensor.mul */
// Tensor mul is not in place.
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n");
......@@ -391,7 +384,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(1));
// Create cudnn runtime function call
Constant* tensorGemmGPU;
FunctionCallee tensorGemmGPU;
DECLARE(tensorGemmGPU);
CallInst* CI = CallInst::Create(tensorGemmGPU,
......@@ -403,7 +396,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
IItoRemove.push_back(II);
}
break;
case Intrinsic::visc_tensor_add:
case Intrinsic::hpvm_tensor_add:
{ /* llvm.hpvm.tensor.add */
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n");
// Tensor add(a,b) is in place for argument a.
......@@ -424,7 +417,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(1));
// Create cudnn runtime function call
Constant* tensorAdd;
FunctionCallee tensorAdd;
DECLARE(tensorAdd);
CallInst::Create(tensorAdd, Args, "", II);
// We can replace the call to hpvm.tensor.add with the 1st argument
......@@ -435,9 +428,9 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
IItoRemove.push_back(II);
}
break;
case Intrinsic::visc_tensor_pool_max:
case Intrinsic::visc_tensor_pool_mean:
{ /* llvm.visc.tensor.relu */
case Intrinsic::hpvm_tensor_pool_max:
case Intrinsic::hpvm_tensor_pool_mean:
{ /* llvm.hpvm.tensor.relu */
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n");
// Argument list - tensorPooling(input, poolFunction, window_height,
......@@ -447,10 +440,10 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(0));
int pool_type = 0;
if (II->getIntrinsicID() == Intrinsic::visc_tensor_pool_max){
if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max){
pool_type = 0;
}
if (II->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean){
if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean){
pool_type = 1;
}
......@@ -464,7 +457,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(6));
// Create cudnn runtime function call
Constant* tensorPooling;
FunctionCallee tensorPooling;
DECLARE(tensorPooling);
CallInst* CI = CallInst::Create(tensorPooling, Args, "", II);
......@@ -476,10 +469,10 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
}
break;
case Intrinsic::visc_tensor_relu:
case Intrinsic::visc_tensor_clipped_relu:
case Intrinsic::visc_tensor_tanh:
{ /* llvm.visc.tensor.relu */
case Intrinsic::hpvm_tensor_relu:
case Intrinsic::hpvm_tensor_clipped_relu:
case Intrinsic::hpvm_tensor_tanh:
{ /* llvm.hpvm.tensor.relu */
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n");
// Tensor relu(a) is in place for argument a.
Value *Op = II->getOperand(0);
......@@ -495,22 +488,22 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
std::vector<Value*> Args;
Args.push_back(II->getOperand(0));
if (II->getIntrinsicID() == Intrinsic::visc_tensor_relu){
if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu){
// Create cudnn runtime function call
Constant* tensorRelu;
FunctionCallee tensorRelu;
DECLARE(tensorRelu);
CallInst::Create(tensorRelu, Args, "", II);
}
else if (II->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu){
else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu){
// Create cudnn runtime function call
//-- Constant* tensorClippedRelu;
Constant* tensorRelu2;
//-- FunctionCallee tensorClippedRelu;
FunctionCallee tensorRelu2;
DECLARE(tensorRelu2);
CallInst::Create(tensorRelu2, Args, "", II);
}
else if (II->getIntrinsicID() == Intrinsic::visc_tensor_tanh){
else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh){
// Create cudnn runtime function call
Constant* tensorTanh;
FunctionCallee tensorTanh;
errs()<<"tensorTanh Call = \n\n";
DECLARE(tensorTanh);
//errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
......@@ -525,8 +518,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
IItoRemove.push_back(II);
}
break;
case Intrinsic::visc_tensor_softmax:
{ /* llvm.visc.tensor.softmax */
case Intrinsic::hpvm_tensor_softmax:
{ /* llvm.hpvm.tensor.softmax */
DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor softmax\n");
// Tensor relu(a) is in place for argument a.
Value *Op = II->getOperand(0);
......@@ -543,7 +536,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(0));
// Create cudnn runtime function call
Constant* tensorSoftmax;
FunctionCallee tensorSoftmax;
DECLARE(tensorSoftmax);
CallInst::Create(tensorSoftmax, Args, "", II);
// We can replace the call to hpvm.tensor.softmax with the 1st argument
......@@ -555,8 +548,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
}
break;
case Intrinsic::visc_node_id:
{ /* llvm.visc.node.id */
case Intrinsic::hpvm_node_id:
{ /* llvm.hpvm.node.id */
DEBUG(errs() << F_cudnn->getName() << "\t: Handling Node ID Intrinsic \n");
// Get uint32 argument
Value *Op = II->getOperand(0);
......@@ -566,7 +559,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
Args.push_back(II->getOperand(0));
// Create hpvm-tensor-rt function call
Constant* tensor_set_node_id;
FunctionCallee tensor_set_node_id;
DECLARE(tensor_set_node_id);
CallInst::Create(tensor_set_node_id, Args, "", II);
......@@ -643,3 +636,4 @@ static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn",
true /* transformation, *
* not just analysis */);
;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===;
;===- ./lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt -------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment