diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp index 229a48a529ebb46d2d982278f81ff08abf3a317a..add89f24ea0e50dc7e162ae2fd2e691eea58213a 100644 --- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -357,9 +357,49 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { IItoRemove.push_back(II); } break; + case Intrinsic::visc_tensor_pool_max: + { /* llvm.visc.tensor.relu */ + DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n"); + // Tensor relu(a) is in place for argument a. + Value *Op = II->getOperand(0); + + // Test the intrinsic operand for in place operation. + bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N); + // Code generation cannot continue if this is false, because the target + // only provides an in place operation + assert(inplace && + "Operand not valid for in place operation. Code gen aborted.\n"); + + // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad, + // vertical_stride, horizontal_stride); + std::vector<Value*> Args; + Args.push_back(II->getOperand(0)); + Constant* constZero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Args.push_back(constZero); // ID for max pool. Min/Avg have different IDs (non-zero) + Args.push_back(II->getOperand(1)); + Args.push_back(II->getOperand(2)); + Args.push_back(II->getOperand(3)); + Args.push_back(II->getOperand(4)); + Args.push_back(II->getOperand(5)); + Args.push_back(II->getOperand(6)); + + // Create cudnn runtime function call + Constant* tensorPooling; + DECLARE(tensorPooling); + CallInst* CI = CallInst::Create(tensorPooling, Args, "", II); + + // Replacing intrinsic result uses with the result of the tensor runtime operation + II->replaceAllUsesWith(CI); + + // Mark to remove at the end + IItoRemove.push_back(II); + } + break; case Intrinsic::visc_tensor_relu: + case Intrinsic::visc_tensor_clipped_relu: + case Intrinsic::visc_tensor_tanh: { /* llvm.visc.tensor.relu */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor relu\n"); + DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n"); // Tensor relu(a) is in place for argument a. Value *Op = II->getOperand(0); @@ -374,10 +414,26 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { std::vector<Value*> Args; Args.push_back(II->getOperand(0)); - // Create cudnn runtime function call - Constant* tensorRelu; - DECLARE(tensorRelu); - CallInst::Create(tensorRelu, Args, "", II); + if (II->getIntrinsicID() == Intrinsic::visc_tensor_relu){ + // Create cudnn runtime function call + Constant* tensorRelu; + DECLARE(tensorRelu); + CallInst::Create(tensorRelu, Args, "", II); + } + else if (II->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu){ + // Create cudnn runtime function call + //-- Constant* tensorClippedRelu; + Constant* tensorRelu2; + DECLARE(tensorRelu2); + CallInst::Create(tensorRelu2, Args, "", II); + } + else if (II->getIntrinsicID() == Intrinsic::visc_tensor_tanh){ + // Create cudnn runtime function call + Constant* tensorTanh; + DECLARE(tensorTanh); + CallInst::Create(tensorTanh, Args, "", II); + } + // We can replace the call to hpvm.tensor.relu with the 1st argument // that, due to in place operation, now contains the result II->replaceAllUsesWith(II->getOperand(0)); diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp index 01effd433eb6e0d27f6d2f891909b11b51b0257b..b884a0ba3ae7d7790f46ec08707f72002acb0af6 100644 --- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp +++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp @@ -167,7 +167,11 @@ IS_VISC_CALL(hint) IS_VISC_CALL(tensor_mul) IS_VISC_CALL(tensor_convolution) IS_VISC_CALL(tensor_add) +IS_VISC_CALL(tensor_pool_max) +IS_VISC_CALL(tensor_pool_min) +IS_VISC_CALL(tensor_pool_mean) IS_VISC_CALL(tensor_relu) +IS_VISC_CALL(tensor_clipped_relu) IS_VISC_CALL(tensor_tanh) IS_VISC_CALL(tensor_sigmoid) IS_VISC_CALL(tensor_softmax) @@ -1275,9 +1279,24 @@ bool GenVISC::runOnModule(Module &M) { if (isVISCCall_tensor_mul(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_mul, &toBeErased); } + if (isVISCCall_tensor_pool_max(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_max, &toBeErased); + } + if (isVISCCall_tensor_pool_min(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_min, &toBeErased); + } + if (isVISCCall_tensor_pool_mean(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_mean, &toBeErased); + } if (isVISCCall_tensor_relu(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_relu, &toBeErased); } + if (isVISCCall_tensor_tanh(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_tanh, &toBeErased); + } + if (isVISCCall_tensor_clipped_relu(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_clipped_relu, &toBeErased); + } if (isVISCCall_tensor_softmax(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_softmax, &toBeErased); }