From b6f6d14c82f6a5a87d29183d996f47221e438c02 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Fri, 14 Dec 2018 01:24:35 -0600
Subject: [PATCH] Adding Pooling, Tanh, ClippedRelu support in GenVisc and
 CUDNN backend

---
 .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp         | 66 +++++++++++++++++--
 llvm/lib/Transforms/GenVISC/GenVISC.cpp       | 19 ++++++
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
index 229a48a529..add89f24ea 100644
--- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -357,9 +357,49 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
         IItoRemove.push_back(II);
       }
       break;
+      case Intrinsic::visc_tensor_pool_max:
+      { /* llvm.visc.tensor.relu */
+        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n");
+        // Tensor relu(a) is in place for argument a.
+        Value *Op = II->getOperand(0);
+
+        // Test the intrinsic operand for in place operation.
+        bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N);
+        // Code generation cannot continue if this is false, because the target
+        // only provides an in place operation
+        assert(inplace &&
+               "Operand not valid for in place operation. Code gen aborted.\n");
+
+        // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad,
+	//                               vertical_stride, horizontal_stride);
+        std::vector<Value*> Args;
+        Args.push_back(II->getOperand(0));
+	Constant* constZero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
+        Args.push_back(constZero); // ID for max pool. Min/Avg have different IDs (non-zero)	
+	Args.push_back(II->getOperand(1));
+        Args.push_back(II->getOperand(2));
+	Args.push_back(II->getOperand(3));
+        Args.push_back(II->getOperand(4));
+	Args.push_back(II->getOperand(5));
+	Args.push_back(II->getOperand(6));
+
+        // Create cudnn runtime function call
+        Constant* tensorPooling;
+        DECLARE(tensorPooling);
+        CallInst* CI = CallInst::Create(tensorPooling, Args, "", II);
+
+	// Replacing intrinsic result uses with the result of the tensor runtime operation
+        II->replaceAllUsesWith(CI);
+
+        // Mark to remove at the end
+        IItoRemove.push_back(II);
+      }
+      break;
       case Intrinsic::visc_tensor_relu:
+      case Intrinsic::visc_tensor_clipped_relu:
+      case Intrinsic::visc_tensor_tanh:
       { /* llvm.visc.tensor.relu */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor relu\n");
+        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n");
         // Tensor relu(a) is in place for argument a.
         Value *Op = II->getOperand(0);
 
@@ -374,10 +414,26 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
         std::vector<Value*> Args;
         Args.push_back(II->getOperand(0));
 
-        // Create cudnn runtime function call
-        Constant* tensorRelu;
-        DECLARE(tensorRelu);
-        CallInst::Create(tensorRelu, Args, "", II);
+	if (II->getIntrinsicID() == Intrinsic::visc_tensor_relu){
+          // Create cudnn runtime function call
+          Constant* tensorRelu;
+          DECLARE(tensorRelu);
+          CallInst::Create(tensorRelu, Args, "", II);
+	}
+	else if (II->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu){
+          // Create cudnn runtime function call
+          //-- Constant* tensorClippedRelu;
+	  Constant* tensorRelu2;
+          DECLARE(tensorRelu2);
+          CallInst::Create(tensorRelu2, Args, "", II);
+	}
+	else if (II->getIntrinsicID() == Intrinsic::visc_tensor_tanh){
+          // Create cudnn runtime function call
+          Constant* tensorTanh;
+          DECLARE(tensorTanh);
+          CallInst::Create(tensorTanh, Args, "", II);
+	}
+     
         // We can replace the call to hpvm.tensor.relu with the 1st argument
         // that, due to in place operation, now contains the result
         II->replaceAllUsesWith(II->getOperand(0));
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index 01effd433e..b884a0ba3a 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -167,7 +167,11 @@ IS_VISC_CALL(hint)
 IS_VISC_CALL(tensor_mul)
 IS_VISC_CALL(tensor_convolution)
 IS_VISC_CALL(tensor_add)
+IS_VISC_CALL(tensor_pool_max)
+IS_VISC_CALL(tensor_pool_min)
+IS_VISC_CALL(tensor_pool_mean)
 IS_VISC_CALL(tensor_relu)
+IS_VISC_CALL(tensor_clipped_relu)
 IS_VISC_CALL(tensor_tanh)
 IS_VISC_CALL(tensor_sigmoid)
 IS_VISC_CALL(tensor_softmax)
@@ -1275,9 +1279,24 @@ bool GenVISC::runOnModule(Module &M) {
       if (isVISCCall_tensor_mul(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_mul, &toBeErased);
       }
+      if (isVISCCall_tensor_pool_max(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_max, &toBeErased);
+      }
+      if (isVISCCall_tensor_pool_min(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_min, &toBeErased);
+      }
+      if (isVISCCall_tensor_pool_mean(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_mean, &toBeErased);
+      }
       if (isVISCCall_tensor_relu(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_relu, &toBeErased);
       }
+      if (isVISCCall_tensor_tanh(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_tanh, &toBeErased);
+      }
+      if (isVISCCall_tensor_clipped_relu(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_clipped_relu, &toBeErased);
+      }
       if (isVISCCall_tensor_softmax(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_softmax, &toBeErased);
       }
-- 
GitLab