From e45db4b3dc248c69043448d7c4cf5f11897ec1ba Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Fri, 12 Jul 2019 01:40:16 -0500
Subject: [PATCH] Adding backend support for DepthwiseConv2D

---
 llvm/include/llvm/IR/IntrinsicsVISC.td        | 13 +++++++
 .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp         | 35 +++++++++++++++++++
 llvm/lib/Transforms/GenVISC/GenVISC.cpp       |  4 +++
 .../VISC/DNN_Benchmarks/common/include/visc.h |  1 +
 4 files changed, 53 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td
index 131f1384cb..ce4e9ece4e 100644
--- a/llvm/include/llvm/IR/IntrinsicsVISC.td
+++ b/llvm/include/llvm/IR/IntrinsicsVISC.td
@@ -275,6 +275,19 @@ let TargetPrefix = "visc" in {
                                                               llvm_i32_ty,
                                                               llvm_i32_ty], []>;
 
+  /* Tensor group convolution intrinsic
+   * i8* llvm.visc.tensor.group.convolution(i8*, i8*, i32, i32, i32, i32);
+   */
+  def int_visc_tensor_group_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                              llvm_ptr_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+							      llvm_i32_ty,
+							      llvm_i32_ty], []>;
+
+
   /* Tensor pool intrinsics: max, min, average
    * i8* llvm.visc.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32);
    * i8* llvm.visc.tensor.pool.min(i8*, i32, i32, i32, i32, i32, i32);
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
index 1544538a46..185667e760 100644
--- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -304,6 +304,40 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
       }
       break;
 
+      case Intrinsic::visc_tensor_group_convolution:
+      { /* llvm.hpvm.tensor.mul */
+        // Tensor mul is not in place.
+        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
+
+        // Argument list for the runtime call
+        std::vector<Value*> Args;
+        Args.push_back(II->getOperand(0));
+        Args.push_back(II->getOperand(1));
+	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(3));
+        Args.push_back(II->getOperand(4));
+        Args.push_back(II->getOperand(5));
+
+	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+
+        Args.push_back(conv_mode);
+        Args.push_back(II->getOperand(7));
+	
+        // Create cudnn runtime function call
+        Constant* tensorConvolution;
+        DECLARE(tensorConvolution);
+	
+        CallInst* CI = CallInst::Create(tensorConvolution,
+                                        Args, "", II);
+        // We can replace the call to hpvm.tensor.mul with the runtime call
+        II->replaceAllUsesWith(CI);
+
+        // Mark to remove at the end
+        IItoRemove.push_back(II);
+      }
+      break;
+
+      
       case Intrinsic::visc_tensor_mul:
       { /* llvm.hpvm.tensor.mul */
         // Tensor mul is not in place.
@@ -407,6 +441,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
         IItoRemove.push_back(II);
       }
       break;
+      
       case Intrinsic::visc_tensor_relu:
       case Intrinsic::visc_tensor_clipped_relu:
       case Intrinsic::visc_tensor_tanh:
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index b884a0ba3a..c438307f1b 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -166,6 +166,7 @@ IS_VISC_CALL(hint)
 // Tensor Operators
 IS_VISC_CALL(tensor_mul)
 IS_VISC_CALL(tensor_convolution)
+IS_VISC_CALL(tensor_group_convolution)
 IS_VISC_CALL(tensor_add)
 IS_VISC_CALL(tensor_pool_max)
 IS_VISC_CALL(tensor_pool_min)
@@ -1273,6 +1274,9 @@ bool GenVISC::runOnModule(Module &M) {
       if (isVISCCall_tensor_convolution(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_convolution, &toBeErased);
       }
+      if (isVISCCall_tensor_group_convolution(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_group_convolution, &toBeErased);
+      }
       if (isVISCCall_tensor_add(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_add, &toBeErased);
       }
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
index aaa70b15b3..ab4787ffa2 100644
--- a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
@@ -95,6 +95,7 @@ float __visc__cos(float);
 void* __visc__tensor_add(void*, void*);
 void* __visc__tensor_mul(void*, void*);
 void* __visc__tensor_convolution(void*, void*, int, int, int, int);
+void* __visc__tensor_group_convolution(void*, void*, int, int, int, int, int, int);
 void* __visc__tensor_pool_max(void*, int, int, int, int, int, int);
 void* __visc__tensor_pool_mean(void*, int, int, int, int, int, int);
 void* __visc__tensor_relu(void*);
-- 
GitLab