From a5112c19e3f70dfc09b0dbfdd377c36abcb3da54 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Thu, 13 Dec 2018 19:09:03 -0600
Subject: [PATCH] Adding support for Convolutions in CUDNN backend and GenVISC

---
 llvm/include/llvm/IR/IntrinsicsVISC.td        | 10 +++++
 .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp         | 37 +++++++++++++++++++
 llvm/lib/Transforms/GenVISC/GenVISC.cpp       |  6 +++
 .../common/include/tensorUtils.h              |  4 +-
 .../VISC/DNN_Benchmarks/common/include/visc.h |  2 +
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td
index c6ce86c504..131f1384cb 100644
--- a/llvm/include/llvm/IR/IntrinsicsVISC.td
+++ b/llvm/include/llvm/IR/IntrinsicsVISC.td
@@ -245,11 +245,21 @@ let TargetPrefix = "visc" in {
    */
   def int_visc_tensor_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
 
+  /* Tensor clipped relu intrinsic
+   * i8* llvm.visc.tensor.clipped.relu(i8*);
+   */
+  def int_visc_tensor_clipped_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
   /* Tensor tanh intrinsic
    * i8* llvm.visc.tensor.tanh(i8*);
    */
   def int_visc_tensor_tanh : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
 
+  /* Tensor sigmoid intrinsic
+   * i8* llvm.visc.tensor.sigmoid(i8*);
+   */
+  def int_visc_tensor_sigmoid : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
   /* Tensor softmax intrinsic
    * i8* llvm.visc.tensor.softmax(i8*);
    */
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
index a6374406d2..229a48a529 100644
--- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -270,6 +270,40 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
       /********************* Handle VISC Tensor intrinsics ********************/
       switch (II->getIntrinsicID()) {
 
+      case Intrinsic::visc_tensor_convolution:
+      { /* llvm.hpvm.tensor.mul */
+        // Tensor mul is not in place.
+        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
+
+        // Argument list for the runtime call
+        std::vector<Value*> Args;
+        Args.push_back(II->getOperand(0));
+        Args.push_back(II->getOperand(1));
+	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(3));
+        Args.push_back(II->getOperand(4));
+        Args.push_back(II->getOperand(5));
+
+	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+	Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
+
+        Args.push_back(conv_mode);
+        Args.push_back(conv_precision);
+	
+        // Create cudnn runtime function call
+        Constant* tensorConvolution;
+        DECLARE(tensorConvolution);
+	
+        CallInst* CI = CallInst::Create(tensorConvolution,
+                                        Args, "", II);
+        // We can replace the call to hpvm.tensor.mul with the runtime call
+        II->replaceAllUsesWith(CI);
+
+        // Mark to remove at the end
+        IItoRemove.push_back(II);
+      }
+      break;
+
       case Intrinsic::visc_tensor_mul:
       { /* llvm.hpvm.tensor.mul */
         // Tensor mul is not in place.
@@ -388,12 +422,15 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
     }
   }
 
+  //--- errs()<<"IIToRemove.size() = "<<IItoRemove.size()<<"\n\n";
+
   // We need to do this explicitly: DCE pass may not remove them.
   // Traverse the vector backwards, otherwise definitions are deleted while
   // their subsequent uses are still around.
   for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
        re = IItoRemove.rend(); ri != re; ++ri) {
     DEBUG(errs() << "Erasing: " << **ri << "\n");
+    errs() << "Erasing: " << **ri << "\n";
     (*ri)->eraseFromParent();
   }
 
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index 2f6282deac..01effd433e 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -165,8 +165,11 @@ IS_VISC_CALL(hint)
 
 // Tensor Operators
 IS_VISC_CALL(tensor_mul)
+IS_VISC_CALL(tensor_convolution)
 IS_VISC_CALL(tensor_add)
 IS_VISC_CALL(tensor_relu)
+IS_VISC_CALL(tensor_tanh)
+IS_VISC_CALL(tensor_sigmoid)
 IS_VISC_CALL(tensor_softmax)
 
 // Return the constant integer represented by value V
@@ -1263,6 +1266,9 @@ bool GenVISC::runOnModule(Module &M) {
       if (isVISCCall_cos(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased);
       }
+      if (isVISCCall_tensor_convolution(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_convolution, &toBeErased);
+      }
       if (isVISCCall_tensor_add(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_add, &toBeErased);
       }
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
index b75d5520fe..4200ad1569 100644
--- a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h
@@ -16,10 +16,10 @@ void printTensorInfo(void* tensor_ptr){
     printf("Successful cudaMalloc \n");
   }
 
-  printf("tensor dims = %zu \n", tensor->dims.num_dims);
+  printf("tensor dims = %d \n", tensor->dims.num_dims);
   printf("dim1_size = %zu \n", tensor->dims.dim_sizes[0]);
   printf("dim2_size = %zu \n", tensor->dims.dim_sizes[1]);
-  printf("num_elems = %d \n", tensor->num_elems);
+  printf("num_elems = %zu \n", tensor->num_elems);
 }
 
 
diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
index 30e15359bc..cf6180e406 100644
--- a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
+++ b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h
@@ -94,7 +94,9 @@ float __visc__cos(float);
 
 void* __visc__tensor_add(void*, void*);
 void* __visc__tensor_mul(void*, void*);
+void* __visc__tensor_convolution(void*, void*, int, int, int, int);
 void* __visc__tensor_relu(void*);
+void* __visc__tensor_tanh(void*);
 void* __visc__tensor_softmax(void*);
 
 #include <unistd.h>
-- 
GitLab