diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td index c6ce86c504efc6a56b3f6888977265335d5cc31e..131f1384cbdd35d0949056a9d3b083fefdc90a6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsVISC.td +++ b/llvm/include/llvm/IR/IntrinsicsVISC.td @@ -245,11 +245,21 @@ let TargetPrefix = "visc" in { */ def int_visc_tensor_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + /* Tensor clipped relu intrinsic + * i8* llvm.visc.tensor.clipped.relu(i8*); + */ + def int_visc_tensor_clipped_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + /* Tensor tanh intrinsic * i8* llvm.visc.tensor.tanh(i8*); */ def int_visc_tensor_tanh : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + /* Tensor sigmoid intrinsic + * i8* llvm.visc.tensor.sigmoid(i8*); + */ + def int_visc_tensor_sigmoid : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + /* Tensor softmax intrinsic * i8* llvm.visc.tensor.softmax(i8*); */ diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp index a6374406d2b0da0b488798838a6175882eea1a3d..229a48a529ebb46d2d982278f81ff08abf3a317a 100644 --- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -270,6 +270,40 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { /********************* Handle VISC Tensor intrinsics ********************/ switch (II->getIntrinsicID()) { + case Intrinsic::visc_tensor_convolution: + { /* llvm.hpvm.tensor.mul */ + // Tensor mul is not in place. + DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n"); + + // Argument list for the runtime call + std::vector<Value*> Args; + Args.push_back(II->getOperand(0)); + Args.push_back(II->getOperand(1)); + Args.push_back(II->getOperand(2)); + Args.push_back(II->getOperand(3)); + Args.push_back(II->getOperand(4)); + Args.push_back(II->getOperand(5)); + + Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); + Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + + Args.push_back(conv_mode); + Args.push_back(conv_precision); + + // Create cudnn runtime function call + Constant* tensorConvolution; + DECLARE(tensorConvolution); + + CallInst* CI = CallInst::Create(tensorConvolution, + Args, "", II); + // We can replace the call to hpvm.tensor.mul with the runtime call + II->replaceAllUsesWith(CI); + + // Mark to remove at the end + IItoRemove.push_back(II); + } + break; + case Intrinsic::visc_tensor_mul: { /* llvm.hpvm.tensor.mul */ // Tensor mul is not in place. @@ -388,12 +422,15 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { } } + //--- errs()<<"IIToRemove.size() = "<<IItoRemove.size()<<"\n\n"; + // We need to do this explicitly: DCE pass may not remove them. // Traverse the vector backwards, otherwise definitions are deleted while // their subsequent uses are still around. for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), re = IItoRemove.rend(); ri != re; ++ri) { DEBUG(errs() << "Erasing: " << **ri << "\n"); + errs() << "Erasing: " << **ri << "\n"; (*ri)->eraseFromParent(); } diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp index 2f6282deac47a193abf5711d04cdf809466a9187..01effd433eb6e0d27f6d2f891909b11b51b0257b 100644 --- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp +++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp @@ -165,8 +165,11 @@ IS_VISC_CALL(hint) // Tensor Operators IS_VISC_CALL(tensor_mul) +IS_VISC_CALL(tensor_convolution) IS_VISC_CALL(tensor_add) IS_VISC_CALL(tensor_relu) +IS_VISC_CALL(tensor_tanh) +IS_VISC_CALL(tensor_sigmoid) IS_VISC_CALL(tensor_softmax) // Return the constant integer represented by value V @@ -1263,6 +1266,9 @@ bool GenVISC::runOnModule(Module &M) { if (isVISCCall_cos(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased); } + if (isVISCCall_tensor_convolution(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_convolution, &toBeErased); + } if (isVISCCall_tensor_add(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_add, &toBeErased); } diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h index b75d5520fe7ac731ce69e3fe17b24238fa19440a..4200ad1569acce4f42a23e49fda3eec64409f980 100644 --- a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h +++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h @@ -16,10 +16,10 @@ void printTensorInfo(void* tensor_ptr){ printf("Successful cudaMalloc \n"); } - printf("tensor dims = %zu \n", tensor->dims.num_dims); + printf("tensor dims = %d \n", tensor->dims.num_dims); printf("dim1_size = %zu \n", tensor->dims.dim_sizes[0]); printf("dim2_size = %zu \n", tensor->dims.dim_sizes[1]); - printf("num_elems = %d \n", tensor->num_elems); + printf("num_elems = %zu \n", tensor->num_elems); } diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h index 30e15359bcfcca793717025e96655bd90f09f80f..cf6180e40600469aee158b0c9881d57135d60d9e 100644 --- a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h +++ b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h @@ -94,7 +94,9 @@ float __visc__cos(float); void* __visc__tensor_add(void*, void*); void* __visc__tensor_mul(void*, void*); +void* __visc__tensor_convolution(void*, void*, int, int, int, int); void* __visc__tensor_relu(void*); +void* __visc__tensor_tanh(void*); void* __visc__tensor_softmax(void*); #include <unistd.h>