From ad3b24df1bf65e4e0ecd089ca6d88c59536faa68 Mon Sep 17 00:00:00 2001
From: Akash Kothari <akashk4@tyler.cs.illinois.edu>
Date: Mon, 21 Dec 2020 00:58:44 -0600
Subject: [PATCH] Moved out the HPVM intrinsics table file from the LLVM source
 tree

---
 llvm_patches/include/IR/IntrinsicsVISC.td | 331 ++++++++++++++++++++++
 1 file changed, 331 insertions(+)
 create mode 100644 llvm_patches/include/IR/IntrinsicsVISC.td

diff --git a/llvm_patches/include/IR/IntrinsicsVISC.td b/llvm_patches/include/IR/IntrinsicsVISC.td
new file mode 100644
index 0000000000..404903648f
--- /dev/null
+++ b/llvm_patches/include/IR/IntrinsicsVISC.td
@@ -0,0 +1,331 @@
+//===- IntrinsicsVISC.td - Defines VISC intrinsics ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the VISC-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "visc" in {
+  /* All intrinsics start with "llvm.visc."
+   * As we do not want the compiler to mess with these intrinsics, we assume
+   * worst memory behavior for all these intrinsics.
+   */
+
+  /* Initialization intrinsic -
+   * i8* llvm.visc.setup(function*);
+   */
+  def int_visc_init : Intrinsic<[], [], []>;
+
+  /* Launch intrinsic - with streaming argument
+   * i8* llvm.visc.launch(i8*, ArgList*, i1);
+   */
+  def int_visc_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                  llvm_ptr_ty, llvm_i1_ty], []>;
+
+  /* Push intrinsic - push data on streaming pipeline
+   * void llvm.visc.push(i8*, ArgList*);
+   */
+  def int_visc_push : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>;
+
+  /* Pop intrinsic - pop data from streaming pipeline
+   * i8* llvm.visc.pop(i8*);
+   */
+  def int_visc_pop : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Cleanup intrinsic -
+   * void llvm.visc.cleanup(i8*);
+   */
+  def int_visc_cleanup : Intrinsic<[], [], []>;
+
+  /* Wait intrinsic -
+   * void llvm.visc.wait(graphID*);
+   */
+  def int_visc_wait : Intrinsic<[], [llvm_ptr_ty], []>;
+
+  /* Track memory intrinsic -
+   * void llvm.visc.trackMemory(i8*, i64);
+   */
+  def int_visc_trackMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>;
+
+  /* Track memory intrinsic -
+   * void llvm.visc.untrackMemory(i8*);
+   */
+  def int_visc_untrackMemory : Intrinsic<[], [llvm_ptr_ty], []>;
+
+  /* Request memory intrinsic -
+   * void llvm.visc.requestMemory(i8*, i64);
+   */
+  def int_visc_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>;
+
+  /* Create Node intrinsic -
+   * i8* llvm.visc.createNode(function*);
+   */
+  def int_visc_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Create Node 1D array intrinsic -
+   * i8* llvm.visc.createNode1D(function*, i64);
+   */
+  def int_visc_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                        llvm_i64_ty], []>;
+
+  /* Create Node 2D array intrinsic -
+   * i8* llvm.visc.createNode2D(function*, i64, i64);
+   */
+  def int_visc_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                        llvm_i64_ty, llvm_i64_ty], []>;
+
+  /* Create Node 3D array intrinsic -
+   * i8* llvm.visc.createNode2D(function*, i64, i64, i64);
+   */
+  def int_visc_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                        llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
+                                        []>;
+
+  /* Create dataflow edge intrinsic -
+   * i8* llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1);
+   */
+  def int_visc_createEdge : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
+                                      llvm_i1_ty, llvm_i32_ty, llvm_i32_ty,
+                                      llvm_i1_ty],
+                                      []>;
+
+  /* Create bind input intrinsic -
+   * void llvm.visc.bind.input(i8*, i32, i32);
+   */
+  def int_visc_bind_input : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty,
+                                      llvm_i32_ty, llvm_i1_ty], []>;
+
+  /* Create bind output intrinsic -
+   * void llvm.visc.bind.output(i8*, i32, i32);
+   */
+  def int_visc_bind_output : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty,
+                                       llvm_i32_ty, llvm_i1_ty], []>;
+
+  /* Find associated dataflow node intrinsic -
+   * i8* llvm.visc.getNode();
+   */
+  def int_visc_getNode : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+
+  /* Find parent dataflow node intrinsic -
+   * i8* llvm.visc.getParentNode(i8*);
+   */
+  def int_visc_getParentNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+  /* Find the number of dimensions of a dataflow node intrinsic -
+   * i32 llvm.visc.getNumDims(i8*);
+   */
+  def int_visc_getNumDims : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+  /* Find the unique indentifier of a dataflow node (with respect to his parent
+   * node) in the specified dimension intrinsic -
+   */
+
+  /* i64 llvm.visc.getNodeInstanceID.[xyz](i8*);
+   */
+  def int_visc_getNodeInstanceID_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                               [IntrNoMem]>;
+
+  def int_visc_getNodeInstanceID_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                               [IntrNoMem]>;
+
+  def int_visc_getNodeInstanceID_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                               [IntrNoMem]>;
+
+  /* Find the number of instances of a dataflow node in the specified dimension
+   * intrinsic -
+   */
+
+  /* i64 llvm.visc.getNumNodeInstances.[xyz](i8*);
+   */
+  def int_visc_getNumNodeInstances_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                                 [IntrNoMem]>;
+
+  def int_visc_getNumNodeInstances_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                                 [IntrNoMem]>;
+
+  def int_visc_getNumNodeInstances_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                                 [IntrNoMem]>;
+
+  /* Local Barrier
+   * void llvm.visc.barrier();
+   */
+  def int_visc_barrier : Intrinsic<[], [], []>;
+
+  /* Memory allocation inside the graph
+   * i8* llvm.visc.malloc();
+   */
+  def int_visc_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], []>;
+
+  /* Find the vector length supported by target architecture
+   * intrinsic -
+   * i32 llvm.visc.getVectorLength();
+   */
+  def int_visc_getVectorLength : Intrinsic<[llvm_i32_ty], [], []>;
+
+  /* ============ Atomic intrinsics ============= */
+  // Atomic arithmetic operations
+  
+  /* i32 llvm.visc.atomic.cmpxchg(i32*, i32)*/
+  def int_visc_atomic_cmpxchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty,
+                                          llvm_i32_ty], []>;
+
+  /* i32 llvm.visc.atomic.add(i32*, i32)*/
+  def int_visc_atomic_add: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.sub(i32*, i32)*/
+  def int_visc_atomic_sub: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.xchg(i32*, i32)*/
+  def int_visc_atomic_xchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.inc(i32*, i32)*/
+  def int_visc_atomic_inc: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.dec(i32*, i32)*/
+  def int_visc_atomic_dec: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.min(i32*, i32)*/
+  def int_visc_atomic_min: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.umin(i32*, i32)*/
+  def int_visc_atomic_umin: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.maxi32*, i32)*/
+  def int_visc_atomic_max: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.umaxi32*, i32)*/
+  def int_visc_atomic_umax: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  // Atomic bitwise operations
+
+  /* i32 llvm.visc.atomic.and(i32*, i32)*/
+  def int_visc_atomic_and: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.or(i32*, i32)*/
+  def int_visc_atomic_or: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+
+  /* i32 llvm.visc.atomic.xor(i32*, i32)*/
+  def int_visc_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                    []>;
+  /***************************************************************************/
+  /*                            ApproxHPVM intrinsics                        */
+  /***************************************************************************/
+
+  /* Tensor add intrinsic
+   * i8* llvm.visc.tensor.add(i8*, i8*);
+   */
+  def int_visc_tensor_add : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                   llvm_ptr_ty], []>;
+
+  /* Tensor mul intrinsic
+   * i8* llvm.visc.tensor.mul(i8*, i8*);
+   */
+  def int_visc_tensor_mul : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                   llvm_ptr_ty], []>;
+
+  /* Tensor relu intrinsic
+   * i8* llvm.visc.tensor.relu(i8*);
+   */
+  def int_visc_tensor_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Tensor clipped relu intrinsic
+   * i8* llvm.visc.tensor.clipped.relu(i8*);
+   */
+  def int_visc_tensor_clipped_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Tensor tanh intrinsic
+   * i8* llvm.visc.tensor.tanh(i8*);
+   */
+  def int_visc_tensor_tanh : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Tensor sigmoid intrinsic
+   * i8* llvm.visc.tensor.sigmoid(i8*);
+   */
+  def int_visc_tensor_sigmoid : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Tensor softmax intrinsic
+   * i8* llvm.visc.tensor.softmax(i8*);
+   */
+  def int_visc_tensor_softmax : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
+  /* Tensor convolution intrinsic
+   * i8* llvm.visc.tensor.convolution(i8*, i8*, i32, i32, i32, i32);
+   */
+  def int_visc_tensor_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                              llvm_ptr_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty], []>;
+
+  /* Tensor group convolution intrinsic
+   * i8* llvm.visc.tensor.group.convolution(i8*, i8*, i32, i32, i32, i32, i32, i32);
+   */
+  def int_visc_tensor_group_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                              llvm_ptr_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+                                                              llvm_i32_ty,
+							      llvm_i32_ty,
+							      llvm_i32_ty], []>;
+
+  /* Tensor BatchNorm intrinsic
+   * i8* llvm.visc.tensor.batchnorm(i8*, i8*, i8*, i8*, i8*, double);
+   */
+  def int_visc_tensor_batchnorm : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                            llvm_ptr_ty,
+							    llvm_ptr_ty,
+							    llvm_ptr_ty,
+							    llvm_ptr_ty,
+                                                            llvm_double_ty], []>;
+
+
+  /* Tensor pool intrinsics: max, min, average
+   * i8* llvm.visc.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32);
+   * i8* llvm.visc.tensor.pool.min(i8*, i32, i32, i32, i32, i32, i32);
+   * i8* llvm.visc.tensor.pool.average(i8*, i32, i32, i32, i32, i32, i32);
+   */
+  def int_visc_tensor_pool_max : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty], []>;
+  def int_visc_tensor_pool_min : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty,
+                                                           llvm_i32_ty], []>;
+  def int_visc_tensor_pool_mean : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
+                                                            llvm_i32_ty,
+                                                            llvm_i32_ty,
+                                                            llvm_i32_ty,
+                                                            llvm_i32_ty,
+                                                            llvm_i32_ty,
+                                                            llvm_i32_ty], []>;
+
+  def int_visc_node_id : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>;
+
+
+}
-- 
GitLab