diff --git a/hpvm/llvm_patches/include/IR/IntrinsicsVISC.td b/hpvm/llvm_patches/include/IR/IntrinsicsApproxHPVM.td similarity index 52% rename from hpvm/llvm_patches/include/IR/IntrinsicsVISC.td rename to hpvm/llvm_patches/include/IR/IntrinsicsApproxHPVM.td index 404903648f8656e00d283d50b27fd411cb930ac8..0287f083849d9385faa00cc2943834b5b58439b0 100644 --- a/hpvm/llvm_patches/include/IR/IntrinsicsVISC.td +++ b/hpvm/llvm_patches/include/IR/IntrinsicsApproxHPVM.td @@ -1,4 +1,4 @@ -//===- IntrinsicsVISC.td - Defines VISC intrinsics ---------*- tablegen -*-===// +//===- IntrinsicsHPVM.td - Defines HPVM intrinsics ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,268 +7,250 @@ // //===----------------------------------------------------------------------===// // -// This file defines all of the VISC-specific intrinsics. +// This file defines all of the HPVM-specific intrinsics. // //===----------------------------------------------------------------------===// -let TargetPrefix = "visc" in { - /* All intrinsics start with "llvm.visc." +let TargetPrefix = "hpvm" in { + /* All intrinsics start with "llvm.hpvm." * As we do not want the compiler to mess with these intrinsics, we assume * worst memory behavior for all these intrinsics. */ /* Initialization intrinsic - - * i8* llvm.visc.setup(function*); + * i8* llvm.hpvm.setup(function*); */ - def int_visc_init : Intrinsic<[], [], []>; + def int_hpvm_init : Intrinsic<[], [], []>; /* Launch intrinsic - with streaming argument - * i8* llvm.visc.launch(i8*, ArgList*, i1); + * i8* llvm.hpvm.launch(i8*, ArgList*, i1); */ - def int_visc_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_launch : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i1_ty], []>; /* Push intrinsic - push data on streaming pipeline - * void llvm.visc.push(i8*, ArgList*); + * void llvm.hpvm.push(i8*, ArgList*); */ - def int_visc_push : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>; + def int_hpvm_push : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>; /* Pop intrinsic - pop data from streaming pipeline - * i8* llvm.visc.pop(i8*); + * i8* llvm.hpvm.pop(i8*); */ - def int_visc_pop : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_pop : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Cleanup intrinsic - - * void llvm.visc.cleanup(i8*); + * void llvm.hpvm.cleanup(i8*); */ - def int_visc_cleanup : Intrinsic<[], [], []>; + def int_hpvm_cleanup : Intrinsic<[], [], []>; /* Wait intrinsic - - * void llvm.visc.wait(graphID*); + * void llvm.hpvm.wait(graphID*); */ - def int_visc_wait : Intrinsic<[], [llvm_ptr_ty], []>; + def int_hpvm_wait : Intrinsic<[], [llvm_ptr_ty], []>; /* Track memory intrinsic - - * void llvm.visc.trackMemory(i8*, i64); + * void llvm.hpvm.trackMemory(i8*, i64); */ - def int_visc_trackMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + def int_hpvm_trackMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; /* Track memory intrinsic - - * void llvm.visc.untrackMemory(i8*); + * void llvm.hpvm.untrackMemory(i8*); */ - def int_visc_untrackMemory : Intrinsic<[], [llvm_ptr_ty], []>; + def int_hpvm_untrackMemory : Intrinsic<[], [llvm_ptr_ty], []>; /* Request memory intrinsic - - * void llvm.visc.requestMemory(i8*, i64); + * void llvm.hpvm.requestMemory(i8*, i64); */ - def int_visc_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + def int_hpvm_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; /* Create Node intrinsic - - * i8* llvm.visc.createNode(function*); + * i8* llvm.hpvm.createNode(function*); */ - def int_visc_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Create Node 1D array intrinsic - - * i8* llvm.visc.createNode1D(function*, i64); + * i8* llvm.hpvm.createNode1D(function*, i64); */ - def int_visc_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], []>; /* Create Node 2D array intrinsic - - * i8* llvm.visc.createNode2D(function*, i64, i64); + * i8* llvm.hpvm.createNode2D(function*, i64, i64); */ - def int_visc_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], []>; /* Create Node 3D array intrinsic - - * i8* llvm.visc.createNode2D(function*, i64, i64, i64); + * i8* llvm.hpvm.createNode2D(function*, i64, i64, i64); */ - def int_visc_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], []>; /* Create dataflow edge intrinsic - - * i8* llvm.visc.createEdge(i8*, i8*, i1, i32, i32, i1); + * i8* llvm.hpvm.createEdge(i8*, i8*, i1, i32, i32, i1); */ - def int_visc_createEdge : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, + def int_hpvm_createEdge : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], []>; /* Create bind input intrinsic - - * void llvm.visc.bind.input(i8*, i32, i32); + * void llvm.hpvm.bind.input(i8*, i32, i32); */ - def int_visc_bind_input : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, + def int_hpvm_bind_input : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], []>; /* Create bind output intrinsic - - * void llvm.visc.bind.output(i8*, i32, i32); + * void llvm.hpvm.bind.output(i8*, i32, i32); */ - def int_visc_bind_output : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, + def int_hpvm_bind_output : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], []>; /* Find associated dataflow node intrinsic - - * i8* llvm.visc.getNode(); + * i8* llvm.hpvm.getNode(); */ - def int_visc_getNode : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; + def int_hpvm_getNode : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; /* Find parent dataflow node intrinsic - - * i8* llvm.visc.getParentNode(i8*); + * i8* llvm.hpvm.getParentNode(i8*); */ - def int_visc_getParentNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; + def int_hpvm_getParentNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; /* Find the number of dimensions of a dataflow node intrinsic - - * i32 llvm.visc.getNumDims(i8*); + * i32 llvm.hpvm.getNumDims(i8*); */ - def int_visc_getNumDims : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; + def int_hpvm_getNumDims : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; /* Find the unique indentifier of a dataflow node (with respect to his parent * node) in the specified dimension intrinsic - */ - /* i64 llvm.visc.getNodeInstanceID.[xyz](i8*); + /* i64 llvm.hpvm.getNodeInstanceID.[xyz](i8*); */ - def int_visc_getNodeInstanceID_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + def int_hpvm_getNodeInstanceID_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNodeInstanceID_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + def int_hpvm_getNodeInstanceID_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNodeInstanceID_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + def int_hpvm_getNodeInstanceID_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; /* Find the number of instances of a dataflow node in the specified dimension * intrinsic - */ - /* i64 llvm.visc.getNumNodeInstances.[xyz](i8*); + /* i64 llvm.hpvm.getNumNodeInstances.[xyz](i8*); */ - def int_visc_getNumNodeInstances_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + def int_hpvm_getNumNodeInstances_x : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNumNodeInstances_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + def int_hpvm_getNumNodeInstances_y : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; - def int_visc_getNumNodeInstances_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + def int_hpvm_getNumNodeInstances_z : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrNoMem]>; /* Local Barrier - * void llvm.visc.barrier(); + * void llvm.hpvm.barrier(); */ - def int_visc_barrier : Intrinsic<[], [], []>; + def int_hpvm_barrier : Intrinsic<[], [], []>; /* Memory allocation inside the graph - * i8* llvm.visc.malloc(); + * i8* llvm.hpvm.malloc(); */ - def int_visc_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], []>; + def int_hpvm_malloc : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], []>; /* Find the vector length supported by target architecture * intrinsic - - * i32 llvm.visc.getVectorLength(); + * i32 llvm.hpvm.getVectorLength(); */ - def int_visc_getVectorLength : Intrinsic<[llvm_i32_ty], [], []>; + def int_hpvm_getVectorLength : Intrinsic<[llvm_i32_ty], [], []>; /* ============ Atomic intrinsics ============= */ // Atomic arithmetic operations - - /* i32 llvm.visc.atomic.cmpxchg(i32*, i32)*/ - def int_visc_atomic_cmpxchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, - llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.add(i32*, i32)*/ - def int_visc_atomic_add: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + /* i32 llvm.hpvm.atomic.add(i32*, i32)*/ + def int_hpvm_atomic_add: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.sub(i32*, i32)*/ - def int_visc_atomic_sub: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + /* i32 llvm.hpvm.atomic.sub(i32*, i32)*/ + def int_hpvm_atomic_sub: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.xchg(i32*, i32)*/ - def int_visc_atomic_xchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + /* i32 llvm.hpvm.atomic.xchg(i32*, i32)*/ + def int_hpvm_atomic_xchg: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.inc(i32*, i32)*/ - def int_visc_atomic_inc: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + /* i32 llvm.hpvm.atomic.min(i32*, i32)*/ + def int_hpvm_atomic_min: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.dec(i32*, i32)*/ - def int_visc_atomic_dec: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], + /* i32 llvm.hpvm.atomic.maxi32*, i32)*/ + def int_hpvm_atomic_max: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.min(i32*, i32)*/ - def int_visc_atomic_min: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; + // Atomic bitwise operations - /* i32 llvm.visc.atomic.umin(i32*, i32)*/ - def int_visc_atomic_umin: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + /* i32 llvm.hpvm.atomic.and(i32*, i32)*/ + def int_hpvm_atomic_and: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.maxi32*, i32)*/ - def int_visc_atomic_max: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + /* i32 llvm.hpvm.atomic.or(i32*, i32)*/ + def int_hpvm_atomic_or: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - /* i32 llvm.visc.atomic.umaxi32*, i32)*/ - def int_visc_atomic_umax: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], + /* i32 llvm.hpvm.atomic.xor(i32*, i32)*/ + def int_hpvm_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; - // Atomic bitwise operations - - /* i32 llvm.visc.atomic.and(i32*, i32)*/ - def int_visc_atomic_and: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - /* i32 llvm.visc.atomic.or(i32*, i32)*/ - def int_visc_atomic_or: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - - /* i32 llvm.visc.atomic.xor(i32*, i32)*/ - def int_visc_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - []>; - /***************************************************************************/ + /***************************************************************************/ /* ApproxHPVM intrinsics */ /***************************************************************************/ /* Tensor add intrinsic - * i8* llvm.visc.tensor.add(i8*, i8*); + * i8* llvm.hpvm.tensor.add(i8*, i8*); */ - def int_visc_tensor_add : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_add : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], []>; /* Tensor mul intrinsic - * i8* llvm.visc.tensor.mul(i8*, i8*); + * i8* llvm.hpvm.tensor.mul(i8*, i8*); */ - def int_visc_tensor_mul : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_mul : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], []>; /* Tensor relu intrinsic - * i8* llvm.visc.tensor.relu(i8*); + * i8* llvm.hpvm.tensor.relu(i8*); */ - def int_visc_tensor_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_tensor_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Tensor clipped relu intrinsic - * i8* llvm.visc.tensor.clipped.relu(i8*); + * i8* llvm.hpvm.tensor.clipped.relu(i8*); */ - def int_visc_tensor_clipped_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_tensor_clipped_relu : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Tensor tanh intrinsic - * i8* llvm.visc.tensor.tanh(i8*); + * i8* llvm.hpvm.tensor.tanh(i8*); */ - def int_visc_tensor_tanh : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_tensor_tanh : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Tensor sigmoid intrinsic - * i8* llvm.visc.tensor.sigmoid(i8*); + * i8* llvm.hpvm.tensor.sigmoid(i8*); */ - def int_visc_tensor_sigmoid : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_tensor_sigmoid : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Tensor softmax intrinsic - * i8* llvm.visc.tensor.softmax(i8*); + * i8* llvm.hpvm.tensor.softmax(i8*); */ - def int_visc_tensor_softmax : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_tensor_softmax : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; /* Tensor convolution intrinsic - * i8* llvm.visc.tensor.convolution(i8*, i8*, i32, i32, i32, i32); + * i8* llvm.hpvm.tensor.convolution(i8*, i8*, i32, i32, i32, i32); */ - def int_visc_tensor_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, @@ -276,9 +258,9 @@ let TargetPrefix = "visc" in { llvm_i32_ty], []>; /* Tensor group convolution intrinsic - * i8* llvm.visc.tensor.group.convolution(i8*, i8*, i32, i32, i32, i32, i32, i32); + * i8* llvm.hpvm.tensor.group.convolution(i8*, i8*, i32, i32, i32, i32, i32, i32); */ - def int_visc_tensor_group_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_group_convolution : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, @@ -288,9 +270,9 @@ let TargetPrefix = "visc" in { llvm_i32_ty], []>; /* Tensor BatchNorm intrinsic - * i8* llvm.visc.tensor.batchnorm(i8*, i8*, i8*, i8*, i8*, double); + * i8* llvm.hpvm.tensor.batchnorm(i8*, i8*, i8*, i8*, i8*, double); */ - def int_visc_tensor_batchnorm : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_batchnorm : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, @@ -299,25 +281,25 @@ let TargetPrefix = "visc" in { /* Tensor pool intrinsics: max, min, average - * i8* llvm.visc.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32); - * i8* llvm.visc.tensor.pool.min(i8*, i32, i32, i32, i32, i32, i32); - * i8* llvm.visc.tensor.pool.average(i8*, i32, i32, i32, i32, i32, i32); + * i8* llvm.hpvm.tensor.pool.max(i8*, i32, i32, i32, i32, i32, i32); + * i8* llvm.hpvm.tensor.pool.min(i8*, i32, i32, i32, i32, i32, i32); + * i8* llvm.hpvm.tensor.pool.average(i8*, i32, i32, i32, i32, i32, i32); */ - def int_visc_tensor_pool_max : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_pool_max : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_visc_tensor_pool_min : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_pool_min : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_visc_tensor_pool_mean : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, + def int_hpvm_tensor_pool_mean : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, @@ -325,7 +307,5 @@ let TargetPrefix = "visc" in { llvm_i32_ty, llvm_i32_ty], []>; - def int_visc_node_id : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>; - - + def int_hpvm_node_id : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>; }