diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu index 1b770736bab93dd6a47cb4351dd0ad054e8eb14d..c1848f126750808a9438a4d2cf7729d1bf420fd1 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_techniques.cu @@ -1031,7 +1031,10 @@ __global__ void convToGemmApprox(float * const __restrict__ output, } - +/// This function serves as an API with the custom implementation of convolution +/// with the perforation and filter sampling support. The compute precison is FP32. +/// This routine is invoked by the tuner for tuning approximations for convolutions. +/// void* tensorConvPerfCuda(void* input_ptr, void* filter_ptr, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride, int conv_mode, int conv_groups, @@ -1245,6 +1248,9 @@ void switchMatrixFull(int N, int n, int c, int h, int w, } +/// This function serves as an API with the custom implementation of convolution +/// with the perforation and filter sampling support. The compute precison is FP32. +/// void* tensorConvApprox(void* input_ptr, void* filter_ptr, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride, int conv_mode, int conv_groups, @@ -1528,6 +1534,10 @@ void switchMatrixHalf(int N, int n, int c, int h, int w, __half *old_data, __hal } +/// This function serves as an API to custom implementation of the +/// half-precision convolution with the perforation and filter sampling +/// support. +/// void* tensorConvApproxHalf2(void* input_ptr, void* filter_ptr, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride, diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu index f24e8b58dbeb5a49e0eaf51cfac1f2d2f3148caa..e706080051a41dac1f7486027fcb9225793921bf 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/half_precision_api.cu @@ -1,4 +1,13 @@ - +//===--------------------------- half_precision_api.cu --------------------------===// +// +//===----------------------------------------------------------------------===// +// +// This file consists of the custom implementation of tensor precision changing +// kernels useful for approximated and non-approximated versions of tensor +// operations. This file also contains API for tensor operations operating on +// tensors with half-precision. +// +//===----------------------------------------------------------------------===// #ifndef HALF_API_HEADER #define HALF_API_HEADER diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp index f46df0bcd8b94533058a520a97b362b74b5727a7..5b0f0beedb4a13bbe484175ade0e2f5364e7be13 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp @@ -1,3 +1,14 @@ +//===--------------------------- hpvm-rt-controller.cpp ---------------------===// +// +//===----------------------------------------------------------------------===// +// +// This file contains code for that allows the tensor runtime to adapt +// in response to external changes in conditions (such as frequency changes) +// by helping to choose correct approximation configurations. It also provides +// routines for the rest of the runtime to get performance and energy profiling. +// +//===----------------------------------------------------------------------===// + #include "hpvm-rt-controller.h" #include "img_tensor_utils.h" diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc index b07bc477279bf110e3cae13ace2a2689130586c2..ad1d2e137d19d1c158afb031f35f278d9cdefaa0 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/profiling.cc @@ -1,3 +1,12 @@ +//===----------------------------- profling.cc ---------------------------===// +// +//===----------------------------------------------------------------------===// +// +// This file contains code provides the definition of the interface for +// applications to start and stop profiling for energy and performance. +// +//===----------------------------------------------------------------------===// + #ifndef PROFILING_HEADER #define PROFILING_HEADER diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu index 9dd8def84ceecad0b51e413396e9170bc7472821..079a9898294b01ba8dfcb575f11998790f24abfa 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu @@ -148,7 +148,9 @@ void allocateMem(struct Tensor* tensor, int data_type, size_t num_elems){ //host_ptr.push_back(tensor->host_data); } - +/// Two tensor formats are supported: NCHW and NHWC. +/// TODO: Make this more general in the future. +/// void setCudnnDataFormat(struct Tensor* tensor, int data_format){ switch(data_format){ @@ -267,7 +269,7 @@ void setTensorDescriptor(struct Tensor* tensor, int num_dims, } - +/// HPVM tensor runtime allows creation of 2D, 3D and 4D tensors. void* create2DTensor(int data_type, size_t dim1_size, size_t dim2_size){