diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index 5c04604406eb81571c0a87539fb0568aad3c4e4d..67a00791b04ef3c0243327ab76ea78336e8b7698 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -58,7 +58,7 @@ set( RUNTIME_SRCS_FILENAME approx_knobs_utils.cc approx_simulation.cu approx_techniques.cu configuration.cpp - debug.cpp device_math.cu + debug.cpp error.cu fp16_gemm.cu freq_utils.cc global_data.cc group_conv.cu diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_api.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_api.h index 6b0f835f7361fb54b9826bdec7e1819333f989df..cac6b6fd686234cadf78096a729eecb1a3203250 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_api.h +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_api.h @@ -3,8 +3,6 @@ #include "tensor.h" -#include "device_math.h" - extern "C" { // NOTE: API for tensorGroupConvolution @@ -59,19 +57,6 @@ void *tensorConvSampSim2(void *input_ptr, void *filter_ptr, int vertical_pad, int skip_rate, int skip_offset, float interpolation_rate); -void *autotuner_tensorFft(void *input, bool inverse); - -void *autotuner_tensorReduce(void *input, size_t axis, MathOp func); - -void *autotuner_tensorProjectiveT(void *input, void *transformation); - -void *autotuner_tensorMap1(MathOp func, void *input); - -void *autotuner_tensorMap2(MathOp func, void *input1, void *input2); - -void *autotuner_tensorMap3(MathOp func, void *input1, void *input2, - void *input3); - void *tensorConvInputHalf(void *input_ptr, void *filter_ptr, int vertical_pad, int horizontal_pad, int vertical_stride, int horizontal_stride, int conv_mode, int conv_groups, diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h index 3b52cce9f62504753d63015a599d214194d48d98..4cab25ab593a40f11d17a96d4045fd11afa36530 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h @@ -53,15 +53,7 @@ public: POOL_MEAN, POOL_MIN, SOFTMAX, - FFT, - REDUCE, - PROJECTIVE_T, - MAP1, - MAP2, - MAP3, - // STENCIL, - // COSINE_T, - // ADDITIONAL_TENSOR_OPERATION + // ADDITIONAL_TENSOR_OPERATION TENSOR_OP_END }; diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h deleted file mode 100644 index 83781b148c4bb41619bbbb54d9e69cc9fc7f2543..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -device_math.h -Provides pointer to CUDA math function and other properties ofa math operator -(one among MathOp) on a certain scalar type. -*/ -#ifndef DEVICE_MATH_H -#define DEVICE_MATH_H - -#include <cuda_fp16.h> -#include <device_launch_parameters.h> -#include <limits> -#include <stdexcept> - -#include "debug.h" - -enum class MathOp { - Hypot, - Atan2, - Add, - Sub, - Mul, - Div, - Sqrt, - Max, - Min, - Avg3, - Blend2, - AddWeighted, - PSNR -}; - -// Find the CUDA function for math operator `op`. -// This is ONLY defined (through template specialization, in device_math.cu) for -// float and half (see below). -template <typename T> void *mathOpToFunc(MathOp op); - -template <> void *mathOpToFunc<float>(MathOp op); - -template <> void *mathOpToFunc<half>(MathOp op); - -// Returns the identity element of math operator `op`, for example, -inf for -// MAX, 0 for ADD. -// Specialization exists for half type. -template <typename T> T reduceOpToIdentity(MathOp op) { - switch (op) { - case MathOp::Hypot: - return T(0.0f); - case MathOp::Add: - return T(0.0f); - case MathOp::Max: - return -std::numeric_limits<T>::max(); - case MathOp::Min: - return std::numeric_limits<T>::max(); - default: - ERROR("Operator does not have id value\n"); - } - return T(); // For some compilers -} - -template <> half reduceOpToIdentity<half>(MathOp op); - -#endif diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu index 8a8ff8435db96607917fc627036e72318409ef9b..a472fcaa36484950de98f858a74f185900ab80b7 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu @@ -22,7 +22,6 @@ #include "op_overheads.h" #include "half_precision_api.h" #include "approx_utils.h" -#include "device_math.h" #include "global_data.h" #include "approx_knob_utils.h" diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp index c18ffcea26f93fe752500983f4d4a3fcfe59ded2..063112ab78641dc57f4d79259df837fa12177b42 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp @@ -144,24 +144,6 @@ void GPUNodeConfiguration::print() { case G_TENSOR_OP::SOFTMAX: printf("softmax"); break; - case G_TENSOR_OP::FFT: - printf("fft"); - break; - case G_TENSOR_OP::REDUCE: - printf("reduce"); - break; - case G_TENSOR_OP::PROJECTIVE_T: - printf("projectiveT"); - break; - case G_TENSOR_OP::MAP1: - printf("map1"); - break; - case G_TENSOR_OP::MAP2: - printf("map2"); - break; - case G_TENSOR_OP::MAP3: - printf("map3"); - break; // TODO additional operations to be printed here default: ERROR("Unknown tensor operation."); diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu deleted file mode 100644 index 032443bd7a63a1640e463c0457dd362e09733be3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu +++ /dev/null @@ -1,189 +0,0 @@ -/* device_math.cu defines */ -#include "device_math.h" -#include "error.h" - -#include <thrust/complex.h> - -#define DEF_FUNC_PTR(fname) __device__ void *fname##_ptr = (void *)(fname); - -#define DEF_FUNC_PTR_CAST(fname, castf) \ - __device__ void *fname##_ptr = (void *)(castf); - -#define CASE_FUNC(ename, fname) \ - case MathOp::ename: { \ - void *v_func_ptr = nullptr; \ - checkCudaErrors(cudaMemcpyFromSymbol(&v_func_ptr, _internal::fname##_ptr, \ - sizeof(void *))); \ - return v_func_ptr; \ - } - -namespace _internal { - -// The following functions are not used, but they reference their cuda -// counterpart which is necessary, otherwise ptx compilation breaks -__device__ float hypotf_(float x, float y) { return hypotf(x, y); } - -__device__ float atan2f_(float x, float y) { return atan2f(x, y); } - -__device__ float sqrtf_(float x) { return sqrtf(x); } - -__device__ float fmax_(float x, float y) { return fmax(x, y); } - -__device__ float fmin_(float x, float y) { return fmin(x, y); } - -__device__ float add(float x, float y) { return x + y; } - -__device__ float sub(float x, float y) { return x - y; } - -__device__ float mul(float x, float y) { return x * y; } - -__device__ float div(float x, float y) { return x / y; } - -__device__ float favg3(float x) { return __fdividef(x, 3.0f); } - -__device__ float blend2(float bg, float fg) { return bg * 0.6 + fg * 0.4; } - -__device__ float addWeighted(float blurred, float image) { - return 0.7 * image + 0.3 * blurred; -} - -__device__ float psnr(float x) { return -10 * log10(x); } - -__device__ float2 f2mul(float2 x1, float2 x2) { - return {x1.x * x2.x - x1.y * x2.y, x1.x * x2.y + x1.y * x2.x}; -} - -__device__ half2 h2mul(half2 x1, half2 x2) { - return {x1.x * x2.x - x1.y * x2.y, x1.x * x2.y + x1.y * x2.x}; -} - -__device__ half2 h2hypot(half2 x, half2 y) { - return h2sqrt(__hfma2(x, x, __hmul2(y, y))); -} - -__device__ half2 h2max(half2 x, half2 y) { - return __hfma2(__hgt2(x, y), x, __hmul2(__hle2(x, y), y)); -} - -__device__ half2 h2min(half2 x, half2 y) { - return __hfma2(__hlt2(x, y), x, __hmul2(__hge2(x, y), y)); -} - -__device__ half2 h2avg3(half2 x) { - half2 three = __floats2half2_rn(3.0f, 3.0f); - return __h2div(x, three); -} - -__device__ half2 h2blend2(half2 bg, half2 fg) { - half2 c1 = __floats2half2_rn(0.6f, 0.6f), c2 = __floats2half2_rn(0.4f, 0.4f); - return __hfma2(bg, c1, __hmul2(fg, c2)); -} - -__device__ half2 h2addWeighted(half2 blurred, half2 image) { - half2 c1 = __floats2half2_rn(0.7f, 0.7f), c2 = __floats2half2_rn(0.3f, 0.3f); - return __hfma2(image, c1, __hmul2(blurred, c2)); -} - -DEF_FUNC_PTR(hypotf) -DEF_FUNC_PTR(atan2f) -DEF_FUNC_PTR(add) -DEF_FUNC_PTR(sub) -DEF_FUNC_PTR(mul) -DEF_FUNC_PTR(div) -DEF_FUNC_PTR(sqrtf) -DEF_FUNC_PTR_CAST(fmax, (float (*)(float, float))fmax) -DEF_FUNC_PTR_CAST(fmin, (float (*)(float, float))fmin) -DEF_FUNC_PTR(favg3) -DEF_FUNC_PTR(blend2) -DEF_FUNC_PTR(addWeighted) -DEF_FUNC_PTR(psnr) - -DEF_FUNC_PTR(f2mul) - -DEF_FUNC_PTR(h2mul) - -DEF_FUNC_PTR(h2hypot) -DEF_FUNC_PTR(__hadd2) -DEF_FUNC_PTR(__hsub2) -DEF_FUNC_PTR(__h2div) -DEF_FUNC_PTR(h2sqrt) -DEF_FUNC_PTR(h2max) -DEF_FUNC_PTR(h2min) -DEF_FUNC_PTR(h2avg3) -DEF_FUNC_PTR(h2blend2) -DEF_FUNC_PTR(h2addWeighted) - -} // namespace _internal - -template <> void *mathOpToFunc<float2>(MathOp op) { - switch (op) { - CASE_FUNC(Mul, f2mul) - default: - ERROR("Float2 function not found\n"); - return nullptr; // For some compilers - } -} - -template <> void *mathOpToFunc<half2>(MathOp op) { - switch (op) { - CASE_FUNC(Mul, h2mul) - default: - ERROR("Half2 function not found\n"); - return nullptr; // For some compilers - } -} - -template <> void *mathOpToFunc<float>(MathOp op) { - switch (op) { - CASE_FUNC(Hypot, hypotf) - CASE_FUNC(Atan2, atan2f) - CASE_FUNC(Add, add) - CASE_FUNC(Sub, sub) - CASE_FUNC(Mul, mul) - CASE_FUNC(Div, div) - CASE_FUNC(Sqrt, sqrtf) - CASE_FUNC(Max, fmax) - CASE_FUNC(Min, fmin) - CASE_FUNC(Avg3, favg3) - CASE_FUNC(Blend2, blend2) - CASE_FUNC(AddWeighted, addWeighted) - CASE_FUNC(PSNR, psnr) - default: - ERROR("Float function not found\n"); - } - return nullptr; // For some compilers -} - -template <> void *mathOpToFunc<half>(MathOp op) { - switch (op) { - CASE_FUNC(Hypot, h2hypot) - CASE_FUNC(Add, __hadd2) - CASE_FUNC(Sub, __hsub2) - CASE_FUNC(Div, __h2div) - CASE_FUNC(Sqrt, h2sqrt) - CASE_FUNC(Max, h2max) - CASE_FUNC(Min, h2min) - CASE_FUNC(Avg3, h2avg3) - CASE_FUNC(Blend2, h2blend2) - CASE_FUNC(AddWeighted, h2addWeighted) - default: - ERROR("Half function not found\n"); - } - return nullptr; // For some compilers -} - -template <> half reduceOpToIdentity<half>(MathOp op) { - switch (op) { - case MathOp::Hypot: - return 0.0f; - case MathOp::Add: - return 0.0f; - case MathOp::Max: - return -65504.0f; - case MathOp::Min: - return 65504.0f; - default: - ERROR("Operator does not have id value\n"); - } - return 0.0f; // For some compilers -} diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp index 5e1fbc99197af7797620f80ffbbc5aa41ee63517..b17285b5e05f6a8576c1e3991ad9f4ffa8735d9e 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp @@ -797,36 +797,6 @@ void RuntimeController::readConfigurationFile(const char *str) { NodeConf->pushNewTensorOperation( GPUNodeConfiguration::TENSOR_OP::SOFTMAX); idx++; - } else if (tokens[idx] == "fft") { - DEBUG("Found fft operation\n"); - NodeConf->pushNewTensorOperation( - GPUNodeConfiguration::TENSOR_OP::FFT); - idx++; - } else if (tokens[idx] == "reduce") { - DEBUG("Found reduce operation\n"); - NodeConf->pushNewTensorOperation( - GPUNodeConfiguration::TENSOR_OP::REDUCE); - idx++; - } else if (tokens[idx] == "projectiveT") { - DEBUG("Found projectiveT operation\n"); - NodeConf->pushNewTensorOperation( - GPUNodeConfiguration::TENSOR_OP::PROJECTIVE_T); - idx++; - } else if (tokens[idx] == "map1") { - DEBUG("Found map1 operation\n"); - NodeConf->pushNewTensorOperation( - GPUNodeConfiguration::TENSOR_OP::MAP1); - idx++; - } else if (tokens[idx] == "map2") { - DEBUG("Found map2 operation\n"); - NodeConf->pushNewTensorOperation( - GPUNodeConfiguration::TENSOR_OP::MAP2); - idx++; - } else if (tokens[idx] == "map3") { - DEBUG("Found map3 operation\n"); - NodeConf->pushNewTensorOperation( - GPUNodeConfiguration::TENSOR_OP::MAP3); - idx++; } else /*Not a new operation. This means an approximation option*/ if (tokens[idx] == "fp32") { DEBUG("Found fp32 option\n"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h index 0587311910512c7c35ee69b8df5a440096da1484..9e2e6bc36e488a0d3e61bf0e2e8171bdce064115 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h +++ b/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h @@ -83,15 +83,6 @@ void *__hpvm__tensor_pool_mean(void *, int, int, int, int, int, int); void *__hpvm__tensor_relu(void *); void *__hpvm__tensor_tanh(void *); void *__hpvm__tensor_softmax(void *); -// Tensor ops for image processing -void *__hpvm__tensor_fft(void *); -void *__hpvm__tensor_reduce(void *, int, void *); -void *__hpvm__tensor_projectiveT(void *, void *); -void *__hpvm__tensor_map1(void *, void *); -void *__hpvm__tensor_map2(void *, void *, void *); -void *__hpvm__tensor_map3(void *, void *, void *, void *); -void *__hpvm__tensor_cosineT(void *); -void *__hpvm__tensor_stencil(void *); // New HPVM intrinsic for Setting Node ID void *__hpvm__node_id(int);