From ab5e780c68ce8ae5babb0d7b152542f5a2c8226f Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Sat, 18 Jul 2020 17:57:27 -0500 Subject: [PATCH] Fixing issue llvm/hpvm#60: removing all warnings --- llvm/projects/hpvm-tensor-rt/CMakeLists.txt | 61 +++++----- .../dnn_sources/include/utils.h | 2 +- .../dnn_sources/src/half/lenet_mnist_half.cc | 53 +-------- .../dnn_sources/src/lenet_mnist.cc | 47 -------- .../dnn_sources/src/unit_tests.cc | 11 +- .../tensor_runtime/include/device_math.h | 1 + .../include/functional/common.h | 8 +- .../tensor_runtime/src/approx_simulation.cu | 106 ++++++++---------- .../tensor_runtime/src/device_math.cu | 5 + .../tensor_runtime/src/img_tensor_runtime.cu | 4 +- .../tensor_runtime/src/tensor_utils.cu | 4 +- 11 files changed, 97 insertions(+), 205 deletions(-) diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt index d86c8fa3cb..76730ff314 100644 --- a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -1,49 +1,38 @@ cmake_minimum_required (VERSION 2.6) project (hpvm-tensor-rt) - - - -SET(CUDA_SEPARABLE_COMPILATION ON) find_package(CUDA 6.5 REQUIRED) +set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "") +set(CUDA_PROPAGATE_HOST_FLAGS OFF) +# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions +# Edit: using c++14 now +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -I/") set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -gencode;arch=compute_60,code=sm_60; -gencode;arch=compute_60,code=compute_60; - -std=c++14 --expt-relaxed-constexpr -maxrregcount 32 # These are newly added + -std=c++14 --expt-relaxed-constexpr -maxrregcount 32 # These are for image ops ) - -if (CMAKE_BUILD_TYPE STREQUAL "Debug") +if(CMAKE_BUILD_TYPE STREQUAL "Debug") message("Debug mode") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g;-lineinfo;-Xcompiler;-ggdb;-lcurand) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g;-lineinfo;-Xcompiler;-ggdb) else() - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DNDEBUG;-Xcompiler;-DNDEBUG) endif() -set(CUDA_PROPAGATE_HOST_FLAGS OFF) - -# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions -# Edit: using c++14 now -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -I/ " ) - +# Default options if(USE_GFLAGS) add_definitions(-DUSE_GFLAGS) endif() - if(USE_AUTOTUNER) remove_definitions(-DNO_INJECTION) endif() - - add_definitions(-DNO_INJECTION) add_definitions(-DPROMISE_TUNER_ENABLED) add_definitions(-DSIMULATION_MODE=true) -add_definitions(-DONLINE_PROFILING=false) -add_definitions(-DFP16_tuning=true) - - +# Default include/link directories include_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include) include_directories(./tensor_runtime/include) include_directories(./dnn_sources/include) @@ -51,6 +40,7 @@ include_directories(../gpu_profiler/include) include_directories(../soc_simulator/include) link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64 $ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64) +# Source files of runtime set( RUNTIME_SRCS_FILENAME approx_simulation.cu @@ -79,44 +69,43 @@ foreach(FILE ${RUNTIME_SRCS_FILENAME}) list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}") endforeach() +# Default link libraries find_library(GPU_PROFILER_LIB - NAMES libgpu_profiler.a - HINTS ../gpu_profiler/lib + NAMES libgpu_profiler.a + HINTS ../gpu_profiler/lib ) find_library(SOC_SIMULATOR_LIB - NAMES libpromise_profiler.a - HINTS ../soc_simulator/lib + NAMES libpromise_profiler.a + HINTS ../soc_simulator/lib ) -set(LINK_LIBS cudnn cufft stdc++fs -lcurand) +set(LINK_LIBS cudnn cufft stdc++fs curand) if(USE_GFLAGS) list(APPEND LINK_LIBS gflags) endif() - # Adding new rule for building a cuDNN runtime library # Offline version cuda_add_library(tensor_runtime ${RUNTIME_SRCS}) cuda_add_cublas_to_target(tensor_runtime) target_link_libraries(tensor_runtime ${LINK_LIBS}) +target_compile_definitions(tensor_runtime PRIVATE -DONLINE_PROFILING=false -DFP16_tuning=true) - +# Install version (also offline) cuda_add_library(tensor_runtime_install ${RUNTIME_SRCS}) cuda_add_cublas_to_target(tensor_runtime_install) -target_link_libraries(tensor_runtime_install ${LINK_LIBS}) # tensor_runtime_install is built AFTER tensor_runtime because of a nvcc bug (bug?) # that doesn't allow compiling the same file from multiple targets at once. # Same for tensor_runtime_online. add_dependencies(tensor_runtime_install tensor_runtime) +target_link_libraries(tensor_runtime_install ${LINK_LIBS}) +target_compile_definitions(tensor_runtime_install PRIVATE -DONLINE_PROFILING=false -DFP16_tuning=true) # Online version -remove_definitions(-DONLINE_PROFILING=false) -add_definitions(-DONLINE_PROFILING=true) -remove_definitions(-DFP16_tuning=true) -add_definitions(-DFP16_tuning=false) cuda_add_library(tensor_runtime_online ${RUNTIME_SRCS}) cuda_add_cublas_to_target(tensor_runtime_online) -target_link_libraries(tensor_runtime_online ${LINK_LIBS}) add_dependencies(tensor_runtime_online tensor_runtime) +target_link_libraries(tensor_runtime_online ${LINK_LIBS}) +target_compile_definitions(tensor_runtime_online PRIVATE -DONLINE_PROFILING=true -DFP16_tuning=false) # Adding new rule for building a cuDNN runtime library diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h index ef0d9fe12f..500ff63bc8 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h @@ -35,7 +35,7 @@ void printTensorInfo(void* tensor_ptr){ // FIXIT: Move this to debug.h and include in all files -void dumpWeightsToFile(char* file_name, void* weights_ptr){ +void dumpWeightsToFile(const char* file_name, void* weights_ptr){ struct Tensor* weights = (Tensor*) weights_ptr; // Move data back to host diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc index 123d3a4429..f04ec04164 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/half/lenet_mnist_half.cc @@ -13,18 +13,9 @@ #include "utils.h" -bool Opentuner_run = false; - - /* NOTE: Reference Architecture to use for profiling */ void testLenetTanh(){ - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Lenet-2 Architecture ********** \n"); // FIXIT: Extend this to batch of images - currently 5 images @@ -62,28 +53,6 @@ void testLenetTanh(){ clearTensorMap(); for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters // Start power and performnce profiling @@ -132,23 +101,7 @@ void testLenetTanh(){ computeAccuracy2(labels, test_batch_size, result); dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - + freeOutputTensors(); } @@ -157,10 +110,6 @@ void testLenetTanh(){ int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - llvm_hpvm_initTensorRt(0); testLenetTanh(); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc index 1b73645354..c047ffe090 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_mnist.cc @@ -12,20 +12,11 @@ #include "tensor_runtime.h" #include "utils.h" - -bool Opentuner_run = false; - int total_runs = 1; /* NOTE: Reference Architecture to use for profiling */ void testLenetTanh(){ - - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Lenet-2 Architecture ********** \n"); // FIXIT: Extend this to batch of images - currently 5 images @@ -64,28 +55,6 @@ void testLenetTanh(){ clearTensorMap(); for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters // Start power and performnce profiling @@ -138,22 +107,6 @@ void testLenetTanh(){ //FIXME: remove the comment below to use piped autotuner //dumpAccuracyNorms(); freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } dumpExecutionAccuracies(); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc index 1ccc5bd1b4..0809c3616b 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc @@ -733,7 +733,7 @@ void testPerforationCalls(void* input, void* filter, float interpolation_rate = 1.0; for (int offset = 0; offset < 2; offset++){ - printf("\n\n\n\**Test -- pad_h = %d pad_w = %d stride_h = %d stride_w = %d row = %d col = %d offset= %d \n\n", + printf("\n\n\n**Test -- pad_h = %d pad_w = %d stride_h = %d stride_w = %d row = %d col = %d offset= %d \n\n", pad_h, pad_w, stride_h, stride_w, row, col, offset); @@ -800,7 +800,7 @@ void testPerforationCalls(void* input, void* filter, } - printf ("\n\n\n\ --- End of Test \n\n\n"); + printf ("\n\n\n--- End of Test \n\n\n"); } @@ -1019,8 +1019,9 @@ void testSamplingCalls(void* input, void* filter, float interpolation_rate = 1.0; for (int offset = 0; offset < 2; offset++){ - - printf("\n\n\n\**Test -- pad_h = %d pad_w = %d stride_h = %d stride_w = %d skip_every = %d offset= %d interpolation_rate = %f \n\n", + + + printf("\n\n\n**Test -- pad_h = %d pad_w = %d stride_h = %d stride_w = %d skip_every = %d offset= %d interpolation_rate = %f \n\n", pad_h, pad_w, stride_h, stride_w, skip_every, offset, interpolation_rate); @@ -1078,7 +1079,7 @@ void testSamplingCalls(void* input, void* filter, } - printf ("\n\n\n\ --- End of Test \n\n\n"); + printf ("\n\n\n --- End of Test \n\n\n"); } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h index 0f3cd1970e..947556489d 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h @@ -43,6 +43,7 @@ template <typename T> T reduceOpToIdentity(MathOp op) { default: ERROR("Operator does not have id value\n"); } + return T(); // For some compilers } template <> half reduceOpToIdentity<half>(MathOp op); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/common.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/common.h index 6a20ff83c2..76a26d249c 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/common.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/functional/common.h @@ -26,8 +26,8 @@ template <typename T> class HFProfileGuard { static const char *getEventName(bool end) { if (typeid(T) == typeid(half) || typeid(T) == typeid(half2)) return end ? "F2H_end" : "F2H_start"; - else - ERROR("Type not accepted\n"); + ERROR("Type not accepted\n"); + return ""; // For some compilers } static bool needProfiling() { @@ -55,8 +55,10 @@ template <typename T> int getTensorType() { return (int)float2_type; else if (typeid(T) == typeid(half2)) return (int)half2_type; - else + else { ERROR("Unsupported type!\n"); + return 0; // For some compilers + } } template <typename T> T *convertAndGetGPUData(Tensor *t); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu index 45876d4b35..5051b78089 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu @@ -26,7 +26,7 @@ #include <string> #include <vector> #include <map> - +#include <cassert> @@ -1422,7 +1422,6 @@ void* Autotuner_ConvLayer(void* input, float i_min, float i_max, ERROR("Online Profiling cannot be enabled with PROMISE Simulation \n"); } } - swing = getSwing(swing); @@ -1438,20 +1437,17 @@ void* Autotuner_ConvLayer(void* input, float i_min, float i_max, out_min, out_max, swing); } - if (isGPULayer(swing)){ + assert(isGPULayer(swing)); - return Autotuner_GPU_ConvLayer(input, i_min, i_max, - filter, w_min, w_max, - bias, b_min, b_max, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - pool_id, pool_size, pool_stride, - activation_id, - out_min, out_max, swing); - - } + return Autotuner_GPU_ConvLayer(input, i_min, i_max, + filter, w_min, w_max, + bias, b_min, b_max, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + pool_id, pool_size, pool_stride, + activation_id, + out_min, out_max, swing); - } @@ -1531,58 +1527,50 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max, out_min, out_max, swing); } + assert(isGPULayer(swing)); - if(isGPULayer(swing)){ - - void* gemm_out; - if(!isFullPrecision(swing)){ - gemm_out = tensorHalfGemm(input, weights); - } - else{ - gemm_out = tensorGemmGPU(input, weights); - } + void *gemm_out; + if (!isFullPrecision(swing)) { + gemm_out = tensorHalfGemm(input, weights); + } else { + gemm_out = tensorGemmGPU(input, weights); + } - - void* gemmbias_out; - if(bias != NULL){ - // Swing 8 corresponds to FP32 - if( isFullPrecision(swing) || (!FP16_tuning) ){ - gemmbias_out = tensorAdd(gemm_out, bias); - } - else{ - gemmbias_out = tensorHalfAdd(gemm_out, bias); - } - } - else{ - gemmbias_out = gemm_out; + void *gemmbias_out; + if (bias != NULL) { + // Swing 8 corresponds to FP32 + if (isFullPrecision(swing) || (!FP16_tuning)) { + gemmbias_out = tensorAdd(gemm_out, bias); + } else { + gemmbias_out = tensorHalfAdd(gemm_out, bias); } - - void* activation_out; - switch(activation_id){ + } else { + gemmbias_out = gemm_out; + } - case -1: - activation_out = gemmbias_out; - INFO("No Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(gemmbias_out); - break; - case 1: - activation_out = tensorRelu(gemmbias_out); - break; - case 2: - activation_out = tensorRelu2(gemmbias_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - return activation_out; + void *activation_out; + switch (activation_id) { + case -1: + activation_out = gemmbias_out; + INFO("No Activation Function \n"); + break; + case 0: + activation_out = tensorTanh(gemmbias_out); + break; + case 1: + activation_out = tensorRelu(gemmbias_out); + break; + case 2: + activation_out = tensorRelu2(gemmbias_out, out_min, out_max); + break; + default: + ERROR("Activation id %d NOT supported \n", activation_out); + break; } - + return activation_out; + } #endif diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu index 002c14b3c7..163efd1a2d 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu @@ -119,6 +119,7 @@ template <> void *mathOpToFunc<float2>(MathOp op) { CASE_FUNC(Mul, f2mul) default: ERROR("Float2 function not found\n"); + return nullptr; // For some compilers } } @@ -127,6 +128,7 @@ template <> void *mathOpToFunc<half2>(MathOp op) { CASE_FUNC(Mul, h2mul) default: ERROR("Half2 function not found\n"); + return nullptr; // For some compilers } } @@ -148,6 +150,7 @@ template <> void *mathOpToFunc<float>(MathOp op) { default: ERROR("Float function not found\n"); } + return nullptr; // For some compilers } template <> void *mathOpToFunc<half>(MathOp op) { @@ -165,6 +168,7 @@ template <> void *mathOpToFunc<half>(MathOp op) { default: ERROR("Half function not found\n"); } + return nullptr; // For some compilers } template <> half reduceOpToIdentity<half>(MathOp op) { @@ -180,4 +184,5 @@ template <> half reduceOpToIdentity<half>(MathOp op) { default: ERROR("Operator does not have id value\n"); } + return 0.0f; // For some compilers } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu index c2cc1ef5f7..9814e2c6a3 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_runtime.cu @@ -497,8 +497,10 @@ void *tensorMap2Half(MathOp f2, void *i1, void *i2) { profileEvent("#tensorMap2_end"); return ret; } - else + else { ERROR("Type not recognized\n"); + return nullptr; // For some compilers + } } void *tensorMap3(MathOp f3, void *i1, void *i2, void *i3) { diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu index 74a87faa2d..6bbccfabaf 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_utils.cu @@ -70,8 +70,9 @@ int getTypeSize(int data_type){ case half2_type: return 4; default: - ERROR("Unknown type %s\n", std::to_string(data_type)); + ERROR("Unknown type %d\n", data_type); } + return 0; } static int getFullPrecTypeSize(int data_type){ @@ -90,6 +91,7 @@ static int getFullPrecTypeSize(int data_type){ default: ERROR("Unknown type %d\n", data_type); } + return 0; } static bool isFP16Compound(int data_type) { -- GitLab