From 3f4461c266db39997935fec5d7dc14c91a610c31 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Tue, 19 Jan 2021 06:50:58 -0600 Subject: [PATCH] Use CUDA-lang support to automate nvcc-gcc version check --- hpvm/projects/hpvm-tensor-rt/CMakeLists.txt | 185 +++++++------------- 1 file changed, 63 insertions(+), 122 deletions(-) diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index fc4497ef91..492fdbacb4 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -1,50 +1,8 @@ cmake_minimum_required(VERSION 3.17) -project(hpvm-tensor-rt) -find_package(CUDA 9.1 REQUIRED) -set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "") -set(CUDA_PROPAGATE_HOST_FLAGS OFF) - -if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - # gcc > 8 are not supported - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8) - message(FATAL_ERROR "GCC versions later than 8 are not supported") - endif() -elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - # clang < 3.2 || clang >= 9 unsupported - set(clang_v ${CMAKE_CXX_COMPILER_VERSION}) - if (clang_v VERSION_GREATER_EQUAL 9 OR clang_v VERSION_LESS_EQUAL 3.2) - message(FATAL_ERROR "Clang<3.2 or clang>=9 are not supported") - endif() -endif() -# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions -# Edit: using c++14 now +project(hpvm-tensor-rt CUDA CXX) set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -I/") -set( - CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; - -gencode;arch=compute_60,code=sm_60; - -gencode;arch=compute_60,code=compute_60; - -std=c++14 --expt-relaxed-constexpr -maxrregcount 32 # These are for image ops -) -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message("Debug mode") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g;-lineinfo;-Xcompiler;-ggdb) -else() - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DNDEBUG;-Xcompiler;-DNDEBUG) -endif() -# Default options -if(USE_GFLAGS) - add_definitions(-DUSE_GFLAGS) -endif() -if(USE_AUTOTUNER) - remove_definitions(-DNO_INJECTION) -endif() -add_definitions(-DNO_INJECTION) -add_definitions(-DPROMISE_TUNER_ENABLED) -add_definitions(-DSIMULATION_MODE=true) - -# Config path configuration file +# -- Config path configuration file if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt) message(FATAL_ERROR "global_knobs.txt not found") endif() @@ -56,76 +14,90 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include/config.h ) -# Default include/link directories +# -- Default include directories set( INCLUDES - $ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include - ${CUDA_INCLUDE_DIRS} + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include ./dnn_sources/include ../gpu_profiler/include ../soc_simulator/include ) -set( - LINK_DIRS - ${CUDA_TOOLKIT_ROOT_DIR}/lib64 $ENV{CUDNN_PATH} - $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64 -) -include_directories(${INCLUDES}) -link_directories(${LINK_DIRS}) -# Source files of runtime +# -- Link libraries +# Configure gpu_profiler and soc_simulator, and setup all libs to link to +# Conditionally add gpu_profiler project if we're building independently +# (not building the whole hpvm) +if(NOT LLVM_BUILD_DIR) # Defined in ../CMakeLists.txt. This means we're compiling in LLVM + message(STATUS "Compiling hpvm-tensor-rt independently") + message(STATUS "Also compiling gpu_profiler and soc_simulator") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../gpu_profiler ${CMAKE_CURRENT_BINARY_DIR}/gpu_profiler) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../soc_simulator ${CMAKE_CURRENT_BINARY_DIR}/soc_simulator) +endif() +set(LINK_LIBS gpu_profiler promise_profiler stdc++fs cudnn curand cublas) +if(USE_GFLAGS) + list(APPEND LINK_LIBS gflags) +endif() +find_package(OpenMP REQUIRED) # Provides ${OpenMP_CXX_FLAGS} + +# -- Definitions +set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true) +if(USE_GFLAGS) + list(APPEND DEFS -DUSE_GFLAGS) +endif() + +# -- Sources of runtime set( RUNTIME_SRCS_FILENAME - approx_simulation.cu - group_conv.cu - approx_techniques.cu + approx_knobs_utils.cc approx_simulation.cu approx_techniques.cu configuration.cpp - debug.cc - debug.cpp - device_math.cu + debug.cc debug.cpp device_math.cu error.cu - tensor_cpu_runtime.cc - fp16_gemm.cu - global_data.cc - half_precision_api.cu - hpvm-rt-controller.cpp + fp16_gemm.cu freq_utils.cc + global_data.cc group_conv.cu + half_precision_api.cu hpvm-rt-controller.cpp + init_api.cc op_overheads.cc profiling.cc - tensor_runtime.cu - tensor_utils.cu + tensor_cpu_runtime.cc tensor_runtime.cu tensor_utils.cu wrapper_runtime.cu - approx_knobs_utils.cc - init_api.cc ) foreach(FILE ${RUNTIME_SRCS_FILENAME}) list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}") + # Some files doesn't end in .cu or .cuh, but we know they are still CUDA files + set_source_files_properties("tensor_runtime/src/${FILE}" PROPERTIES LANGUAGE CUDA) endforeach() -# Compile gpu_profiler and soc_simulator -# Conditionally add gpu_profiler project if we're building independently -# (not building the whole hpvm) -get_filename_component(root_dir ${CMAKE_SOURCE_DIR} REALPATH) -get_filename_component(our_dir ${CMAKE_CURRENT_SOURCE_DIR} REALPATH) -if(${root_dir} STREQUAL ${our_dir}) - message(STATUS "Compiling hpvm-tensor-rt independently") - message(STATUS "Also compiling gpu_profiler and soc_simulator") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../gpu_profiler ${CMAKE_CURRENT_BINARY_DIR}/gpu_profiler) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../soc_simulator ${CMAKE_CURRENT_BINARY_DIR}/soc_simulator) -endif() -set(LINK_LIBS gpu_profiler promise_profiler cudnn cufft stdc++fs curand) -if(USE_GFLAGS) - list(APPEND LINK_LIBS gflags) -endif() +# -- Adding tensor_runtime targets +function(add_tensor_runtime target_name) + add_library(${target_name} ${RUNTIME_SRCS}) + set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60) + target_compile_options( + ${target_name} PRIVATE + $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr -maxrregcount 3> + $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:DEBUG>>:-lineinfo -Xcompiler -ggdb> + $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}> + ) + target_include_directories(${target_name} PUBLIC ${INCLUDES}) + target_link_libraries(${target_name} PUBLIC ${LINK_LIBS}) + target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN}) +endfunction(add_tensor_runtime) # Adding new rule for building a cuDNN runtime library # Offline version -find_package(OpenMP REQUIRED) -cuda_add_library(tensor_runtime ${RUNTIME_SRCS}) -cuda_add_cublas_to_target(tensor_runtime ${OpenMP_CXX_FLAGS}) -target_compile_options(tensor_runtime PRIVATE ${OpenMP_CXX_FLAGS}) -target_link_libraries(tensor_runtime ${LINK_LIBS} ${OpenMP_CXX_FLAGS}) -target_compile_definitions(tensor_runtime PRIVATE -DONLINE_PROFILING=false -DFP16_tuning=true) +add_tensor_runtime(tensor_runtime -DONLINE_PROFILING=false -DFP16_tuning=true) +# Online version +add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=false) +# tensor_runtime_online is built AFTER tensor_runtime because of a nvcc bug (bug?) +# that doesn't allow compiling the same file from multiple targets at once. +# Same for tensor_runtime_online. +add_dependencies(tensor_runtime_online tensor_runtime) + +# Adding rule for the debugging source +add_executable(unit_tests dnn_sources/src/unit_tests.cc) +target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) + +# -- Compile tensor_runtime.ll if possible if(LLVM_BUILD_DIR) # Defined in ../CMakeLists.txt. This means we're compiling in LLVM get_filename_component(LLVM_CLANG_XX ${LLVM_BUILD_DIR}/bin/clang++ REALPATH) # It's important that tensor_runtime.ll goes here if we're compiling with LLVM @@ -158,31 +130,6 @@ if(LLVM_CLANG_XX) ) endif() -# Install version (also offline) -cuda_add_library(tensor_runtime_install ${RUNTIME_SRCS}) -cuda_add_cublas_to_target(tensor_runtime_install) -# tensor_runtime_install is built AFTER tensor_runtime because of a nvcc bug (bug?) -# that doesn't allow compiling the same file from multiple targets at once. -# Same for tensor_runtime_online. -add_dependencies(tensor_runtime_install tensor_runtime) -target_link_libraries(tensor_runtime_install ${LINK_LIBS}) -target_compile_definitions(tensor_runtime_install PRIVATE -DONLINE_PROFILING=false -DFP16_tuning=true) - -# Online version -cuda_add_library(tensor_runtime_online ${RUNTIME_SRCS}) -cuda_add_cublas_to_target(tensor_runtime_online ${OpenMP_CXX_FLAGS}) -target_compile_options(tensor_runtime_online PRIVATE ${OpenMP_CXX_FLAGS}) -add_dependencies(tensor_runtime_online tensor_runtime) -target_link_libraries(tensor_runtime_online ${LINK_LIBS} ${OpenMP_CXX_FLAGS}) -target_compile_definitions(tensor_runtime_online PRIVATE -DONLINE_PROFILING=true -DFP16_tuning=false) - - - -# -------------- Unit Test Source ---------------- - -add_executable(unit_tests dnn_sources/src/unit_tests.cc) -target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - #**************** FP32 TensorRT Source Builds *********** @@ -216,9 +163,6 @@ target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc) target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - - #********* FP16 TensorRT Source Builds ****** add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc) @@ -243,12 +187,9 @@ add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc) target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - file(GLOB files "dnn_sources/src/dynamic/*.cpp") foreach(file ${files}) get_filename_component(stem ${file} NAME_WE) add_executable(${stem} ${file}) target_link_libraries(${stem} tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) endforeach() - -- GitLab