-
Hashim Sharif authoredHashim Sharif authored
CMakeLists.txt 8.65 KiB
project(hpvm-tensor-rt CUDA CXX)
set(CMAKE_CXX_STANDARD 14)
if(CMAKE_CURRENT_BINARY_DIR STREQUAL CMAKE_SOURCE_DIR) # This means we're NOT compiling in HPVM
set(INDEP_BUILD True)
message(STATUS "Compiling hpvm-tensor-rt independently")
else()
set(INDEP_BUILD False)
message(STATUS "Compiling hpvm-tensor-rt inside HPVM")
endif()
# -- Configure path configuration file
if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt)
message(FATAL_ERROR "global_knobs.txt not found")
endif()
# GLOBAL_KNOBS_FILE is used in a string in C,
# so whatever you do, please don't have quotation mark (") in your filename.
get_filename_component(GLOBAL_KNOBS_FILE ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt REALPATH)
configure_file(
tensor_runtime/include/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include/config.h
)
# -- Default include directories
set(
INCLUDES
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
${CUDNN_INCLUDE_PATH}
./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include
./dnn_sources/include
../gpu_profiler/include ../soc_simulator/include
)
# -- Link libraries
find_package(OpenMP REQUIRED) # Provides ${OpenMP_CXX_FLAGS}
# Configure gpu_profiler and soc_simulator, and setup all libs to link to
# Conditionally add gpu_profiler project if we're building independently
# (not building the whole hpvm)
if(INDEP_BUILD)
message(STATUS "Also compiling gpu_profiler and soc_simulator")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../gpu_profiler ${CMAKE_CURRENT_BINARY_DIR}/gpu_profiler)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../soc_simulator ${CMAKE_CURRENT_BINARY_DIR}/soc_simulator)
endif()
set(LINK_DIR CUDNN_LIBRARY_PATH)
set(LINK_LIBS gpu_profiler promise_profiler stdc++fs cudnn curand cublas)
if(USE_GFLAGS)
list(APPEND LINK_LIBS gflags)
endif()
# -- Definitions
set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true)
if(USE_GFLAGS)
list(APPEND DEFS -DUSE_GFLAGS)
endif()
# -- Sources of runtime
set(
RUNTIME_SRCS_FILENAME
approx_knobs_utils.cc approx_simulation.cu approx_techniques.cu
configuration.cpp
debug.cpp
error.cu
fp16_gemm.cu freq_utils.cc
global_data.cc group_conv.cu
half_precision_api.cu hpvm-rt-controller.cpp
init_api.cc
op_overheads.cc
profiling.cc
tensor_cpu_runtime.cc tensor_runtime.cu tensor_utils.cu
wrapper_runtime.cu
)
foreach(FILE ${RUNTIME_SRCS_FILENAME})
list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}")
# Some files doesn't end in .cu or .cuh, but we know they are still CUDA files
set_source_files_properties("tensor_runtime/src/${FILE}" PROPERTIES LANGUAGE CUDA)
endforeach()
# -- Adding tensor_runtime targets
function(add_tensor_runtime target_name)
add_library(${target_name} ${RUNTIME_SRCS})
set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60)
target_compile_options(
${target_name} PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr -maxrregcount 32>
$<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:DEBUG>>:-lineinfo -Xcompiler -ggdb>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
)
target_include_directories(${target_name} PUBLIC ${INCLUDES})
target_link_directories(${target_name} PUBLIC ${LINK_DIR})
target_link_libraries(${target_name} PUBLIC ${LINK_LIBS})
target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN})
endfunction(add_tensor_runtime)
# Adding new rule for building a cuDNN runtime library
# Offline version
add_tensor_runtime(tensor_runtime -DONLINE_PROFILING=false -DFP16_tuning=true)
# Online version
add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=false)
# tensor_runtime_online is built AFTER tensor_runtime because of a nvcc bug (bug?)
# that doesn't allow compiling the same file from multiple targets at once.
# Same for tensor_runtime_online.
add_dependencies(tensor_runtime_online tensor_runtime)
# Adding rule for the debugging source
add_executable(unit_tests dnn_sources/src/unit_tests.cc)
target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${OpenMP_CXX_FLAGS})
# -- Compile tensor_runtime.ll if possible
if(INDEP_BUILD)
# Surely if we're compiling outside of hpvm, then we need the system-wide clang -- a clang 9.
execute_process(COMMAND clang-9 --version OUTPUT_VARIABLE clang_stdout ERROR_QUIET)
if(clang_stdout)
set(TENSOR_RT_LL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/lib)
else()
message(WARNING "System clang++ of version 9 not found; skipping tensor_runtime.ll generation")
endif()
set(CLANG_NAME clang-9)
else()
# It's important that tensor_runtime.ll goes here if we're compiling with LLVM
# Some HPVM passes look for tensor_runtime.ll in this folder (which is usually build/lib)
set(TENSOR_RT_LL_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
# Per cmake documentation, if we're building in LLVM, then in add_custom_command
# the command "clang" will be auto resolved to the path to clang we're building
set(CLANG_NAME clang)
add_dependencies(tensor_runtime clang)
endif()
# If some clang-9 is found, create a tensor_runtime.ll from tensor_signatures.cc
if(CLANG_NAME)
message(STATUS "Creating tensor_runtime.ll in ${TENSOR_RT_LL_PREFIX}")
foreach(dir ${INCLUDES})
list(APPEND INCLUDE_COMPILER_STRINGS "-I${dir}")
endforeach()
add_custom_command(
TARGET tensor_runtime POST_BUILD
COMMAND ${CLANG_NAME} -x c++ ${INCLUDE_COMPILER_STRINGS} -S -emit-llvm
${CMAKE_CURRENT_SOURCE_DIR}/tensor_runtime/include/tensor_signatures.cc
-o ${TENSOR_RT_LL_PREFIX}/tensor_runtime.ll
)
endif()
#**************** FP32 TensorRT Source Builds ***********
add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc)
target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc)
target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc)
target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc)
target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc)
target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc)
target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc)
target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc)
target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc)
target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc)
target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
#********* FP16 TensorRT Source Builds ******
add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc)
target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc)
target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc)
target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc)
target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc)
target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc)
target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc)
target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})