Skip to content
Snippets Groups Projects
CMakeLists.txt 9.41 KiB
project(hpvm-tensor-rt CUDA CXX)
set(CMAKE_CXX_STANDARD 14)

if(CMAKE_CURRENT_BINARY_DIR STREQUAL CMAKE_SOURCE_DIR)  # This means we're NOT compiling in HPVM
  set(INDEP_BUILD True)
  message(STATUS "Compiling hpvm-tensor-rt independently")
else()
  set(INDEP_BUILD False)
  message(STATUS "Compiling hpvm-tensor-rt inside HPVM")
endif()

# -- Configure path configuration file
if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt)
  message(FATAL_ERROR "global_knobs.txt not found")
endif()
# GLOBAL_KNOBS_FILE is used in a string in C,
# so whatever you do, please don't have quotation mark (") in your filename.
get_filename_component(GLOBAL_KNOBS_FILE ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt REALPATH)
configure_file(
  tensor_runtime/include/config.h.in
  ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include/config.h
)

# -- Default include directories
set(
  INCLUDES
  # Have to add these because of tensor_runtime.ll (see below)
  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDNN_INCLUDE_PATH}
  ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include
  ./dnn_sources/include  # TODO: remove this.
)

# Build gpu_profiler and soc_simulator (dependencies)
add_library(gpu_profiler SHARED gpu_profiler/profiler.cpp)
target_include_directories(gpu_profiler PUBLIC gpu_profiler/)
target_link_libraries(gpu_profiler pthread)

add_library(soc_simulator SHARED soc_simulator/promise_timing_model.cpp)
target_include_directories(soc_simulator PUBLIC soc_simulator/)

# -- Link libraries
find_package(OpenMP REQUIRED)  # Provides ${OpenMP_CXX_FLAGS} and OpenMP::OpenMP_CXX
# This will use the CUDA found by CUDA language support in the root CMake,
# but it exports the CUDA::* targets (used below) so we can freely add libraries to link to.
find_package(CUDAToolkit REQUIRED)
set(
  LINK_LIBS
  gpu_profiler soc_simulator
  CUDA::cublas CUDA::curand CUDNN::cudnn
  OpenMP::OpenMP_CXX
)
if(USE_GFLAGS)
  list(APPEND LINK_LIBS gflags)
endif()

# -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS)
# tensor_runtime links to, because CMake doesn't help us do this.
# This is needed by both approxhpvm.py and the RPATH setting step (below).
# First, take a guess at the paths to the libraries that are used here.
# (CMake, why do you make this so difficult?)
foreach(interface_lib ${LINK_LIBS})
  get_target_property(actual_libs ${interface_lib} INTERFACE_LINK_LIBRARIES)
  foreach(actual_lib ${actual_libs})
    # ${actual_lib} may not be a path, then taking the directory of it should return "".
    get_filename_component(libdir ${actual_lib} DIRECTORY)
    get_filename_component(libname ${actual_lib} NAME)
    if(NOT ${libdir} STREQUAL "")
      list(APPEND TRT_LINK_DIRS ${libdir})
    endif()
    if(NOT ${libname} STREQUAL "" AND NOT ${libname} STREQUAL "actual_libs-NOTFOUND")
      list(APPEND TRT_LINK_LIBS ${libname})
    endif()
  endforeach()
endforeach()
# Dedup, just for shorter compiler arguments.
list(REMOVE_DUPLICATES TRT_LINK_DIRS)
list(REMOVE_DUPLICATES TRT_LINK_LIBS)

# -- Definitions
set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true)
if(USE_GFLAGS)
  list(APPEND DEFS -DUSE_GFLAGS)
endif()

# -- Sources of runtime
set(
  RUNTIME_SRCS_FILENAME
  approx_knobs_utils.cc approx_simulation.cu approx_techniques.cu
  configuration.cpp
  debug.cpp
  error.cu
  fp16_gemm.cu freq_utils.cc
  global_data.cc group_conv.cu
  half_precision_api.cu hpvm-rt-controller.cpp
  init_api.cc
  op_overheads.cc
  profiling.cc
  tensor_cpu_runtime.cc tensor_runtime.cu tensor_utils.cu
  wrapper_runtime.cu
)
foreach(FILE ${RUNTIME_SRCS_FILENAME})
  list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}")
  # Some files doesn't end in .cu or .cuh, but we know they are still CUDA files
  set_source_files_properties("tensor_runtime/src/${FILE}" PROPERTIES LANGUAGE CUDA)
endforeach()

# -- Adding tensor_runtime targets
function(add_tensor_runtime target_name)
  add_library(${target_name} SHARED ${RUNTIME_SRCS})
  set_target_properties(${target_name} PROPERTIES CUDA_ARCHITECTURES 60)
  target_compile_options(
    ${target_name} PRIVATE
    --expt-relaxed-constexpr -maxrregcount 32 -Xcompiler=${OpenMP_CXX_FLAGS}
    $<$<CONFIG:DEBUG>:-lineinfo -Xcompiler=-ggdb>
  )
  target_include_directories(${target_name} PUBLIC ${INCLUDES})
  target_link_libraries(${target_name} PUBLIC ${LINK_LIBS})
  target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN})

  # We have to manually set rpath because cmake is not willing to comply...
  foreach(libdir ${TRT_LINK_DIRS})
    target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}")
  endforeach()
  # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target
  # so that approxhpvm.py can read them. (we'll create our own properties.)
  set_target_properties(
    ${target_name} PROPERTIES
    TRT_LINK_DIRS "${TRT_LINK_DIRS}"
    TRT_LINK_LIBS "${TRT_LINK_LIBS}"
  )
endfunction(add_tensor_runtime)

# Adding new rule for building a cuDNN runtime library
# Offline version
add_tensor_runtime(tensor_runtime -DONLINE_PROFILING=false -DFP16_tuning=true)

# Online version
add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=false)
# tensor_runtime_online is built AFTER tensor_runtime because of a nvcc bug (bug?)
# that doesn't allow compiling the same file from multiple targets at once.
# Same for tensor_runtime_online.
add_dependencies(tensor_runtime_online tensor_runtime)

# Adding rule for the debugging source
add_executable(unit_tests dnn_sources/src/unit_tests.cc)
target_link_libraries(unit_tests tensor_runtime_online)

# -- Compile tensor_runtime.ll if possible
if(INDEP_BUILD)
  # Surely if we're compiling outside of hpvm, then we need the system-wide clang -- a clang 9.
  execute_process(COMMAND clang-9 --version OUTPUT_VARIABLE clang_stdout ERROR_QUIET)
  if(clang_stdout)
    set(TENSOR_RT_LL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/lib)
  else()
    message(WARNING "System clang++ of version 9 not found; skipping tensor_runtime.ll generation")
  endif()
  set(CLANG_NAME clang-9)
else()
  # It's important that tensor_runtime.ll goes here if we're compiling with LLVM
  # Some HPVM passes look for tensor_runtime.ll in this folder (which is usually build/lib)
  set(TENSOR_RT_LL_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
  # Per cmake documentation, if we're building in LLVM, then in add_custom_command
  # the command "clang" will be auto resolved to the path to clang we're building
  set(CLANG_NAME clang)
  add_dependencies(tensor_runtime clang)
endif()
# If some clang-9 is found, create a tensor_runtime.ll from tensor_signatures.cc
if(CLANG_NAME)
  message(STATUS "Creating tensor_runtime.ll in ${TENSOR_RT_LL_PREFIX}")
  foreach(dir ${INCLUDES})
    list(APPEND INCLUDE_COMPILER_STRINGS "-I${dir}")
  endforeach()
  add_custom_command(
    TARGET tensor_runtime POST_BUILD
    COMMAND ${CLANG_NAME} -x c++ ${INCLUDE_COMPILER_STRINGS} -S -emit-llvm
    ${CMAKE_CURRENT_SOURCE_DIR}/tensor_runtime/include/tensor_signatures.cc
    -o ${TENSOR_RT_LL_PREFIX}/tensor_runtime.ll
  )
endif()


#**************** FP32 TensorRT Source Builds *********** 

add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc)
target_link_libraries(lenet_mnist_fp32 tensor_runtime_online)

add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc)
target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online)

add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc)
target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online)

add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc)
target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online)

add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc)
target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online)

add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc)
target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online)

add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc)
target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online)

add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc)
target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online)

add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc)
target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online)
add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc)
target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online)

#********* FP16 TensorRT Source Builds ****** 

add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc)
target_link_libraries(lenet_mnist_fp16 tensor_runtime_online)

add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc)
target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online)

add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc)
target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online)

add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc)
target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online)

add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc)
target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online)

add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc)
target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online)

add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc)
target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online)