CMakeLists.txt

project(hpvm-tensor-rt CUDA CXX)
set(CMAKE_CXX_STANDARD 14)

if(CMAKE_CURRENT_BINARY_DIR STREQUAL CMAKE_SOURCE_DIR)  # This means we're NOT compiling in HPVM
  set(INDEP_BUILD True)
  message(STATUS "Compiling hpvm-tensor-rt independently")
else()
  set(INDEP_BUILD False)
  message(STATUS "Compiling hpvm-tensor-rt inside HPVM")
endif()

# -- Configure path configuration file
if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt)
  message(FATAL_ERROR "global_knobs.txt not found")
endif()
# GLOBAL_KNOBS_FILE is used in a string in C,
# so whatever you do, please don't have quotation mark (") in your filename.
get_filename_component(GLOBAL_KNOBS_FILE ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt REALPATH)
configure_file(
  tensor_runtime/include/config.h.in
  ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include/config.h
)

# -- Default include directories
set(
  INCLUDES
  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
  ${CUDNN_INCLUDE_PATH}
  ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include
  ./dnn_sources/include
  ../gpu_profiler/include ../soc_simulator/include
)

# -- Link libraries
find_package(OpenMP REQUIRED)  # Provides ${OpenMP_CXX_FLAGS}
# Configure gpu_profiler and soc_simulator, and setup all libs to link to
# Conditionally add gpu_profiler project if we're building independently
# (not building the whole hpvm)
if(INDEP_BUILD)
  message(STATUS "Also compiling gpu_profiler and soc_simulator")
  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../gpu_profiler ${CMAKE_CURRENT_BINARY_DIR}/gpu_profiler)
  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../soc_simulator ${CMAKE_CURRENT_BINARY_DIR}/soc_simulator)
endif()
set(LINK_DIR CUDNN_LIBRARY_PATH)
set(LINK_LIBS gpu_profiler promise_profiler stdc++fs cudnn curand cublas)
if(USE_GFLAGS)
  list(APPEND LINK_LIBS gflags)
endif()

# -- Definitions
set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true)
if(USE_GFLAGS)
  list(APPEND DEFS -DUSE_GFLAGS)
endif()

# -- Sources of runtime
set(
  RUNTIME_SRCS_FILENAME
  approx_knobs_utils.cc approx_simulation.cu approx_techniques.cu
  configuration.cpp
  debug.cpp
  error.cu
  fp16_gemm.cu freq_utils.cc
  global_data.cc group_conv.cu
  half_precision_api.cu hpvm-rt-controller.cpp
  init_api.cc
  op_overheads.cc
  profiling.cc
  tensor_cpu_runtime.cc tensor_runtime.cu tensor_utils.cu
  wrapper_runtime.cu
)
foreach(FILE ${RUNTIME_SRCS_FILENAME})
  list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}")
  # Some files doesn't end in .cu or .cuh, but we know they are still CUDA files
  set_source_files_properties("tensor_runtime/src/${FILE}" PROPERTIES LANGUAGE CUDA)
endforeach()

# -- Adding tensor_runtime targets
function(add_tensor_runtime target_name)
  add_library(${target_name} ${RUNTIME_SRCS})
  set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60)
  target_compile_options(
    ${target_name} PRIVATE
    $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr -maxrregcount 32>
    $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:DEBUG>>:-lineinfo -Xcompiler -ggdb>
    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
  )
  target_include_directories(${target_name} PUBLIC ${INCLUDES})
  target_link_directories(${target_name} PUBLIC ${LINK_DIR})
  target_link_libraries(${target_name} PUBLIC ${LINK_LIBS})
  target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN})
endfunction(add_tensor_runtime)

# Adding new rule for building a cuDNN runtime library
# Offline version
add_tensor_runtime(tensor_runtime -DONLINE_PROFILING=false -DFP16_tuning=true)

# Online version
add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=false)
# tensor_runtime_online is built AFTER tensor_runtime because of a nvcc bug (bug?)
# that doesn't allow compiling the same file from multiple targets at once.
# Same for tensor_runtime_online.
add_dependencies(tensor_runtime_online tensor_runtime)

# Adding rule for the debugging source
add_executable(unit_tests dnn_sources/src/unit_tests.cc)
target_link_libraries(unit_tests  tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}  ${OpenMP_CXX_FLAGS})

# -- Compile tensor_runtime.ll if possible
if(INDEP_BUILD)
  # Surely if we're compiling outside of hpvm, then we need the system-wide clang -- a clang 9.
  execute_process(COMMAND clang-9 --version OUTPUT_VARIABLE clang_stdout ERROR_QUIET)
  if(clang_stdout)
    set(TENSOR_RT_LL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/lib)
  else()
    message(WARNING "System clang++ of version 9 not found; skipping tensor_runtime.ll generation")
  endif()
  set(CLANG_NAME clang-9)
else()
  # It's important that tensor_runtime.ll goes here if we're compiling with LLVM
  # Some HPVM passes look for tensor_runtime.ll in this folder (which is usually build/lib)
  set(TENSOR_RT_LL_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
  # Per cmake documentation, if we're building in LLVM, then in add_custom_command
  # the command "clang" will be auto resolved to the path to clang we're building
  set(CLANG_NAME clang)
  add_dependencies(tensor_runtime clang)
endif()
# If some clang-9 is found, create a tensor_runtime.ll from tensor_signatures.cc
if(CLANG_NAME)
  message(STATUS "Creating tensor_runtime.ll in ${TENSOR_RT_LL_PREFIX}")
  foreach(dir ${INCLUDES})
    list(APPEND INCLUDE_COMPILER_STRINGS "-I${dir}")
  endforeach()
  add_custom_command(
    TARGET tensor_runtime POST_BUILD
    COMMAND ${CLANG_NAME} -x c++ ${INCLUDE_COMPILER_STRINGS} -S -emit-llvm
    ${CMAKE_CURRENT_SOURCE_DIR}/tensor_runtime/include/tensor_signatures.cc
    -o ${TENSOR_RT_LL_PREFIX}/tensor_runtime.ll
  )
endif()


#**************** FP32 TensorRT Source Builds *********** 

add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc)
target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc)
target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc)
target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc)
target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc)
target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc)
target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc)
target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc)
target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc)
target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc)
target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

#********* FP16 TensorRT Source Builds ****** 

add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc)
target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc)
target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc)
target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc)
target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc)
target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc)
target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})

add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc)
target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})