From 3f4461c266db39997935fec5d7dc14c91a610c31 Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Tue, 19 Jan 2021 06:50:58 -0600
Subject: [PATCH] Use CUDA-lang support to automate nvcc-gcc version check

---
 hpvm/projects/hpvm-tensor-rt/CMakeLists.txt | 185 +++++++-------------
 1 file changed, 63 insertions(+), 122 deletions(-)

diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
index fc4497ef91..492fdbacb4 100644
--- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
@@ -1,50 +1,8 @@
 cmake_minimum_required(VERSION 3.17)
-project(hpvm-tensor-rt)
-find_package(CUDA 9.1 REQUIRED)
-set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "")
-set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-  # gcc > 8 are not supported
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8)
-    message(FATAL_ERROR "GCC versions later than 8 are not supported")
-  endif()
-elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-  # clang < 3.2 || clang >= 9 unsupported
-  set(clang_v ${CMAKE_CXX_COMPILER_VERSION})
-  if (clang_v VERSION_GREATER_EQUAL 9 OR clang_v VERSION_LESS_EQUAL 3.2)
-    message(FATAL_ERROR "Clang<3.2 or clang>=9 are not supported")
-  endif()
-endif()
-# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions
-# Edit: using c++14 now
+project(hpvm-tensor-rt CUDA CXX)
 set(CMAKE_CXX_STANDARD 14)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -I/")
-set(
-  CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};
-  -gencode;arch=compute_60,code=sm_60;
-  -gencode;arch=compute_60,code=compute_60;
-  -std=c++14 --expt-relaxed-constexpr -maxrregcount 32 # These are for image ops
-)
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-  message("Debug mode")
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g;-lineinfo;-Xcompiler;-ggdb)
-else()
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DNDEBUG;-Xcompiler;-DNDEBUG)
-endif()
 
-# Default options
-if(USE_GFLAGS)
-  add_definitions(-DUSE_GFLAGS)
-endif()
-if(USE_AUTOTUNER)
-  remove_definitions(-DNO_INJECTION)
-endif()
-add_definitions(-DNO_INJECTION)
-add_definitions(-DPROMISE_TUNER_ENABLED)
-add_definitions(-DSIMULATION_MODE=true)
-
-# Config path configuration file
+# -- Config path configuration file
 if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/global_knobs.txt)
   message(FATAL_ERROR "global_knobs.txt not found")
 endif()
@@ -56,76 +14,90 @@ configure_file(
   ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include/config.h
 )
 
-# Default include/link directories
+# -- Default include directories
 set(
   INCLUDES
-  $ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include
-  ${CUDA_INCLUDE_DIRS}
+  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
   ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include
   ./dnn_sources/include
   ../gpu_profiler/include ../soc_simulator/include
 )
-set(
-  LINK_DIRS
-  ${CUDA_TOOLKIT_ROOT_DIR}/lib64 $ENV{CUDNN_PATH}
-  $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64
-)
-include_directories(${INCLUDES})
-link_directories(${LINK_DIRS})
 
-# Source files of runtime
+# -- Link libraries
+# Configure gpu_profiler and soc_simulator, and setup all libs to link to
+# Conditionally add gpu_profiler project if we're building independently
+# (not building the whole hpvm)
+if(NOT LLVM_BUILD_DIR)  # Defined in ../CMakeLists.txt. This means we're compiling in LLVM
+  message(STATUS "Compiling hpvm-tensor-rt independently")
+  message(STATUS "Also compiling gpu_profiler and soc_simulator")
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../gpu_profiler ${CMAKE_CURRENT_BINARY_DIR}/gpu_profiler)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../soc_simulator ${CMAKE_CURRENT_BINARY_DIR}/soc_simulator)
+endif()
+set(LINK_LIBS gpu_profiler promise_profiler stdc++fs cudnn curand cublas)
+if(USE_GFLAGS)
+  list(APPEND LINK_LIBS gflags)
+endif()
+find_package(OpenMP REQUIRED)  # Provides ${OpenMP_CXX_FLAGS}
+
+# -- Definitions
+set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true)
+if(USE_GFLAGS)
+  list(APPEND DEFS -DUSE_GFLAGS)
+endif()
+
+# -- Sources of runtime
 set(
   RUNTIME_SRCS_FILENAME
-  approx_simulation.cu
-  group_conv.cu
-  approx_techniques.cu
+  approx_knobs_utils.cc approx_simulation.cu approx_techniques.cu
   configuration.cpp
-  debug.cc
-  debug.cpp
-  device_math.cu
+  debug.cc debug.cpp device_math.cu
   error.cu
-  tensor_cpu_runtime.cc
-  fp16_gemm.cu
-  global_data.cc
-  half_precision_api.cu
-  hpvm-rt-controller.cpp
+  fp16_gemm.cu freq_utils.cc
+  global_data.cc group_conv.cu
+  half_precision_api.cu hpvm-rt-controller.cpp
+  init_api.cc
   op_overheads.cc
   profiling.cc
-  tensor_runtime.cu
-  tensor_utils.cu
+  tensor_cpu_runtime.cc tensor_runtime.cu tensor_utils.cu
   wrapper_runtime.cu
-  approx_knobs_utils.cc
-  init_api.cc
 )
 foreach(FILE ${RUNTIME_SRCS_FILENAME})
   list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}")
+  # Some files doesn't end in .cu or .cuh, but we know they are still CUDA files
+  set_source_files_properties("tensor_runtime/src/${FILE}" PROPERTIES LANGUAGE CUDA)
 endforeach()
 
-# Compile gpu_profiler and soc_simulator
-# Conditionally add gpu_profiler project if we're building independently
-# (not building the whole hpvm)
-get_filename_component(root_dir ${CMAKE_SOURCE_DIR} REALPATH)
-get_filename_component(our_dir ${CMAKE_CURRENT_SOURCE_DIR} REALPATH)
-if(${root_dir} STREQUAL ${our_dir})
-  message(STATUS "Compiling hpvm-tensor-rt independently")
-  message(STATUS "Also compiling gpu_profiler and soc_simulator")
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../gpu_profiler ${CMAKE_CURRENT_BINARY_DIR}/gpu_profiler)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../soc_simulator ${CMAKE_CURRENT_BINARY_DIR}/soc_simulator)
-endif()
-set(LINK_LIBS gpu_profiler promise_profiler cudnn cufft stdc++fs curand)
-if(USE_GFLAGS)
-  list(APPEND LINK_LIBS gflags)
-endif()
+# -- Adding tensor_runtime targets
+function(add_tensor_runtime target_name)
+  add_library(${target_name} ${RUNTIME_SRCS})
+  set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60)
+  target_compile_options(
+    ${target_name} PRIVATE
+    $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr -maxrregcount 3>
+    $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:DEBUG>>:-lineinfo -Xcompiler -ggdb>
+    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
+  )
+  target_include_directories(${target_name} PUBLIC ${INCLUDES})
+  target_link_libraries(${target_name} PUBLIC ${LINK_LIBS})
+  target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN})
+endfunction(add_tensor_runtime)
 
 # Adding new rule for building a cuDNN runtime library
 # Offline version
-find_package(OpenMP REQUIRED)
-cuda_add_library(tensor_runtime ${RUNTIME_SRCS})
-cuda_add_cublas_to_target(tensor_runtime ${OpenMP_CXX_FLAGS})
-target_compile_options(tensor_runtime PRIVATE ${OpenMP_CXX_FLAGS})
-target_link_libraries(tensor_runtime ${LINK_LIBS} ${OpenMP_CXX_FLAGS})
-target_compile_definitions(tensor_runtime PRIVATE -DONLINE_PROFILING=false -DFP16_tuning=true)
+add_tensor_runtime(tensor_runtime -DONLINE_PROFILING=false -DFP16_tuning=true)
 
+# Online version
+add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=false)
+# tensor_runtime_online is built AFTER tensor_runtime because of a nvcc bug (bug?)
+# that doesn't allow compiling the same file from multiple targets at once.
+# Same for tensor_runtime_online.
+add_dependencies(tensor_runtime_online tensor_runtime)
+
+# Adding rule for the debugging source
+add_executable(unit_tests   dnn_sources/src/unit_tests.cc)
+target_link_libraries(unit_tests  tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+
+# -- Compile tensor_runtime.ll if possible
 if(LLVM_BUILD_DIR)  # Defined in ../CMakeLists.txt. This means we're compiling in LLVM
   get_filename_component(LLVM_CLANG_XX ${LLVM_BUILD_DIR}/bin/clang++ REALPATH)
   # It's important that tensor_runtime.ll goes here if we're compiling with LLVM
@@ -158,31 +130,6 @@ if(LLVM_CLANG_XX)
   )
 endif()
 
-# Install version (also offline)
-cuda_add_library(tensor_runtime_install ${RUNTIME_SRCS})
-cuda_add_cublas_to_target(tensor_runtime_install)
-# tensor_runtime_install is built AFTER tensor_runtime because of a nvcc bug (bug?)
-# that doesn't allow compiling the same file from multiple targets at once.
-# Same for tensor_runtime_online.
-add_dependencies(tensor_runtime_install tensor_runtime)
-target_link_libraries(tensor_runtime_install ${LINK_LIBS})
-target_compile_definitions(tensor_runtime_install PRIVATE -DONLINE_PROFILING=false -DFP16_tuning=true)
-
-# Online version
-cuda_add_library(tensor_runtime_online ${RUNTIME_SRCS})
-cuda_add_cublas_to_target(tensor_runtime_online ${OpenMP_CXX_FLAGS})
-target_compile_options(tensor_runtime_online PRIVATE ${OpenMP_CXX_FLAGS})
-add_dependencies(tensor_runtime_online tensor_runtime)
-target_link_libraries(tensor_runtime_online ${LINK_LIBS} ${OpenMP_CXX_FLAGS})
-target_compile_definitions(tensor_runtime_online PRIVATE -DONLINE_PROFILING=true -DFP16_tuning=false)
-
-
-
-# --------------  Unit Test Source ----------------
-
-add_executable(unit_tests   dnn_sources/src/unit_tests.cc)
-target_link_libraries(unit_tests  tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
 
 #**************** FP32 TensorRT Source Builds *********** 
 
@@ -216,9 +163,6 @@ target_link_libraries(vgg16_imagenet_fp32  tensor_runtime_online  ${GPU_PROFILER
 add_executable(resnet50_imagenet_fp32  dnn_sources/src/fp32/resnet50_imagenet.cc)
 target_link_libraries(resnet50_imagenet_fp32  tensor_runtime_online  ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
 
-
-
-
 #********* FP16 TensorRT Source Builds ****** 
 
 add_executable(lenet_mnist_fp16   dnn_sources/src/fp16/lenet_mnist_half.cc)
@@ -243,12 +187,9 @@ add_executable(mobilenet_cifar10_fp16  dnn_sources/src/fp16/mobilenet_half.cc)
 target_link_libraries(mobilenet_cifar10_fp16  tensor_runtime_online  ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
 
 
-
-
 file(GLOB files "dnn_sources/src/dynamic/*.cpp")
 foreach(file ${files})
   get_filename_component(stem ${file} NAME_WE) 
   add_executable(${stem} ${file})
   target_link_libraries(${stem} tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
 endforeach()
-
-- 
GitLab