Skip to content
Snippets Groups Projects
Commit ce6714fd authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Set RPATH for tensor_runtime and reduce hardcoding for approxhpvm.py

parent ce61e4d1
No related branches found
No related tags found
No related merge requests found
......@@ -27,7 +27,7 @@ set(
# Have to add these because of tensor_runtime.ll (see below)
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDNN_INCLUDE_PATH}
./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include
./dnn_sources/include
./dnn_sources/include # TODO: remove this.
)
# Build gpu_profiler and soc_simulator (dependencies)
......@@ -53,6 +53,29 @@ if(USE_GFLAGS)
list(APPEND LINK_LIBS gflags)
endif()
# -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS)
# tensor_runtime links to, because CMake doesn't help us do this.
# This is needed by both approxhpvm.py and the RPATH setting step (below).
# First, take a guess at the paths to the libraries that are used here.
# (CMake, why do you make this so difficult?)
foreach(interface_lib ${LINK_LIBS})
get_target_property(actual_libs ${interface_lib} INTERFACE_LINK_LIBRARIES)
foreach(actual_lib ${actual_libs})
# ${actual_lib} may not be a path, then taking the directory of it should return "".
get_filename_component(libdir ${actual_lib} DIRECTORY)
get_filename_component(libname ${actual_lib} NAME)
if(NOT ${libdir} STREQUAL "")
list(APPEND TRT_LINK_DIRS ${libdir})
endif()
if(NOT ${libname} STREQUAL "" AND NOT ${libname} STREQUAL "actual_libs-NOTFOUND")
list(APPEND TRT_LINK_LIBS ${libname})
endif()
endforeach()
endforeach()
# Dedup, just for shorter compiler arguments.
list(REMOVE_DUPLICATES TRT_LINK_DIRS)
list(REMOVE_DUPLICATES TRT_LINK_LIBS)
# -- Definitions
set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true)
if(USE_GFLAGS)
......@@ -93,6 +116,18 @@ function(add_tensor_runtime target_name)
target_include_directories(${target_name} PUBLIC ${INCLUDES})
target_link_libraries(${target_name} PUBLIC ${LINK_LIBS})
target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN})
# We have to manually set rpath because cmake is not willing to comply...
foreach(libdir ${TRT_LINK_DIRS})
target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}")
endforeach()
# Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target
# so that approxhpvm.py can read them. (we'll create our own properties.)
set_target_properties(
${target_name} PROPERTIES
TRT_LINK_DIRS "${TRT_LINK_DIRS}"
TRT_LINK_LIBS "${TRT_LINK_LIBS}"
)
endfunction(add_tensor_runtime)
# Adding new rule for building a cuDNN runtime library
......@@ -108,7 +143,7 @@ add_dependencies(tensor_runtime_online tensor_runtime)
# Adding rule for the debugging source
add_executable(unit_tests dnn_sources/src/unit_tests.cc)
target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${OpenMP_CXX_FLAGS})
target_link_libraries(unit_tests tensor_runtime_online)
# -- Compile tensor_runtime.ll if possible
if(INDEP_BUILD)
......@@ -147,54 +182,54 @@ endif()
#**************** FP32 TensorRT Source Builds ***********
add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc)
target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(lenet_mnist_fp32 tensor_runtime_online)
add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc)
target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online)
add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc)
target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online)
add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc)
target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online)
add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc)
target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online)
add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc)
target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online)
add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc)
target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online)
add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc)
target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online)
add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc)
target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online)
add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc)
target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online)
#********* FP16 TensorRT Source Builds ******
add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc)
target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(lenet_mnist_fp16 tensor_runtime_online)
add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc)
target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online)
add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc)
target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online)
add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc)
target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online)
add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc)
target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online)
add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc)
target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online)
add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc)
target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online)
# This file is very tightly coupled with main.py.in.
# Watch out and keep them in sync.
# main.py.in (to become approxhpvm.py) requires the following variables:
# LLVM_PROJECT_DIR, LLVM_BUILD_DIR
# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS
# AVAILABLE_PASSES, HPVM_RT_PATH
set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR})
set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR})
set(LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
set(TRT_PATH "$<TARGET_FILE:tensor_runtime>")
get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS)
get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS)
# The hpvm-rt runtime
# This has to be explicitly set as hpvm-rt.bc is created in a custom_target
# and does not export its file location.
# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
set(TENSOR_RUNTIME_LIB "$<TARGET_FILE:tensor_runtime>")
set(
AVAILABLE_PASSES
LLVMBuildDFG
......@@ -21,8 +28,7 @@ set(
LLVMClearDFG
LLVMGenHPVM
)
# CUDA_TOOLKIT_ROOT_DIR and CUDNN_LIBRARY_DIR has been defined globally
set(CUDNN_DIR ${CUDNN_LIBRARY_DIR})
# First resolve all `@symbol@` by configuring the file
configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf)
# Then resolve all generator expressions we configured into the previous file
......@@ -32,17 +38,7 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_B
# so we can set these dependencies on it
set(
DEPS
tensor_runtime
LLVMBuildDFG
LLVMInPlaceDFGAnalysis
LLVMDFG2LLVM_CPU
LLVMDFG2LLVM_CUDNN
LLVMDFG2LLVM_WrapperAPI
LLVMFuseHPVMTensorNodes
LLVMClearDFG
LLVMGenHPVM
hpvm-rt.bc
clang opt llvm-link
tensor_runtime hpvm-rt.bc clang opt llvm-link ${AVAILABLE_PASSES}
)
add_custom_command(
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py
......
......@@ -7,25 +7,17 @@ from typing import List, Union, Optional
PathLike = Union[Path, str]
HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm"
LLVM_BUILD_DIR = Path("@LLVM_BUILD_DIR@") / "bin"
CUDA_TOOLKIT_ROOT_DIR = Path("@CUDA_TOOLKIT_ROOT_DIR@")
TENSOR_RUNTIME_LIB = Path("@TENSOR_RUNTIME_LIB@")
LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin"
# Directories to include
TRT_PATH = Path("@TRT_PATH@")
TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";")
TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")]
TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";")
AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";")
HPVM_RT_PATH = "@HPVM_RT_PATH@"
CUDNN_DIR = "@CUDNN_DIR@"
# Directories to include
INCLUDE_DIRS = [
HPVM_PROJECT_DIR / "include", # HPVM include dir
# Tensor runtime include dir
HPVM_PROJECT_DIR / "projects/hpvm-tensor-rt/tensor_runtime/include",
HPVM_PROJECT_DIR / "test/dnn_benchmarks/hpvm-c/include", # hpvm-c intrinsics decl dir
CUDA_TOOLKIT_ROOT_DIR / "include", # CUDA include dir
]
LINK_DIRS = [CUDA_TOOLKIT_ROOT_DIR / "lib64", CUDNN_DIR, TENSOR_RUNTIME_LIB.parent]
LINK_LIBS = [
"pthread", "cudart", "curand", "cudnn", "cublas", "cufft", "OpenCL", "stdc++fs", "omp", "m"
]
COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"]
......@@ -72,10 +64,10 @@ def hpvm_c_to_ll(
flags: List[str] = None,
) -> List[str]:
extra_includes = extra_includes or []
includes = [f"-I{path}" for path in INCLUDE_DIRS + extra_includes]
includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes]
flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS]
return [
str(LLVM_BUILD_DIR / "clang++"), *includes, *flags, "-emit-llvm", "-S",
str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S",
str(src_file), "-o", str(target_file)
]
......@@ -115,16 +107,16 @@ def opt_codegen_tensor(
def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]:
return [str(LLVM_BUILD_DIR / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)]
return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)]
def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]:
linker_dir_flags = []
for path in LINK_DIRS:
for path in TRT_LINK_DIRS:
linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"])
linker_lib_flags = [f"-l{lib}" for lib in LINK_LIBS]
linker_lib_flags = [f"-l{lib}" for lib in TRT_LINK_LIBS]
return [
str(LLVM_BUILD_DIR / "clang++"), str(src_file), str(TENSOR_RUNTIME_LIB), "-o", str(target_file),
str(LLVM_BUILD_BIN / "clang++"), str(src_file), str(TRT_PATH), "-o", str(target_file),
*linker_dir_flags, *linker_lib_flags
]
......@@ -141,7 +133,7 @@ def _run_opt(
load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]]
pass_flags_strs = [f"-{flag}" for flag in pass_flags]
return [
str(LLVM_BUILD_DIR / "opt"), *load_passes_strs, *pass_flags_strs,
str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs,
"-S", str(src_file), "-o", str(target_file)
]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment