diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index 5652cdf71513ac4d93c3b0ec634f5013153321d8..e53d6c4d3cb5caf47970b96d94667bbebbad4284 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -27,7 +27,7 @@ set( # Have to add these because of tensor_runtime.ll (see below) ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDNN_INCLUDE_PATH} ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include - ./dnn_sources/include + ./dnn_sources/include # TODO: remove this. ) # Build gpu_profiler and soc_simulator (dependencies) @@ -53,6 +53,29 @@ if(USE_GFLAGS) list(APPEND LINK_LIBS gflags) endif() +# -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS) +# tensor_runtime links to, because CMake doesn't help us do this. +# This is needed by both approxhpvm.py and the RPATH setting step (below). +# First, take a guess at the paths to the libraries that are used here. +# (CMake, why do you make this so difficult?) +foreach(interface_lib ${LINK_LIBS}) + get_target_property(actual_libs ${interface_lib} INTERFACE_LINK_LIBRARIES) + foreach(actual_lib ${actual_libs}) + # ${actual_lib} may not be a path, then taking the directory of it should return "". + get_filename_component(libdir ${actual_lib} DIRECTORY) + get_filename_component(libname ${actual_lib} NAME) + if(NOT ${libdir} STREQUAL "") + list(APPEND TRT_LINK_DIRS ${libdir}) + endif() + if(NOT ${libname} STREQUAL "" AND NOT ${libname} STREQUAL "actual_libs-NOTFOUND") + list(APPEND TRT_LINK_LIBS ${libname}) + endif() + endforeach() +endforeach() +# Dedup, just for shorter compiler arguments. +list(REMOVE_DUPLICATES TRT_LINK_DIRS) +list(REMOVE_DUPLICATES TRT_LINK_LIBS) + # -- Definitions set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true) if(USE_GFLAGS) @@ -93,6 +116,18 @@ function(add_tensor_runtime target_name) target_include_directories(${target_name} PUBLIC ${INCLUDES}) target_link_libraries(${target_name} PUBLIC ${LINK_LIBS}) target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN}) + + # We have to manually set rpath because cmake is not willing to comply... + foreach(libdir ${TRT_LINK_DIRS}) + target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}") + endforeach() + # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target + # so that approxhpvm.py can read them. (we'll create our own properties.) + set_target_properties( + ${target_name} PROPERTIES + TRT_LINK_DIRS "${TRT_LINK_DIRS}" + TRT_LINK_LIBS "${TRT_LINK_LIBS}" + ) endfunction(add_tensor_runtime) # Adding new rule for building a cuDNN runtime library @@ -108,7 +143,7 @@ add_dependencies(tensor_runtime_online tensor_runtime) # Adding rule for the debugging source add_executable(unit_tests dnn_sources/src/unit_tests.cc) -target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${OpenMP_CXX_FLAGS}) +target_link_libraries(unit_tests tensor_runtime_online) # -- Compile tensor_runtime.ll if possible if(INDEP_BUILD) @@ -147,54 +182,54 @@ endif() #**************** FP32 TensorRT Source Builds *********** add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc) -target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(lenet_mnist_fp32 tensor_runtime_online) add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc) -target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online) add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc) -target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online) add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc) -target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online) add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc) -target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online) add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc) -target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online) add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc) -target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online) add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc) -target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online) add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc) -target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online) add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc) -target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online) #********* FP16 TensorRT Source Builds ****** add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc) -target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(lenet_mnist_fp16 tensor_runtime_online) add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc) -target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online) add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc) -target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online) add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc) -target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online) add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc) -target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online) add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc) -target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online) add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc) -target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online) diff --git a/hpvm/tools/py-approxhpvm/CMakeLists.txt b/hpvm/tools/py-approxhpvm/CMakeLists.txt index d751823a08662aa927b4f5af6f2c6350a816a6a1..40951b2466f872bdcd4851f68b0a56e8dec1214d 100644 --- a/hpvm/tools/py-approxhpvm/CMakeLists.txt +++ b/hpvm/tools/py-approxhpvm/CMakeLists.txt @@ -1,15 +1,22 @@ # This file is very tightly coupled with main.py.in. # Watch out and keep them in sync. +# main.py.in (to become approxhpvm.py) requires the following variables: +# LLVM_PROJECT_DIR, LLVM_BUILD_DIR +# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS +# AVAILABLE_PASSES, HPVM_RT_PATH set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR}) set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR}) -set(LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) + +set(TRT_PATH "$<TARGET_FILE:tensor_runtime>") +get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS) +get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS) + # The hpvm-rt runtime # This has to be explicitly set as hpvm-rt.bc is created in a custom_target # and does not export its file location. # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) -set(TENSOR_RUNTIME_LIB "$<TARGET_FILE:tensor_runtime>") set( AVAILABLE_PASSES LLVMBuildDFG @@ -21,8 +28,7 @@ set( LLVMClearDFG LLVMGenHPVM ) -# CUDA_TOOLKIT_ROOT_DIR and CUDNN_LIBRARY_DIR has been defined globally -set(CUDNN_DIR ${CUDNN_LIBRARY_DIR}) + # First resolve all `@symbol@` by configuring the file configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) # Then resolve all generator expressions we configured into the previous file @@ -32,17 +38,7 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_B # so we can set these dependencies on it set( DEPS - tensor_runtime - LLVMBuildDFG - LLVMInPlaceDFGAnalysis - LLVMDFG2LLVM_CPU - LLVMDFG2LLVM_CUDNN - LLVMDFG2LLVM_WrapperAPI - LLVMFuseHPVMTensorNodes - LLVMClearDFG - LLVMGenHPVM - hpvm-rt.bc - clang opt llvm-link + tensor_runtime hpvm-rt.bc clang opt llvm-link ${AVAILABLE_PASSES} ) add_custom_command( OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py diff --git a/hpvm/tools/py-approxhpvm/main.py.in b/hpvm/tools/py-approxhpvm/main.py.in index af706a1eaa7a53879e525d87dd5034caf814db38..804710b94222dc90930ce7f16149b4135148b2b2 100644 --- a/hpvm/tools/py-approxhpvm/main.py.in +++ b/hpvm/tools/py-approxhpvm/main.py.in @@ -7,25 +7,17 @@ from typing import List, Union, Optional PathLike = Union[Path, str] HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm" -LLVM_BUILD_DIR = Path("@LLVM_BUILD_DIR@") / "bin" -CUDA_TOOLKIT_ROOT_DIR = Path("@CUDA_TOOLKIT_ROOT_DIR@") -TENSOR_RUNTIME_LIB = Path("@TENSOR_RUNTIME_LIB@") +LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin" + +# Directories to include +TRT_PATH = Path("@TRT_PATH@") +TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";") +TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")] +TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";") + AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") HPVM_RT_PATH = "@HPVM_RT_PATH@" -CUDNN_DIR = "@CUDNN_DIR@" -# Directories to include -INCLUDE_DIRS = [ - HPVM_PROJECT_DIR / "include", # HPVM include dir - # Tensor runtime include dir - HPVM_PROJECT_DIR / "projects/hpvm-tensor-rt/tensor_runtime/include", - HPVM_PROJECT_DIR / "test/dnn_benchmarks/hpvm-c/include", # hpvm-c intrinsics decl dir - CUDA_TOOLKIT_ROOT_DIR / "include", # CUDA include dir -] -LINK_DIRS = [CUDA_TOOLKIT_ROOT_DIR / "lib64", CUDNN_DIR, TENSOR_RUNTIME_LIB.parent] -LINK_LIBS = [ - "pthread", "cudart", "curand", "cudnn", "cublas", "cufft", "OpenCL", "stdc++fs", "omp", "m" -] COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"] @@ -72,10 +64,10 @@ def hpvm_c_to_ll( flags: List[str] = None, ) -> List[str]: extra_includes = extra_includes or [] - includes = [f"-I{path}" for path in INCLUDE_DIRS + extra_includes] + includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes] flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS] return [ - str(LLVM_BUILD_DIR / "clang++"), *includes, *flags, "-emit-llvm", "-S", + str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S", str(src_file), "-o", str(target_file) ] @@ -115,16 +107,16 @@ def opt_codegen_tensor( def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]: - return [str(LLVM_BUILD_DIR / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] + return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]: linker_dir_flags = [] - for path in LINK_DIRS: + for path in TRT_LINK_DIRS: linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) - linker_lib_flags = [f"-l{lib}" for lib in LINK_LIBS] + linker_lib_flags = [f"-l{lib}" for lib in TRT_LINK_LIBS] return [ - str(LLVM_BUILD_DIR / "clang++"), str(src_file), str(TENSOR_RUNTIME_LIB), "-o", str(target_file), + str(LLVM_BUILD_BIN / "clang++"), str(src_file), str(TRT_PATH), "-o", str(target_file), *linker_dir_flags, *linker_lib_flags ] @@ -141,7 +133,7 @@ def _run_opt( load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] pass_flags_strs = [f"-{flag}" for flag in pass_flags] return [ - str(LLVM_BUILD_DIR / "opt"), *load_passes_strs, *pass_flags_strs, + str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs, "-S", str(src_file), "-o", str(target_file) ]