diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index 81490cce664e56dbdc3fec1072fa883c8b60936d..dd774f8ffa8fc04d39c3c28a8a40d8bed28c6c2f 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.17) project(hpvm-tensor-rt) -find_package(CUDA 6.5 REQUIRED) +find_package(CUDA 9.1 REQUIRED) set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "") set(CUDA_PROPAGATE_HOST_FLAGS OFF) diff --git a/hpvm/projects/hpvm-tensor-rt/README.md b/hpvm/projects/hpvm-tensor-rt/README.md index 530acc21099e52c2896d16398e5ca2bf4eaa88bf..e492c4c838df969f00baa85e6e3adcafdfc0a7f9 100644 --- a/hpvm/projects/hpvm-tensor-rt/README.md +++ b/hpvm/projects/hpvm-tensor-rt/README.md @@ -1,69 +1,63 @@ -# AppproxHPVM Tensor Runtime +# ApproxHPVM Tensor Runtime -## Dependencies +## Getting Started -* CUDNN-7.0 or above -* CUDA-9.1 or above -* CUBLAS-9.1 or above - included with cuda-toolkit +### Dependencies -## Dependent Library Builds +- CUDA-9.1 or above + - Your device must have a CUDA-enabled nVidia GPU + - CUBLAS-9.1 or above - included with CUDA by default -```shell -cd ../gpu_profiler -mkdir lib -cmake ../ -make +- cuDNN-7.0 or above -cd ../soc_simulator -mkdir lib -cmake ../ -make -``` +- `cmake >= 3.17` + +- `make >= 4` + +- `gcc < 8` or `3.2 <= clang < 9` + - We have an upperbound for compiler version because CUDA doesn't support too recent compilers +### Building the Tensor Runtime -## BUILD +The following commands will compile the tensor runtime library (`build/libtensor_runtime.a`) +as well as a number of exemplary benchmarks (DNN models): ```shell -source bin/setup_cuda_paths.sh -mkdir build -cd build +mkdir build && cd build cmake ../ -make +make -j ``` +### Tensor Runtime APIs -## Directory Structure +- `tensor_runtime/include/tensor_runtime.h` declares all the functions available in the runtime. -* ./tensor_runtime: - * ./tensor_runtime/include/: Include files for Tensor Runtime - * ./tensor_runtime/include/tensor_signatures.cc: Include file with Tensor RT signatures - * NOTE: UPDATE this with updated API - * ./tensor_runtime/src/: HPVM TensorRT sources - -* ./dnn_sources: - * ./dnn_sources/src/${BENCH}.cc: Per Bench FULL-precision source - * ./dnn_sources/src/half/${BENCH}.cc: Per Bench HALF-precision source - * ./dnn_sources/src/promise/${BENCH}.cc: Per Bench Layer-API source - -* ./lib: - * ./lib/tensor_runtime.ll - * NOTE: generated from ./tensor_runtime/include/tensor_signatures.cc - * ./lib/libtensor_runtime.a - * NOTE: Linked against HPVM benchmarks - * ./lib/libtensor_autotuner.a - * NOTE: error-injection library linked with benchmarks - -* ./bin: - * ./bin/install_runtime.sh: Script for moving Tensor RT files to ./lib - * ./bin/run_autotuner.py: Python script for running Autotuner experiments - * ./bin/setup_tyler_paths.sh: Tyler-specific path setup for Tensor RT - * ./bin/setup_jetson.sh: Jetson board specific path setup for Tensor RT - * ./bin/setup_cuda_paths.sh: Place-holder script for setting CUDA paths - * ./bin/swing_selection.py: Script for hardware mapping - * NOTE: Includes the L2,L1 norm mapping to hardware knobs - -* ./opentuner: - * ./opentuner/autotuner/: Autotuner scripts - * ./opentuner/autotuner/approxhpvm_tuner.py: Tuner script for ApproxHPVM binaries - * ./opentuner/autotuner/promise_tuner.py: Tuner script for tuning PROMISE voltage levels + TODO: the tensor runtime is generally under-documented at the time. + More documentation will be added in the first public release. + +- For examples of using `tensor_runtime` functions, see `dnn_sources/src/alexnet_cifar10.cc`. + - Also, try running `build/alexnet_cifar10` which is compiled from that file and runnable out of the box. + +## Developer Notes + +### Directory Structure + +- ./tensor_runtime: + - ./tensor_runtime/include/: Include files for Tensor Runtime + - ./tensor_runtime/include/tensor_signatures.cc: Include file with Tensor RT signatures + - NOTE: UPDATE this with updated API + - ./tensor_runtime/src/: HPVM TensorRT sources +- ./dnn_sources: + - ./dnn_sources/src/${BENCH}.cc: Per Bench FULL-precision source + - ./dnn_sources/src/half/${BENCH}.cc: Per Bench HALF-precision source + - ./dnn_sources/src/promise/${BENCH}.cc: Per Bench Layer-API source + +- ./bin: + - ./bin/install_runtime.sh: Script for moving Tensor RT files to ./lib + - ./bin/run_autotuner.py: Python script for running Autotuner experiments + - ./bin/setup_tyler_paths.sh: Tyler-specific path setup for Tensor RT + - ./bin/setup_jetson.sh: Jetson board specific path setup for Tensor RT + - ./bin/setup_cuda_paths.sh: Place-holder script for setting CUDA paths + - ./bin/swing_selection.py: Script for hardware mapping + - NOTE: Includes the L2,L1 norm mapping to hardware knobs