diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1a5fbf0af68a3f9fd37ba297a58b85291179377b..bd5edbd1a467666f67c66be132b3a9d9bbd2d540 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,7 +12,7 @@ cache: paths: - hpvm/build/ - hpvm/llvm/ - - hpvm/test/dnn_benchmarks/model_params/ + when: always build: stage: build @@ -24,15 +24,5 @@ build: - ./install.sh -j32 -t "X86" DCMAKE_BUILD_TYPE=Release - cd .. only: - changes: - - hpvm/scripts/hpvm_installer.py - -tests: - stage: test - tags: - - hpvm - script: - - pwd - - source activate hpvm && cd hpvm - - ./install.sh -j32 -t "X86" DCMAKE_BUILD_TYPE=Release - - cd build && make -j32 check-hpvm-pass + - hpvm-release-exp + - merge_requests diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt index 809a30cfa52e16f436dac4e22843f4c5a3add3d9..fcfaf264a64d52bfe13e0023fe92ad12b7cf2016 100644 --- a/hpvm/CMakeLists.txt +++ b/hpvm/CMakeLists.txt @@ -7,11 +7,10 @@ message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}") # find_package will use the auxillary cmake/Find*.cmake we provide list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -find_package(CUDNN 7 EXACT REQUIRED) # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_PATH +find_package(CUDNN 7 EXACT REQUIRED) # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_DIR and CUDNN::cudnn +find_package(OpenCL REQUIRED) # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} include_directories(./include/) -# find_package will use the auxillary cmake/Find*.cmake we provide -list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) # Generate TENSOR_RT_PREFIX into config.h set(TENSOR_RT_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) diff --git a/hpvm/cmake/FindCUDNN.cmake b/hpvm/cmake/FindCUDNN.cmake index e5a427f0317a6f3b8f7e7b2cc89fd176fd4362dc..bb4918f704a5fe210f0e25e893e1b33335189add 100644 --- a/hpvm/cmake/FindCUDNN.cmake +++ b/hpvm/cmake/FindCUDNN.cmake @@ -10,8 +10,9 @@ # The following are set after configuration is done: # CUDNN_FOUND # CUDNN_INCLUDE_PATH -# CUDNN_LIBRARY_PATH +# CUDNN_LIBRARY_DIR # +# It also provides the IMPORTed target CUDNN::cudnn. include(FindPackageHandleStandardArgs) @@ -45,11 +46,8 @@ endif() find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME} PATHS ${CUDNN_LIBRARY} PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64) -# Get director from filename ${CUDNN_LIBRARY_PATH} -get_filename_component( - CUDNN_LIBRARY_PATH - "${CUDNN_LIBRARY_PATH}/.." ABSOLUTE -) +# Get directory from filename ${CUDNN_LIBRARY_PATH} +get_filename_component(CUDNN_LIBRARY_DIR "${CUDNN_LIBRARY_PATH}/.." ABSOLUTE) # This version check is from OpenCV repo: https://github.com/opencv/opencv/blob/master/cmake/FindCUDNN.cmake # extract version from the include @@ -80,4 +78,8 @@ find_package_handle_standard_args( VERSION_VAR CUDNN_VERSION ) +add_library(CUDNN::cudnn IMPORTED INTERFACE) +target_include_directories(CUDNN::cudnn SYSTEM INTERFACE "${CUDNN_INCLUDE_PATH}") +target_link_libraries(CUDNN::cudnn INTERFACE "${CUDNN_LIBRARY_PATH}") + mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE_DIR CUDNN_LIBRARY) diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h b/hpvm/include/hpvm.h similarity index 100% rename from hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h rename to hpvm/include/hpvm.h diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt index 6efd8d3d0a9d86236adc87657fb68b782f3daaa0..ad78c35828cf9be9f66f23301fbe4d207222d4d1 100644 --- a/hpvm/projects/hpvm-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-rt/CMakeLists.txt @@ -3,8 +3,6 @@ add_definitions(-DNUM_CORES=8) SET(CMAKE_C_COMPILER ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang) SET(CMAKE_CXX_COMPILER ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++) SET(CMAKE_CXX_STANDARD 11) -# Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} if found -find_package(OpenCL REQUIRED) # This puts libhpvm-rt.a in lib/ which we don't care about # we want ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index a142d524b69cb605b85c496aa140c806ad258dfd..f6fed2ac296f93bc060fe09b3b889b42ee8c4a1a 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -24,10 +24,8 @@ configure_file( # -- Default include directories set( INCLUDES - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ${CUDNN_INCLUDE_PATH} ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include - ./dnn_sources/include + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDNN_INCLUDE_PATH} ) # Build gpu_profiler and soc_simulator (dependencies) @@ -39,13 +37,43 @@ add_library(soc_simulator SHARED soc_simulator/promise_timing_model.cpp) target_include_directories(soc_simulator PUBLIC soc_simulator/) # -- Link libraries -find_package(OpenMP REQUIRED) # Provides ${OpenMP_CXX_FLAGS} -set(LINK_DIR CUDNN_LIBRARY_PATH) -set(LINK_LIBS gpu_profiler soc_simulator stdc++fs cudnn curand cublas) +find_package(OpenMP REQUIRED) # Provides ${OpenMP_CXX_FLAGS} and OpenMP::OpenMP_CXX +# This will use the CUDA found by CUDA language support in the root CMake, +# but it exports the CUDA::* targets (used below) so we can freely add libraries to link to. +find_package(CUDAToolkit REQUIRED) +set( + LINK_LIBS + gpu_profiler soc_simulator + CUDA::cublas CUDA::curand CUDNN::cudnn + OpenMP::OpenMP_CXX +) if(USE_GFLAGS) list(APPEND LINK_LIBS gflags) endif() +# -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS) +# tensor_runtime links to, because CMake doesn't help us do this. +# This is needed by both approxhpvm.py and the RPATH setting step (below). +# First, take a guess at the paths to the libraries that are used here. +# (CMake, why do you make this so difficult?) +foreach(interface_lib ${LINK_LIBS}) + get_target_property(actual_libs ${interface_lib} INTERFACE_LINK_LIBRARIES) + foreach(actual_lib ${actual_libs}) + # ${actual_lib} may not be a path, then taking the directory of it should return "". + get_filename_component(libdir ${actual_lib} DIRECTORY) + get_filename_component(libname ${actual_lib} NAME) + if(NOT ${libdir} STREQUAL "") + list(APPEND TRT_LINK_DIRS ${libdir}) + endif() + if(NOT ${libname} STREQUAL "" AND NOT ${libname} STREQUAL "actual_libs-NOTFOUND") + list(APPEND TRT_LINK_LIBS ${libname}) + endif() + endforeach() +endforeach() +# Dedup, just for shorter compiler arguments. +list(REMOVE_DUPLICATES TRT_LINK_DIRS) +list(REMOVE_DUPLICATES TRT_LINK_LIBS) + # -- Definitions set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true) if(USE_GFLAGS) @@ -77,16 +105,27 @@ endforeach() # -- Adding tensor_runtime targets function(add_tensor_runtime target_name) add_library(${target_name} SHARED ${RUNTIME_SRCS}) - set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60) + set_target_properties(${target_name} PROPERTIES CUDA_ARCHITECTURES 60) target_compile_options( ${target_name} PRIVATE --expt-relaxed-constexpr -maxrregcount 32 -Xcompiler=${OpenMP_CXX_FLAGS} $<$<CONFIG:DEBUG>:-lineinfo -Xcompiler=-ggdb> ) target_include_directories(${target_name} PUBLIC ${INCLUDES}) - target_link_directories(${target_name} PUBLIC ${LINK_DIR}) - target_link_libraries(${target_name} PUBLIC ${LINK_LIBS} ${OpenMP_CXX_FLAGS}) + target_link_libraries(${target_name} PUBLIC ${LINK_LIBS}) target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN}) + + # We have to manually set rpath because cmake is not willing to comply... + foreach(libdir ${TRT_LINK_DIRS}) + target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}") + endforeach() + # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target + # so that approxhpvm.py can read them. (we'll create our own properties.) + set_target_properties( + ${target_name} PROPERTIES + TRT_LINK_DIRS "${TRT_LINK_DIRS}" + TRT_LINK_LIBS "${TRT_LINK_LIBS}" + ) endfunction(add_tensor_runtime) # Adding new rule for building a cuDNN runtime library @@ -101,8 +140,8 @@ add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=f add_dependencies(tensor_runtime_online tensor_runtime) # Adding rule for the debugging source -add_executable(unit_tests dnn_sources/src/unit_tests.cc) -target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${OpenMP_CXX_FLAGS}) +add_executable(unit_tests tests/unit_tests.cc) +target_link_libraries(unit_tests tensor_runtime_online) # -- Compile tensor_runtime.ll if possible if(INDEP_BUILD) @@ -126,6 +165,8 @@ endif() # If some clang-9 is found, create a tensor_runtime.ll from tensor_signatures.cc if(CLANG_NAME) message(STATUS "Creating tensor_runtime.ll in ${TENSOR_RT_LL_PREFIX}") + # Manually add cuda includes because add_custom_command doesn't handle them + # (unlike add_library which has CUDA-lang support). foreach(dir ${INCLUDES}) list(APPEND INCLUDE_COMPILER_STRINGS "-I${dir}") endforeach() @@ -136,59 +177,3 @@ if(CLANG_NAME) -o ${TENSOR_RT_LL_PREFIX}/tensor_runtime.ll ) endif() - - -#**************** FP32 TensorRT Source Builds *********** - -add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc) -target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc) -target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc) -target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc) -target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc) -target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc) -target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc) -target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc) -target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc) -target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc) -target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -#********* FP16 TensorRT Source Builds ****** - -add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc) -target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc) -target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc) -target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc) -target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc) -target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc) -target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc) -target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h similarity index 50% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h rename to hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h index 61fd362afcc665e21a7ba8636c8df778ac95184e..d4961d19b9326daa4571d066dfe2b3177f6a78d4 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h @@ -3,23 +3,35 @@ #ifndef UTILS_HEADER #define UTILS_HEADER -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> #include <sstream> #include <vector> #include <bits/stdc++.h> #include <tensor_runtime.h> #include <tensor.h> #include <cmath> -#include <string.h> + std::vector<float> run_accuracies; std::string model_params_path = "../../test/dnn_benchmarks/model_params/"; + +void printTensorInfo(void *tensor_ptr) { + + struct Tensor *tensor = (struct Tensor *)tensor_ptr; + + if (tensor->gpu_data != NULL) { + printf("Successful cudaMalloc \n"); + } + + printf("tensor dims = %d \n", tensor->dims.num_dims); + printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]); + printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]); + printf("num_elems = %lu \n", tensor->num_elems); +} + // FIXIT: Move this to debug.h and include in all files -void dumpWeightsToFile(const char *file_name, void *weights_ptr) { +void dumpWeightsToFile(char *file_name, void *weights_ptr) { + struct Tensor *weights = (Tensor *)weights_ptr; // Move data back to host hpvm_request_tensor(weights, 0); @@ -31,6 +43,10 @@ void dumpWeightsToFile(const char *file_name, void *weights_ptr) { abort(); } + // printf("size_in_bytes = %lu \n", weights->size_in_bytes); + size_t bytes_written = + fwrite(weights->host_data, 1, weights->size_in_bytes, fp); + // printf("bytes_written = %lu \n", bytes_written); fclose(fp); } @@ -58,21 +74,13 @@ void fillWithOnesAndTwos(void *tensor_ptr) { // initialization is specific to the floating point type if (tensor->data_type == CUDNN_DATA_FLOAT) { float *data_arr = (float *)tensor->host_data; - - for (unsigned int i = 0; i < tensor->num_elems; i++) { - if (i % 2 == 0) - data_arr[i] = 1.0; - else - data_arr[i] = 2.0; - } - - /*for(unsigned int i = 0; i < tensor->num_elems/2; i++){ + for (unsigned int i = 0; i < tensor->num_elems / 2; i++) { data_arr[i] = 1.0; } - for(unsigned int i = tensor->num_elems/2; i < tensor->num_elems; i++){ + for (unsigned int i = tensor->num_elems / 2; i < tensor->num_elems; i++) { data_arr[i] = 2.0; - }*/ + } } } @@ -106,6 +114,18 @@ void fillTensorWithNegOnes(void *tensor_ptr) { } } +void fillTensorVals(void *tensor_ptr) { + + struct Tensor *tensor = (struct Tensor *)tensor_ptr; + // initialization is specific to the floating point type + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + data_arr[i] = i + 1; + } + } +} + void printTensorValues(void *tensor_ptr) { struct Tensor *tensor = (struct Tensor *)tensor_ptr; @@ -113,14 +133,11 @@ void printTensorValues(void *tensor_ptr) { hpvm_request_tensor(tensor, 0); // printing is specific to the floating point type - if (tensor->data_type != CUDNN_DATA_FLOAT) { - // printf("\n WARNING: The tensor is non-float type tensor \n\n"); - } - - float *data_arr = (float *)tensor->host_data; - - for (unsigned int i = 0; i < tensor->num_elems; i++) { - printf("%f,", data_arr[i]); + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + printf("%f,", data_arr[i]); + } } printf("\n"); @@ -131,11 +148,49 @@ void printTensorDims(void *tensor_ptr) { struct Tensor *tensor = (struct Tensor *)tensor_ptr; printf("Num_elems = %lu \n", tensor->num_elems); - for (int i = 0; i < tensor->dims.num_dims; i++) { + for (unsigned int i = 0; i < tensor->dims.num_dims; i++) { printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]); } } +void compareTensors(void *tensor1_ptr, void *tensor2_ptr) { + + struct Tensor *tensor1 = (struct Tensor *)tensor1_ptr; + struct Tensor *tensor2 = (struct Tensor *)tensor2_ptr; + + hpvm_request_tensor(tensor1, 0); + hpvm_request_tensor(tensor2, 0); + + float *tensor_data1 = (float *)tensor1->host_data; + float *tensor_data2 = (float *)tensor2->host_data; + + for (unsigned int i = 0; i < tensor1->num_elems; i++) { + if (tensor_data1[i] != tensor_data2[i]) { + printf("Tensor data mismatch at index %d \n", i); + abort(); + } + } +} + +void compareValues(void *tensor_ptr, float *data, size_t num_elems) { + + struct Tensor *tensor = (struct Tensor *)tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + float *tensor_data = (float *)tensor->host_data; + for (unsigned int i = 0; i < num_elems; i++) { + if (tensor_data[i] != data[i]) { + printf("Tensor data mismatch"); + abort(); + } + } +} + + + + + struct Tensor *readTrainedWeights(const char *file_name, int data_type, long int dim1_size, long int dim2_size, long int dim3_size, long int dim4_size) { @@ -146,7 +201,7 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type, long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; float *tensor_data = (float *)malloc(sizeof(float) * num_elems); - // printf("size_in_bytes = %lu \n", size_in_bytes); + printf("size_in_bytes = %lu \n", size_in_bytes); int file_header_size = 0; @@ -157,7 +212,11 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type, } fseek(file, file_header_size, SEEK_CUR); // Skipping the file header - fread(tensor_data, 1, size_in_bytes, file); + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + + // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, + // bytes_read); + fclose(file); struct Tensor *weights = (struct Tensor *)create4DTensor( @@ -170,9 +229,9 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type, return weights; } -struct Tensor *readInputBatch(const char *file_name, int data_type, - long int start, long int end, long int dim2_size, - long int dim3_size, long int dim4_size) { +struct Tensor *readInputBatch(const char *file_name, long data_type, + long start, long end, + long dim2_size, long dim3_size, long dim4_size) { long int dim1_size = end - start; // FIXIT: Don't assume floating point types @@ -191,9 +250,12 @@ struct Tensor *readInputBatch(const char *file_name, int data_type, } fseek(file, file_header_size, SEEK_SET); // Skipping the file header - fread(tensor_data, 1, size_in_bytes, file); + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + fclose(file); + // printf ("FIXED input BATCH read \n"); + struct Tensor *weights = (struct Tensor *)create4DTensor( data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size); @@ -203,10 +265,7 @@ struct Tensor *readInputBatch(const char *file_name, int data_type, return weights; } -uint8_t *readLabelsBatch(const char *labels_file, int start, int end) { - - int num_labels = end - start; - int file_header_size = sizeof(uint8_t) * start; +uint8_t *readLabels(const char *labels_file, int num_labels) { uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels); FILE *file = fopen(labels_file, "rb"); @@ -215,14 +274,30 @@ uint8_t *readLabelsBatch(const char *labels_file, int start, int end) { abort(); } - fseek(file, file_header_size, SEEK_SET); // Skipping the file header - fread(labels, 1, sizeof(uint8_t) * num_labels, file); + size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file); + + fclose(file); + + return labels; +} + +uint32_t *readLabels3(const char *labels_file, int num_labels) { + + uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels); + FILE *file = fopen(labels_file, "rb"); + if (file == NULL) { + printf("Data file %s is not found. Aborting...\n", labels_file); + abort(); + } + + size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); + fclose(file); - // printf("--labels bytes_read = %lu \n", bytes_read); return labels; } + uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) { int num_labels = end - start; @@ -236,29 +311,31 @@ uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) { } fseek(file, file_header_size, SEEK_SET); // Skipping the file header - fread(labels, 1, sizeof(uint32_t) * num_labels, file); + + size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); + fclose(file); return labels; } -// NOTE: batch_size and num_classes are Unused arguments -float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr, - size_t num_classes = 10) { + + +float computeAccuracy3(uint32_t *labels, void *result_ptr) { struct Tensor *result = (struct Tensor *)result_ptr; size_t batch_dim = result->dims.dim_sizes[0]; - num_classes = result->dims.dim_sizes[1]; + size_t num_classes = result->dims.dim_sizes[1]; float *data = (float *)result->host_data; int num_errors = 0; - printf("batch_dim = %lu, channels = %lu \n", batch_dim, num_classes); + printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes); for (unsigned int i = 0; i < batch_dim; i++) { int chosen = 0; - for (size_t id = 1; id < num_classes; ++id) { + for (unsigned int id = 1; id < num_classes; ++id) { if (data[i * num_classes + chosen] < data[i * num_classes + id]) chosen = id; } @@ -285,27 +362,49 @@ float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr, return accuracy; } -float computeAccuracy3(uint32_t *labels, void *result_ptr) { +struct ClassProb { + float prob; + int index; +}; + +bool descendFloatComp(ClassProb obj1, ClassProb obj2) { + return obj1.prob > obj2.prob; +} + +float computeTop5Accuracy(uint8_t *labels, int num_labels, void *result_ptr, + unsigned num_classes = 10) { struct Tensor *result = (struct Tensor *)result_ptr; size_t batch_dim = result->dims.dim_sizes[0]; - size_t num_classes = result->dims.dim_sizes[1]; + size_t channels = result->dims.dim_sizes[1]; float *data = (float *)result->host_data; int num_errors = 0; - printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes); + printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels); - for (size_t i = 0; i < batch_dim; i++) { + for (unsigned int i = 0; i < num_labels; i++) { - uint32_t chosen = 0; - for (size_t id = 1; id < num_classes; ++id) { - if (data[i * num_classes + chosen] < data[i * num_classes + id]) - chosen = id; + std::vector<ClassProb> elem_probs; + for (unsigned int id = 0; id < num_classes; ++id) { + ClassProb cProb; + cProb.prob = data[i * channels + id]; + cProb.index = id; + elem_probs.push_back(cProb); } - if (chosen != labels[i]) - num_errors++; + std: + sort(elem_probs.begin(), elem_probs.end(), descendFloatComp); + // Check if any of top-5 predictions matches + bool matched = false; + for (int j = 0; j < 5; j++) { + ClassProb cProb = elem_probs[j]; + if (cProb.index == labels[i]) + matched = true; + } + + if (!matched) + num_errors += 1; } float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; @@ -344,11 +443,38 @@ void dumpFinalAccuracy(float accuracy) { run_accuracies.push_back(accuracy); } +void dumpAvgPSNR(float avg_psnr) { + + FILE *fp = fopen("avg_psnr", "w+"); + if (fp != NULL) { + std::ostringstream ss; + ss << std::fixed << avg_psnr; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + } + + fclose(fp); +} + +void dumpPSNRStd(float psnr_std) { + + FILE *fp = fopen("psnr_std.txt", "w+"); + if (fp != NULL) { + std::ostringstream ss; + ss << std::fixed << psnr_std; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + } + + fclose(fp); +} + + void dumpExecutionAccuracies() { FILE *fp = fopen("run_accuracies.txt", "w+"); if (fp != NULL) { - for (size_t i = 0; i < run_accuracies.size(); i++) { + for (unsigned int i = 0; i < run_accuracies.size(); i++) { float accuracy = run_accuracies[i]; std::ostringstream ss; ss << std::fixed << accuracy; @@ -360,4 +486,110 @@ void dumpExecutionAccuracies() { fclose(fp); } + +float readPSNRFromFile(const char *file_name) { + + float psnr; + FILE *pFile = fopen(file_name, "r"); + if (pFile == NULL) { + printf("ERROR: psnr.txt not found! \n"); + abort(); + } + + fscanf(pFile, "%f", &psnr); + printf("**** PSNR read = %f \n\n", psnr); + return psnr; +} + +float computePSNRViolation(void *gold_ptr, void *approx_ptr, + float PSNR_threshold) { + + PSNR_threshold = readPSNRFromFile("psnr.txt"); + std::vector<float> psnr_list; + + struct Tensor *gold_tensor = (struct Tensor *)gold_ptr; + struct Tensor *approx_tensor = (struct Tensor *)approx_ptr; + + size_t *dim_sizes = gold_tensor->dims.dim_sizes; + size_t batch_dim = dim_sizes[0]; + size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; + + printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size); + + float *gold_data = (float *)gold_tensor->host_data; + float *approx_data = (float *)approx_tensor->host_data; + + FILE *fp = fopen("img_psnr.txt", "w+"); + + float sum_psnr = 0.0; + int num_errors = 0; + for (size_t i = 0; i < batch_dim; i++) { + float mse_sum = 0.0; + float max_val = -999999; + size_t offset = i * image_size; + + for (size_t j = 0; j < image_size; j++) { + float diff = gold_data[offset + j] - approx_data[offset + j]; + float diff_square = diff * diff; + mse_sum += diff_square; + + if (max_val < gold_data[offset + j]) { + max_val = gold_data[offset + j]; + } + } + + mse_sum = mse_sum / image_size; + float psnr = 20 * log10(255 / sqrt(mse_sum)); + + sum_psnr += psnr; + if (psnr < PSNR_threshold) + num_errors += 1; + + printf("PSNR value = %f \n", psnr); + psnr_list.push_back(psnr); + + std::ostringstream ss; + ss << std::fixed << psnr; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + fwrite("\n", 1, 1, fp); + } + + float violation_rate = (num_errors * 1.0) / batch_dim * 100.0; + printf("*** violation_rate= %f \n\n", violation_rate); + + float avg_psnr = sum_psnr / batch_dim; + printf("*** avg_psnr = %f \n\n", avg_psnr); + dumpAvgPSNR(avg_psnr); + + float success_rate = 100.0 - violation_rate; + dumpFinalAccuracy(success_rate); + + fclose(fp); + + float var = 0.0; + for (size_t i = 0; i < batch_dim; i++) { + var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr); + } + + var /= batch_dim; + float std = sqrt(var); + + dumpPSNRStd(std); + + return violation_rate; +} + +void dumpOutput(void *output_ptr, const char *file_name) { + + struct Tensor *out_tensor = (struct Tensor *)output_ptr; + size_t size_in_bytes = out_tensor->size_in_bytes; + printf("** Output size = %lu \n", size_in_bytes); + + float *host_data = (float *)out_tensor->host_data; + FILE *fd = fopen(file_name, "w+"); + fwrite(host_data, 1, size_in_bytes, fd); + fclose(fd); +} + #endif diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc rename to hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc index 746f62bce19b25c3b74bec4908cdc3c87bee034a..ffb4c3a809b3e936f6c27ebd7c11aef5c4460104 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc +++ b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc @@ -6,7 +6,7 @@ #include <string.h> #include "tensor_runtime.h" #include "tensor_cpu_runtime.h" -#include "utils.h" +#include "tensorUtils.h" #include "tensor_custom_ops_cpu.h" using namespace std; diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt index 660003538fe72d45e3dbfc1178fd296cdc7156b5..cb548b84dc9944e54c7dfdd0a0d74cf0aed9aafd 100644 --- a/hpvm/test/CMakeLists.txt +++ b/hpvm/test/CMakeLists.txt @@ -1,4 +1,5 @@ include(../cmake/TestFile.cmake) # Generation of `.test` files in CMake add_subdirectory(hpvm_pass) # Passes test suite -add_subdirectory(dnn_benchmarks/hpvm-c) # DNN accuracy test suite +add_subdirectory(dnn_benchmarks/hpvm-c) # HPVM-C DNN accuracy test suite +add_subdirectory(dnn_benchmarks/tensor-rt-src) # tensor_runtime DNN (build only, no tests) add_subdirectory(dnn_benchmarks/profiling) # hpvm-profiler test suite diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt index 3a0c6534e02ce82fdfd02f483f71c6be1a9ab433..6664827014d40c8a101f0aa30499228345edc460 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt +++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt @@ -1,50 +1,18 @@ # First get approxhpvm.py which we then use to compile benchmarks. get_filename_component(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py REALPATH) -# Configure config.h which tells the benchmarks where's the model parameter directory. -# We can also use the one in tensor_runtime, but we're avoiding that so as to -# decouple things. +# Each source file contains a @MODEL_PARAMS_DIR@ waiting to be filled in. set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/") -configure_file( - "include/config.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/include/config.h" -) -# This will be an extra include directory (specific to these benchmarks) -# and we'll give this to approxhpvm.py -set(CONFIG_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") - -# --[ llvm-lit test setup -# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py) -# as most of the tests require some kind of compilation / generation -# which is best done over there. -configure_lit_site_cfg( - ../../lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py -) -add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs" - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS dnn_benchmarks # Compile all dnn benchmarks to run them - ARGS "-j1" # Run DNN benchmarks sequentially -) -# Install an accuracy comparator under build/bin -set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR}) -add_custom_command( - OUTPUT ${BIN_DIR}/check_dnn_acc.py - COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR} - COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py -) - set(test_compile_targets "") function(compile_hpvm_c bin_filename src_filepath codegen_target) + set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}.cpp") + configure_file(${src_filepath} ${generated_file_path}) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename} - DEPENDS ${src_filepath} approxhpvm.py + DEPENDS ${generated_file_path} approxhpvm.py COMMAND ${APPROXHPVM_PY} - ${src_filepath} ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename} - -t ${codegen_target} -I ${CONFIG_INCLUDE_DIR} ${ARGV} + ${generated_file_path} ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename} + -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN} ) add_custom_target(${bin_filename} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}) set(test_compile_targets ${test_compile_targets} ${bin_filename} PARENT_SCOPE) @@ -79,6 +47,31 @@ foreach(dir ${entries}) hpvm_add_dnn_test(${dirname}_cudnn) endforeach(dir) +# Install an accuracy comparator under build/bin for test suite. +set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR}) +add_custom_command( + OUTPUT ${BIN_DIR}/check_dnn_acc.py + COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR} + COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py +) + message(STATUS "List of HPVM-C DNN benchmarks: ${test_compile_targets}") add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets} ${BIN_DIR}/check_dnn_acc.py) message(STATUS "Target name for compiling all DNN benchmarks: dnn_benchmarks") + +# --[ llvm-lit test setup +# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py) +# as most of the tests require some kind of compilation / generation +# which is best done over there. +configure_lit_site_cfg( + ../../lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) +add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS dnn_benchmarks # Compile all dnn benchmarks to run them + ARGS "-j1" # Run DNN benchmarks sequentially +) diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp index 860e3b6423bc78d073096a981f765bed10fb73a7..39f49784d76470c4e0bab213127369806e1e2531 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp @@ -429,7 +429,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp index f44e19dece121cb01a1f3e6a8bf9e27ea945e6ce..dafd1a6ae084c4e1bf819ce1ac94e667c696eb24 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp @@ -434,7 +434,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp index 6d8973ad982b1aa3b206a0cf40ee1888c37e293f..64350c590bb181fa4eaab4b2bf5fb37f69e11c09 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp @@ -380,7 +380,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint8_t *labels = readLabels(labels_path.c_str(), 5000); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp index b2a940d501d8b1c2e29dbe7240012ace8197bbb4..72af2ff4a1b33aabac427d203101c32c4a7403c7 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp @@ -386,7 +386,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp index 474ab64cadf3eac158d39e6e1e6686765c3bac36..37e7a34a51a14b6903d549f271d3c0c83822fec8 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp @@ -478,7 +478,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp index 10e95202f2e2188a9dcd1c12a168a612f897fcf9..1206d7bac4b9dcff2b4cfd7183f4a3e5f65d73d9 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp @@ -483,7 +483,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp index 5c42f6953cfd9256cea73b39868a7ec571f18565..d7ab4238ebac5598b92c432aced85a602bb5ce89 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp @@ -282,7 +282,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp index 0c2568f81b701cb474a257b190be61b4bba45f3e..26acc65a99287ea9f20e037dd996635315d76e48 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp @@ -287,7 +287,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp index 01d027341686291c83e605bdeee1bbcffa68d6e9..5f8c63dbfbfb800dc6f60f9ed9a6108dee0a9a48 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp @@ -1984,7 +1984,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp index e51e85dd980dd910389ec4415174e6e005f75c41..2070089053ef0b6e7e0ca33c2c6cc4cea17b8e29 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp @@ -1989,7 +1989,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp index fa83c534d0639241205758018f8f7c37401e6b22..5b580f26821e67cc96c8347e485b792f40105176 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp @@ -1318,7 +1318,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp index c7b789c2343a8dfd1e847652af2bd1d6adfd51f1..735e2c9abab91f00560faa5496e234321027b82c 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp @@ -1249,7 +1249,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp index 91d07e30469e675fd2027f29290e35a0db888174..160563064cc47effd463c4915b0c7f0d93bff56f 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp @@ -5151,7 +5151,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; + std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp index 932580e03e7ccc4495d8d76be2f7147369e36d68..c5cf2cb3a0177a5cce9ad0cf460484e63ded0ecd 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp @@ -4927,7 +4927,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; + std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp index 195c676c11d53b19e0d18ed4908198a929d188aa..bec6139c2d089e90d09fa239e1b15c9a835fd4ea 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp @@ -845,7 +845,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp index c304237ea57ba15d48cff0773860cdc469fc2a04..4fa7d5c121bacff122821fe983ed443e3c6db249 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp @@ -850,7 +850,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp index 4cd5c134293d85983146352175e278915ab1d2ba..8666030fba4390d29d9324f5a5c7d60324325f05 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp @@ -845,7 +845,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp index 532fca6b856f296624c21e9a18421763c4b70f48..6d01caa3b7c0875cff4f3e16131ddd09195e92b7 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp @@ -850,7 +850,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp index 8e299f40e6ddd04a3ce9f8d9dffff49b1de36189..b1b2b4f2e312b6372e10a2fce3ef12eab2dddded 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp @@ -893,7 +893,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp index 930a33e43c706e6e91475fc97671c39c23f63387..eb29e45805671072428318412f27b05d0da90199 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp @@ -898,7 +898,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in b/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in deleted file mode 100644 index 0eb8f3f24d0e51f2aaa12f1fd672043599490082..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in +++ /dev/null @@ -1 +0,0 @@ -#define MODEL_PARAMS_DIR "@MODEL_PARAMS_DIR@" diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6e22eba67471855971005bf9e57ed0aa38dafff8 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt @@ -0,0 +1,40 @@ +#**************** FP32 and FP16 TensorRT Source Builds *********** +# Don't put binaries in build/bin. This doesn't affect global setting. +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/") +set(test_compile_targets "") +function(add_trt_source target_name filepath) + set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.cpp") + configure_file(${filepath} ${generated_file_path}) + add_executable(${target_name} ${generated_file_path}) + target_link_libraries(${target_name} tensor_runtime_online) + set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE) +endfunction(add_trt_source) + +set( + DNN_NAMES + alexnet_cifar10 + alexnet2_cifar10 + vgg16_cifar10 + resnet18_cifar10 + vgg16_cifar100 + mobilenet_cifar10 + alexnet_imagenet + vgg16_imagenet + resnet50_imagenet +) +foreach(dnn_name ${DNN_NAMES}) + # FP32 version + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fp32/${dnn_name}.cc") + add_trt_source(${dnn_name}_fp32 "${CMAKE_CURRENT_SOURCE_DIR}/fp32/${dnn_name}.cc") + endif() + # FP16 version + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fp16/${dnn_name}_half.cc") + add_trt_source(${dnn_name}_fp16 "${CMAKE_CURRENT_SOURCE_DIR}/fp16/${dnn_name}_half.cc") + endif() +endforeach() + +message(STATUS "List of tensor_runtime DNN benchmarks: ${test_compile_targets}") +add_custom_target(trt_dnn_benchmarks DEPENDS ${test_compile_targets}) +message(STATUS "Target name for compiling all DNN benchmarks: trt_dnn_benchmarks") diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc similarity index 96% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc index 0b344035296bdbab2744e32604f3a8881feb6230..ab80718fd33d0b9787be4a0f183e3a7a65dc76e7 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc @@ -1,7 +1,8 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + /* NOTE: Reference Architecture to use for profiling */ void testCifarNet() { @@ -9,8 +10,7 @@ void testCifarNet() { printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); std::string dir_prefix = - model_params_path + std::string("/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc similarity index 95% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc index 7d493b8720ab701f87fdd53b315da7eafecf6637..b3b69d6b695eca9286b90685f3e071e234887d27 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc @@ -1,13 +1,14 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc similarity index 97% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc index 03dc905bbfcb07ad9a266fc153cd1a6a0db9837e..44b78b9169707fd6c7b9ff6503a4a9aa8d2ec947 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc @@ -1,7 +1,8 @@ #include "tensor_runtime.h" -#include "utils.h" +#include "tensorUtils.h" + /* NOTE: Reference Architecture to use for profiling */ void testLenetTanh() { @@ -11,7 +12,7 @@ void testLenetTanh() { int test_batch_size = 5000; - std::string dir_prefix = model_params_path + std::string("/lenet_mnist/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc index d6eaef755743ce961d3d9c2f013eef26a77579f7..d4423bf4345756e72ad46b140ae8cafc26eae264 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc @@ -1,15 +1,15 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/mobilenet_cifar10/"); - + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc index 40e128eb8a80f6e080c090589a3e91b80ffa082f..76dea5ef08713d22fe7086b678bb3274378d0fd9 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc @@ -1,15 +1,15 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/resnet18_cifar10/"); - + std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc index eb3275b83009ec4300e9cb713f3b182727661db4..2772fd3da42d50aa2ff5391d1e3c85c610a4960a 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc @@ -1,13 +1,14 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc index 2a4b5a6a914698f621284e0f5b19843b817068df..954f6778b899d2cefb2b28d68a32fad33d52f70c 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc @@ -1,13 +1,14 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc similarity index 96% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc index 396e9f11cae92c2f6613b5acb799caecbf025a59..e7431234d705449efa0fc5aafe23238e89be1d30 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc @@ -1,7 +1,8 @@ +#include "tensor_runtime.h" +#include "tensorUtils.h" + -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" /* NOTE: Reference Architecture to use for profiling */ void testCifarNet() { @@ -9,8 +10,7 @@ void testCifarNet() { printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); std::string dir_prefix = - model_params_path + std::string("/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc similarity index 96% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc index 600512078563baf850f440ea97e78cb2d73be170..12c304c9b401c586a0da4658b092f2b791268983 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc @@ -1,13 +1,14 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); // std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc index 29909e5938ca0f700c4ee22165ae2ad354e53a32..b57e60c0fef41b283ad57a7b203759a8f014252d 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc @@ -1,15 +1,15 @@ #include "tensor_runtime.h" -#include "utils.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/alexnet_imagenet/"); - + std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc similarity index 97% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc index 61a0eeb441458ff6f91af8bc76ecc17a33428aec..9777670722b69c8b23a82a77312d17386f2d5c3f 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc @@ -1,7 +1,8 @@ #include "tensor_runtime.h" -#include "../include/utils.h" +#include "tensorUtils.h" + int total_runs = 1; @@ -10,7 +11,7 @@ void testLenetTanh() { int test_batch_size = 5000; - std::string dir_prefix = model_params_path + std::string("/lenet_mnist/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc index 85849126cf164693d12fb08aba8326033ca61b82..3e37bf7feb6641af3afdeb8fb9f3a65fdfcbdce3 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc @@ -1,15 +1,14 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/mobilenet_cifar10/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc index bd3dd7dc1ea23f3cb8ad91e8632b347dd51a848b..c8a99419a81d19b374642c21c977a511413f9ae2 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc @@ -1,15 +1,15 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/resnet18_cifar10/"); - + std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc index 0cccb124b0dca81d45887df50c4a9bcaf2a21db5..3aeabc22736e6955a9ad5ad07144fc38057616ea 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc @@ -1,15 +1,15 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/resnet50_imagenet/"); - + std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc index 813874f0ed888ed5889b0574de454ca2720c944d..f7fffadfc36ba0fd248371efb35a1b7dfede68d3 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc @@ -1,13 +1,13 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc index 84164bf24bbff855b7a0975d7df2883e674b34c8..d3949c7cc568063f3b344d8497551fa1f4f4102c 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc @@ -1,13 +1,14 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc index eca833b08ff374e208f0cafabbf598cd0f7b5d90..2bb1be2821a8d33062bf1cfd83bb978f59884fa9 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc @@ -1,13 +1,14 @@ #include "tensor_runtime.h" -#include "utils.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_imagenet/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/tools/py-approxhpvm/CMakeLists.txt b/hpvm/tools/py-approxhpvm/CMakeLists.txt index f93f96dd0578a67630cc859bba2e24e071b39299..f9d9d6ec60825fc1e858a28e0598189fd6b1cda5 100644 --- a/hpvm/tools/py-approxhpvm/CMakeLists.txt +++ b/hpvm/tools/py-approxhpvm/CMakeLists.txt @@ -1,15 +1,27 @@ # This file is very tightly coupled with main.py.in. # Watch out and keep them in sync. +# main.py.in (to become approxhpvm.py) requires the following variables: +# LLVM_PROJECT_DIR, LLVM_BUILD_DIR +# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS +# DIRECT_LINK_LIBS +# AVAILABLE_PASSES, HPVM_RT_PATH set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR}) set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR}) -set(LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) + +get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES) +get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS) +get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS) + +# This is defined globally. We need to manually link to this +# because OpenCL functions are injected by HPVM Passes. +set(DIRECT_LINK_LIBS ${OpenCL_LIBRARY} "$<TARGET_FILE:tensor_runtime>") + # The hpvm-rt runtime # This has to be explicitly set as hpvm-rt.bc is created in a custom_target # and does not export its file location. # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) -set(TENSOR_RUNTIME_LIB "$<TARGET_FILE:tensor_runtime>") set( AVAILABLE_PASSES LLVMBuildDFG @@ -21,8 +33,7 @@ set( LLVMClearDFG LLVMGenHPVM ) -# CUDA_TOOLKIT_ROOT_DIR and CUDNN_LIBRARY_PATH has been defined globally -set(CUDNN_DIR ${CUDNN_LIBRARY_PATH}) + # First resolve all `@symbol@` by configuring the file configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) # Then resolve all generator expressions we configured into the previous file @@ -32,17 +43,7 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_B # so we can set these dependencies on it set( DEPS - tensor_runtime - LLVMBuildDFG - LLVMInPlaceDFGAnalysis - LLVMDFG2LLVM_CPU - LLVMDFG2LLVM_CUDNN - LLVMDFG2LLVM_WrapperAPI - LLVMFuseHPVMTensorNodes - LLVMClearDFG - LLVMGenHPVM - hpvm-rt.bc - clang opt llvm-link + tensor_runtime hpvm-rt.bc clang opt llvm-link ${AVAILABLE_PASSES} ) add_custom_command( OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py diff --git a/hpvm/tools/py-approxhpvm/main.py.in b/hpvm/tools/py-approxhpvm/main.py.in index af706a1eaa7a53879e525d87dd5034caf814db38..7b211911643c64d8bf2c34ef8a43e3ac98cdd88a 100644 --- a/hpvm/tools/py-approxhpvm/main.py.in +++ b/hpvm/tools/py-approxhpvm/main.py.in @@ -7,25 +7,17 @@ from typing import List, Union, Optional PathLike = Union[Path, str] HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm" -LLVM_BUILD_DIR = Path("@LLVM_BUILD_DIR@") / "bin" -CUDA_TOOLKIT_ROOT_DIR = Path("@CUDA_TOOLKIT_ROOT_DIR@") -TENSOR_RUNTIME_LIB = Path("@TENSOR_RUNTIME_LIB@") +LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin" + +# Directories to include +TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";") +TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")] +TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";") +DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";") + AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") HPVM_RT_PATH = "@HPVM_RT_PATH@" -CUDNN_DIR = "@CUDNN_DIR@" -# Directories to include -INCLUDE_DIRS = [ - HPVM_PROJECT_DIR / "include", # HPVM include dir - # Tensor runtime include dir - HPVM_PROJECT_DIR / "projects/hpvm-tensor-rt/tensor_runtime/include", - HPVM_PROJECT_DIR / "test/dnn_benchmarks/hpvm-c/include", # hpvm-c intrinsics decl dir - CUDA_TOOLKIT_ROOT_DIR / "include", # CUDA include dir -] -LINK_DIRS = [CUDA_TOOLKIT_ROOT_DIR / "lib64", CUDNN_DIR, TENSOR_RUNTIME_LIB.parent] -LINK_LIBS = [ - "pthread", "cudart", "curand", "cudnn", "cublas", "cufft", "OpenCL", "stdc++fs", "omp", "m" -] COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"] @@ -72,10 +64,10 @@ def hpvm_c_to_ll( flags: List[str] = None, ) -> List[str]: extra_includes = extra_includes or [] - includes = [f"-I{path}" for path in INCLUDE_DIRS + extra_includes] + includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes] flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS] return [ - str(LLVM_BUILD_DIR / "clang++"), *includes, *flags, "-emit-llvm", "-S", + str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S", str(src_file), "-o", str(target_file) ] @@ -115,17 +107,31 @@ def opt_codegen_tensor( def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]: - return [str(LLVM_BUILD_DIR / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] + return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]: + def drop_suffix(libname: str): + import re + + match = re.match(r"lib(.*)\.so", libname) + return libname if match is None else match.group(1) + + link_dirs, link_libnames = [], [] + for lib in DIRECT_LINK_LIBS: + lib = Path(lib) + link_dirs.append(lib.parent) + link_libnames.append(drop_suffix(lib.name)) + link_dirs += TRT_LINK_DIRS + link_libnames += TRT_LINK_LIBS + linker_dir_flags = [] - for path in LINK_DIRS: + for path in link_dirs: linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) - linker_lib_flags = [f"-l{lib}" for lib in LINK_LIBS] + linker_lib_flags = [f"-l{drop_suffix(lib)}" for lib in link_libnames] return [ - str(LLVM_BUILD_DIR / "clang++"), str(src_file), str(TENSOR_RUNTIME_LIB), "-o", str(target_file), - *linker_dir_flags, *linker_lib_flags + str(LLVM_BUILD_BIN / "clang++"), str(src_file), + "-o", str(target_file), *linker_dir_flags, *linker_lib_flags ] @@ -141,7 +147,7 @@ def _run_opt( load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] pass_flags_strs = [f"-{flag}" for flag in pass_flags] return [ - str(LLVM_BUILD_DIR / "opt"), *load_passes_strs, *pass_flags_strs, + str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs, "-S", str(src_file), "-o", str(target_file) ] @@ -154,6 +160,7 @@ def parse_args(): "-t", "--codegen-target", type=str, + required=True, choices=["tensor", "cudnn"], help="Backend to use", ) @@ -165,7 +172,7 @@ def parse_args(): help="File to approximation configurations; required for 'tensor' target" ) parser.add_argument( - "-I", "--include", type=Path, nargs="+", + "-I", "--include", type=Path, action="append", help="Additional include directories to use" )