diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt index d980429560da047f4fdb7dab8eb228e3d1b3419f..c507f09e140dcad205fc17301fee3165faa9c317 100644 --- a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -51,10 +51,9 @@ cuda_add_cublas_to_target(tensor_runtime) # Adding new rule for building a cuDNN runtime library cuda_add_library(tensor_cpu_runtime tensor_runtime/src/tensor_cpu_runtime.cc) +target_link_libraries(tensor_runtime cudnn cufft stdc++fs -lcurand) if(USE_GFLAGS) - target_link_libraries(tensor_runtime gflags cudnn cufft -lcurand) -else() - target_link_libraries(tensor_runtime cudnn cufft -lcurand) + target_link_libraries(tensor_runtime gflags) endif() target_link_libraries(tensor_cpu_runtime) diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_utils.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_utils.h index 89df319abbc9ca367a1c086eb8d1de204c49d03e..abd90b00555166d2b797f36a1f125ba6a96b707d 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_utils.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/img_tensor_utils.h @@ -13,12 +13,15 @@ void *loadAsImage(const char *filename, size_t n_color = N_RGB_CHAN); void saveToImage(const char *filename, Tensor *tensor); -Tensor *readDataSet(const char *path, size_t n_color = N_RGB_CHAN); +Tensor *readDataSet( + const char *path, size_t start = 0, size_t count = std::string::npos, + size_t n_color = N_RGB_CHAN); -void saveDataSet(const char *path, const char *prefix, Tensor *batch); +void saveDataSet(const char *path, Tensor *batch, size_t start_idx = 0); // Kernel constructor -void *createFilterFromData(int data_type, void *data, size_t w, size_t h, size_t n_chan); +void *createFilterFromData( + int data_type, void *data, size_t w, size_t h, size_t n_chan); std::vector<float> PSNR(void *gold_ptr, void *approx_ptr); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp index 172229d5325531259d2e1f67f927b635a418ffae..6d909a1736482efcadb0797c1b47475c0084ce3c 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/img_tensor_utils.cpp @@ -1,12 +1,11 @@ #include <cstring> -#include <dirent.h> +#include <experimental/filesystem> #include <string> -#include <sys/stat.h> #include "debug.h" -#include "img_tensor_utils.h" -#include "img_tensor_runtime.h" #include "device_math.h" +#include "img_tensor_runtime.h" +#include "img_tensor_utils.h" // Image I/O utilities #define STB_IMAGE_IMPLEMENTATION @@ -15,44 +14,6 @@ #include "image/stb_image.h" #include "image/stb_image_write.h" -static inline bool isRegFile(const char *path, dirent *dp) { - if (dp->d_type == DT_REG) - return true; - if (dp->d_type != DT_UNKNOWN) - return false; - struct stat sb {}; - if (lstat(path, &sb) == -1) { - INFO("lstat failed for file %s\n", path); - return false; - } - mode_t type = sb.st_mode & S_IFMT; - return type == S_IFREG; -} - -static inline std::string sample_file(const char *path) { - auto dirp = opendir(path); - dirent *dp = nullptr; - while ((dp = readdir(dirp)) != nullptr) { - std::string filename = std::string(path) + "/" + dp->d_name; - if (isRegFile(filename.c_str(), dp)) - return filename; - } - return ""; -} - -static inline size_t count_file(const char *path) { - auto dirp = opendir(path); - size_t count = 0; - dirent *dp = nullptr; - while ((dp = readdir(dirp)) != nullptr) { - std::string filename = std::string(path) + "/" + dp->d_name; - if (isRegFile(filename.c_str(), dp)) - count++; - } - (void)closedir(dirp); - return count; -} - static inline uint8_t *float_to_uint8(const float *fl, size_t count) { auto *ret = new uint8_t[count]; float max_v = 0; @@ -133,26 +94,47 @@ static Tensor *to_nchw(Tensor *t) { return out_tensor; } -Tensor *readDataSet(const char *path, size_t n_color) { +namespace fs = std::experimental::filesystem; + +static inline std::vector<std::string> listFiles(const std::string &folder) { + std::vector<std::string> ret; + for (const auto &entry : fs::directory_iterator(folder)) + ret.push_back(entry.path().string()); + return ret; +} + +template <typename T> +std::vector<T> +sliceVector(const std::vector<T> &in, size_t start, size_t count) { + auto slice_begin = in.begin() + start; + auto slice_end = count == std::string::npos ? in.end() : slice_begin + count; + if (slice_end > in.end()) + return std::vector<T>(); + return std::vector<T>(slice_begin, slice_end); +} + +Tensor * +readDataSet(const char *path, size_t start, size_t count, size_t n_color) { INFO("Loading image dataset from path %s\n", path); - auto *first_image = (Tensor *)loadAsImage(sample_file(path).c_str(), n_color); + std::vector<std::string> filenames = + sliceVector(listFiles(path), start, count); + if (filenames.empty()) { + INFO("Folder is empty or slice is empty\n"); + return nullptr; + } + + auto *first_image = (Tensor *)loadAsImage(filenames[0].c_str(), n_color); std::vector<size_t> sizes = ::sizes(first_image); - delete first_image; size_t h = sizes[2], w = sizes[3]; - size_t count = count_file(path); - DEBUG("Loading shape: (%lu, %lu, %lu, %lu)\n", count, n_color, h, w); + DEBUG( + "Loading shape: (%lu, %lu, %lu, %lu)\n", filenames.size(), n_color, h, w); auto *batch = (Tensor *)create4DTensor( - CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, count, h, w, n_color); + CUDNN_DATA_FLOAT, CUDNN_TENSOR_NHWC, filenames.size(), h, w, n_color); size_t n_floats = n_color * h * w; auto *base_data = (float *)batch->host_data; - auto dirp = opendir(path); - dirent *dp = nullptr; - while ((dp = readdir(dirp)) != nullptr) { - if (dp->d_type != DT_REG) - continue; - std::string entry_path = std::string(path) + "/" + dp->d_name; + for (const auto &path : filenames) { int x, y, n; // x = width, y = height, n = # 8-bit components per pixel - uint8_t *data = stbi_load(entry_path.c_str(), &x, &y, &n, n_color); + uint8_t *data = stbi_load(path.c_str(), &x, &y, &n, n_color); if (data == nullptr) throw std::runtime_error("Image load failed"); float *converted = uint8_to_float(data, n_floats); @@ -161,15 +143,12 @@ Tensor *readDataSet(const char *path, size_t n_color) { delete[] converted; base_data += n_floats; } - (void)closedir(dirp); auto *nchw_batch = to_nchw(batch); - delete batch; DEBUG("Loaded all images.\n"); return nchw_batch; } -void saveDataSet( - const char *path, const char *prefix, Tensor *batch) { +void saveDataSet(const char *path, Tensor *batch, size_t start_idx) { INFO("Saving image dataset to path %s\n", path); DEBUG("Copying to CPU before printing\n"); deviceToHostCopy(batch); @@ -181,10 +160,9 @@ void saveDataSet( std::vector<size_t> sizes = ::sizes(converted_batch); size_t h = sizes[1], w = sizes[2], c = sizes[3]; auto *base_data = (float *)batch->host_data; - for (size_t i = 0; i < sizes[0]; i++) { + for (size_t i = start_idx; i < start_idx + sizes[0]; i++) { std::string name = path; name += "/"; - name += prefix; name += std::to_string(i); name += ".png"; @@ -194,9 +172,6 @@ void saveDataSet( base_data += h * w * c; } - if (batch != converted_batch) { - delete converted_batch; - } } void *loadAsImage(const char *filename, size_t n_color) { @@ -212,7 +187,6 @@ void *loadAsImage(const char *filename, size_t n_color) { std::memcpy(image->host_data, converted, x * y * n * sizeof(float)); auto *nchw_image = to_nchw(image); stbi_image_free(data); - delete image; return nchw_image; } @@ -230,9 +204,6 @@ void saveToImage(const char *filename, Tensor *tensor) { uint8_t *ldr = float_to_uint8(hdr_data, w * h * c); stbi_write_png(filename, w, h, c, ldr, 0); delete[] ldr; - if (tensor != converted_tensor) { - delete converted_tensor; - } } void *createFilterFromData( @@ -253,32 +224,29 @@ void *createFilterFromData( return tensor; } -__device__ float psnr(float x) { - return -10 * log10(x); -} +__device__ float psnr(float x) { return -10 * log10(x); } DEF_FUNC(psnr) std::vector<float> PSNR(void *gold_ptr, void *approx_ptr) { - auto *gold_tensor = (Tensor *) gold_ptr, *approx_tensor = (Tensor *) approx_ptr; - - size_t *dim_sizes = gold_tensor->dims.dim_sizes; - size_t batch_dim = dim_sizes[0]; - size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; - float image_size_f = image_size; - DEBUG("batch_dim = %lu, image_size = %lu\n", batch_dim, image_size); - auto *image_size_tensor = (Tensor *)create4DTensor( - CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 1, 1 - ); - std::memcpy(image_size_tensor->host_data, &image_size_f, sizeof(float)); - - auto *diff = tensorMap2(device::fsub_ptrptr, gold_tensor, approx_tensor); - auto *diffsqr = tensorMap2(device::fmul_ptrptr, diff, diff); - auto *mse_sum_1d = tensorReduce(diffsqr, 3, device::fadd_ptrptr); - auto *mse_sum = tensorReduce(mse_sum_1d, 2, device::fadd_ptrptr); - auto *mse_avg = tensorMap2(device::fdiv_ptrptr, mse_sum, image_size_tensor); - auto *psnr_val = (Tensor *) tensorMap1(psnr_ptrptr, mse_avg); - - auto *float_data = (float*)psnr_val->host_data; - return std::vector<float>(float_data, float_data + batch_dim); + auto *gold_tensor = (Tensor *)gold_ptr, *approx_tensor = (Tensor *)approx_ptr; + + size_t *dim_sizes = gold_tensor->dims.dim_sizes; + size_t batch_dim = dim_sizes[0]; + size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; + float image_size_f = image_size; + DEBUG("batch_dim = %lu, image_size = %lu\n", batch_dim, image_size); + auto *image_size_tensor = + (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 1, 1); + std::memcpy(image_size_tensor->host_data, &image_size_f, sizeof(float)); + + auto *diff = tensorMap2(device::fsub_ptrptr, gold_tensor, approx_tensor); + auto *diffsqr = tensorMap2(device::fmul_ptrptr, diff, diff); + auto *mse_sum_1d = tensorReduce(diffsqr, 3, device::fadd_ptrptr); + auto *mse_sum = tensorReduce(mse_sum_1d, 2, device::fadd_ptrptr); + auto *mse_avg = tensorMap2(device::fdiv_ptrptr, mse_sum, image_size_tensor); + auto *psnr_val = (Tensor *)tensorMap1(psnr_ptrptr, mse_avg); + + auto *float_data = (float *)psnr_val->host_data; + return std::vector<float>(float_data, float_data + batch_dim); } diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile index 4b9b5baedd896f88429d86c1f9415d20a4b49010..c3666e03a1f50c6ff982c305d49926cc2f524a98 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile @@ -19,7 +19,7 @@ CUSTOM_LIB_PATHS = $(HPVM_BUILD_DIR)/lib/libtensor_runtime.a $(HPVM_BUILD_DIR)/l CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_TOOLKIT_ROOT_DIR)/include -fno-exceptions -ffast-math -std=c++11 -O3 CCFLAGS += -DDEVICE=CUDNN_TARGET -LINKER_FLAGS = -L $(CUDA_TOOLKIT_ROOT_DIR)/lib64 -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL -lcufft +LINKER_FLAGS = -L $(CUDA_TOOLKIT_ROOT_DIR)/lib64 -lstdc++fs -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL -lcufft HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib @@ -33,7 +33,7 @@ VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLV -TARGET = $(BUILD_DIR)/$(APP).opt.bc +TARGET = $(BUILD_DIR)/$(APP).opt.bc direct SOURCES = $(SRC_DIR)/$(APP).cpp VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll @@ -45,6 +45,9 @@ default: $(BUILD_DIR) $(TARGET) $(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp $(CC) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -S -o $(BUILD_DIR)/$(APP).ll +direct: $(SRC_DIR)/canny_direct_call.cpp + $(CC) $(CC_FLAGS) src/canny_direct_call.cpp $(CUSTOM_LIB_PATHS) -o $(BUILD_DIR)/canny_direct_call $(LINKER_FLAGS) + $(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll $(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_wrapper.bc