diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..eae98cd748ea4690e5c8128fe95223ce9534a127 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "extern/spdlog"] + path = extern/spdlog + url = https://github.com/gabime/spdlog.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 9aee9f94644d0133f1fc1dea0a6559506df4d9b6..9109ba62247dd177182d6f9721bf16d0b9bfaefa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,11 +64,13 @@ endif() option(DEBUG "Build with debug flags" False) option(USE_NCCL "Use NCCL for single node GPU peer communication" False) option(USE_NVTX "Build with NVTX profiling ranges" False) +option(USE_LOGGER "Build with SPDLog" True) # (not yet optional) message(STATUS "USE_CUDA: ${USE_CUDA}") message(STATUS "DEBUG: ${DEBUG}") message(STATUS "USE_NCCL: ${USE_NCCL}") message(STATUS "USE_NVTX: ${USE_NVTX}") +message(STATUS "USE_LOGGER: ${USE_LOGGER}") ## Set flags before adding executable @@ -96,6 +98,28 @@ if(USE_NCCL) add_definitions(-DUSE_NCCL) target_link_libraries("${PROJECT_NAME}" PRIVATE nccl) endif() +if(USE_LOGGER) + # set(spdlog_DIR "extern/spdlog/include/spdlog") + # set(CMAKE_spdlog_DIR "${spdlog_DIR}") + # find_package(spdlog REQUIRED) + set(spdlog_DIR "extern/spdlog/include") + set(CMAKE_spdlog_DIR "${spdlog_DIR}") + add_subdirectory(extern/spdlog) + # find_package(spdlog REQUIRED) + include_directories(${spdlog_DIR}) + set(SPDLOG_LEVEL SPDLOG_LEVEL_DEBUG) + + # target_include_directories("lib${PROJECT_NAME}" PRIVATE ${spdlog_DIR}) + # target_include_directories("${PROJECT_NAME}" PRIVATE ${spdlog_DIR}) + + # if(DEFINED ENV{CUDA_INCLUDE_DIRS}) + # set(CUDA_INCLUDE_DIRS $ENV{CUDA_INCLUDE_DIRS}) + # endif() + # target_link_libraries("${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only) + # target_link_libraries("lib${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only) + # target_link_libraries("lib${PROJECT_NAME}" spdlog) + # target_link_libraries("${PROJECT_NAME}" spdlog) +endif() ## Two lines below needed? set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) @@ -147,5 +171,12 @@ endif() if(USE_NVTX) target_link_libraries("${PROJECT_NAME}" PUBLIC nvToolsExt) endif() +if(USE_LOGGER) + add_definitions(-DUSE_LOGGER) + # include_directories(${spdlog_DIR}) + target_include_directories("${PROJECT_NAME}" PRIVATE ${spdlog_DIR}) + target_link_libraries("${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only) + add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL}) +endif() install(TARGETS "${PROJECT_NAME}") diff --git a/extern/spdlog b/extern/spdlog new file mode 160000 index 0000000000000000000000000000000000000000..1ef8d3ce348daf5d580e27fc68e91628ce42c1f4 --- /dev/null +++ b/extern/spdlog @@ -0,0 +1 @@ +Subproject commit 1ef8d3ce348daf5d580e27fc68e91628ce42c1f4 diff --git a/src/ARBDException.h b/src/ARBDException.h index 810c143a8579061ed05ba4aad3041c8723535f79..d310a33857ff422f8da52359beeffe87f55fc539 100644 --- a/src/ARBDException.h +++ b/src/ARBDException.h @@ -9,6 +9,7 @@ #include <string> #include <cstdarg> #include <exception> +#include "SignalManager.h" enum ExceptionType { UnspeficiedError, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index db6290e506a3458881fe33be5e05a201017a16b7..742832a7e83fa4fefc7c2117f96873e470694a01 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,4 @@ + add_library("lib${PROJECT_NAME}" ARBDException.cpp GPUManager.cpp @@ -11,3 +12,11 @@ add_library("lib${PROJECT_NAME}" SignalManager.cpp PatchOp.cu ) +if(USE_LOGGER) + add_definitions(-DUSE_LOGGER) + target_include_directories("lib${PROJECT_NAME}" PRIVATE ${spdlog_DIR}) + target_link_libraries("lib${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only) + # target_link_libraries("lib${PROJECT_NAME}" spdlog) + # target_link_libraries("${PROJECT_NAME}" spdlog) + add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL}) +endif() diff --git a/src/GPUManager.cpp b/src/GPUManager.cpp index 45a9faec474903e7534dee762f49ac3c037fadd2..3f8dab9920567b7edbcb85e3a1ec849e11158fd3 100644 --- a/src/GPUManager.cpp +++ b/src/GPUManager.cpp @@ -20,17 +20,17 @@ std::vector<GPU> GPUManager::allGpus, GPUManager::gpus, GPUManager::notimeouts; GPU::GPU(unsigned int id) : id(id) { cudaSetDevice(id); cudaGetDeviceProperties(&properties, id); - printf("[%d] %s ", id, properties.name); + char* timeout_str = ""; if (properties.kernelExecTimeoutEnabled) { - printf("(may timeout) "); + timeout_str = "(may timeout) "; may_timeout = true; } else { may_timeout = false; } - printf("| SM %d.%d, ", properties.major, properties.minor); - printf("%.2fGHz, ", (float) properties.clockRate * 10E-7); - printf("%.1fGB RAM\n", (float) properties.totalGlobalMem * 7.45058e-10); - + INFO("[{}] {} {}| SM {}.{} {:.2f}GHz, {:.1f}GB RAM", + id, properties.name, timeout_str, properties.major, properties.minor, + (float) properties.clockRate * 10E-7, (float) properties.totalGlobalMem * 7.45058e-10); + streams_created = false; // fflush(stdout); // gpuErrchk( cudaDeviceSynchronize() ); @@ -59,12 +59,12 @@ void GPU::create_streams() { void GPU::destroy_streams() { int curr; - // printf("Destroying streams\n"); + TRACE("Destroying streams"); if (cudaGetDevice(&curr) == cudaSuccess) { // Avoid errors when program is shutting down gpuErrchk( cudaSetDevice(id) ); if (streams_created) { for (int i = 0; i < NUMSTREAMS; i++) { - // printf(" destroying stream %d at %p\n", i, (void *) &streams[i]); + TRACE(" destroying stream {} at {}\n", i, fmt::ptr((void *) &streams[i])); gpuErrchk( cudaStreamDestroy( streams[i] ) ); } } @@ -76,7 +76,7 @@ void GPU::destroy_streams() { void GPUManager::init() { gpuErrchk(cudaGetDeviceCount(&nGPUs)); - printf("Found %d GPU(s)\n", nGPUs); + INFO("Found {} GPU(s)", nGPUs); for (int dev = 0; dev < nGPUs; dev++) { GPU g(dev); allGpus.push_back(g); @@ -84,7 +84,7 @@ void GPUManager::init() { } is_safe = false; if (allGpus.size() == 0) { - fprintf(stderr, "Error: Did not find a GPU\n"); + Exception(ValueError, "Did not find a GPU\n"); exit(1); } } @@ -96,19 +96,20 @@ void GPUManager::load_info() { } void GPUManager::init_devices() { - printf("Initializing devices... "); + INFO("Initializing GPU devices... "); + char msg[256] = ""; for (unsigned int i = 0; i < gpus.size(); i++) { if (i != gpus.size() - 1 && gpus.size() > 1) - printf("%d, ", gpus[i].id); + sprintf(msg, "%s%d, ", msg, gpus[i].id); else if (gpus.size() > 1) - printf("and %d\n", gpus[i].id); + sprintf(msg, "%sand %d", msg, gpus[i].id); else - printf("%d\n", gpus[i].id); - + sprintf(msg, "%d", gpus[i].id); use(i); cudaDeviceSetCacheConfig( cudaFuncCachePreferL1 ); gpus[i].create_streams(); } + INFO("Initializing GPUs: {}", msg); use(0); gpuErrchk( cudaDeviceSynchronize() ); } diff --git a/src/Proxy.h b/src/Proxy.h index bec32e21995bbd15ef6c49725188294c1c45636c..78ee47dc5d8643e1dcd392ebc708a7eb37acb8dd 100644 --- a/src/Proxy.h +++ b/src/Proxy.h @@ -1,5 +1,6 @@ #pragma once +#include <future> #include <iostream> #include "ARBDException.h" @@ -10,7 +11,7 @@ struct Resource { /** * @brief Enum to specify the type of the resource (e.g., CPU or GPU). */ - enum ResourceType {CPU, GPU}; + enum ResourceType {CPU, MPI, GPU}; ResourceType type; ///< Type of the resource. size_t id; ///< ID or any other identifier associated with the resource. // HOST DEVICE static bool is_local() { // check if thread/gpu idx matches some global idx }; @@ -56,6 +57,55 @@ public: */ Resource location; ///< The device (thread/gpu) holding the data represented by the proxy. T* addr; ///< The address of the underlying object. + + template <typename RetType, typename... Args> + RetType callSync(RetType (T::*memberFunc)(Args...), Args... args) { + switch (location.type) { + case Resource::CPU: + return (addr->*memberFunc)(args...); + case Resource::GPU: + // Handle GPU-specific logic + std::cerr << "Error: GPU not implemented in synchronous call." << std::endl; + // You may want to throw an exception or handle this case accordingly + return RetType{}; + case Resource::MPI: + // Handle MPI-specific logic + std::cerr << "Error: MPI not implemented in synchronous call." << std::endl; + // You may want to throw an exception or handle this case accordingly + return RetType{}; + default: + // Handle other cases or throw an exception + std::cerr << "Error: Unknown resource type." << std::endl; + // You may want to throw an exception or handle this case accordingly + return RetType{}; + } + } + + template <typename RetType, typename... Args> + std::future<RetType> callAsync(RetType (T::*memberFunc)(Args...), Args... args) { + switch (location.type) { + case Resource::CPU: + // Handle CPU-specific asynchronous logic + return std::async(std::launch::async, [this, memberFunc, args...] { + return (addr->*memberFunc)(args...); + }); + case Resource::GPU: + // Handle GPU-specific asynchronous logic + std::cerr << "Error: GPU not implemented in asynchronous call." << std::endl; + // You may want to throw an exception or handle this case accordingly + return std::async(std::launch::async, [] { return RetType{}; }); + case Resource::MPI: + // Handle MPI-specific asynchronous logic + std::cerr << "Error: MPI not implemented in asynchronous call." << std::endl; + // You may want to throw an exception or handle this case accordingly + return std::async(std::launch::async, [] { return RetType{}; }); + default: + // Handle other cases or throw an exception + std::cerr << "Error: Unknown resource type." << std::endl; + // You may want to throw an exception or handle this case accordingly + return std::async(std::launch::async, [] { return RetType{}; }); + } + } }; /** @@ -99,11 +149,11 @@ HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T */ template <typename T, typename Dummy = void, typename std::enable_if_t<!has_send_children<T>::value, Dummy>* = nullptr> HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) { - printf("Sending object %s @%x to device at %x\n", type_name<T>().c_str(), &obj, dest); - + TRACE("...Sending object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest)); // Simple objects can simply be copied without worrying about contained objects and arrays auto ret = _send_ignoring_children<T>(location, obj, dest); - printf("...done\n"); + TRACE("...done sending"); + // printf("...done\n"); return ret; } @@ -118,11 +168,11 @@ HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) { */ template <typename T, typename Dummy = void, typename std::enable_if_t<has_send_children<T>::value, Dummy>* = nullptr> HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) { - printf("Sending object %s @%x to device at %x\n", type_name<T>().c_str(), &obj, dest); + TRACE("Sending complex object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest)); auto dummy = obj.send_children(location); // function is expected to return an object of type obj with all pointers appropriately assigned to valid pointers on location Proxy<T> ret = _send_ignoring_children(location, dummy, dest); - printf("clearing...\n"); + TRACE("... clearing dummy complex object"); dummy.clear(); - printf("...done\n"); + TRACE("... done sending"); return ret; } diff --git a/src/SignalManager.cpp b/src/SignalManager.cpp index 08a87ac7f202d2f7674527bc32a21a26cc95e03e..4605268518bf77987d12b7574888f2e8a4131fe6 100644 --- a/src/SignalManager.cpp +++ b/src/SignalManager.cpp @@ -1,4 +1,5 @@ #include "SignalManager.h" + #include <cstdio> #include <cstdlib> #ifdef SIGNAL @@ -33,6 +34,9 @@ void SignalManager::segfault_handler(int sig, siginfo_t *info, void *secret) void SignalManager::manage_segfault() { +#ifdef USE_LOGGER + spdlog::set_level(spdlog::level::trace); +#endif struct sigaction sa; sa.sa_sigaction = segfault_handler; @@ -44,6 +48,10 @@ void SignalManager::manage_segfault() #else void SignalManager::segfault_handler(int sig, siginfo_t *info, void *secret) {} -void SignalManager::manage_segfault() {} +void SignalManager::manage_segfault() { +#ifdef USE_LOGGER + spdlog::set_level(spdlog::level::trace); +#endif +} #endif diff --git a/src/SignalManager.h b/src/SignalManager.h index c58ad2d9dface2c2d1e45a2b66dbc50a2aa676eb..0b9afb8ade5f583d1ff1d227ebbc7984301f80d8 100644 --- a/src/SignalManager.h +++ b/src/SignalManager.h @@ -6,6 +6,39 @@ #ifndef SIGNALMANAGER_H_ #define SIGNALMANAGER_H_ +#ifdef USE_LOGGER + +#define FMT_HEADER_ONLY +#include <spdlog/fmt/bundled/core.h> +#include <spdlog/fmt/bundled/format.h> +#include <spdlog/spdlog.h> + +#ifndef SPDLOG_ACTIVE_LEVEL +#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_TRACE +#endif + +#define TRACE(...) SPDLOG_TRACE(__VA_ARGS__) +#define DEBUG(...) SPDLOG_DEBUG(__VA_ARGS__) +// #define DEBUG(...) spdlog::debug(__VA_ARGS__) +#define INFO(...) SPDLOG_INFO(__VA_ARGS__) +#define WARN(...) SPDLOG_WARN(__VA_ARGS__) +#define ERROR(...) SPDLOG_ERROR(__VA_ARGS__) +#define CRITICAL(...) SPDLOG_CRITICAL(__VA_ARGS__) +// spdlog::set_level(spdlog::level::trace); + +#else + +// Disable logger macros +// NOTE to developers: only use the macros below for logging, only in host code +#define TRACE(...) +#define DEBUG(...) +#define INFO(...) +#define WARN(...) +#define ERROR(...) +#define CRITICAL(...) + +#endif + // see http://www.linuxjournal.com/files/linuxjournal.com/linuxjournal/articles/063/6391/6391l3.html #include <csignal> #include <execinfo.h> diff --git a/src/SimSystem.cpp b/src/SimSystem.cpp index 20e87bfa4009cb3d92ba2b3d0c5146119e5e4f70..74df51316b9009dc4e839f3d5eb7b422122bb28a 100644 --- a/src/SimSystem.cpp +++ b/src/SimSystem.cpp @@ -17,7 +17,7 @@ void CellDecomposer::decompose(SimSystem& sys, ResourceCollection& resources) { Vector3 dr = max-min; // For starters, distribute patches uniformly among available resources - Vector3 n_p_v = (dr / cutoff).element_floor(); // ordered z-fast + Vector3 n_p_v = (dr / cutoff).element_floor(); size_t n_r = resources.resources.size(); size_t n_p = static_cast<size_t>(round(n_p_v[0]*n_p_v[1]*n_p_v[2])); diff --git a/src/Tests/CMakeLists.txt b/src/Tests/CMakeLists.txt index 4b2055025b40384e6a98abc2f04296bf1b582805..147d2fb19db29dd35ea71424ab107286173bd8d8 100644 --- a/src/Tests/CMakeLists.txt +++ b/src/Tests/CMakeLists.txt @@ -27,6 +27,12 @@ endif() if(USE_NVTX) target_link_libraries(arbd_tests PUBLIC nvToolsExt) endif() +if(USE_LOGGER) + add_definitions(-DUSE_LOGGER) + target_include_directories(arbd_tests PRIVATE ${spdlog_DIR}) + target_link_libraries(arbd_tests PRIVATE spdlog::spdlog_header_only) + add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL}) +endif() ## catch_discover_tests("${PROJECT_NAME}_TESTS") diff --git a/src/Tests/catch_boiler.h b/src/Tests/catch_boiler.h index 571424f62e6af61b25e50a61575b4ba05dcd5fdf..03b7f10d043e8bf7ea82678cd72214ffe1311189 100644 --- a/src/Tests/catch_boiler.h +++ b/src/Tests/catch_boiler.h @@ -8,7 +8,6 @@ #include <nvfunctional> #include "../type_name.h" - /* #include <catch2/catch_tostring.hpp> */ /* namespace Catch { */ /* template<typename T, bool b1, bool b2> */ @@ -34,6 +33,7 @@ namespace Tests { namespace Tests {\ template<typename Op_t, typename R, typename ...T>\ void run_trial( std::string name, R expected_result, T...args) {\ + SignalManager::manage_segfault();\ R *gpu_result_d, gpu_result, cpu_result;\ cpu_result = Op_t::op(args...);\ cudaMalloc((void **)&gpu_result_d, sizeof(R));\