Add support for logger

a1ad3be5 · cmaffeo2 · 9f1b7cb1 · a1ad3be5 · a1ad3be5 · 1ef8d3ce
Commit a1ad3be5 authored 1 year ago by cmaffeo2
--- a/.gitmodules
+++ b/.gitmodules
+[submodule "extern/spdlog"]
+	path = extern/spdlog
+	url = https://github.com/gabime/spdlog.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,11 +64,13 @@ endif()
 option(DEBUG "Build with debug flags" False)
 option(USE_NCCL "Use NCCL for single node GPU peer communication" False)
 option(USE_NVTX "Build with NVTX profiling ranges" False)
+option(USE_LOGGER "Build with SPDLog" True)

 # (not yet optional) message(STATUS "USE_CUDA: ${USE_CUDA}")
 message(STATUS "DEBUG: ${DEBUG}")
 message(STATUS "USE_NCCL: ${USE_NCCL}")
 message(STATUS "USE_NVTX: ${USE_NVTX}")
+message(STATUS "USE_LOGGER: ${USE_LOGGER}")


 ## Set flags before adding executable 
@@ -96,6 +98,28 @@ if(USE_NCCL)
  add_definitions(-DUSE_NCCL)
  target_link_libraries("${PROJECT_NAME}" PRIVATE nccl)
 endif()
+if(USE_LOGGER)
+  # set(spdlog_DIR "extern/spdlog/include/spdlog")
+  # set(CMAKE_spdlog_DIR "${spdlog_DIR}")
+  # find_package(spdlog REQUIRED)
+  set(spdlog_DIR "extern/spdlog/include")
+  set(CMAKE_spdlog_DIR "${spdlog_DIR}")
+  add_subdirectory(extern/spdlog)
+  # find_package(spdlog REQUIRED)
+  include_directories(${spdlog_DIR})
+  set(SPDLOG_LEVEL SPDLOG_LEVEL_DEBUG)
+
+  # target_include_directories("lib${PROJECT_NAME}" PRIVATE ${spdlog_DIR})
+  # target_include_directories("${PROJECT_NAME}" PRIVATE ${spdlog_DIR})
+
+  # if(DEFINED ENV{CUDA_INCLUDE_DIRS})
+  #   set(CUDA_INCLUDE_DIRS $ENV{CUDA_INCLUDE_DIRS})
+  # endif()
+  # target_link_libraries("${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only)
+  # target_link_libraries("lib${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only)	
+  # target_link_libraries("lib${PROJECT_NAME}" spdlog)	
+  # target_link_libraries("${PROJECT_NAME}" spdlog)	
+endif()

 ## Two lines below needed?
 set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
@@ -147,5 +171,12 @@ endif()
 if(USE_NVTX)
  target_link_libraries("${PROJECT_NAME}" PUBLIC nvToolsExt)
 endif()
+if(USE_LOGGER)
+  add_definitions(-DUSE_LOGGER)
+  # include_directories(${spdlog_DIR})
+  target_include_directories("${PROJECT_NAME}" PRIVATE ${spdlog_DIR})
+  target_link_libraries("${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only)
+  add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL})
+endif()

 install(TARGETS "${PROJECT_NAME}")
--- a/spdlog @ 1ef8d3ce
+++ b/spdlog @ 1ef8d3ce
+Subproject commit 1ef8d3ce348daf5d580e27fc68e91628ce42c1f4
--- a/src/ARBDException.h
+++ b/src/ARBDException.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <cstdarg>
 #include <exception>
+#include "SignalManager.h"

 enum ExceptionType {
    UnspeficiedError,

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
+
 add_library("lib${PROJECT_NAME}"
  ARBDException.cpp
  GPUManager.cpp
@@ -11,3 +12,11 @@ add_library("lib${PROJECT_NAME}"
  SignalManager.cpp
  PatchOp.cu
 )
+if(USE_LOGGER)
+  add_definitions(-DUSE_LOGGER)
+  target_include_directories("lib${PROJECT_NAME}" PRIVATE ${spdlog_DIR})
+  target_link_libraries("lib${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only)	
+  # target_link_libraries("lib${PROJECT_NAME}" spdlog)	
+  # target_link_libraries("${PROJECT_NAME}" spdlog)
+  add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL})
+endif()
--- a/src/GPUManager.cpp
+++ b/src/GPUManager.cpp
@@ -20,17 +20,17 @@ std::vector<GPU> GPUManager::allGpus, GPUManager::gpus, GPUManager::notimeouts;
 GPU::GPU(unsigned int id) : id(id) {
    cudaSetDevice(id);
    cudaGetDeviceProperties(&properties, id);
-    printf("[%d] %s ", id, properties.name);
+    char* timeout_str = "";
    if (properties.kernelExecTimeoutEnabled) {
-	printf("(may timeout) ");
+	timeout_str = "(may timeout) ";
 	may_timeout = true;
    } else {
 	may_timeout = false;
    }
-    printf("| SM %d.%d, ", properties.major, properties.minor);
-    printf("%.2fGHz, ", (float) properties.clockRate * 10E-7);
-    printf("%.1fGB RAM\n", (float) properties.totalGlobalMem * 7.45058e-10);
-
+    INFO("[{}] {} {}| SM {}.{} {:.2f}GHz, {:.1f}GB RAM",
+	 id, properties.name, timeout_str, properties.major, properties.minor,
+	 (float) properties.clockRate * 10E-7, (float) properties.totalGlobalMem * 7.45058e-10);
+    
    streams_created = false;
    // fflush(stdout);
    // gpuErrchk( cudaDeviceSynchronize() );
@@ -59,12 +59,12 @@ void GPU::create_streams() {

 void GPU::destroy_streams() {
    int curr;
-    // printf("Destroying streams\n");
+    TRACE("Destroying streams");
    if (cudaGetDevice(&curr) == cudaSuccess) { // Avoid errors when program is shutting down
 	gpuErrchk( cudaSetDevice(id) );
 	if (streams_created) {
 	    for (int i = 0; i < NUMSTREAMS; i++) {
-		// printf("  destroying stream %d at %p\n", i, (void *) &streams[i]);
+		TRACE("  destroying stream {} at {}\n", i, fmt::ptr((void *) &streams[i]));
 		gpuErrchk( cudaStreamDestroy( streams[i] ) );
 	    }
 	}
@@ -76,7 +76,7 @@ void GPU::destroy_streams() {

 void GPUManager::init() {
    gpuErrchk(cudaGetDeviceCount(&nGPUs));
-    printf("Found %d GPU(s)\n", nGPUs);
+    INFO("Found {} GPU(s)", nGPUs);
    for (int dev = 0; dev < nGPUs; dev++) {
 	GPU g(dev);
 	allGpus.push_back(g);
@@ -84,7 +84,7 @@ void GPUManager::init() {
    }
    is_safe = false;
    if (allGpus.size() == 0) {
-	fprintf(stderr, "Error: Did not find a GPU\n");
+	Exception(ValueError, "Did not find a GPU\n");
 	exit(1);
    }
 }
@@ -96,19 +96,20 @@ void GPUManager::load_info() {
 }

 void GPUManager::init_devices() {
-    printf("Initializing devices... ");
+    INFO("Initializing GPU devices... ");
+    char msg[256] = "";    
    for (unsigned int i = 0; i < gpus.size(); i++) {
    	if (i != gpus.size() - 1 && gpus.size() > 1)
-    	    printf("%d, ", gpus[i].id);
+    	    sprintf(msg, "%s%d, ", msg, gpus[i].id);
    	else if (gpus.size() > 1)
-    	    printf("and %d\n", gpus[i].id);
+	    sprintf(msg, "%sand %d", msg, gpus[i].id);
    	else
-    	    printf("%d\n", gpus[i].id);
-
+    	    sprintf(msg, "%d", gpus[i].id);
    	use(i);
    	cudaDeviceSetCacheConfig( cudaFuncCachePreferL1 );
    	gpus[i].create_streams();
    }
+    INFO("Initializing GPUs: {}", msg);
    use(0);
    gpuErrchk( cudaDeviceSynchronize() );
 }

--- a/src/Proxy.h
+++ b/src/Proxy.h
 #pragma once

+#include <future>
 #include <iostream>
 #include "ARBDException.h"

@@ -10,7 +11,7 @@ struct Resource {
    /**
     * @brief Enum to specify the type of the resource (e.g., CPU or GPU).
     */
-    enum ResourceType {CPU, GPU};
+    enum ResourceType {CPU, MPI, GPU};
    ResourceType type; ///< Type of the resource.
    size_t id; ///< ID or any other identifier associated with the resource.
    // HOST DEVICE static bool is_local() { // check if thread/gpu idx matches some global idx };
@@ -56,6 +57,55 @@ public:
     */
    Resource location;	    ///< The device (thread/gpu) holding the data represented by the proxy.
    T* addr;		    ///< The address of the underlying object.
+
+    template <typename RetType, typename... Args>
+    RetType callSync(RetType (T::*memberFunc)(Args...), Args... args) {
+        switch (location.type) {
+            case Resource::CPU:
+                return (addr->*memberFunc)(args...);
+            case Resource::GPU:
+                // Handle GPU-specific logic
+                std::cerr << "Error: GPU not implemented in synchronous call." << std::endl;
+                // You may want to throw an exception or handle this case accordingly
+                return RetType{};
+            case Resource::MPI:
+                // Handle MPI-specific logic
+                std::cerr << "Error: MPI not implemented in synchronous call." << std::endl;
+                // You may want to throw an exception or handle this case accordingly
+                return RetType{};
+            default:
+                // Handle other cases or throw an exception
+                std::cerr << "Error: Unknown resource type." << std::endl;
+                // You may want to throw an exception or handle this case accordingly
+                return RetType{};
+        }
+    }
+
+    template <typename RetType, typename... Args>
+    std::future<RetType> callAsync(RetType (T::*memberFunc)(Args...), Args... args) {
+        switch (location.type) {
+            case Resource::CPU:
+                // Handle CPU-specific asynchronous logic
+                return std::async(std::launch::async, [this, memberFunc, args...] {
+                    return (addr->*memberFunc)(args...);
+                });
+            case Resource::GPU:
+                // Handle GPU-specific asynchronous logic
+                std::cerr << "Error: GPU not implemented in asynchronous call." << std::endl;
+                // You may want to throw an exception or handle this case accordingly
+                return std::async(std::launch::async, [] { return RetType{}; });
+            case Resource::MPI:
+                // Handle MPI-specific asynchronous logic
+                std::cerr << "Error: MPI not implemented in asynchronous call." << std::endl;
+                // You may want to throw an exception or handle this case accordingly
+                return std::async(std::launch::async, [] { return RetType{}; });
+            default:
+                // Handle other cases or throw an exception
+                std::cerr << "Error: Unknown resource type." << std::endl;
+                // You may want to throw an exception or handle this case accordingly
+                return std::async(std::launch::async, [] { return RetType{}; });
+        }
+    }
 };

 /**
@@ -99,11 +149,11 @@ HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T
 */
 template <typename T, typename Dummy = void, typename std::enable_if_t<!has_send_children<T>::value, Dummy>* = nullptr>
 HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
-    printf("Sending object %s @%x to device at %x\n", type_name<T>().c_str(), &obj, dest);
-
+    TRACE("...Sending object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest));
    // Simple objects can simply be copied without worrying about contained objects and arrays
    auto ret = _send_ignoring_children<T>(location, obj, dest);
-    printf("...done\n");        
+    TRACE("...done sending");
+    // printf("...done\n");        
    return ret;
 }

@@ -118,11 +168,11 @@ HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
 */
 template <typename T, typename Dummy = void, typename std::enable_if_t<has_send_children<T>::value, Dummy>* = nullptr>
 HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
-    printf("Sending object %s @%x to device at %x\n", type_name<T>().c_str(), &obj, dest);
+    TRACE("Sending complex object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest));
    auto dummy = obj.send_children(location); // function is expected to return an object of type obj with all pointers appropriately assigned to valid pointers on location
    Proxy<T> ret = _send_ignoring_children(location, dummy, dest);
-    printf("clearing...\n");
+    TRACE("... clearing dummy complex object");
    dummy.clear();
-    printf("...done\n");    
+    TRACE("... done sending");
    return ret;
 }
--- a/src/SignalManager.cpp
+++ b/src/SignalManager.cpp
 #include "SignalManager.h"
+
 #include <cstdio>
 #include <cstdlib>
 #ifdef SIGNAL
@@ -33,6 +34,9 @@ void SignalManager::segfault_handler(int sig, siginfo_t *info, void *secret)

 void SignalManager::manage_segfault() 
 {
+#ifdef USE_LOGGER
+    spdlog::set_level(spdlog::level::trace);
+#endif
 	struct sigaction sa;

 	sa.sa_sigaction = segfault_handler;
@@ -44,6 +48,10 @@ void SignalManager::manage_segfault()

 #else
 void SignalManager::segfault_handler(int sig, siginfo_t *info, void *secret) {}
-void SignalManager::manage_segfault() {}
+void SignalManager::manage_segfault() {
+#ifdef USE_LOGGER
+    spdlog::set_level(spdlog::level::trace);
+#endif
+}

 #endif
--- a/src/SignalManager.h
+++ b/src/SignalManager.h
@@ -6,6 +6,39 @@
 #ifndef SIGNALMANAGER_H_
 #define SIGNALMANAGER_H_

+#ifdef USE_LOGGER
+
+#define FMT_HEADER_ONLY
+#include <spdlog/fmt/bundled/core.h>
+#include <spdlog/fmt/bundled/format.h>
+#include <spdlog/spdlog.h>
+
+#ifndef SPDLOG_ACTIVE_LEVEL
+#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_TRACE
+#endif
+
+#define TRACE(...) SPDLOG_TRACE(__VA_ARGS__)
+#define DEBUG(...) SPDLOG_DEBUG(__VA_ARGS__)
+// #define DEBUG(...) spdlog::debug(__VA_ARGS__)
+#define INFO(...) SPDLOG_INFO(__VA_ARGS__)
+#define WARN(...) SPDLOG_WARN(__VA_ARGS__)
+#define ERROR(...) SPDLOG_ERROR(__VA_ARGS__)
+#define CRITICAL(...) SPDLOG_CRITICAL(__VA_ARGS__)
+// spdlog::set_level(spdlog::level::trace);
+
+#else
+
+// Disable logger macros
+// NOTE to developers: only use the macros below for logging, only in host code
+#define TRACE(...)
+#define DEBUG(...)
+#define INFO(...)
+#define WARN(...)
+#define ERROR(...)
+#define CRITICAL(...)
+
+#endif
+
 // see http://www.linuxjournal.com/files/linuxjournal.com/linuxjournal/articles/063/6391/6391l3.html
 #include <csignal>
 #include <execinfo.h>

--- a/src/SimSystem.cpp
+++ b/src/SimSystem.cpp
@@ -17,7 +17,7 @@ void CellDecomposer::decompose(SimSystem& sys, ResourceCollection& resources) {
    Vector3 dr = max-min;

    // For starters, distribute patches uniformly among available resources
-    Vector3 n_p_v = (dr / cutoff).element_floor(); // ordered z-fast
+    Vector3 n_p_v = (dr / cutoff).element_floor();
    size_t n_r = resources.resources.size();

    size_t n_p = static_cast<size_t>(round(n_p_v[0]*n_p_v[1]*n_p_v[2]));

--- a/src/Tests/CMakeLists.txt
+++ b/src/Tests/CMakeLists.txt
@@ -27,6 +27,12 @@ endif()
 if(USE_NVTX)
  target_link_libraries(arbd_tests PUBLIC nvToolsExt)
 endif()
+if(USE_LOGGER)
+  add_definitions(-DUSE_LOGGER)
+  target_include_directories(arbd_tests PRIVATE ${spdlog_DIR})
+  target_link_libraries(arbd_tests PRIVATE spdlog::spdlog_header_only)
+  add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL})
+endif()

 ## catch_discover_tests("${PROJECT_NAME}_TESTS")


--- a/src/Tests/catch_boiler.h
+++ b/src/Tests/catch_boiler.h
@@ -8,7 +8,6 @@
 #include <nvfunctional>

 #include "../type_name.h"
-
 /* #include <catch2/catch_tostring.hpp> */
 /* namespace Catch { */
 /*     template<typename T, bool b1, bool b2> */
@@ -34,6 +33,7 @@ namespace Tests {
 namespace Tests {\
 template<typename Op_t, typename R, typename ...T>\
    void run_trial( std::string name, R expected_result, T...args) {\
+    SignalManager::manage_segfault();\
 	R *gpu_result_d, gpu_result, cpu_result;\
 	cpu_result = Op_t::op(args...);\
 	cudaMalloc((void **)&gpu_result_d, sizeof(R));\