From 24d8fc88cedea15d6b1affce8b9af8750d2ff65f Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Wed, 17 Jan 2024 17:51:44 -0600
Subject: [PATCH] Started implementation of Random class

---
 CMakeLists.txt        |   7 +
 src/CMakeLists.txt    |   3 +
 src/Proxy.h           | 105 +++++++-----
 src/Random.cu         | 388 ++++++++++++++++++++++++++++++++++++++++++
 src/Random.h          |  96 +++++++++++
 src/SignalManager.cpp |   3 +-
 src/SignalManager.h   |  15 ++
 src/SimManager.cpp    |   8 +-
 8 files changed, 575 insertions(+), 50 deletions(-)
 create mode 100644 src/Random.cu
 create mode 100644 src/Random.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9109ba6..021c1d4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,6 +83,11 @@ if(USE_CUDA)
   endif()
   # message(STATUS "CUDA_INC: ${CUDA_INCLUDE_DIRS}")
   include_directories(${CUDA_INCLUDE_DIRS})
+
+  if(NOT DEFINED CMAKE_CUDA_STANDARD)
+    set(CMAKE_CUDA_STANDARD 14)
+    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+  endif()
 endif()
 
 if(DEBUG)
@@ -176,6 +181,8 @@ if(USE_LOGGER)
   # include_directories(${spdlog_DIR})
   target_include_directories("${PROJECT_NAME}" PRIVATE ${spdlog_DIR})
   target_link_libraries("${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only)
+  # find_package(fmt)
+  # target_link_libraries("${PROJECT_NAME}" PRIVATE fmt::fmt)
   add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL})
 endif()
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 772b770..c331389 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library("lib${PROJECT_NAME}"
   ARBDException.cpp
   GPUManager.cpp
   ParticlePatch.cpp
+  Random.cu
   SimSystem.cpp
   Integrator.cpp
   Integrator/CPU.cpp
@@ -19,5 +20,7 @@ if(USE_LOGGER)
   target_link_libraries("lib${PROJECT_NAME}" PRIVATE spdlog::spdlog_header_only)	
   # target_link_libraries("lib${PROJECT_NAME}" spdlog)	
   # target_link_libraries("${PROJECT_NAME}" spdlog)
+  # find_package(fmt)
+  # target_link_libraries("lib${PROJECT_NAME}" PRIVATE fmt::fmt)
   add_compile_definitions(SPDLOG_ACTIVE_LEVEL=${SPDLOG_LEVEL})
 endif()
diff --git a/src/Proxy.h b/src/Proxy.h
index f4b4c24..61dea75 100644
--- a/src/Proxy.h
+++ b/src/Proxy.h
@@ -14,6 +14,11 @@ struct Resource {
     enum ResourceType {CPU, MPI, GPU};
     ResourceType type; ///< Type of the resource.
     size_t id; ///< ID or any other identifier associated with the resource.
+
+    HOST DEVICE bool is_local() const {
+	WARN("Resource::is_local() not implemented; returning true");
+	return true;
+    };
     // HOST DEVICE static bool is_local() { // check if thread/gpu idx matches some global idx };
 };
 
@@ -94,53 +99,48 @@ public:
     template <typename RetType, typename... Args1, typename... Args2>
     RetType callSync(RetType (T::*memberFunc)(Args1...), Args2&&... args) {
         switch (location.type) {
-            case Resource::CPU:
-	    // 	return ([&](auto&&... capturedArgs) {
-            //     return (addr->*memberFunc)(std::forward<decltype(capturedArgs)>(capturedArgs)...);
-            // })(std::forward<Args>(args)...);
+	case Resource::CPU:
+	    if (location.is_local()) {
 		return (addr->*memberFunc)(std::forward<Args2>(args)...);
-            case Resource::GPU:
-                // Handle GPU-specific logic
-                std::cerr << "Error: GPU not implemented in synchronous call." << std::endl;
-                // You may want to throw an exception or handle this case accordingly
-                return RetType{};
-            case Resource::MPI:
-                // Handle MPI-specific logic
-                std::cerr << "Error: MPI not implemented in synchronous call." << std::endl;
-                // You may want to throw an exception or handle this case accordingly
-                return RetType{};
-            default:
-                // Handle other cases or throw an exception
-                std::cerr << "Error: Unknown resource type." << std::endl;
-                // You may want to throw an exception or handle this case accordingly
-                return RetType{};
+	    } else {
+		Exception( NotImplementedError, "Proxy::callSync() non-local CPU calls" );
+	    }
+	case Resource::GPU:
+	    if (location.is_local()) {
+		Exception( NotImplementedError, "Proxy::callSync() local GPU calls" );
+	    } else {
+		Exception( NotImplementedError, "Proxy::callSync() non-local GPU calls" );
+	    }
+	case Resource::MPI:
+	    Exception( NotImplementedError, "MPI sync calls (deprecate?)" );
+	default:
+	    Exception( ValueError, "Proxy::callSync(): Unknown resource type" );
         }
+	return RetType{};
     }
 
-    template <typename RetType, typename... Args>
-    std::future<RetType> callAsync(RetType (T::*memberFunc)(Args...), Args... args) {
+    // TODO generalize to handle void RetType 
+    template <typename RetType, typename... Args1, typename... Args2>
+    std::future<RetType> callAsync(RetType (T::*memberFunc)(Args1...), Args2&&... args) {
         switch (location.type) {
-            case Resource::CPU:
-                // Handle CPU-specific asynchronous logic
-                return std::async(std::launch::async, [this, memberFunc, args...] {
-                    return (addr->*memberFunc)(args...);
-                });
-            case Resource::GPU:
-                // Handle GPU-specific asynchronous logic
-                std::cerr << "Error: GPU not implemented in asynchronous call." << std::endl;
-                // You may want to throw an exception or handle this case accordingly
-                return std::async(std::launch::async, [] { return RetType{}; });
-            case Resource::MPI:
-                // Handle MPI-specific asynchronous logic
-                std::cerr << "Error: MPI not implemented in asynchronous call." << std::endl;
-                // You may want to throw an exception or handle this case accordingly
-                return std::async(std::launch::async, [] { return RetType{}; });
-            default:
-                // Handle other cases or throw an exception
-                std::cerr << "Error: Unknown resource type." << std::endl;
-                // You may want to throw an exception or handle this case accordingly
-                return std::async(std::launch::async, [] { return RetType{}; });
+	case Resource::CPU:
+	    if (location.is_local()) {
+		return (addr->*memberFunc)(std::forward<Args2>(args)...);
+	    } else {
+		Exception( NotImplementedError, "Proxy::callAsync() non-local CPU calls" );
+	    }
+	case Resource::GPU:
+	    if (location.is_local()) {
+		Exception( NotImplementedError, "Proxy::callAsync() local GPU calls" );
+	    } else {
+		Exception( NotImplementedError, "Proxy::callAsync() non-local GPU calls" );
+	    }
+	case Resource::MPI:
+	    Exception( NotImplementedError, "MPI async calls (deprecate?)" );
+	default:
+	    Exception( ValueError, "Proxy::callAsync(): Unknown resource type" );
         }
+	return std::async(std::launch::async, [] { return RetType{}; });
     }
 };
 
@@ -163,15 +163,26 @@ template <typename T>
 HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T* dest = nullptr) {
     switch (location.type) {
     case Resource::GPU:
-	if (dest == nullptr) { // allocate if needed
-	    // printf("   cudaMalloc for array\n");
-	    gpuErrchk(cudaMalloc(&dest, sizeof(T)));
+	if (location.is_local()) {
+	    if (dest == nullptr) { // allocate if needed
+		TRACE("   cudaMalloc for array");
+		gpuErrchk(cudaMalloc(&dest, sizeof(T)));
+	    }
+	    gpuErrchk(cudaMemcpy(dest, &obj, sizeof(T), cudaMemcpyHostToDevice));
+	} else {
+ 	    Exception( NotImplementedError, "`_send_ignoring_children(...)` on non-local GPU" );
 	}
-	gpuErrchk(cudaMemcpy(dest, &obj, sizeof(T), cudaMemcpyHostToDevice));
 	break;
     case Resource::CPU:
-	// not implemented
-	Exception( NotImplementedError, "`_send_ignoring_children(...)` on CPU" );
+	if (location.is_local()) {
+	    if (dest == nullptr) { // allocate if needed
+		TRACE("   Alloc CPU memory for ", decltype(T));
+		dest = new T;
+	    }
+	    memcpy(dest, &obj, sizeof(T));
+	} else {
+	    Exception( NotImplementedError, "`_send_ignoring_children(...)` on non-local CPU" );
+	}
 	break;
     default:
 	// error
diff --git a/src/Random.cu b/src/Random.cu
new file mode 100644
index 0000000..620c3f0
--- /dev/null
+++ b/src/Random.cu
@@ -0,0 +1,388 @@
+#include "Random.h"
+
+#ifdef USE_CUDA
+#include <cuda.h>
+#include <curand_kernel.h>
+#include <curand.h>
+
+#endif
+
+#define cuRandchk(ans) { cuRandAssert((ans), __FILE__, __LINE__); }
+inline void cuRandAssert(curandStatus code, const char *file, int line, bool abort=true) {
+	if (code != CURAND_STATUS_SUCCESS) {
+		fprintf(stderr, "CURAND Error: %d (%s:%d)\n", code, file, line);
+		if (abort) exit(code);
+	}
+}
+
+
+std::map<Random::Conf, Random*> Random::_objects;
+
+bool operator<(const Random::Conf x, const Random::Conf y) {
+    return x.backend == y.backend ? (x.seed == y.seed ?
+				     (x.num_threads < y.num_threads) :
+				     x.seed < y.seed) : x.backend < y.backend;
+}
+
+// class RandomDEPRECATED {
+// public:
+// 	static const size_t RAND_N = 1024*4; // max random numbers stored
+
+// 	curandState_t *states;
+// 	curandGenerator_t generator;
+// 	unsigned int *integer_h, *integer_d;
+// 	float *uniform_h, *uniform_d;
+// 	float *gaussian_h, *gaussian_d;
+// 	size_t integer_n, uniform_n, gaussian_n;
+
+// public:
+
+// 	Random() : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) { }
+// 	Random(int num, unsigned long seed=0) : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) {		
+// 		init(num, seed);
+// 	}
+
+// 	void init(int num, unsigned long seed);
+
+// 	DEVICE inline float gaussian(int idx, int num) {
+// 		// TODO do stuff
+// 		if (idx < num)
+// 			return curand_normal(&states[idx]);
+// 		return 0.0f;
+// 	}
+// 	DEVICE inline float gaussian(curandState* state) {
+// 		return curand_normal(state);
+// 	}
+
+// 	DEVICE inline Vector3 gaussian_vector(int idx, int num) {
+// 		// TODO do stuff
+// 		if (idx < num) {
+// 			curandState localState = states[idx];
+// 			Vector3 v = gaussian_vector(&localState);
+// 			states[idx] = localState;
+// 			return v;
+// 		} else return Vector3(0.0f);			
+// 	}
+// 	DEVICE inline Vector3 gaussian_vector(curandState* state) {
+// 		float x = gaussian(state);
+// 		float y = gaussian(state);
+// 		float z = gaussian(state);
+// 		return Vector3(x, y, z);
+// 	}
+
+// 	unsigned int integer();
+// 	unsigned int poisson(float lambda);
+// 	float uniform();
+
+// 	HOST inline float gaussian() {
+// 	    if (gaussian_n < 1) {
+// 		cuRandchk(curandGenerateNormal(generator, gaussian_d, RAND_N, 0, 1));
+// 		gpuErrchk(cudaMemcpy(gaussian_h, gaussian_d, sizeof(float) * RAND_N, cudaMemcpyDeviceToHost));
+// 	    }
+// 	    return gaussian_h[--gaussian_n];
+// 	}
+
+// 	void reorder(int *a, int n);
+// };
+
+// void Random::init(int num, unsigned long seed) {
+// 	if (states != NULL)
+// 		gpuErrchk(cudaFree(states));
+// 	gpuErrchk(cudaMalloc(&states, sizeof(curandState) * num));
+// 	int nBlocks = num / NUM_THREADS + 1;
+// 	initKernel<<< nBlocks, NUM_THREADS >>>(seed, states, num);
+// 	gpuErrchk(cudaDeviceSynchronize());
+
+// 	// Create RNG and set seed
+// 	curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_XORWOW);
+// 	curandSetPseudoRandomGeneratorSeed(generator, seed);
+	
+// 	if (uniform_d != NULL) 
+//         {
+//             gpuErrchk(cudaFree(uniform_d));
+//             uniform_d = NULL;
+//         }
+//         if(integer_d!=NULL)
+//         {
+//             gpuErrchk(cudaFree(integer_d));
+//             integer_d = NULL;
+//         }
+//         if(gaussian_d!=NULL)
+//         {
+//             gpuErrchk(cudaFree(gaussian_d));
+//             gaussian_d = NULL;
+//         }
+//         if(integer_h!=NULL)
+//         {
+// 	    delete[] integer_h;
+//             integer_h = NULL;
+//         }
+//         if(uniform_h!=NULL)
+//         {
+// 	    delete[] uniform_h;
+//             uniform_h = NULL;
+// 	}
+//         if(gaussian_h!=NULL)
+//         {
+// 	    delete[] gaussian_h;
+//             gaussian_h = NULL;
+//         }
+// 	gpuErrchk(cudaMalloc((void**)&uniform_d, sizeof(float) * RAND_N));
+// 	gpuErrchk(cudaMalloc((void**)&integer_d, sizeof(unsigned int) * RAND_N));
+// 	gpuErrchk(cudaMalloc((void**)&gaussian_d, sizeof(float) * RAND_N));
+// 	integer_h = new unsigned int[RAND_N];
+// 	uniform_h = new float[RAND_N];
+// 	gaussian_h = new float[RAND_N];
+// 	uniform_n = 0;
+// 	integer_n = 0;
+// 	gaussian_n = 0;
+// }
+
+// float Random::uniform() {
+// 	if (uniform_n < 1) {
+// 		cuRandchk(curandGenerateUniform(generator, uniform_d, RAND_N));
+// 		gpuErrchk(cudaMemcpy(uniform_h, uniform_d, sizeof(float) * RAND_N, cudaMemcpyDeviceToHost));
+// 		uniform_n = RAND_N;
+// 	}
+// 	return uniform_h[--uniform_n];
+// }
+
+// unsigned int Random::poisson(float lambda) {
+// 	const float l = exp(-lambda);
+// 	unsigned int k = 0;
+// 	float p = uniform();
+// 	while (p >= l) {
+// 		p *= uniform();
+// 		k = k + 1;
+// 	}
+// 	return k;
+// }
+
+// unsigned int Random::integer() {
+// 	if (integer_n < 1) {
+// 		curandGenerate(generator, integer_d, RAND_N);
+// 		gpuErrchk(cudaMemcpy(integer_h, integer_d, sizeof(unsigned int) * RAND_N, cudaMemcpyDeviceToHost));
+// 		integer_n = RAND_N;
+// 	}
+// 	return integer_h[--integer_n];
+// }
+
+// void Random::reorder(int a[], int n) {
+// 	for (int i = 0; i < (n-1); ++i) {
+// 		unsigned int j = i + (integer() % (n-i));
+// 		if ( j == i )
+// 			continue;
+// 		std::swap<int>(a[i], a[j]);
+// 		const int tmp = a[j];
+// 		a[j] = a[i];
+// 		a[i] = tmp;
+// 	}
+// }
+
+class RandomImpl : public Random {
+public:
+    // Methods for maninpulating the state
+    void init(size_t num, unsigned long seed, size_t offset) {
+	// 
+    };
+
+    // Methods for generating random values
+    HOST inline float gaussian() {
+	return 0;
+    };
+    HOST inline float gaussian(size_t idx, size_t thread) {
+	return 0;
+    };
+
+    // HOST DEVICE inline float gaussian(RandomState* state) {};
+    HOST inline Vector3 gaussian_vector(size_t idx, size_t thread) {
+	return Vector3();
+    };
+    unsigned int integer() {
+	return 0;
+    };
+    unsigned int poisson(float lambda) {
+	return 0;
+    };
+    float uniform() {
+	return 0;
+    };
+    // void reorder(int *a, int n);
+};
+
+#ifdef USE_CUDA
+
+__global__ 
+void RandomImplCUDA_init_kernel(unsigned long seed, curandState_t *state, int num) {
+       size_t idx  = blockIdx.x * blockDim.x + threadIdx.x;
+       size_t step = blockDim.x * gridDim.x;
+       for(size_t i = idx; i < num; i=i+step)
+       {
+           curandState_t local;
+           // curand_init(clock64()+seed,i,0,&local);
+           //curand_init(clock64(),i,0,&state[i]);
+	   curand_init(seed,i,0,&local);
+           state[(size_t)i] = local;
+       }
+}
+
+
+// GPU-resident?
+template<size_t NUM_THREADS = 128, size_t buffer_size = 1024>
+class RandomImplCUDA : public Random {
+public:
+    struct Data {
+	Data() : states(nullptr), buffer(nullptr), num(0) {};
+	curandState_t *states;
+	size_t* buffer;		// What about Gauss vs int?
+	size_t num;		// Remove?
+    };
+
+    // Metadata stored on host even if Data is on device
+    struct Metadata {
+ 	curandGenerator_t generator;
+	Proxy<Data> data;		// state data may be found elsewhere
+	size_t* buffer;
+	size_t num;
+    };
+
+
+    RandomImplCUDA(Random::Conf c, Resource&& location) {
+	// Note: NUM_THREADS refers to CUDA threads, whereas c.num_threads refers to (?) number of random numbers to be generated in parallel
+
+	INFO("Creating RandomImplCUDA with seed {}", c.seed);
+	
+	assert( location.type == Resource::GPU );
+	
+	// For now create temporary buffers locally, then copy to 'location'
+	//   Can optimize at a later time by avoiding temporary allocations
+	Data tmp;
+	tmp.states = new curandState_t[c.num_threads];
+	tmp.buffer = new size_t[buffer_size];
+	tmp.num = 0;
+
+	INFO("...Sending data");
+
+	metadata.data = send(location, tmp);
+	metadata.buffer = new size_t[buffer_size]; // TODO handle case if RandomImplCUDA is on location
+	metadata.num = 0;
+	
+
+	INFO("...Cleaning temporary data");
+	delete [] tmp.states;
+	delete [] tmp.buffer;
+
+	// gpuErrchk(cudaMalloc(&(data.states), sizeof(curandState_t) * c.num_threads));
+	// int nBlocks = c.num_threads / NUM_THREADS + 1;
+	// initKernel<<< nBlocks, NUM_THREADS >>>(c.seed, data.states, c.num_threads);
+	// gpuErrchk(cudaDeviceSynchronize());
+
+	// TODO consider location whe ncreating generator!
+	// Create RNG and set seed
+	INFO("...Creating generator");
+	curandCreateGenerator(&(metadata.generator), CURAND_RNG_PSEUDO_XORWOW);
+	curandSetPseudoRandomGeneratorSeed( metadata.generator, c.seed );
+
+	// Set seed 
+	INFO("...Setting seed");
+	gpuErrchk(cudaDeviceSynchronize());
+	INFO("...synced");
+	size_t num_blocks = 1 + (c.num_threads-1)/NUM_THREADS;
+	RandomImplCUDA_init_kernel<<< num_blocks, NUM_THREADS >>>(c.seed, metadata.data.addr->states, c.num_threads);
+	INFO("...Kernel launched");
+	gpuErrchk(cudaDeviceSynchronize());
+	INFO("...Done");
+
+	// Allocate temporary storage
+
+
+	// gpuErrchk(cudaMalloc((void**) &(data.buffer), sizeof(size_t) * buffer_size));
+	    
+    };
+   
+    // Methods for maninpulating the state
+    void init(size_t num, unsigned long seed, size_t offset) {
+	// gpuErrchk(cudaMalloc((void**)&uniform_d, sizeof(float) * RAND_N));
+	// gpuErrchk(cudaMalloc((void**)&integer_d, sizeof(unsigned int) * RAND_N));
+	// gpuErrchk(cudaMalloc((void**)&gaussian_d, sizeof(float) * RAND_N));
+	// integer_h = new unsigned int[RAND_N];
+	// uniform_h = new float[RAND_N];
+	// gaussian_h = new float[RAND_N];
+	// uniform_n = 0;
+	// integer_n = 0;
+	// gaussian_n = 0;
+	// // 
+    };
+
+    // Methods for generating random values
+    HOST inline float gaussian() {
+	if (metadata.num == 0) {
+	    cuRandchk(curandGenerateUniform( metadata.generator, (float*) metadata.data.addr->buffer, buffer_size ));
+	    gpuErrchk(cudaMemcpy(metadata.buffer, metadata.data.addr->buffer, sizeof(float) * buffer_size, cudaMemcpyDeviceToHost));
+	    metadata.num = buffer_size-1;
+	}
+	return (float)(metadata.buffer[metadata.num--]);
+    };
+    HOST inline float gaussian(size_t idx, size_t thread) {
+	return 0;
+    };
+
+    // HOST DEVICE inline float gaussian(RandomState* state) {};
+    HOST inline Vector3 gaussian_vector(size_t idx, size_t thread) {
+	return Vector3();
+    };
+    unsigned int integer() {
+	return 0;
+    };
+    unsigned int poisson(float lambda) {
+	return 0;
+    };
+    float uniform() {
+	return 0;
+    };
+private:
+    // Proxy<Data> data;
+    Metadata metadata;
+    
+    // void reorder(int *a, int n);
+    };
+#endif
+
+Random* Random::GetRandom(Conf&& conf) {
+	// Checks _randoms for a matching configuration, returns one if found, otherwise creates
+	if (conf.backend == Conf::Default) {
+#ifdef USE_CUDA
+	    conf.backend = Conf::CUDA;
+#else
+	    conf.backend = Conf::CPU;
+#endif
+	}
+
+	// Insert configuration into map, if it exists 
+	auto emplace_result = Random::_objects.emplace(conf, nullptr);
+	auto& it = emplace_result.first;
+	bool& inserted = emplace_result.second;
+	if (inserted) {
+	    // Conf not found, so create a new one 
+	    Random* tmp;
+
+	    switch (conf.backend) {
+	    case Conf::CUDA:
+#ifdef USE_CUDA
+		// TODO: replace Resource below / overload GetRandom() so it can target a resource 
+		tmp = new RandomImplCUDA<128,1024>(conf, Resource{Resource::GPU,0} );
+#else
+		WARN("Random::GetRandom(): CUDA disabled, creating CPU random instead");
+		tmp = new RandomImpl();
+#endif
+		break;
+	    case Conf::CPU:
+		tmp = new RandomImpl();
+		break;
+	    default:
+		Exception(ValueError, "Random::GetRandom(): Unrecognized backend");
+	    }
+	    it->second = tmp;
+	}
+	return it->second;
+}
diff --git a/src/Random.h b/src/Random.h
new file mode 100644
index 0000000..c5d5786
--- /dev/null
+++ b/src/Random.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "GPUManager.h"
+#include "Types.h"
+#include "Proxy.h"
+#include <map>
+
+// #include <cuda.h>
+// #include <curand_kernel.h>
+// #include <curand.h>
+
+// #define cuRandchk(ans) { cuRandAssert((ans), __FILE__, __LINE__); }
+// inline void cuRandAssert(curandStatus code, const char *file, int line, bool abort=true) {
+// 	if (code != CURAND_STATUS_SUCCESS) {
+// 		fprintf(stderr, "CURAND Error: %d (%s:%d)\n", code, file, line);
+// 		if (abort) exit(code);
+// 	}
+// }
+
+// class RandomState;
+
+// Parallel (multi-threaded) RNG interface with implementations hidden in Random/*h
+class Random {
+public:
+    struct Conf {
+	enum Backend { Default, CUDA, CPU };
+	Backend backend;
+	size_t seed;
+	size_t num_threads;
+	// explicit operator int() const { return backend*3*num_threads + num_threads + seed; };
+    };
+    // size_t thread_idx;
+
+    struct State;
+    
+    static Random* GetRandom(Conf&& conf);
+
+    // Methods for maninpulating the state
+    // virtual void init(size_t num, unsigned long seed, size_t offset) = 0;
+    
+    // Methods for generating random values
+    virtual inline float gaussian() = 0;
+    virtual inline float gaussian(size_t idx, size_t thread) = 0;
+    // HOST DEVICE inline float gaussian(RandomState* state);
+    virtual inline Vector3 gaussian_vector(size_t idx, size_t thread) = 0;
+    
+    virtual unsigned int integer() = 0;
+    virtual unsigned int poisson(float lambda) = 0;
+    virtual float uniform() = 0;
+    
+    // void reorder(int *a, int n);
+    
+protected:
+    static std::map<Conf, Random*> _objects;
+	// Random() : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) { }
+	// Random(int num, unsigned long seed=0) : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) {		
+	// 	init(num, seed);
+	// }
+
+};
+
+// class RandomImpl : public Random {
+// public:
+//     // Methods for maninpulating the state
+//     void init(size_t num, unsigned long seed, size_t offset);
+
+//     // // Methods for generating random values
+//     HOST inline float gaussian();
+//     HOST inline float gaussian(size_t idx, size_t thread);
+//     // HOST DEVICE inline float gaussian(RandomState* state) {};
+//     HOST inline Vector3 gaussian_vector(size_t idx, size_t thread);
+//     unsigned int integer() { return 0; };
+//     unsigned int poisson(float lambda) { return 0; };
+//     float uniform() { return 0; };
+    
+//     // // void reorder(int *a, int n);
+
+// };
+
+#ifdef USE_CUDA
+// class RandomImplCUDA : public Random {
+// public:
+//     // Methods for maninpulating the state
+//     void init(size_t num, unsigned long seed, size_t offset);
+
+//     // // Methods for generating random values
+//     HOST inline float gaussian();
+//     HOST inline float gaussian(size_t idx, size_t thread);
+//     // HOST DEVICE inline float gaussian(RandomState* state) {};
+//     HOST inline Vector3 gaussian_vector(size_t idx, size_t thread);
+//     unsigned int integer() { return 0; };
+//     unsigned int poisson(float lambda) { return 0; };
+//     float uniform() { return 0; };
+
+// };
+#endif
diff --git a/src/SignalManager.cpp b/src/SignalManager.cpp
index 4605268..709acc3 100644
--- a/src/SignalManager.cpp
+++ b/src/SignalManager.cpp
@@ -50,7 +50,8 @@ void SignalManager::manage_segfault()
 void SignalManager::segfault_handler(int sig, siginfo_t *info, void *secret) {}
 void SignalManager::manage_segfault() {
 #ifdef USE_LOGGER
-    spdlog::set_level(spdlog::level::trace);
+    // Commented when moving to logger.h
+    // spdlog::set_level(spdlog::level::trace);
 #endif
 }
 
diff --git a/src/SignalManager.h b/src/SignalManager.h
index 0b9afb8..1d517d0 100644
--- a/src/SignalManager.h
+++ b/src/SignalManager.h
@@ -8,6 +8,10 @@
 
 #ifdef USE_LOGGER
 
+/* #define ARBD_LOG_ACTIVE_LEVEL 0 */
+/* #include "logger.h" */
+
+//*
 #define FMT_HEADER_ONLY
 #include <spdlog/fmt/bundled/core.h>
 #include <spdlog/fmt/bundled/format.h>
@@ -25,6 +29,17 @@
 #define ERROR(...) SPDLOG_ERROR(__VA_ARGS__)
 #define CRITICAL(...) SPDLOG_CRITICAL(__VA_ARGS__)
 // spdlog::set_level(spdlog::level::trace);
+//*/
+
+/*
+#define TRACE(...) ::arbd::log_trace(__VA_ARGS__)
+#define DEBUG(...) ::arbd::log_debug(__VA_ARGS__)
+// #define DEBUG(...) spdlog::debug(__VA_ARGS__)
+#define INFO(...) ::arbd::log_info(__VA_ARGS__)
+#define WARN(...) ::arbd::log_warn(__VA_ARGS__)
+#define ERROR(...) ::arbd::log_error(__VA_ARGS__)
+#define CRITICAL(...) ::arbd::log_critical(__VA_ARGS__)
+//*/
 
 #else
 
diff --git a/src/SimManager.cpp b/src/SimManager.cpp
index aec0e7d..16e5fb2 100644
--- a/src/SimManager.cpp
+++ b/src/SimManager.cpp
@@ -1,14 +1,18 @@
 #include "SimManager.h"
+#include "Random.h"
 #include <memory>
 
 void SimManager::run() {
-    std::cout << "running" << std::endl;
+    INFO("Running!");
+    // TODO: process a command queue?
 
     // SimSystem sys = SimSystem();
     // Patch p(10,0,0,sys);
 
     Patch p;
 
+    Random* r = Random::GetRandom(Random::Conf{});
+    
     //ProxyPatch p2(10,0,0);
 
     // p.add_compute( std::make_unique<LocalPairForce>() );
@@ -23,7 +27,7 @@ void SimManager::run() {
 // #endif
     
     for (size_t step = 0; step < 10; ++step) {
-	printf("Step\n");
+	INFO("Step {}: random {}", step, r->gaussian());
 	p.compute();
 #ifdef USE_CUDA
 	cudaDeviceSynchronize();
-- 
GitLab