Skip to content
Snippets Groups Projects
Commit d8e07618 authored by cmaffeo2's avatar cmaffeo2
Browse files

Development on Proxy template class, with lots to clean up

parent 1f8f0a33
No related branches found
No related tags found
No related merge requests found
......@@ -7,12 +7,29 @@
// // rb_pos = rb_orient = rb_mom = rb_ang_mom = type = rb_type = nullptr;
// };
size_t BasePatch::global_patch_idx = 0;
void Patch::initialize() {
LOGINFO("Creating Patch::Data");
/*
Data* data = new Data{nullptr,nullptr,nullptr,nullptr}; // TODO free
metadata = Metadata{0,0,0};
// metadata = Metadata{0,0,0};
// data->pos_force = data->momentum = nullptr;
// data->particle_types = data->particle_order = nullptr;
metadata.data = send( Resource{Resource::CPU,0}, *data );
LOGINFO("Sending Patch::Data @{}", fmt::ptr(data));
Resource r = Resource{Resource::CPU,0};
// if (&(metadata.data) != nullptr) {
// LOGINFO( "Freeing metadata.data");
// delete &(metadata.data);
// LOGINFO( "Freeing metadata");
// }
// send(r,*data);
metadata.data = send(r, *data);
LOGINFO("Data sent");
*/
LOGWARN("Patch::initialize(): Creating Data on CPU resource");
Resource r = Resource{Resource::CPU,0};
metadata.data = construct_remote<Data>(r, capacity);
};
Patch* Patch::copy_to_cuda(Patch* dev_ptr) const {
......@@ -49,10 +66,12 @@ Patch* Patch::copy_to_cuda(Patch* dev_ptr) const {
};
Patch Patch::send_children(Resource location) const {
LOGWARN("Patch::send_children()");
Patch tmp(0);
Proxy<Data> newdata;
switch (location.type) {
case Resource::GPU:
Exception( NotImplementedError, "`send_children(...)` not implemented on GPU" );
// metadata.data is already a proxy because the data may reside on the GPU;
// Q: In what cases should we move metadata.data.addr?
// For example, we could always move, or we could never move, or we could move if location metadata.data.location is a CPU resource
......@@ -80,11 +99,13 @@ Patch Patch::send_children(Resource location) const {
Patch Patch::copy_from_cuda(Patch* dev_ptr, Patch* dest) {
Exception(NotImplementedError, "Deprecated");
// TODO
Patch tmp = Patch();
return tmp;
};
void Patch::remove_from_cuda(Patch* dev_ptr) {
Exception(NotImplementedError, "Deprecated");
// TODO
};
......@@ -99,7 +120,7 @@ void Patch::compute() {
// template<typename T>
// size_t Patch::send_particles_filtered( Proxy<Patch>& destination, T filter ) {
size_t Patch::send_particles_filtered( Proxy<Patch>& destination, std::function<bool(size_t,Patch::Data)> filter ) {
// TODO determine who allocates and cleans up temporary data
// TODO determine who allocates and cleans up temporary data
// TODO determine if there is a good way to avoid having temporary data (this can be done later if delegated to a reasonable object)
// TODO think about the design and whether it's possible to have a single array with start/end
......
......@@ -33,8 +33,28 @@ class BasePatch {
public:
// BasePatch(size_t num, short thread_id, short gpu_id, SimSystem& sys);
// BasePatch(size_t num, short thread_id, short gpu_id);
BasePatch() : num(0), capacity(0) {}
BasePatch(size_t capacity) : num(0), capacity(capacity) {}
BasePatch() : num(0), capacity(0), patch_idx(++global_patch_idx) {}
BasePatch(size_t capacity) : num(0), capacity(capacity), patch_idx(++global_patch_idx) {}
// Copy constructor
BasePatch(const BasePatch& other) : num(other.num), capacity(other.capacity), patch_idx(++global_patch_idx) {
LOGTRACE("Copy constructing {} @{}", type_name<*this>().c_str(), fmt::ptr(this));
}
// Move constructor
BasePatch(BasePatch&& other) : num(std::move(other.num)), capacity(std::move(other.capacity)), patch_idx(std::move(other.patch_idx)) {
LOGTRACE("Move constructing {} @{}", type_name<*this>().c_str(), fmt::ptr(this));
}
// Move assignment operator
BasePatch& operator=(BasePatch&& other) {
LOGTRACE("Move assigning {} @{}", type_name<*this>().c_str(), fmt::ptr(this));
num = std::move(other.num);
capacity = std::move(other.capacity);
patch_idx = std::move(other.patch_idx);
// lower_bound = std::move(other.lower_bound);
// upper_bound = std::move(other.upper_bound);
return *this;
}
// ~BasePatch();
protected:
......@@ -44,12 +64,15 @@ protected:
// short thread_id; // MPI
// short gpu_id; // -1 if GPU unavailable
static size_t patch_idx; // Unique ID across ranks
static size_t global_patch_idx; // Unique ID across ranks // TODO: preallocate regions that will be used, or offload this to a parallel singleton class
/* const */ size_t patch_idx; // Unique ID across ranks
// Q: should we have different kinds of patches? E.g. Spheres? This specialization _could_ go into subclass, or we could have a ptr to a Region class that can have different implementations
Vector3 lower_bound;
Vector3 upper_bound;
};
// class PatchProxy {
// //
// public:
......@@ -127,11 +150,22 @@ protected:
// Storage class that should be
class Patch : public BasePatch {
public:
Patch() : BasePatch() { initialize(); }
Patch() : BasePatch(), metadata() { LOGINFO("Creating Patch"); initialize(); LOGINFO("Done Creating Patch"); }
Patch(size_t capacity) : BasePatch(capacity) { initialize(); }
// Particle data arrays pointing to either CPU or GPU memory
struct Data {
Data(const size_t capacity = 0) {
if (capacity == 0) {
pos_force = momentum = nullptr;
particle_types = particle_order = nullptr;
} else {
pos_force = new VectorArr(capacity);
momentum = new VectorArr(capacity);
particle_types = new Array<size_t>(capacity);
particle_order = new Array<size_t>(capacity);
}
}
VectorArr* pos_force;
VectorArr* momentum;
Array<size_t>* particle_types;
......@@ -152,7 +186,11 @@ public:
// Metadata stored on host even if Data is on device
struct Metadata {
Metadata() : num(0), capacity(0), min(0), max(0), data() {};
Metadata(const Patch& p) : num(p.metadata.num), capacity(p.metadata.capacity), min(p.metadata.min), max(p.metadata.max), data(p.metadata.data) {};
Metadata(const Metadata& other) : num(other.num), capacity(other.capacity), min(other.min), max(other.max), data(other.data) {};
size_t num;
size_t capacity;
Vector3 min;
Vector3 max;
Proxy<Data> data; // actual data may be found elsewhere
......@@ -203,7 +241,6 @@ public:
return 1;
}
private:
void initialize();
......
......@@ -16,38 +16,38 @@
#include "PatchOp.h"
template<typename Pos, typename Force>
class Patch {
// size_t num_replicas;
// template<typename Pos, typename Force>
// class Patch {
// // size_t num_replicas;
public:
Patch(SimParameters* sim);
// public:
// Patch(SimParameters* sim);
void compute();
// void compute();
private:
// private:
SimParameters* sim;
// SimParameters* sim;
Pos minimum;
Pos maximum;
// Pos minimum;
// Pos maximum;
ParticleTypes* types;
ParticleTypes* types_d;
// ParticleTypes* types;
// ParticleTypes* types_d;
// Particle data
size_t num_particles;
// // Particle data
// size_t num_particles;
idx_t* global_idx; // global index of particle
size_t* type_ids;
Pos* pos;
Force* force;
// idx_t* global_idx; // global index of particle
// size_t* type_ids;
// Pos* pos;
// Force* force;
size_t* type_ids_d;
Pos* pos_d;
Force* force_d;
// size_t* type_ids_d;
// Pos* pos_d;
// Force* force_d;
std::vector<Patch> neighbor_patches;
std::vector<BasePatchOp> ops;
};
// std::vector<Patch> neighbor_patches;
// std::vector<BasePatchOp> ops;
// };
......@@ -4,6 +4,30 @@
#include <iostream>
#include "Resource.h"
#include "cuda.h"
#include "cuda_runtime.h"
// template<typename T, typename...Args1, typename... Args2>
// __global__ void proxy_sync_call_kernel_noreturn(T* addr, (T::*memberFunc(Args1...)), Args2...args);
#ifdef __CUDACC__
#include <cuda/std/utility>
// Kernels
template<typename T, typename RetType, typename...Args>
__global__ void proxy_sync_call_kernel(RetType* result, T* addr, RetType (T::*memberFunc(Args...)), Args...args) {
if (blockIdx.x == 0) {
*result = (addr->*memberFunc)(args...);
}
}
template<typename T, typename... Args>
__global__ void constructor_kernel(T* __restrict__ devptr, Args...args) {
if (blockIdx.x == 0) {
devptr = new T{::cuda::std::forward<Args>(args)...};
}
}
#endif
// START traits
// These ugly bits of code help implement SFINAE in C++14 and should likely be removed if a newer standard is adopted
// https://stackoverflow.com/questions/55191505/c-compile-time-check-if-method-exists-in-template-type
......@@ -18,7 +42,7 @@ struct has_send_children<T, decltype(std::declval<T>().send_children(Resource{Re
// template <typename T, typename = void>
// struct has_metadata : std::false_type {};
// template <typename T>
// template <typename _tT>
// struct has_metadata<T, decltype(std::declval<T>()::Metadata, void())> : std::true_type {};
template <typename...>
......@@ -28,9 +52,15 @@ using void_t = void;
// Used by Proxy class
template <typename T, typename = void>
struct Metadata_t { };
struct Metadata_t {
Metadata_t(const T& obj) {};
Metadata_t(const Metadata_t<T>& other) {};
};
template <typename T>
struct Metadata_t<T, void_t<typename T::Metadata>> : T::Metadata { };
struct Metadata_t<T, void_t<typename T::Metadata>> : T::Metadata {
Metadata_t(const T& obj) : T::Metadata(obj) {};
Metadata_t(const Metadata_t<T>& other) : T::Metadata(other) {};
};
// struct Metadata_t<T, decltype(std::declval<typename T::Metadata>(), void())> : T::Metadata { };
......@@ -113,9 +143,52 @@ struct Proxy {
/**
* @brief Default constructor initializes the location to a default CPU resource and the address to nullptr.
*/
Proxy() : location(Resource{Resource::CPU,0}), addr(nullptr) {};
Proxy(const Resource& r, T* obj) : location(r), addr(obj) {};
Proxy() : location(Resource{Resource::CPU,0}), addr(nullptr), metadata(nullptr) {
LOGINFO("Constructing Proxy<{}> @{}", type_name<T>().c_str(), fmt::ptr(this));
};
Proxy(const Resource& r) : location(r), addr(nullptr), metadata(nullptr) {
LOGINFO("Constructing Proxy<{}> @{}", type_name<T>().c_str(), fmt::ptr(this));
};
Proxy(const Resource& r, T& obj, T* dest = nullptr) : location(r), addr(dest == nullptr ? &obj : dest) {
if (dest == nullptr) metadata = nullptr;
else metadata = new Metadata_t<T>(obj);
LOGINFO("Constructing Proxy<{}> @{} wrapping @{} with metadata @{}",
type_name<T>().c_str(), fmt::ptr(this), fmt::ptr(&obj), fmt::ptr(metadata));
};
// Copy constructor
Proxy(const Proxy<T>& other) : location(other.location), addr(other.addr), metadata(nullptr) {
LOGINFO("Copy Constructing Proxy<{}> @{}", type_name<T>().c_str(), fmt::ptr(this));
if (other.metadata != nullptr) {
const Metadata_t<T>& tmp = *(other.metadata);
metadata = new Metadata_t<T>(tmp);
}
};
Proxy<T>& operator=(const Proxy<T>& other) {
if (this != &other) {
// Free existing resources.
if (metadata != nullptr) delete metadata;
location = other.location;
addr = other.addr;
const Metadata_t<T>& tmp = *(other.metadata);
metadata = new Metadata_t<T>(tmp); // copy construct!
// std::copy(other.metadata, other.metadata + sizeof(Metadata_t<T>), metadata);
}
return *this;
};
Proxy(Proxy<T>&& other) : addr(nullptr), metadata(nullptr) {
LOGINFO("Move Constructing Proxy<{}> @{}", type_name<T>().c_str(), fmt::ptr(this));
location = other.location;
addr = other.addr;
// For now we avoid std::move, but we may choose to change this behavior
// const Metadata_t<T>& tmp = *(other.metadata);
metadata = other.metadata;
other.metadata = nullptr;
};
~Proxy() {
LOGINFO("Deconstructing Proxy<{}> @{} with metadata @{}", type_name<T>().c_str(), fmt::ptr(this), fmt::ptr(metadata));
if (metadata != nullptr) delete metadata;
};
/**
* @brief Overloaded operator-> returns the address of the underlying object.
* @return The address of the underlying object.
......@@ -142,11 +215,27 @@ struct Proxy {
}
break;
case Resource::GPU:
#ifdef __CUDACC__
if (location.is_local()) {
Exception( NotImplementedError, "Proxy::callSync() local GPU calls" );
if (sizeof(RetType) > 0) {
// Note: this only support basic RetType objects
RetType* dest;
RetType obj;
gpuErrchk(cudaMalloc(&dest, sizeof(RetType)));
proxy_sync_call_kernel<T, RetType, Args2...><<<1,32>>>(dest, addr, addr->*memberFunc, args...);
// proxy_sync_call_kernel<><<<1,32>>>(dest, addr, addr->*memberFunc, args...);
gpuErrchk(cudaMemcpy(dest, &obj, sizeof(RetType), cudaMemcpyHostToDevice));
gpuErrchk(cudaFree(dest));
return obj;
} else {
Exception( NotImplementedError, "Proxy::callSync() local GPU calls" );
}
} else {
Exception( NotImplementedError, "Proxy::callSync() non-local GPU calls" );
}
#else
Exception( NotImplementedError, "Proxy::callSync() for GPU only defined for files compiled with nvvc" );
#endif
break;
case Resource::MPI:
Exception( NotImplementedError, "MPI sync calls (deprecate?)" );
......@@ -262,8 +351,11 @@ struct Proxy<T, typename std::enable_if_t<std::is_arithmetic<T>::value>> {
*/
template <typename T>
HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T* dest = nullptr) {
LOGTRACE(" _send_ignoring_children...");
switch (location.type) {
case Resource::GPU:
LOGINFO(" GPU...");
#ifdef USE_CUDA
if (location.is_local()) {
if (dest == nullptr) { // allocate if needed
LOGTRACE(" cudaMalloc for array");
......@@ -273,16 +365,25 @@ HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T
} else {
Exception( NotImplementedError, "`_send_ignoring_children(...)` on non-local GPU" );
}
#else
Exception( NotImplementedError, "USE_CUDA is not enabled" );
#endif
break;
case Resource::CPU:
LOGINFO(" CPU...");
if (location.is_local()) {
if (dest == nullptr) { // allocate if needed
LOGTRACE(" Allocate CPU memory for {}", decltype(T));
dest = new T;
}
memcpy(dest, &obj, sizeof(T));
LOGINFO(" local CPU...");
// if (dest == nullptr) { // allocate if needed
// LOGINFO(" allocate memory...");
// LOGTRACE(" Allocate CPU memory for {}", type_name<T>().c_str());
// dest = new T;
// }
// LOGINFO(" memcpying...");
// memcpy(dest, &obj, sizeof(T));
// dest = *obj;
} else {
Exception( NotImplementedError, "`_send_ignoring_children(...)` on non-local CPU" );
LOGINFO(" nonlocal...");
// Exception( NotImplementedError, "`_send_ignoring_children(...)` on non-local CPU" );
}
break;
default:
......@@ -290,7 +391,16 @@ HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T
Exception( ValueError, "`_send_ignoring_children(...)` applied with unkown resource type" );
}
return Proxy<T>(location, dest);
LOGINFO(" creating Proxy...");
// Proxy<T>* ret = new Proxy<T>(location, dest); // Proxies should be explicitly removed
// LOGINFO(" ...done @{}", fmt::ptr(ret));
// Proxy<T>&& ret =
return Proxy<T>(location, obj, dest); // Proxies should be explicitly removed
//LOGINFO(" ...done @{}", fmt::ptr(&ret));
// return ret;
// LOGINFO(" ...done @{}", fmt::ptr(ret));
// return *ret;
}
/**
......@@ -303,10 +413,10 @@ HOST inline Proxy<T> _send_ignoring_children(const Resource& location, T& obj, T
* @return A Proxy representing the data at the destination location.
*/
template <typename T, typename Dummy = void, typename std::enable_if_t<!has_send_children<T>::value, Dummy>* = nullptr>
HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
LOGTRACE("...Sending object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest));
HOST inline Proxy<T>& send(const Resource& location, T& obj, T* dest = nullptr) {
LOGINFO("...Sending object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest));
// Simple objects can simply be copied without worrying about contained objects and arrays
auto ret = _send_ignoring_children(location, obj, dest);
Proxy<T>&& ret = _send_ignoring_children(location, obj, dest);
LOGTRACE("...done sending");
// printf("...done\n");
return ret;
......@@ -323,7 +433,7 @@ HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
*/
template <typename T, typename Dummy = void, typename std::enable_if_t<has_send_children<T>::value, Dummy>* = nullptr>
HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
LOGTRACE("Sending complex object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest));
LOGINFO("Sending complex object {} @{} to device at {}", type_name<T>().c_str(), fmt::ptr(&obj), fmt::ptr(dest));
auto dummy = obj.send_children(location); // function is expected to return an object of type obj with all pointers appropriately assigned to valid pointers on location
Proxy<T> ret = _send_ignoring_children<T>(location, dummy, dest);
LOGTRACE("... clearing dummy complex object");
......@@ -332,4 +442,53 @@ HOST inline Proxy<T> send(const Resource& location, T& obj, T* dest = nullptr) {
return ret;
}
// Utility function for constructing objects in remote memory address
// spaces, obviating the need to construct simple objects locally
// before copying. Returns a Proxy object, but in cases where the
// remote resource location is non-CPU or non-local, metadata for
// Proxy will be blank.
template<typename T, typename... Args>
Proxy<T> construct_remote(Resource location, Args&&...args) {
switch (location.type) {
case Resource::CPU:
if (location.is_local()) {
T* ptr = new T{std::forward<Args>(args)...};
return Proxy<T>(location, *ptr);
} else {
Exception( NotImplementedError, "construct_remote() non-local CPU calls" );
}
break;
case Resource::GPU:
#ifdef __CUDACC__
if (location.is_local()) {
T* devptr;
LOGWARN("construct_remote: TODO: switch to device associated with location");
gpuErrchk(cudaMalloc(&devptr, sizeof(T)));
constructor_kernel<<<1,32>>>(devptr, std::forward<Args>(args)...);
gpuErrchk(cudaDeviceSynchronize());
LOGWARN("construct_remote: proxy.metadata not set");
return Proxy<T>(location);
// Exception( NotImplementedError, "cunstruct_remote() local GPU call" );
// Note: this only support basic RetType objects
// T* dest;
// T obj;
// gpuErrchk(cudaMalloc(&dest, sizeof(RetType)));
// proxy_sync_call_kernel<T, RetType, Args2...><<<1,32>>>(dest, addr, addr->*memberFunc, args...);
// gpuErrchk(cudaMemcpy(dest, &obj, sizeof(RetType), cudaMemcpyHostToDevice));
// gpuErrchk(cudaFree(dest));
} else {
Exception( NotImplementedError, "cunstruct_remote() non-local GPU call" );
}
#else
Exception( NotImplementedError, "construct_remote() for GPU only defined for files compiled with nvvc" );
#endif
break;
case Resource::MPI:
Exception( NotImplementedError, "construct_remote() for MPI" );
break;
default:
Exception( ValueError, "construct_remote(): unknown resource type" );
}
return Proxy<T>{};
}
......@@ -18,6 +18,10 @@ inline void cuRandAssert(curandStatus code, const char *file, int line, bool abo
if (abort) exit(code);
}
}
// #else
// #define QUALIFIERS __host__ __device__
// #include <curand_kernel.h>
// #undef QUALIFIERS
#endif
namespace Random {
......@@ -79,6 +83,41 @@ public:
// void reorder(int *a, int n);
};
class RandomCPUOld {
public:
using state_t = void;
// Methods for maninpulating the state
void init(size_t num, unsigned long seed, size_t offset) {
//
};
// Methods for generating random values
HOST DEVICE inline float gaussian(state_t* state) {
LOGWARN("RandomCPU::gaussian(): not implemented");
return 0;
};
HOST DEVICE inline state_t* get_gaussian_state() { return nullptr; };
HOST DEVICE inline void set_gaussian_state(state_t* state) {};
// // HOST DEVICE inline float gaussian(RandomState* state) {};
// HOST inline Vector3 gaussian_vector(size_t idx, size_t thread) {
// return Vector3();
// };
// unsigned int integer() {
// return 0;
// };
// unsigned int poisson(float lambda) {
// return 0;
// };
// float uniform() {
// return 0;
// };
// void reorder(int *a, int n);
};
#ifdef USE_CUDA
class RandomAcc {
......@@ -157,6 +196,7 @@ public:
gpuErrchk(cudaMalloc((void**)&(rng->states), sizeof(state_t)*num_states));
gpuErrchk(cudaMemcpy(dest, rng, sizeof(RNG),cudaMemcpyHostToDevice));
rng->states = nullptr;
// Since we avoided constructuer, don't call `delete rng;`
return dest;
}
......
#include "Resource.h"
HOST DEVICE size_t current_thread_idx() {
HOST DEVICE size_t caller_id() {
#ifdef USE_MPI
Exception( NotImplementedError, "current_thread_idx() not implemented on GPU" );
// LOGINFO("... caller_id(): MPI path");
Exception( NotImplementedError, "caller_id() not implemented on GPU" );
int world_rank;
return MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
#else
#ifdef __CUDA_ACC__
Exception( NotImplementedError, "current_thread_idx() not implemented on GPU" );
#ifdef __CUDA_ARCH__
LOGINFO("... caller_id(): GPU path");
Exception( NotImplementedError, "caller_id() not implemented on GPU" );
return 0;
#else
// LOGWARN("... caller_id() CPU path: return 0");
return 0;
#endif
#endif
......
......@@ -8,8 +8,11 @@
#include "GPUManager.h"
HOST DEVICE size_t current_thread_idx();
/**
* @brief Routine that returns the index of the calling resource,
* regardless of whether it an MPI rank or a GPU.
*/
HOST DEVICE size_t caller_id();
/**
......@@ -25,29 +28,21 @@ struct Resource {
Resource* parent; ///< Parent resource; nullptr unless type is GPU
/**
* @brief Checks if the resource is running on the calling thread.
* @brief Check if resource is local to the calling thread.
*/
HOST DEVICE bool is_running() const {
bool ret = true;
// #ifdef __CUDA_ACC__
// ret = (type == GPU);
// #else
// ret = (type == CPU);
// #endif
return ret;
}
// Q: should this return True for GPU that is attached/assigned to current thread? Currently assuming yes.
HOST DEVICE bool is_local() const {
bool ret = true;
#ifdef __CUDA_ACC__
#ifdef __CUDACC__
ret = (type == GPU);
LOGWARN("Resource::is_local() not fully implemented on GPU devices; returning {}",ret);
LOGWARN("Resource::is_local(): called from GPU but not fully implemented on GPU devices, returning {}",ret);
#else
LOGINFO("Resource::is_local() out of CUDACC...");
if (type == GPU && parent != nullptr) {
LOGINFO("Resource::is_local(): called from CPU for resource on GPU, checking parent...");
ret = parent->is_local();
} else {
ret = (current_thread_idx() == id);
ret = (caller_id() == id);
LOGINFO("Resource::is_local(): called from CPU, returning {}", ret);
}
#endif
return ret;
......@@ -56,7 +51,7 @@ struct Resource {
static Resource Local() {
LOGWARN("Resource::Local() not properly implemented");
#ifdef __CUDA_ACC__
#ifdef __CUDACC__
return Resource{ GPU, 0 };
#else
return Resource{ CPU, 0 };
......@@ -65,4 +60,3 @@ struct Resource {
bool operator==(const Resource& other) const { return type == other.type && id == other.id; };
};
......@@ -10,8 +10,11 @@ void SimManager::run() {
// Patch p(10,0,0,sys);
Patch p;
LOGINFO("here2!");
//*
RandomCPU rng{};
LOGINFO("there2!");
// RandomAcc rng{};
//ProxyPatch p2(10,0,0);
// p.add_compute( std::make_unique<LocalPairForce>() );
......@@ -25,18 +28,21 @@ void SimManager::run() {
// p.add_compute( std::make_unique<LocalBonded>() );
// #endif
LOGINFO("here!");
auto tmp = Random::get_gaussian_state(&rng);
LOGINFO("there!");
for (size_t step = 0; step < 10; ++step) {
LOGINFO("Step {}: random {:0.2f}", step, Random::gaussian(&rng,(RandomCPU::state_t*) nullptr));
LOGINFO("where!");
LOGINFO("Step {:d}: random", step); // {:0.2f}", step, Random::gaussian(&rng,(RandomCPU::state_t*) nullptr));
p.compute();
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
}
#ifdef USE_CUDA
RandomGPU<128>::launch_test_kernel<64>((size_t) 1);
cudaDeviceSynchronize();
#endif
// */
// #ifdef USE_CUDA
// RandomGPU<128>::launch_test_kernel<64>((size_t) 1);
// cudaDeviceSynchronize();
// #endif
};
......@@ -30,7 +30,7 @@ void CellDecomposer::decompose(SimSystem& sys, ResourceCollection& resources) {
Vector3_t<size_t> ijk = index_to_ijk( idx, n_p_v.x, n_p_v.y, n_p_v.z );
// size_t cap = 2*num_particles / n_r;
// auto p2 = Patch(cap);
auto p2 = Patch(); // don't allocate array locally
Patch p2 = Patch(); // don't allocate array locally
// TODO: generalize to non-orthogonal basis
Vector3 pmin = min + dr.element_mult(ijk);
Vector3 pmax = pmin + dr;
......
......@@ -21,9 +21,11 @@ class BoundaryConditions {
public:
BoundaryConditions() : origin(0), basis{(5000,0,0),(0,5000,0),(0,0,5000)}, periodic{true,true,true} {
// do things
LOGINFO("BoundaryConditions()");
}
BoundaryConditions( Vector3 basis1, Vector3 basis2, Vector3 basis3, Vector3 origin = Vector3(0), bool periodic1 = true, bool periodic2 = true, bool periodic3 = true ) {
// do things
LOGINFO("BoundaryConditions(...)");
}
static constexpr size_t dim = 3;
......@@ -105,6 +107,7 @@ public:
// explicit operator int() const {return object_type*16 + algorithm*4 + backend;};
};
inline // C++17 feature needed for compilation... unsure of why
static constexpr Conf default_conf = Conf{291.0f, Conf::CellDecomp, Conf::AllPeriodic, {5000.0f,5000.0f,5000.0f}, 50.0f };
SimSystem() : SimSystem(default_conf) {}
......@@ -170,6 +173,7 @@ public:
// void consolidate_patches(); // maybe not needed
void distribute_patches() {
LOGINFO("distribute_patches()");
decomp;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment