Skip to content
Snippets Groups Projects
Commit 19a7f749 authored by cmaffeo2's avatar cmaffeo2
Browse files

progress with files added

parent 03cabdc6
No related branches found
No related tags found
No related merge requests found
add_library("lib${PROJECT_NAME}"
GPUManager.cpp
ParticlePatch.cpp
Integrator.cpp
Integrator/CPU.cpp
Integrator/CUDA.cu
Interaction.cpp
Interaction/CPU.cpp
Interaction/CUDA.cu
SignalManager.cpp
)
#pragma once
// #include "../useful.h"
#include "../Types.h"
#ifdef __CUDACC__
#define HOST __host__
#define DEVICE __device__
#else
#define HOST
#define DEVICE
#endif
namespace IntegratorKernels {
HOST DEVICE void __inline__ BDIntegrate() {
// std::cout << "Computes::BDIntegrate_inline" << std::endl;
printf("Integrator::BDIntegrate\n");
};
HOST DEVICE void __inline__ BDIntegrate(Vector3* __restrict__ pos, const Vector3 * const __restrict__ force, const int& idx, float& root_Dt, Vector3& normal_sample_3D) {
printf("Integrator::BDIntegrate\n");
pos[idx] = pos[idx] + force[idx] * root_Dt + normal_sample_3D;
};
}
#include "Interaction.h"
bool operator<(const LocalInteraction::Conf x, const LocalInteraction::Conf y) { return (int(x) < int(y)); };
std::map<LocalInteraction::Conf, LocalInteraction*> LocalInteraction::_interactions;
LocalInteraction* LocalInteraction::GetInteraction(Conf& conf) {
// Checks _interactions for a matching configuration, returns one if found, otherwise creates
if (conf.backend == Conf::Default) {
#ifdef USE_CUDA
conf.backend = Conf::CUDA;
#else
conf.backend = Conf::CPU;
#endif
}
// Insert configuration into map, if it exists
auto emplace_result = LocalInteraction::_interactions.emplace(conf, nullptr);
auto& it = emplace_result.first;
bool& inserted = emplace_result.second;
if (inserted) {
// Conf not found, so create a new one
LocalInteraction* tmp;
switch (conf.object_type) {
case Conf::Particle:
switch (conf.dof) {
case Conf::Bond:
switch (conf.form) {
case Conf::Harmonic:
switch (conf.backend) {
case Conf::CUDA:
#ifdef USE_CUDA
tmp = new LocalBondedCUDA();
#else
std::cerr << "WARNING: LocalInteraction::GetInteraction: "
<< "CUDA disabled, creating CPU interaction instead" << std::endl;
tmp = new LocalBonded();
#endif
break;
case Conf::CPU:
tmp = new LocalBonded();
break;
default:
std::cerr << "Error: LocalInteraction::GetInteraction: "
<< "Unrecognized backend; exiting" << std::endl;
}
default:
std::cerr << "Error: LocalInteraction::GetInteraction: "
<< "Unrecognized form; exiting" << std::endl;
assert(false);
}
break;
default:
std::cerr << "Error: Interaction::GetInteraction: "
<< "Unrecognized algorithm type; exiting" << std::endl;
assert(false);
}
break;
case Conf::RigidBody:
assert(false);
break;
default:
std::cerr << "Error: Interaction::GetInteraction: "
<< "Unrecognized object type; exiting" << std::endl;
assert(false);
}
it->second = tmp;
}
return it->second;
}
#include "CPU.h"
void LocalBonded::compute(Patch* p) {
std::cout << "LocalBonded::compute()" << std::endl;
InteractionKernels::HarmonicBonds();
};
#pragma once
#include "../Interaction.h"
class LocalBonded : public LocalInteraction {
public:
void compute(Patch* patch);
int num_patches() const { return 1; };
};
#include "CUDA.h"
#ifdef USE_CUDA
__global__ void HarmonicBonds_kernel() {
if (threadIdx.x == 0) {
printf("HarmonicBonds_kernel()\n");
InteractionKernels::HarmonicBonds();
}
};
void LocalBondedCUDA::compute(Patch* p) {
printf("HarmonicBondsCUDA::compute()\n");
HarmonicBonds_kernel<<<1,32>>>();
};
#endif
#pragma once
#include "../Interaction.h"
#ifdef USE_CUDA
class LocalBondedCUDA : public LocalInteraction {
public:
void compute(Patch* patch);
int num_patches() const { return 1; };
};
#endif
#pragma once
// #include "../useful.h"
#include "../Types.h"
#ifdef __CUDACC__
#define HOST __host__
#define DEVICE __device__
#else
#define HOST
#define DEVICE
#endif
namespace InteractionKernels {
HOST DEVICE void __inline__ HarmonicBonds() {
// std::cout << "Computes::BDIntegrate_inline" << std::endl;
printf("Interaction::HarmonicBondsDummy()\n");
};
HOST DEVICE void __inline__ HarmonicBonds(Vector3* __restrict__ pos, const Vector3 * const __restrict__ force) {
printf("Interaction::HarmonicBonds\n");
// pos[idx] = pos[idx] + force[idx] * root_Dt + normal_sample_3D;
};
}
find_package(Catch2 3 REQUIRED)
# include(doctest)
include(Catch) # contains some cmake macros that will help with discovery of tests
include(CTest)
## TODO, add a macro to add these projects, build with CUDA
add_executable("arbd_tests"
vector3_precision.cu
)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")
set_property(TARGET arbd_tests PROPERTY CXX_STANDARD 14)
set_property(TARGET arbd_tests PROPERTY CUDA_STANDARD 14)
# target_link_libraries("arbd_tests" PRIVATE "lib${PROJECT_NAME}" Catch2::Catch2)
target_link_libraries("arbd_tests" PRIVATE "lib${PROJECT_NAME}" Catch2::Catch2WithMain)
## Add optional libraries
if(USE_CUDA)
target_link_libraries(arbd_tests PUBLIC curand)
endif()
if(USE_NCCL)
target_link_libraries(arbd_tests PUBLIC nccl)
endif()
if(USE_NVTX)
target_link_libraries(arbd_tests PUBLIC nvToolsExt)
endif()
## catch_discover_tests("${PROJECT_NAME}_TESTS")
catch_discover_tests(arbd_tests)
install(TARGETS arbd_tests)
#include <type_traits>
#include <typeinfo>
#ifndef _MSC_VER
#include <cxxabi.h>
#endif
#include <memory>
#include <string>
#include <cstdlib>
template <typename T>
std::string type_name() {
using TR = typename std::remove_reference<T>::type;
std::unique_ptr<char, void(*)(void*)> own
(
#ifndef _MSC_VER
abi::__cxa_demangle(typeid(TR).name(), nullptr,
nullptr, nullptr),
#else
nullptr,
#endif
std::free
);
std::string r = own != nullptr ? own.get() : typeid(TR).name();
if (std::is_const<TR>::value)
r += " const";
if (std::is_volatile<TR>::value)
r += " volatile";
if (std::is_lvalue_reference<T>::value)
r += "&";
else if (std::is_rvalue_reference<T>::value)
r += "&&";
return r;
}
#include <float.h>
#include <iostream>
#include <cstdio>
// #include "useful.h"
#include "../SignalManager.h"
#include "../Types.h"
#include <cuda.h>
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
#include "type_name.h"
// Test kernel function
// namespace TestVectorOps {
// ChatGPT("Add doxygen to each function, class and struct, where R represents a return parameter, T and U")
template<typename R, typename T, typename U>
HOST DEVICE R cross_vectors(T&& v1, U&& v2) {
return v1.cross(v2);
}
template<typename F, typename R, typename T, typename U>
__global__ void binary_op_test_kernel( F fn, R* result, T in1, U in2 ) {
if (blockIdx.x == 0) {
*result = (*fn)(in1,in2);
}
}
template<typename T, typename U>
void check_vectors_equal( T&& cpu, U&& gpu) {
CHECK( type_name<decltype(cpu)>() == type_name<decltype(gpu)>() ); // should be unneccesary
CHECK( cpu.x == gpu.x );
CHECK( cpu.y == gpu.y );
CHECK( cpu.z == gpu.z );
CHECK( cpu.w == gpu.w );
}
template<typename R, typename T, typename U>
struct TestOp {
std::string name;
typename R (*)(T,U) func;
};
TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3Dcross]" ) {
// INFO("Test case start");
using T = Vector3_t<float>;
using U = Vector3_t<double>;
using R = std::common_type_t<T,U>;
T v1(1,0,0);
U v2(0,2,0);
R *gpu_result_d, gpu_result, cpu_result;
std::vector<TestOp> ops;
ops.push_back(TestOp("add", [] __host__ __device__ (T a, U b){ return static_cast<R>(b+a); }; ));
ops.push_back(TestOp("cross", [] __host__ __device__ (T a, U b){ return a.cross(b); }; ));
// auto sub = [] __host__ __device__ (T& a, U& b){ return a.cross(b); };
// auto dot = [] __host__ __device__ (T& a, U& b){ return a.cross(b); };
auto cross = [] __host__ __device__ (T a, U b){ return a.cross(b); };
using L = decltype(add);
std::vector<R(*)(T,U)> lambdas;
lambdas.push_back(add);
lambdas.push_back(cross);
// Get_result
cudaMalloc((void **)&gpu_result_d, sizeof(R));
for (auto& op: ops) {
INFO(op.name);
binary_op_test_kernel<<<1,1>>>(op.func, gpu_result_d, v1, v2);
cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
// Get cpu_result
cpu_result = fn(v1,v2); // rvalue version failed on cpu, but works on gpu
// Check consistency
check_vectors_equal(cpu_result, gpu_result);
}
}
// int main(int argc, char* argv[]) {
// return 0;
// }
// int main(int argc, char* argv[]) {
// SignalManager::manage_segfault();
// Vector3_t<float> v1(1,0,0);
// Vector3_t<double> v2(0,2,0);
// v2.y = 6.62606957e-44;
// std::cout << "test_vector3.cpp" << std::endl;
// // // std::cout << v1.to_char() << std::endl;
// // std::cout << v1.to_char() << " x " << v2.to_char() <<
// // " = " << (v1.cross(v2)).to_char() << std::endl;
// // std::cout << (-v2).to_char() << " x " << v1.to_char() <<
// // " = " << (-v2.cross(v1)).to_char() << std::endl;
// // std::cout.precision(17);
// // std::cout << sizeof(v2.cross(v1).z) << " " << sizeof(v1.cross(v2).z) << std::endl;
// // std::cout << (v2.cross(v1).z) << " " << (v1.cross(v2).z) << std::endl;
// // std::cout << float(6.62606957e-44) << std::endl;
// // std::cout << " ... done" << std::endl;
// // test_kernel<<<1,1>>>(v1,v2);
// // cudaDeviceSynchronize();
// return 0;
// }
// #include <iostream>
// // Since C++ 11
// // template<typename T>
// // using func_t = T (*) (T, T);
// template<typename R, typename T, typename U>
// using func_t = R (*) (T, U);
// template <typename R, typename T, typename U>
// __host__ __device__ R add_func (T x, U y)
// {
// return x + y;
// }
// template <typename T>
// __host__ __device__ T mul_func (T x, T y)
// {
// return x * y;
// }
// // Required for functional pointer argument in kernel function
// // Static pointers to device functions
// template <typename R, typename T, typename U>
// __device__ func_t<R,T,U> p_add_func = add_func<R,T,U>;
// // template <typename T>
// // __device__ func_t<T> p_mul_func = mul_func<T>;
// template <typename R, typename T, typename U>
// __global__ void kernel(func_t<R,T,U> op, T * d_x, U * d_y, R * result)
// {
// *result = (*op)(*d_x, *d_y);
// }
// template <typename T, typename U>
// void mytest(T x, U y)
// {
// using R = std::common_type_t<T,U>;
// func_t<R,T,U> h_add_func;
// // func_t<T> h_mul_func;
// T * d_x;
// U * d_y;
// cudaMalloc(&d_x, sizeof(T));
// cudaMalloc(&d_y, sizeof(U));
// cudaMemcpy(d_x, &x, sizeof(T), cudaMemcpyHostToDevice);
// cudaMemcpy(d_y, &y, sizeof(U), cudaMemcpyHostToDevice);
// R result;
// R * d_result, * h_result;
// cudaMalloc(&d_result, sizeof(R));
// h_result = &result;
// // Copy device function pointer to host side
// cudaMemcpyFromSymbol(&h_add_func, p_add_func<R,T,U>, sizeof(func_t<R,T,U>));
// // cudaMemcpyFromSymbol(&h_mul_func, p_mul_func<T>, sizeof(func_t<T>));
// kernel<R,T,U><<<1,1>>>(h_add_func, d_x, d_y, d_result);
// cudaDeviceSynchronize();
// cudaMemcpy(h_result, d_result, sizeof(R), cudaMemcpyDeviceToHost);
// CHECK( result == add_func<R,T,U>(x,y) );
// // kernel<T><<<1,1>>>(h_mul_func, d_x, d_y, d_result);
// // cudaDeviceSynchronize();
// // cudaMemcpy(h_result, d_result, sizeof(T), cudaMemcpyDeviceToHost);
// // CHECK( result == mul_func(x,y) );
// }
// TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3Dcross]" ) {
// INFO("TEST START");
// mytest<int,float>(2.05, 10.00);
// // mytest<float>(2.05, 10.00);
// // mytest<double>(2.05, 10.00);
// }
// */
#pragma once
#ifdef __CUDACC__
#define HOST __host__
#define DEVICE __device__
#else
#define HOST
#define DEVICE
#endif
#if defined(__CUDACC__) // NVCC
#define MY_ALIGN(n) __align__(n)
#elif defined(__GNUC__) // GCC
#define MY_ALIGN(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER) // MSVC
#define MY_ALIGN(n) __declspec(align(n))
#else
#error "Please provide a definition for MY_ALIGN macro for your host compiler!"
#endif
#ifdef USE_CUDA
#include <cuda_runtime.h>
#else
struct MY_ALIGN(16) float4 {
float4() : x(0), y(0), z(0), w(0) {};
float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {};
float4 operator+(const float4&& o) {
return float4(x+o.x,y+o.y,z+o.z,w+o.w);
// float4 r;
// r.x = x+o.x; r.y = y+o.y; r.z = z+o.z; r.w = w+o.w;
// return r;
};
float4 operator*(const float&& s) {
return float4(x*s,y*s,z*s,w*s);
};
float x,y,z,w;
};
#endif
#include "Types/Vector3.h"
#include "Types/Vector3.h"
using Vector3 = Vector3_t<float>;
using Matrix3 = Vector3_t<float>; /* TODO: FIX */
/*********************************************************************
* @file Vector3.h
*
* @brief Declaration of templated Vector3_t class.
*********************************************************************/
#pragma once
#include <type_traits> // for std::common_type<T,U>
/**
* 3D vector utility class with common operations implemented on CPU and GPU.
*
* Implemented with 4D data storage for better GPU alignment; extra
* data can be stored in fourth varaible this->w
*
* @tparam T the type of data stored in the four fields, x,y,z,w; T
* Should usually be float or double.
g */
template<typename T>
class MY_ALIGN(4*sizeof(T)) Vector3_t {
public:
HOST DEVICE inline Vector3_t<T>() : x(0), y(0), z(0), w(0) {}
HOST DEVICE inline Vector3_t<T>(T s):x(s), y(s), z(s), w(s) {}
HOST DEVICE inline Vector3_t<T>(const Vector3_t<T>& v):x(v.x), y(v.y), z(v.z), w(v.w) {}
HOST DEVICE inline Vector3_t<T>(T x0, T y0, T z0) : x(x0), y(y0), z(z0), w(0) {}
HOST DEVICE inline Vector3_t<T>(const T* d) : x(d[0]), y(d[1]), z(d[2]), w(0) {}
HOST DEVICE inline Vector3_t<T>(const float4 a) : x(a.x), y(a.y), z(a.z), w(a.w) {}
#if __cplusplus >= 201703L
// TODO: test if c++17 constexpr performs better than std::common_type, otherwise get rid of this #if block
template <typename U>
HOST DEVICE inline Vector3_t<typename std::common_type<T,U>::type> cross(const Vector3_t<U>& w) const {
if constexpr(sizeof(U) < sizeof(T)) {
Vector3_t<T> v;
v.x = y*w.z - z*w.y;
v.y = z*w.x - x*w.z;
v.z = x*w.y - y*w.x;
return v;
} else {
Vector3_t<U> v;
v.x = y*w.z - z*w.y;
v.y = z*w.x - x*w.z;
v.z = x*w.y - y*w.x;
return v;
}
}
#else
template <typename U>
HOST DEVICE inline Vector3_t<typename std::common_type<T,U>::type> cross(const Vector3_t<U>& w) const {
using TU = typename std::common_type<T,U>::type;
Vector3_t<TU> v;
v.x = y*w.z - z*w.y;
v.y = z*w.x - x*w.z;
v.z = x*w.y - y*w.x;
return v;
}
#endif
HOST DEVICE inline Vector3_t<T>& operator=(const Vector3_t<T>&& v) {
x = v.x;
y = v.y;
z = v.z;
return *this;
}
HOST DEVICE inline Vector3_t<T>& operator+=(const Vector3_t<T>&& v) {
x += v.x;
y += v.y;
z += v.z;
return *this;
}
HOST DEVICE inline Vector3_t<T>& operator-=(const Vector3_t<T>&& v) {
x -= v.x;
y -= v.y;
z -= v.z;
return *this;
}
HOST DEVICE inline Vector3_t<T>& operator*=(float s) {
x *= s;
y *= s;
z *= s;
return *this;
}
HOST DEVICE inline Vector3_t<T>& operator/=(float s) {
const float sinv = 1.0f/s;
x *= sinv;
y *= sinv;
z *= sinv;
return *this;
}
HOST DEVICE inline Vector3_t<T> operator-() const {
Vector3_t<T> v;
v.x = -x;
v.y = -y;
v.z = -z;
return v;
}
template<typename U>
HOST DEVICE inline Vector3_t<T> operator+(const Vector3_t<U>& w ) const {
using TU = typename std::common_type<T,U>::type;
Vector3_t<TU> v;
v.x = x + w.x;
v.y = y + w.y;
v.z = z + w.z;
return v;
}
HOST DEVICE inline Vector3_t<T> operator-(const Vector3_t<T>& w ) const {
Vector3_t<T> v;
v.x = x - w.x;
v.y = y - w.y;
v.z = z - w.z;
return v;
}
template<typename U>
HOST DEVICE inline Vector3_t<T> operator*(U&& s) const {
// TODO, throw exception if int
using TU = typename std::common_type<T,U>::type;
Vector3_t<TU> v;
v.x = s*x;
v.y = s*y;
v.z = s*z;
return v;
}
template<typename U>
HOST DEVICE inline Vector3_t<T> operator/(U&& s) const {
const U inv = static_cast<U>(1.0)/s;
return (*this)*inv;
}
template<typename U>
HOST DEVICE inline float dot(const Vector3_t<U>& w) const { return x*w.x + y*w.y + z*w.z; }
HOST DEVICE inline float length() const { return sqrtf(x*x + y*y + z*z); }
HOST DEVICE inline float length2() const { return x*x + y*y + z*z; }
HOST DEVICE inline float rLength() const {
float l = length();
if (l != 0.0f)
return 1.0f / l;
return 0.0f;
}
HOST DEVICE inline float rLength2() const {
float l2 = length2();
if (l2 != 0.0f) return 1.0f / l2;
return 0.0f;
}
template<typename U>
HOST DEVICE static inline auto element_mult(const Vector3_t<T>&& v, const Vector3_t<U>&& w) {
using TU = typename std::common_type<T,U>::type;
Vector3_t<TU> ret(
v.x*w.x,
v.y*w.y,
v.z*w.z);
return ret;
}
HOST DEVICE static inline Vector3_t<T> element_sqrt(const Vector3_t<T>&& w) {
Vector3_t<T> ret(
sqrt(w.x),
sqrt(w.y),
sqrt(w.z));
return ret;
}
HOST DEVICE inline void print() const {
printf("%0.3f %0.3f %0.3f\n", x,y,z);
}
T x, y, z, w; //append a member w
char* to_char() const {
char* s = new char[128];
sprintf(s, "%.10g %.10g %.10g", x, y, z);
s[127] = 0;
return s;
}
};
/* template<typename T> */
/* HOST DEVICE inline Vector3_t<T> operator*(float s, Vector3_t<T> v) { */
/* v.x *= s; */
/* v.y *= s; */
/* v.z *= s; */
/* return v; */
/* } */
// template<typename T>
// HOST DEVICE inline Vector3_t<T> operator/(Vector3_t<T> v, float s) {
// const float sinv = 1.0f/s;
// return v*sinv;
// }
template<typename T, typename U>
HOST DEVICE inline auto operator/(U&& s, Vector3_t<T>&& v) {
// TODO, throw exception if int
using TU = typename std::common_type<T,U>::type;
Vector3_t<TU> ret;
ret.x = s / v.x;
ret.y = s / v.y;
ret.z = s / v.z;
return ret;
}
namespace std {
template<typename T,typename U>
struct common_type<Vector3_t<T>, Vector3_t<U>> {
using type = Vector3_t<common_type_t<T,U>>;
};
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment