From b40b0a7cf2744e68e9f89e14eca46c98e8cb9a70 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 25 Apr 2023 14:40:52 -0500 Subject: [PATCH] Update --- src/CMakeLists.txt | 1 + src/Interaction.h | 2 +- src/PatchOp.cuh | 95 +++++++++++ src/PatchOp.h | 1 + src/Tests/CMakeLists.txt | 1 + src/Tests/matrix3.cu | 297 +++++++++++++++++++++++++++++++++ src/Tests/type_name.h | 2 +- src/Tests/vector3_precision.cu | 143 ++++++++-------- src/Types.h | 30 +++- src/Types/Matrix3.h | 234 ++++++++++++++++++++++++++ src/Types/Vector3.h | 48 ++++-- 11 files changed, 764 insertions(+), 90 deletions(-) create mode 100644 src/PatchOp.cuh create mode 100644 src/Tests/matrix3.cu create mode 100644 src/Types/Matrix3.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4c3fa1e..fb681eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,4 +8,5 @@ add_library("lib${PROJECT_NAME}" Interaction/CPU.cpp Interaction/CUDA.cu SignalManager.cpp + PatchOp.cu ) diff --git a/src/Interaction.h b/src/Interaction.h index 93a5f27..a088381 100644 --- a/src/Interaction.h +++ b/src/Interaction.h @@ -32,7 +32,7 @@ public: * @brief Struct for defining interaction configuration */ struct Conf { - enum Object {Particle, RigidBody }; + enum Object {Particle, RigidBody}; enum DoF {Bond, Angle, Dihedral, Bonded, NeighborhoodPair}; enum Form {Harmonic, Tabulated, LennardJones, Coulomb, LJ}; enum Backend { Default, CUDA, CPU }; diff --git a/src/PatchOp.cuh b/src/PatchOp.cuh new file mode 100644 index 0000000..52bf95b --- /dev/null +++ b/src/PatchOp.cuh @@ -0,0 +1,95 @@ +#pragma once +#include "PatchOp.h" + +struct ParticleType { + float diffusion = 1; +}; +struct SimParameters { + float timestep = 1; + float kT = 1; +}; + +struct Random { + __host__ __device__ Vector3 gaussian_vector() { + return Vector3(0.0f); + } + int seed = 1; +}; + + +namespace LocalOneParticleLoop { + + namespace BD { + template<typename ...Op_t> + __global__ void op_kernel(size_t num, Vector3* __restrict__ pos_force, size_t* type_ids, ParticleType* types, SimParameters& sim, Random& random) { + for (size_t i = threadIdx.x + blockIdx.x*blockDim.x; i < num; i+= gridDim.x*blockDim.x) { + Vector3& pos = pos_force[i]; + Vector3& force = pos_force[num+i]; + ParticleType& t = types[type_ids[i]]; + + using expander = int[]; + (void)expander{0, (void(Op_t::op(pos, force, t, sim, random)))...}; + } + } + } + + namespace MD { + template<typename ...Op_t> + __global__ void op_kernel(size_t num, Vector3* __restrict__ pos_force, size_t* type_ids, ParticleType* types) { + for (size_t i = threadIdx.x + blockIdx.x*blockDim.x; i < num; i+= gridDim.x*blockDim.x) { + Vector3& pos = pos_force[i]; + Vector3& mom = pos_force[num+i]; + Vector3& force = pos_force[2*num+i]; + ParticleType& t = types[type_ids[i]]; + + using expander = int[]; + (void)expander{0, (void(Op_t::op(&pos, &force, t, &mom)))...}; + } + } + } + + template<size_t block_size=32, bool is_BD=true, typename ...Op_t> + class LocalOneParticleLoop : public BasePatchOp { + void compute(Patch* patch) { + size_t num_blocks = (patch->num+1)/block_size; + if (is_BD) { + BD::op_kernel<Op_t...><<<block_size, num_blocks>>>(patch->num, patch->pos, patch->type_ids, patch->types); + } else { + MD::op_kernel<Op_t...><<<block_size, num_blocks>>>(patch->num, patch->pos, patch->type_ids, patch->types); + } + } + }; + + struct OpIntegrateBD { + HOST DEVICE static void op(Vector3& __restrict__ pos, + Vector3& __restrict__ force, + ParticleType& type, + SimParameters& sim, + Random& random, + Vector3* __restrict__ mom = nullptr) { + + const float Dt = type.diffusion*sim.timestep; + Vector3 R = random.gaussian_vector(); // TODO who owns "random" object; how is it's state stored and recalled (especially if hardware changes); how is the implementation efficient on HOST/DEVICE? + + const Vector3 new_pos = pos + (Dt / sim.kT) * force + sqrtf(2.0f * Dt) * R; + // pos = sim.wrap(new_pos); // TODO decide how wrapping will be handled + } + }; + + struct OpComputeForceBD { + HOST DEVICE static void op(Vector3& __restrict__ pos, + Vector3& __restrict__ force, + ParticleType& type, + SimParameters& sim, + Random& random, + Vector3* __restrict__ mom = nullptr) { + + const float Dt = type.diffusion*sim.timestep; + Vector3 R = random.gaussian_vector(); // TODO who owns "random" object; how is it's state stored and recalled (especially if hardware changes); how is the implementation efficient on HOST/DEVICE? + + const Vector3 new_pos = pos + (Dt / sim.kT) * force + sqrtf(2.0f * Dt) * R; + // pos = sim.wrap(new_pos); // TODO decide how wrapping will be handled + } + }; + +} diff --git a/src/PatchOp.h b/src/PatchOp.h index ca4ce36..f660b76 100644 --- a/src/PatchOp.h +++ b/src/PatchOp.h @@ -44,5 +44,6 @@ private: void* compute_data; }; + #include "Integrator.h" #include "Interaction.h" diff --git a/src/Tests/CMakeLists.txt b/src/Tests/CMakeLists.txt index f85436c..15fb2f2 100644 --- a/src/Tests/CMakeLists.txt +++ b/src/Tests/CMakeLists.txt @@ -6,6 +6,7 @@ include(CTest) ## TODO, add a macro to add these projects, build with CUDA add_executable("arbd_tests" +matrix3.cu vector3_precision.cu ) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda") diff --git a/src/Tests/matrix3.cu b/src/Tests/matrix3.cu new file mode 100644 index 0000000..4b54306 --- /dev/null +++ b/src/Tests/matrix3.cu @@ -0,0 +1,297 @@ +#include <iostream> +#include <cstdio> + +#include "../SignalManager.h" +#include "../Types.h" +#include <cuda.h> +#include <nvfunctional> + +#include "type_name.h" + +#include <catch2/catch_tostring.hpp> +namespace Catch { + template<typename T, bool b1, bool b2> + struct StringMaker<Matrix3_t<T,b1,b2>> { + static std::string convert( Matrix3_t<T,b1,b2> const& value ) { + return value.to_string(); + } + }; +} +#include <catch2/catch_test_macros.hpp> +#include <catch2/matchers/catch_matchers_floating_point.hpp> + + + +namespace Tests { + + template<typename Op_t, typename R, typename ...T> + __global__ void op_kernel(R* result, T...in) { + if (blockIdx.x == 0) { + *result = Op_t::op(in...); + } + } + + + // In C++14, how can I unpack a tuple so its elements are passed to a function as arguments? + template <typename F, typename Tuple, size_t... I> + decltype(auto) apply_impl(F&& f, Tuple&& t, std::index_sequence<I...>) + { + return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...); + } + template <typename F, typename Tuple> + decltype(auto) apply(F&& f, Tuple&& t) + { + using Indices = std::make_index_sequence<std::tuple_size<std::decay_t<Tuple>>::value>; + return apply_impl(std::forward<F>(f), std::forward<Tuple>(t), Indices{}); + } + + template <typename F, typename Tuple, size_t... I> + decltype(auto) apply_each_impl(F&& f, Tuple&& t, std::index_sequence<I...>) + { + return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...); + } + template <typename F, typename Tuple> + decltype(auto) apply_each(F&& f, Tuple&& t) + { + using Indices = std::make_index_sequence<std::tuple_size<std::decay_t<Tuple>>::value>; + return apply_each_impl(std::forward<F>(f), std::forward<Tuple>(t), Indices{}); + } + + template<typename T> + void call_info(T t) { + // INFO( type_name(t) ); + // UNSCOPED_INFO( type_name<T>() << " " << t ); + } + // template<> + // void call_info(double t) { + // UNSCOPED_INFO( "double " << t ); + // } + // template<> + // void call_info(float t) { + // UNSCOPED_INFO( "double " << t ); + // // INFO( type_name(t) ); + // INFO( "float" ); + // } + // template<> + // void call_info(int t) { + // // INFO( type_name(t) ); + // INFO( "int" ); + // } + + template<typename Op_t, typename R, typename ...T> + void run_trial( std::string name, R expected_result, T...args) { + R *gpu_result_d, gpu_result, cpu_result; + cpu_result = Op_t::op(args...); + cudaMalloc((void **)&gpu_result_d, sizeof(R)); + + op_kernel<Op_t, R, T...><<<1,1>>>(gpu_result_d, args...); + cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + + INFO( name ); + // INFO( type_name<T...>() ); + + // auto tmp = std::make_tuple(args...); + // for (int i = 0; i < sizeof(T...); ++i) { + // auto& arg = std::get<i>(tmp); + // CAPTURE( arg ); + // } + // auto fn = [](auto a) { CAPTURE( a ); }; + + // auto fn = [](auto a) { INFO( a->to_string() ); }; + // using expander = int[]; + // (void)expander{0, (void(call_info<T>(std::forward<T>(args))), 0)...}; + + CAPTURE( cpu_result ); + CAPTURE( expected_result ); + + REQUIRE( cpu_result == expected_result ); + CHECK( cpu_result == gpu_result ); + } +} + +namespace Tests::Unary { + template<typename R, typename T> + struct NegateOp { HOST DEVICE static R op(T in) { return static_cast<R>(-in); } }; + + template<typename R, typename T> + struct NormalizedOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized()); } }; + + namespace Matrix3 { + template<typename R, typename T> + struct DeterminantOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.det()); } }; + + template<typename R, typename T> + struct NormalizeDetOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized().det()); } }; + + // template<typename R, typename T> + // struct NormalizeOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized()); } }; + + + + template<typename R, typename T> + struct InverseOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.inverse()); } }; + + template<typename R, typename T> + struct TransposeOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.transpose()); } }; + } +} + +namespace Tests::Binary { + // R is return type, T and U are types of operands + template<typename R, typename T, typename U> + struct AddOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a+b); } }; + template<typename R, typename T, typename U> + struct SubOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a-b); } }; + template<typename R, typename T, typename U> + struct MultOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a*b); } }; + template<typename R, typename T, typename U> + struct DivOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a/b); } }; + + namespace Matrix3 { + template<typename R, typename T, typename U> + struct CrossOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a.cross(b)); } }; + } +} + +namespace Tests::Unary::Matrix3 { + + template<typename A, bool is_diag=false, bool check_diag=false> + void run_tests() { + using T = Matrix3_t<A, is_diag, check_diag>; + // std::ostream test_info; + INFO( "Testing " << type_name<T>() << " unary operators that" ); + { + using R = A; + INFO( " return " << type_name<R>() ); + run_trial<DeterminantOp<R,T>,R,T>( "Testing determinant", R(6), T(1,2,3) ); + run_trial<NormalizeDetOp<R,T>,R,T>( "Testing that normalized matrix has determinant == 1", R(1), T(1,1,1) ); + // run_trial<NormalizeDetOp<R,T>,R,T>( "Testing that normalized matrix has determinant == 1", R(1), T(2,2,2) ); + // run_trial<NormalizeDetOp<R,T>,R,T>( "Testing that normalized matrix has determinant == 1", R(1), T(1,2,3) ); + } + + { // Test operators that return Matrix + using R = T; + INFO( " return " << type_name<R>() ); + run_trial<TransposeOp<R,T>,R,T>( "Testing transpose", + R(1,4,7, + 2,5,8, + 3,6,9), + T(1,2,3, + 4,5,6, + 7,8,9) ); + run_trial<NegateOp<R,T>,R,T>( "Testing negation", R(-1,-2,-3), T(1,2,3) ); + run_trial<InverseOp<R,T>,R,T>( "Testing inversion", + R(3.0/16, 0.25, -5.0/16, + 0.25, 0, 0.25, -5.0/16, + 0.25, 3.0/16), + T(1,2,-1, + 2,1,2, + -1,2,1) ); + run_trial<NormalizedOp<R,T>,R,T>( "Testing Matrix3_t<>.normalized()", R(1,1,1), T(1,1,1) ); + run_trial<NormalizedOp<R,T>,R,T>( "Testing Matrix3_t<>.normalized()", R(1,1,1), T(2,2,2) ); + run_trial<NormalizedOp<R,T>,R,T>( "Testing Matrix3_t<>.normalized()", R(A(1.0/6),A(2.0/6),A(3.0/6)), T(1,2,3) ); + } + } + TEST_CASE( "Check that Matrix3_t unary operations are identical on GPU and CPU", "[Tests::Unary::Matrix3]" ) { + // INFO("Test case start"); + const bool is_diag = false; + const bool check_diag = true; + run_tests<double, is_diag, check_diag>(); + run_tests<float, is_diag, check_diag>(); + run_tests<int, is_diag, check_diag>(); + } +} + +namespace Tests::Binary::Matrix3 { + + template<typename A, typename B, bool is_diag=false, bool check_diag=false> + void run_tests() { + using T = Matrix3_t<A, is_diag, check_diag>; + + { // Test binary operators that return Matrix where U b is a scalar + using U = B; + using R = T; + run_trial<MultOp<R,T,U>,R,T,U>( "Scale", R(2,4,6), T(1,2,3), U(2) ); + } + + { // Test binary operators that return Vector where U b is Vector + using U = Vector3_t<B>; + using R = U; + run_trial<MultOp<R,T,U>,R,T,U>( "Matrix.transform(Vector)", + R(1+4+9, + 4+10+18, + 7+16+27), + T(1,2,3, + 4,5,6, + 7,8,9), + U(1,2,3) ); + } + + { // Test binary operators that return Vector and U b is Vector + using U = Vector3_t<B>; + using R = U; + run_trial<MultOp<R,T,U>,R,T,U>( "Matrix element multiplication", + R(1+4+9, + 4+10+18, + 7+16+27), + T(1,2,3, + 4,5,6, + 7,8,9), + U(1,2,3) ); + } + + { // Test binary operators that return Matrix and U b is Matrix + using U = Matrix3_t<B,is_diag,check_diag>; + using R = std::common_type_t<T,U>; + run_trial<AddOp<R,T,U>,R,T,U>( "Matrix addition", + R(2,4,6, + 8,10,12, + 14,16,18), + T(1,2,3, + 4,5,6, + 7,8,9), + T(1,2,3, + 4,5,6, + 7,8,9)); + + run_trial<SubOp<R,T,U>,R,T,U>( "Matrix subtraction", + R(0,0,0), + T(1,2,3, + 4,5,6, + 7,8,9), + T(1,2,3, + 4,5,6, + 7,8,9) ); + + run_trial<MultOp<R,T,U>,R,T,U>( "Matrix transformation", + R(1+8+21, 2+10+24, 3+12+27, + 4+20+42, 8+25+48, 12+30+54, + 7+32+63, 14+40+72, 21+48+81), + T(1,2,3, + 4,5,6, + 7,8,9), + T(1,2,3, + 4,5,6, + 7,8,9) ); + } + } + TEST_CASE( "Check that Matrix3_t binary operations are identical on GPU and CPU", "[Tests::Binary::Matrix3]" ) { + // INFO("Test case start"); + const bool is_diag = false; + const bool check_diag = true; + run_tests<double,double, is_diag, check_diag>(); + // run_tests<float,double, is_diag, check_diag>(); + //run_tests<int,double, is_diag, check_diag>(); + + //run_tests<double,float, is_diag, check_diag>(); + run_tests<float,float, is_diag, check_diag>(); + // run_tests<int,float, is_diag, check_diag>(); + + //run_tests<double,int, is_diag, check_diag>(); + //run_tests<float,int, is_diag, check_diag>(); + + // run_tests<int,int, is_diag, check_diag>(); + + } +} diff --git a/src/Tests/type_name.h b/src/Tests/type_name.h index 90e8309..f64e440 100644 --- a/src/Tests/type_name.h +++ b/src/Tests/type_name.h @@ -7,7 +7,7 @@ #include <string> #include <cstdlib> -template <typename T> +template <typename T, typename ...Extras> std::string type_name() { using TR = typename std::remove_reference<T>::type; std::unique_ptr<char, void(*)(void*)> own diff --git a/src/Tests/vector3_precision.cu b/src/Tests/vector3_precision.cu index b02c3b4..a8b93db 100644 --- a/src/Tests/vector3_precision.cu +++ b/src/Tests/vector3_precision.cu @@ -13,92 +13,93 @@ #include "type_name.h" +namespace Tests::Vector3 { + enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL }; + BinaryOp_t& operator++(BinaryOp_t& op) { return op = static_cast<BinaryOp_t>( 1+static_cast<int>(op) ); } -enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL }; -BinaryOp_t& operator++(BinaryOp_t& op) { return op = static_cast<BinaryOp_t>( 1+static_cast<int>(op) ); } - -std::string get_binary_op_name( BinaryOp_t op ) { - switch (op) { - case ADD: - return "add"; - case SUB: - return "subtract"; - case CROSS: - return "cross"; - case DOT: - return "dot"; + std::string get_binary_op_name( BinaryOp_t op ) { + switch (op) { + case ADD: + return "add"; + case SUB: + return "subtract"; + case CROSS: + return "cross"; + case DOT: + return "dot"; + } + return std::string(""); // (static_cast<int>(op))); } - return std::string(""); // (static_cast<int>(op))); -} -template<typename R, typename T, typename U> -__host__ __device__ nvstd::function<R(T,U)> get_binary_op_func( BinaryOp_t op) { - switch (op) { - case ADD: + template<typename R, typename T, typename U> + __host__ __device__ nvstd::function<R(T,U)> get_binary_op_func( BinaryOp_t op) { + switch (op) { + case ADD: + return [] (T a, U b) {return static_cast<R>(b+a);}; + case SUB: + return [] (T a, U b) {return static_cast<R>(b-a);}; + case CROSS: + return [] (T a, U b) {return static_cast<R>(b.cross(a));}; + case DOT: + return [] (T a, U b) {return static_cast<R>(b.dot(a));}; + default: + assert(false); + } return [] (T a, U b) {return static_cast<R>(b+a);}; - case SUB: - return [] (T a, U b) {return static_cast<R>(b-a);}; - case CROSS: - return [] (T a, U b) {return static_cast<R>(b.cross(a));}; - case DOT: - return [] (T a, U b) {return static_cast<R>(b.dot(a));}; - default: - assert(false); } - return [] (T a, U b) {return static_cast<R>(b+a);}; -} -template<typename R, typename T, typename U> -__global__ void binary_op_test_kernel( BinaryOp_t op, R* result, T in1, U in2 ) { - nvstd::function<R(T,U)> fn = get_binary_op_func<R,T,U>(op); - if (blockIdx.x == 0) { - *result = fn(in1,in2); + template<typename R, typename T, typename U> + __global__ void binary_op_test_kernel( BinaryOp_t op, R* result, T in1, U in2 ) { + nvstd::function<R(T,U)> fn = get_binary_op_func<R,T,U>(op); + if (blockIdx.x == 0) { + *result = fn(in1,in2); + } } -} -template<typename T, typename U> -void check_vectors_equal( T&& cpu, U&& gpu) { - CHECK( type_name<decltype(cpu)>() == type_name<decltype(gpu)>() ); // should be unneccesary - CHECK( cpu.x == gpu.x ); - CHECK( cpu.y == gpu.y ); - CHECK( cpu.z == gpu.z ); - CHECK( cpu.w == gpu.w ); -} + template<typename T, typename U> + void check_vectors_equal( T&& cpu, U&& gpu) { + CHECK( type_name<decltype(cpu)>() == type_name<decltype(gpu)>() ); // should be unneccesary + CHECK( cpu.x == gpu.x ); + CHECK( cpu.y == gpu.y ); + CHECK( cpu.z == gpu.z ); + CHECK( cpu.w == gpu.w ); + } -template<typename A, typename B> -void run_tests() { - using T = Vector3_t<A>; - using U = Vector3_t<B>; - using R = std::common_type_t<T,U>; + template<typename A, typename B> + void run_tests() { + using T = Vector3_t<A>; + using U = Vector3_t<B>; + using R = std::common_type_t<T,U>; - T v1(1,1.005,0); - U v2(0,2,0); - R *gpu_result_d, gpu_result, cpu_result; - cudaMalloc((void **)&gpu_result_d, sizeof(R)); + T v1(1,1.005,0); + U v2(0,2,0); + R *gpu_result_d, gpu_result, cpu_result; + cudaMalloc((void **)&gpu_result_d, sizeof(R)); - for (BinaryOp_t op = ADD; op < FINAL; ++op) { - INFO( get_binary_op_name( op ) ); - binary_op_test_kernel<R,T,U><<<1,1>>>(op, gpu_result_d, v1, v2); - cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); + for (BinaryOp_t op = ADD; op < FINAL; ++op) { + INFO( get_binary_op_name( op ) ); + binary_op_test_kernel<R,T,U><<<1,1>>>(op, gpu_result_d, v1, v2); + cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); - // Get cpu_result - cpu_result = (get_binary_op_func<R,T,U>(op))(v1,v2); + // Get cpu_result + cpu_result = (get_binary_op_func<R,T,U>(op))(v1,v2); - // Check consistency - check_vectors_equal(cpu_result, gpu_result); + // Check consistency + check_vectors_equal(cpu_result, gpu_result); + } + cudaFree(gpu_result_d); } - cudaFree(gpu_result_d); -} -TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3]" ) { - // INFO("Test case start"); + TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3]" ) { + // INFO("Test case start"); - run_tests<int,int>(); - run_tests<float,float>(); - run_tests<double,double>(); + run_tests<int,int>(); + run_tests<float,float>(); + run_tests<double,double>(); - run_tests<float,double>(); - run_tests<double,float>(); - run_tests<int,double>(); + run_tests<float,double>(); + run_tests<double,float>(); + run_tests<int,double>(); + } } diff --git a/src/Types.h b/src/Types.h index 6cfa33e..765b1ab 100644 --- a/src/Types.h +++ b/src/Types.h @@ -21,15 +21,11 @@ #ifdef USE_CUDA #include <cuda_runtime.h> #else - struct MY_ALIGN(16) float4 { float4() : x(0), y(0), z(0), w(0) {}; float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}; float4 operator+(const float4&& o) { return float4(x+o.x,y+o.y,z+o.z,w+o.w); - // float4 r; - // r.x = x+o.x; r.y = y+o.y; r.z = z+o.z; r.w = w+o.w; - // return r; }; float4 operator*(const float&& s) { return float4(x*s,y*s,z*s,w*s); @@ -39,9 +35,31 @@ struct MY_ALIGN(16) float4 { }; #endif +#include <stdarg.h> // For va_start, etc. +#include <memory> // For std::unique_ptr +#include <cstring> + +// from: https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf/8098080#8098080 +inline std::string string_format(const std::string fmt_str, ...) { + int final_n, n = ((int)fmt_str.size()) * 2; /* Reserve two times as much as the length of the fmt_str */ + std::unique_ptr<char[]> formatted; + va_list ap; + while(1) { + formatted.reset(new char[n]); /* Wrap the plain char array into the unique_ptr */ + strcpy(&formatted[0], fmt_str.c_str()); + va_start(ap, fmt_str); + final_n = vsnprintf(&formatted[0], n, fmt_str.c_str(), ap); + va_end(ap); + if (final_n < 0 || final_n >= n) + n += abs(final_n - n + 1); + else + break; + } + return std::string(formatted.get()); +} #include "Types/Vector3.h" -#include "Types/Vector3.h" +#include "Types/Matrix3.h" using Vector3 = Vector3_t<float>; -using Matrix3 = Vector3_t<float>; /* TODO: FIX */ +using Matrix3 = Matrix3_t<float,false>; diff --git a/src/Types/Matrix3.h b/src/Types/Matrix3.h new file mode 100644 index 0000000..6d0315f --- /dev/null +++ b/src/Types/Matrix3.h @@ -0,0 +1,234 @@ +/********************************************************************* + * @file Matrix3.h + * + * @brief Declaration of templated Vector3_t class. + *********************************************************************/ +#pragma once +#include <cassert> +#include <memory> +#include <type_traits> // for std::common_type<T,U> + +/** +// IDEA: use struct for bool options +// PROBLEM: such a struct will have a size of 1 byte because it is not a "base class subobject" +// - Might not be optimized away... have to check this empirically +// - i.e. this approach is good for "large" flow objects, but may be bad for gpu objects +// SOLUTION?: make a base class that defines the opts subobject and? + +template<bool _is_diag=false, bool _check_diag=false> +struct Matrix3_opts { + const bool inline is_diag() const { return _is_diag; } + const bool inline check_diag() const { return _check_diag; } +}; + +// Other strategies could include + +// policy packs: https://stackoverflow.com/questions/21939217/combining-policy-classes-template-template-parameters-variadic-templates && https://www.modernescpp.com/index.php/policy-and-traits +// inheritance is an interesting option here... + +**/ + +template<typename T, bool is_diag=false, bool check_diag=false> +struct MY_ALIGN(16*sizeof(T)) Matrix3_t { + using Matrix3 = Matrix3_t<T,is_diag,check_diag>; + using Vector3 = Vector3_t<T>; + + HOST DEVICE inline Matrix3_t() { (*this) = Matrix3(1); } + HOST DEVICE Matrix3_t(T s) { (*this) = Matrix3(s,s,s); } + HOST DEVICE Matrix3_t(T x, T y, T z) { (*this) = Matrix3( x, 0, 0, 0, y, 0, 0, 0, z); } + HOST DEVICE Matrix3_t(const T* d) { (*this) = Matrix3(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8]); } + HOST DEVICE Matrix3_t(const Vector3& ex, const Vector3& ey, const Vector3& ez) { (*this) = Matrix3(ex.x, ex.y, ex.z, ey.x, ey.y, ey.z, ez.x, ez.y, ez.z); } + HOST DEVICE Matrix3_t(T xx, T xy, T xz, T yx, T yy, T yz, T zx, T zy, T zz) : xx(xx), xy(xy), xz(xz), yx(yx), yy(yy), yz(yz), zx(zx), zy(zy), zz(zz) { diag_check(); } + + HOST DEVICE inline void diag_check() const { + // if (opts.check_diag() && opts.is_diag()) {} + if (check_diag && is_diag) { + assert(xy == xz == yx == yz == zx == zy == 0); + // assert(xy == 0 && xz == 0 && + // yx == 0 && yz == 0 && + // zx == 0 && zy == 0); + } + } + + // Operators + template<typename U> + HOST DEVICE inline auto operator*(U s) const { + Matrix3_t<std::common_type_t<T,U>, is_diag, check_diag> m; + m.xx = s*xx; m.xy = s*xy; m.xz = s*xz; + m.yx = s*yx; m.yy = s*yy; m.yz = s*yz; + m.zx = s*zx; m.zy = s*zy; m.zz = s*zz; + return m; + } + + template<typename U> + HOST DEVICE inline auto operator*(const Vector3_t<U>& v) const { return this->transform(v); } + + template<typename U, bool is_diag2, bool check_diag2> + HOST DEVICE inline auto operator*(const Matrix3_t<U,is_diag2,check_diag2>& m) const { return this->transform(m); } + + template<typename U, bool is_diag2, bool check_diag2> + HOST DEVICE inline auto operator+(const Matrix3_t<U,is_diag2,check_diag2>& m) const { + Matrix3_t<std::common_type_t<T,U>, is_diag && is_diag2, check_diag||check_diag2 > ret; + ret.xx = xx+m.xx; + ret.yy = yy+m.yy; + ret.zz = zz+m.zz; + + if ( (!is_diag) || (!is_diag2) ) { + ret.xy = xy+m.xy; ret.xz = xz+m.xz; + ret.yx = yx+m.yx; ret.yz = yz+m.yz; + ret.zx = zx+m.zx; ret.zy = zy+m.zy; + } + return ret; + } + + template<typename U, bool is_diag2, bool check_diag2> + HOST DEVICE inline auto operator-(const Matrix3_t<U,is_diag2,check_diag2>& m) const { return (*this)+(-m); } + + HOST DEVICE inline Matrix3 operator-() const { return Matrix3(-xx,-xy,-xz,-yx,-yy,-yz,-zx,-zy,-zz); } + HOST DEVICE inline Matrix3 transpose() const { return Matrix3(xx,yx,zx,xy,yy,zy,xz,yz,zz); } + + template<typename U> + HOST DEVICE inline auto transform(const Vector3_t<U>& v) const { + Vector3_t<std::common_type_t<T,U>> w; + if (is_diag) { + w.x = xx*v.x; + w.y = yy*v.y; + w.z = zz*v.z; + } else { + w.x = xx*v.x + xy*v.y + xz*v.z; + w.y = yx*v.x + yy*v.y + yz*v.z; + w.z = zx*v.x + zy*v.y + zz*v.z; + } + return w; + } + + template<typename U, bool is_diag2, bool check_diag2> + HOST DEVICE inline auto transform(const Matrix3_t<U,is_diag2,check_diag2>& m) const { + Matrix3_t<std::common_type_t<T,U>, is_diag && is_diag2, check_diag||check_diag2 > ret; + ret.xx = xx*m.xx + xy*m.yx + xz*m.zx; + ret.yy = yx*m.xy + yy*m.yy + yz*m.zy; + ret.zz = zx*m.xz + zy*m.yz + zz*m.zz; + + if ( (!is_diag) || (!is_diag2) ) { + ret.yx = yx*m.xx + yy*m.yx + yz*m.zx; + ret.zx = zx*m.xx + zy*m.yx + zz*m.zx; + + ret.xy = xx*m.xy + xy*m.yy + xz*m.zy; + ret.zy = zx*m.xy + zy*m.yy + zz*m.zy; + + ret.xz = xx*m.xz + xy*m.yz + xz*m.zz; + ret.yz = yx*m.xz + yy*m.yz + yz*m.zz; + } + return ret; + } + + HOST DEVICE + Matrix3 inverse() const { + Matrix3 m; + if (is_diag) { + return Matrix3(T(1.0)/xx,T(1.0)/yy,T(1.0)/zz); + } else { + T det = this->det(); + return Matrix3( (yy*zz-yz*zy)/det,-(xy*zz-xz*zy)/det, (xy*yz-xz*yy)/det, + -(yx*zz-yz*zx)/det, (xx*zz-xz*zx)/det,-(xx*yz-xz*yx)/det, + (yx*zy-yy*zx)/det,-(xx*zy-xy*zx)/det, (xx*yy-xy*yx)/det ); + } + } + + HOST DEVICE + T det() const { if (is_diag) { + return xx*yy*zz; + } else { + return xx*(yy*zz-yz*zy) - xy*(yx*zz-yz*zx) + xz*(yx*zy-yy*zx); + } + } + + //Han-Yi Chou + HOST DEVICE inline Matrix3 normalized() const { + Vector3 x = this->ex(); + Vector3 y = this->ey(); + /* + x = x / x.length(); + float error = x.dot(y); + y = y-(error*x); + y = y / y.length(); + Vector3 z = x.cross(y); + z = z / z.length();*/ + //x = (0.5*(3-x.dot(x)))*x; /* approximate normalization */ + //y = (0.5*(3-y.dot(y)))*y; + //z = (0.5*(3-z.dot(z)))*z; + //return Matrix3(x,y,z); + Vector3 z = x.cross(y); + T l; + l = z.length(); + z = l > 0 ? z/l : Vector3(T(0)); + l = x.length(); + x = l > 0 ? x/l : Vector3(T(0)); + y = z.cross(x); + l = y.length(); + y = l > 0 ? y/l : Vector3(T(0)); + return Matrix3(x,y,z); + } + + HOST DEVICE inline Vector3 ex() const { return Vector3(xx,yx,zx); } + HOST DEVICE inline Vector3 ey() const { return Vector3(xy,yy,zy); } + HOST DEVICE inline Vector3 ez() const { return Vector3(xz,yz,zz); } + + auto to_string() const { + return string_format("%2.8f %2.8f %2.8f\n%2.8f %2.8f %2.8f\n%2.8f %2.8f %2.8f", xx,xy,xz,yx,yy,yz,zx,zy,zz); + // return std::format("\n{} {} {}\n{} {} {}\n{} {} {}", xx,xy,xz,yx,yy,yz,zx,zy,zz); + // return std::format("\n{} {} {}\n{} {} {}\n{} {} {}", xx,xy,xz,yx,yy,yz,zx,zy,zz); + // char s[256]; + // sprintf(s, "%2.8f %2.8f %2.8f\n%2.8f %2.8f %2.8f\n%2.8f %2.8f %2.8f", + // xx, xy, xz, yx, yy, yz, zx, zy, zz); + // s[255] = 0; + // return std::string(s); + } + + HOST DEVICE inline bool is_diagonal() const { return is_diag; } + + template<typename U> + HOST DEVICE inline bool operator==(U b) const { + return xx == b.xx && xy == b.xy && xz == b.xz && + yx == b.yx && yy == b.yy && yz == b.yz && + zx == b.zx && zy == b.zy && zz == b.zz; + } + + // Helper function for testing + template<typename U> void test_equal(const U&& b) const { + CHECK( xx == b.xx ); + CHECK( xy == b.xy ); + CHECK( xz == b.xz ); + CHECK( yx == b.yx ); + CHECK( yy == b.yy ); + CHECK( yz == b.yz ); + CHECK( zx == b.zx ); + CHECK( zy == b.zy ); + CHECK( zz == b.zz ); + } + + T xx, xy, xz; + T yx, yy, yz; + T zx, zy, zz; +}; + +// template<typename ...Args> +// HOST std::ostream& operator << ( std::ostream& os, Matrix3_t<Args...> const& value ) { +// printf("MATRIX OP\n"); +// os << value.to_string(); +// return os; +// } + + +/* template<typename U> */ +/* HOST DEVICE friend inline Matrix3 operator*(float s, Matrix3 m) { return m*s; } */ +/* HOST DEVICE friend inline Matrix3 operator/(Matrix3 m, float s) { */ +/* const float sinv = 1.0f/s; */ +/* return m*sinv; */ +/* } */ + + // HOST DEVICE void setIsDiag() { + // isDiag = (xy == 0 && xz == 0 && + // yx == 0 && yz == 0 && + // zx == 0 && zy == 0) ? true : false; + // } diff --git a/src/Types/Vector3.h b/src/Types/Vector3.h index 52345b3..d500311 100644 --- a/src/Types/Vector3.h +++ b/src/Types/Vector3.h @@ -4,6 +4,7 @@ * @brief Declaration of templated Vector3_t class. *********************************************************************/ #pragma once +#include <memory> #include <type_traits> // for std::common_type<T,U> /** @@ -18,11 +19,11 @@ g */ template<typename T> class MY_ALIGN(4*sizeof(T)) Vector3_t { public: - HOST DEVICE inline Vector3_t<T>() : x(0), y(0), z(0), w(0) {} + HOST DEVICE inline Vector3_t<T>() : x(T(0)), y(T(0)), z(T(0)), w(T(0)) {} HOST DEVICE inline Vector3_t<T>(T s):x(s), y(s), z(s), w(s) {} HOST DEVICE inline Vector3_t<T>(const Vector3_t<T>& v):x(v.x), y(v.y), z(v.z), w(v.w) {} HOST DEVICE inline Vector3_t<T>(T x0, T y0, T z0) : x(x0), y(y0), z(z0), w(0) {} - HOST DEVICE inline Vector3_t<T>(const T* d) : x(d[0]), y(d[1]), z(d[2]), w(0) {} + // HOST DEVICE inline Vector3_t<T>(const T* d) : x(d[0]), y(d[1]), z(d[2]), w(0) {} HOST DEVICE inline Vector3_t<T>(const float4 a) : x(a.x), y(a.y), z(a.z), w(a.w) {} #if __cplusplus >= 201703L @@ -130,8 +131,8 @@ public: } template<typename U> - HOST DEVICE inline Vector3_t<std::common_type_t<T,U>> operator/(U&& s) const { - const U inv = static_cast<U>(1.0)/s; + HOST DEVICE inline Vector3_t<std::common_type_t<T,U>> operator/(U& s) const { + const U inv = U(1.0)/s; return (*this)*inv; } @@ -175,14 +176,19 @@ public: printf("%0.3f %0.3f %0.3f\n", x,y,z); } - T x, y, z, w; //append a member w - - char* to_char() const { - char* s = new char[128]; - sprintf(s, "%.10g %.10g %.10g", x, y, z); + auto to_string() const { + char s[128]; + sprintf(s, "%.10g %.10g %.10g (%.10g)", x, y, z, w); s[127] = 0; - return s; + return std::string(s); + } + + template<typename U> + HOST DEVICE inline bool operator==(U b) const { + return x == b.x && y == b.y && z == b.z && w == b.w; } + + T x, y, z, w; //append a member w }; @@ -202,7 +208,7 @@ public: // } template<typename T, typename U> -HOST DEVICE inline auto operator/(U&& s, Vector3_t<T>&& v) { +HOST DEVICE inline auto operator/(const U&& s, const Vector3_t<T>&& v) { // TODO, throw exception if int using TU = typename std::common_type<T,U>::type; Vector3_t<TU> ret; @@ -212,6 +218,26 @@ HOST DEVICE inline auto operator/(U&& s, Vector3_t<T>&& v) { return ret; } +// template<typename T, typename U> +// HOST DEVICE inline auto operator*(const U&& s, const Vector3_t<T>&& v) { +// return v*s; +// } +// template<typename T, typename U> +// HOST DEVICE inline auto operator*(const U& s, const Vector3_t<T>& v) { +// return v*s; +// } + +// template<typename T> +// HOST DEVICE inline auto operator*(const float&& s, const Vector3_t<T>&& v) { +// return v*s; +// } + +template<typename T> +HOST DEVICE inline auto operator*(const float& s, const Vector3_t<T>& v) { + return v*s; +} + +// Provide common type for vectors namespace std { template<typename T,typename U> struct common_type<Vector3_t<T>, Vector3_t<U>> { -- GitLab