From f14758898b87f48d8a97a13c286683650a372095 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 14 Mar 2023 10:52:56 -0500 Subject: [PATCH] Updated test case, some docs --- src/CUDA.h | 7 -- src/PatchOp.h | 37 ++++-- src/Tests/vector3_precision.cu | 198 +++++++++------------------------ src/Types/Vector3.h | 27 ++--- 4 files changed, 98 insertions(+), 171 deletions(-) delete mode 100644 src/CUDA.h diff --git a/src/CUDA.h b/src/CUDA.h deleted file mode 100644 index d0b072f..0000000 --- a/src/CUDA.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -class BDIntegrateCUDA : public Integrator { -public: - void compute(Patch* patch); - int num_patches() const { return 1; }; -}; diff --git a/src/PatchOp.h b/src/PatchOp.h index b1a4223..ca4ce36 100644 --- a/src/PatchOp.h +++ b/src/PatchOp.h @@ -1,22 +1,45 @@ /********************************************************************* * @file PatchOp.h * - * @brief Declaration of BasePatchOp class. - * - * Includes headers of derived classes for convenient access to - * factory methods. Virtual method `BasePatchOp::compute(Patch* - * patch)` called by `patch->compute()` by any Patch to which the - * PatchOp has been added. + * @brief Declaration of BasePatchOp class. + * + * @details This file contains the declaration of the abstract base + * class BasePatchOp, which operates on Patch data. It also + * includes headers of derived classes for convenient access + * to factory methods. The virtual method + * `BasePatchOp::compute(Patch* patch)` is called by + * `patch->compute()` by any Patch to which the PatchOp has + * been added. *********************************************************************/ + #pragma once class Patch; -/// Abstract base class that operates on Patch data. +/** + * @brief Abstract base class that operates on Patch data. + * + * @details BasePatchOp is an abstract base class for all classes that + * operate on Patch data. It provides the `compute()` + * method, which is called by `Patch::compute()` for each + * PatchOp attached to a Patch. + */ class BasePatchOp { public: + /** + * @brief Performs the computation for this PatchOp on the given Patch. + * + * @param patch The Patch on which the computation should be performed. + */ virtual void compute(Patch* patch) = 0; + + /** + * @brief Returns the number of patches that this PatchOp requires. + * + * @return The number of patches required by this PatchOp. + */ virtual int num_patches() const = 0; + private: void* compute_data; }; diff --git a/src/Tests/vector3_precision.cu b/src/Tests/vector3_precision.cu index 8fba1f9..b02c3b4 100644 --- a/src/Tests/vector3_precision.cu +++ b/src/Tests/vector3_precision.cu @@ -6,6 +6,7 @@ #include "../SignalManager.h" #include "../Types.h" #include <cuda.h> +#include <nvfunctional> #include <catch2/catch_test_macros.hpp> #include <catch2/matchers/catch_matchers_floating_point.hpp> @@ -13,20 +14,45 @@ #include "type_name.h" -// Test kernel function -// namespace TestVectorOps { +enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL }; +BinaryOp_t& operator++(BinaryOp_t& op) { return op = static_cast<BinaryOp_t>( 1+static_cast<int>(op) ); } -// ChatGPT("Add doxygen to each function, class and struct, where R represents a return parameter, T and U") +std::string get_binary_op_name( BinaryOp_t op ) { + switch (op) { + case ADD: + return "add"; + case SUB: + return "subtract"; + case CROSS: + return "cross"; + case DOT: + return "dot"; + } + return std::string(""); // (static_cast<int>(op))); +} template<typename R, typename T, typename U> -HOST DEVICE R cross_vectors(T&& v1, U&& v2) { - return v1.cross(v2); +__host__ __device__ nvstd::function<R(T,U)> get_binary_op_func( BinaryOp_t op) { + switch (op) { + case ADD: + return [] (T a, U b) {return static_cast<R>(b+a);}; + case SUB: + return [] (T a, U b) {return static_cast<R>(b-a);}; + case CROSS: + return [] (T a, U b) {return static_cast<R>(b.cross(a));}; + case DOT: + return [] (T a, U b) {return static_cast<R>(b.dot(a));}; + default: + assert(false); + } + return [] (T a, U b) {return static_cast<R>(b+a);}; } -template<typename F, typename R, typename T, typename U> -__global__ void binary_op_test_kernel( F fn, R* result, T in1, U in2 ) { +template<typename R, typename T, typename U> +__global__ void binary_op_test_kernel( BinaryOp_t op, R* result, T in1, U in2 ) { + nvstd::function<R(T,U)> fn = get_binary_op_func<R,T,U>(op); if (blockIdx.x == 0) { - *result = (*fn)(in1,in2); + *result = fn(in1,in2); } } @@ -39,156 +65,40 @@ void check_vectors_equal( T&& cpu, U&& gpu) { CHECK( cpu.w == gpu.w ); } -template<typename R, typename T, typename U> -struct TestOp { - std::string name; - typename R (*)(T,U) func; -}; - - -TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3Dcross]" ) { - // INFO("Test case start"); - using T = Vector3_t<float>; - using U = Vector3_t<double>; +template<typename A, typename B> +void run_tests() { + using T = Vector3_t<A>; + using U = Vector3_t<B>; using R = std::common_type_t<T,U>; - T v1(1,0,0); + T v1(1,1.005,0); U v2(0,2,0); R *gpu_result_d, gpu_result, cpu_result; - - - std::vector<TestOp> ops; - ops.push_back(TestOp("add", [] __host__ __device__ (T a, U b){ return static_cast<R>(b+a); }; )); - ops.push_back(TestOp("cross", [] __host__ __device__ (T a, U b){ return a.cross(b); }; )); - // auto sub = [] __host__ __device__ (T& a, U& b){ return a.cross(b); }; - // auto dot = [] __host__ __device__ (T& a, U& b){ return a.cross(b); }; - auto cross = [] __host__ __device__ (T a, U b){ return a.cross(b); }; - - using L = decltype(add); - std::vector<R(*)(T,U)> lambdas; - lambdas.push_back(add); - lambdas.push_back(cross); - - // Get_result cudaMalloc((void **)&gpu_result_d, sizeof(R)); - for (auto& op: ops) { - INFO(op.name); - binary_op_test_kernel<<<1,1>>>(op.func, gpu_result_d, v1, v2); + for (BinaryOp_t op = ADD; op < FINAL; ++op) { + INFO( get_binary_op_name( op ) ); + binary_op_test_kernel<R,T,U><<<1,1>>>(op, gpu_result_d, v1, v2); cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); - + // Get cpu_result - cpu_result = fn(v1,v2); // rvalue version failed on cpu, but works on gpu + cpu_result = (get_binary_op_func<R,T,U>(op))(v1,v2); // Check consistency check_vectors_equal(cpu_result, gpu_result); } + cudaFree(gpu_result_d); } -// int main(int argc, char* argv[]) { -// return 0; -// } - -// int main(int argc, char* argv[]) { -// SignalManager::manage_segfault(); -// Vector3_t<float> v1(1,0,0); -// Vector3_t<double> v2(0,2,0); -// v2.y = 6.62606957e-44; - -// std::cout << "test_vector3.cpp" << std::endl; -// // // std::cout << v1.to_char() << std::endl; - -// // std::cout << v1.to_char() << " x " << v2.to_char() << -// // " = " << (v1.cross(v2)).to_char() << std::endl; -// // std::cout << (-v2).to_char() << " x " << v1.to_char() << -// // " = " << (-v2.cross(v1)).to_char() << std::endl; - -// // std::cout.precision(17); -// // std::cout << sizeof(v2.cross(v1).z) << " " << sizeof(v1.cross(v2).z) << std::endl; -// // std::cout << (v2.cross(v1).z) << " " << (v1.cross(v2).z) << std::endl; -// // std::cout << float(6.62606957e-44) << std::endl; -// // std::cout << " ... done" << std::endl; - -// // test_kernel<<<1,1>>>(v1,v2); -// // cudaDeviceSynchronize(); -// return 0; -// } - -// #include <iostream> - -// // Since C++ 11 -// // template<typename T> -// // using func_t = T (*) (T, T); - -// template<typename R, typename T, typename U> -// using func_t = R (*) (T, U); - -// template <typename R, typename T, typename U> -// __host__ __device__ R add_func (T x, U y) -// { -// return x + y; -// } - -// template <typename T> -// __host__ __device__ T mul_func (T x, T y) -// { -// return x * y; -// } - -// // Required for functional pointer argument in kernel function -// // Static pointers to device functions -// template <typename R, typename T, typename U> -// __device__ func_t<R,T,U> p_add_func = add_func<R,T,U>; -// // template <typename T> -// // __device__ func_t<T> p_mul_func = mul_func<T>; - - -// template <typename R, typename T, typename U> -// __global__ void kernel(func_t<R,T,U> op, T * d_x, U * d_y, R * result) -// { -// *result = (*op)(*d_x, *d_y); -// } - -// template <typename T, typename U> -// void mytest(T x, U y) -// { -// using R = std::common_type_t<T,U>; -// func_t<R,T,U> h_add_func; -// // func_t<T> h_mul_func; - -// T * d_x; -// U * d_y; -// cudaMalloc(&d_x, sizeof(T)); -// cudaMalloc(&d_y, sizeof(U)); -// cudaMemcpy(d_x, &x, sizeof(T), cudaMemcpyHostToDevice); -// cudaMemcpy(d_y, &y, sizeof(U), cudaMemcpyHostToDevice); - -// R result; -// R * d_result, * h_result; -// cudaMalloc(&d_result, sizeof(R)); -// h_result = &result; - -// // Copy device function pointer to host side -// cudaMemcpyFromSymbol(&h_add_func, p_add_func<R,T,U>, sizeof(func_t<R,T,U>)); -// // cudaMemcpyFromSymbol(&h_mul_func, p_mul_func<T>, sizeof(func_t<T>)); - -// kernel<R,T,U><<<1,1>>>(h_add_func, d_x, d_y, d_result); -// cudaDeviceSynchronize(); -// cudaMemcpy(h_result, d_result, sizeof(R), cudaMemcpyDeviceToHost); -// CHECK( result == add_func<R,T,U>(x,y) ); +TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3]" ) { + // INFO("Test case start"); -// // kernel<T><<<1,1>>>(h_mul_func, d_x, d_y, d_result); -// // cudaDeviceSynchronize(); -// // cudaMemcpy(h_result, d_result, sizeof(T), cudaMemcpyDeviceToHost); -// // CHECK( result == mul_func(x,y) ); -// } + run_tests<int,int>(); + run_tests<float,float>(); + run_tests<double,double>(); -// TEST_CASE( "Check that Vector3_t binary operations are identical on GPU and CPU", "[Vector3Dcross]" ) { -// INFO("TEST START"); -// mytest<int,float>(2.05, 10.00); -// // mytest<float>(2.05, 10.00); -// // mytest<double>(2.05, 10.00); -// } - -// */ + run_tests<float,double>(); + run_tests<double,float>(); + run_tests<int,double>(); +} diff --git a/src/Types/Vector3.h b/src/Types/Vector3.h index 890de64..52345b3 100644 --- a/src/Types/Vector3.h +++ b/src/Types/Vector3.h @@ -99,9 +99,9 @@ public: return v; } - template<typename U> - HOST DEVICE inline Vector3_t<T> operator+(const Vector3_t<U>& w ) const { - using TU = typename std::common_type<T,U>::type; + template<typename U> + HOST DEVICE inline Vector3_t<std::common_type_t<T,U>> operator+(const Vector3_t<U>& w ) const { + using TU = typename std::common_type_t<T,U>; Vector3_t<TU> v; v.x = x + w.x; v.y = y + w.y; @@ -109,18 +109,19 @@ public: return v; } - HOST DEVICE inline Vector3_t<T> operator-(const Vector3_t<T>& w ) const { - Vector3_t<T> v; - v.x = x - w.x; - v.y = y - w.y; - v.z = z - w.z; - return v; + template<typename U> + HOST DEVICE inline Vector3_t<std::common_type_t<T,U>> operator-(const Vector3_t<U>& w ) const { + Vector3_t<std::common_type_t<T,U>> v; + v.x = x - w.x; + v.y = y - w.y; + v.z = z - w.z; + return v; } template<typename U> - HOST DEVICE inline Vector3_t<T> operator*(U&& s) const { + HOST DEVICE inline Vector3_t<std::common_type_t<T,U>> operator*(U&& s) const { // TODO, throw exception if int - using TU = typename std::common_type<T,U>::type; + using TU = typename std::common_type_t<T,U>; Vector3_t<TU> v; v.x = s*x; v.y = s*y; @@ -129,13 +130,13 @@ public: } template<typename U> - HOST DEVICE inline Vector3_t<T> operator/(U&& s) const { + HOST DEVICE inline Vector3_t<std::common_type_t<T,U>> operator/(U&& s) const { const U inv = static_cast<U>(1.0)/s; return (*this)*inv; } template<typename U> - HOST DEVICE inline float dot(const Vector3_t<U>& w) const { return x*w.x + y*w.y + z*w.z; } + HOST DEVICE inline auto dot(const Vector3_t<U>& w) const { return x*w.x + y*w.y + z*w.z; } HOST DEVICE inline float length() const { return sqrtf(x*x + y*y + z*z); } HOST DEVICE inline float length2() const { return x*x + y*y + z*z; } -- GitLab