diff --git a/src/GPUManager.h b/src/GPUManager.h index 159a3ac0117d4208f34ffef6c8edee251756cec8..fe7741e2ac2275ee75262a1fdb73691186ad3743 100644 --- a/src/GPUManager.h +++ b/src/GPUManager.h @@ -33,6 +33,27 @@ struct MY_ALIGN(16) float4 { }; #endif +// START traits +// https://stackoverflow.com/questions/55191505/c-compile-time-check-if-method-exists-in-template-type +#include <type_traits> +// template<class ...Ts> +// struct voider{ +// using type = void; +// }; + +// template<class T, class = void> +// struct has_copy_to_cuda : std::false_type{}; + +// template<class T> +// struct has_copy_to_cuda<T, typename voider<decltype(std::declval<T>().copy_to_cuda())>::type> : std::true_type{}; + +template <typename T, typename = void> +struct has_copy_to_cuda : std::false_type {}; + +template <typename T> +struct has_copy_to_cuda<T, decltype(std::declval<T>().copy_to_cuda(), void())> : std::true_type {}; +// END traits + #ifdef USE_CUDA #include <cstdio> diff --git a/src/Tests/CMakeLists.txt b/src/Tests/CMakeLists.txt index 26f5dcacaf5b298fc45707f8adbbd1c93ea84f38..4b2055025b40384e6a98abc2f04296bf1b582805 100644 --- a/src/Tests/CMakeLists.txt +++ b/src/Tests/CMakeLists.txt @@ -9,6 +9,7 @@ add_executable("arbd_tests" matrix3.cu vector3_precision.cu bitmask.cu +array.cu ) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda") set_property(TARGET arbd_tests PROPERTY CXX_STANDARD 14) diff --git a/src/Tests/array.cu b/src/Tests/array.cu new file mode 100644 index 0000000000000000000000000000000000000000..3a2ec001c4eca0497cc91fa4451fa1b61cf9463e --- /dev/null +++ b/src/Tests/array.cu @@ -0,0 +1,230 @@ +#include <float.h> +#include <iostream> +#include <cstdio> + +// #include "useful.h" +#include "../SignalManager.h" +#include "../Types.h" +#include <cuda.h> +#include <nvfunctional> + +#include <catch2/catch_test_macros.hpp> +#include <catch2/matchers/catch_matchers_floating_point.hpp> + +namespace Tests::TestArray { + // enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL }; + // BinaryOp_t& operator++(BinaryOp_t& op) { return op = static_cast<BinaryOp_t>( 1+static_cast<int>(op) ); } + + // std::string get_binary_op_name( BinaryOp_t op ) { + // switch (op) { + // case ADD: + // return "add"; + // case SUB: + // return "subtract"; + // case CROSS: + // return "cross"; + // case DOT: + // return "dot"; + // } + // return std::string(""); // (static_cast<int>(op))); + // } + + // template<typename R, typename T, typename U> + // __host__ __device__ nvstd::function<R(T,U)> get_binary_op_func( BinaryOp_t op) { + // switch (op) { + // case ADD: + // return [] (T a, U b) {return static_cast<R>(b+a);}; + // case SUB: + // return [] (T a, U b) {return static_cast<R>(b-a);}; + // case CROSS: + // return [] (T a, U b) {return static_cast<R>(b.cross(a));}; + // case DOT: + // return [] (T a, U b) {return static_cast<R>(b.dot(a));}; + // default: + // assert(false); + // } + // return [] (T a, U b) {return static_cast<R>(b+a);}; + // } + + // template<typename R, typename T, typename U> + // __global__ void binary_op_test_kernel( BinaryOp_t op, R* result, T in1, U in2 ) { + // nvstd::function<R(T,U)> fn = get_binary_op_func<R,T,U>(op); + // if (blockIdx.x == 0) { + // *result = fn(in1,in2); + // } + // } + + // template<typename T, typename U> + // void check_vectors_equal( T&& cpu, U&& gpu) { + // CHECK( type_name<decltype(cpu)>() == type_name<decltype(gpu)>() ); // should be unneccesary + // CHECK( cpu.x == gpu.x ); + // CHECK( cpu.y == gpu.y ); + // CHECK( cpu.z == gpu.z ); + // CHECK( cpu.w == gpu.w ); + // } + + // template<typename A, typename B> + // void run_tests() { + // using T = Vector3_t<A>; + // using U = Vector3_t<B>; + // using R = std::common_type_t<T,U>; + + // T v1(1,1.005,0); + // U v2(0,2,0); + // R *gpu_result_d, gpu_result, cpu_result; + // cudaMalloc((void **)&gpu_result_d, sizeof(R)); + + // for (BinaryOp_t op = ADD; op < FINAL; ++op) { + // INFO( get_binary_op_name( op ) ); + // binary_op_test_kernel<R,T,U><<<1,1>>>(op, gpu_result_d, v1, v2); + // cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost); + // cudaDeviceSynchronize(); + + // // Get cpu_result + // cpu_result = (get_binary_op_func<R,T,U>(op))(v1,v2); + + // // Check consistency + // check_vectors_equal(cpu_result, gpu_result); + // } + // cudaFree(gpu_result_d); + // } + + // template <typename T> + // void print_enable_if_value_helper(std::true_type) { + // std::cout << "has_copy_to_cuda is true" << std::endl; + // } + + // template <typename T> + // void print_enable_if_value_helper(std::false_type) { + // std::cout << "has_copy_to_cuda is false" << std::endl; + // } + + // template <typename T> + // void print_enable_if_value_helper(std::true_type) { + // std::cout << "has_copy_to_cuda is true" << std::endl; + // } + + // template <typename T> + // void print_enable_if_value_helper(std::false_type) { + // std::cout << "has_copy_to_cuda is false" << std::endl; + // } + + // template <typename T> + // void print_enable_if_value() { + // print_enable_if_value_helper<has_copy_to_cuda<T>>(typename has_copy_to_cuda<T>::type{}); + // } + + template <typename T> + void print_enable_if_value() { + if (has_copy_to_cuda<T>::value) { + std::cout << "has_copy_to_cuda is true" << std::endl; + } else { + std::cout << "has_copy_to_cuda is false" << std::endl; + } + } + + template <typename T> + Array<T> create_array(size_t num) { + Array<T> arr(num); + return arr; + } + TEST_CASE( "Test Array assignment and copy_to_cuda", "[Array]" ) { + { + // Creation and copy assignment + Array<Vector3> a = create_array<Vector3>(10); + } + + { + // Allocation and deallocation + VectorArr a(10); + a[0] = Vector3(1); + // a[0].print(); + // a[1].print(); + a[3] = Vector3(3); + // a[3].print(); + + VectorArr* a_d = a.copy_to_cuda(); + VectorArr b(0); + VectorArr* b_d = b.copy_to_cuda(); + VectorArr a_d_h = a_d->copy_from_cuda(a_d); + VectorArr b_d_h = b_d->copy_from_cuda(b_d); + + // a_d_h[0].print(); + // a_d_h[1].print(); + // a_d_h[3].print(); + + REQUIRE( a[1] == a_d_h[1] ); + REQUIRE( a[3] == a_d_h[3] ); + + VectorArr::remove_from_cuda(a_d); + VectorArr::remove_from_cuda(b_d); + + print_enable_if_value<int>(); // Replace VectorArr with your actual type + print_enable_if_value<Vector3>(); // Replace VectorArr with your actual type + print_enable_if_value<VectorArr>(); // Replace VectorArr with your actual type + print_enable_if_value<Array<VectorArr>>(); // Replace VectorArr with your actual type + + // b_d_h[0].print(); + } + } + TEST_CASE( "Test Assigment and copying of Arrays of Arrays and copy_to_cuda", "[Array]" ) { + { + // Allocation and deallocation + // printf("Creating v1(10)\n"); + VectorArr v1(10); + for (int i = 0; i < v1.size(); ++i) { + v1[i] = Vector3(i+1); + } + // printf("Creating v2(20)\n"); + VectorArr v2(20); + for (int i = 0; i < v2.size(); ++i) { + v2[i] = Vector3(10*i+1); + } + + // printf("Creating a(2)\n"); + Array<VectorArr> a(3); + a[0] = v1; + a[1] = v2; + // a[1] = std::move(v2); + + Array<VectorArr>* a_d = a.copy_to_cuda(); + Array<VectorArr> a_d_h = a_d->copy_from_cuda(a_d); + + + REQUIRE( a[0][1] == a_d_h[0][1] ); + // REQUIRE( a[0][5] == a_d_h[0][5] ); + + a_d->remove_from_cuda(a_d); + } + } + TEST_CASE( "Test Assigment and copying of Arrays of Arrays of Arrays", "[Array]" ) { + { + // Allocation and deallocation + // printf("Creating v1(10)\n"); + VectorArr v1(10); + for (int i = 0; i < v1.size(); ++i) { + v1[i] = Vector3(i+1); + } + // printf("Creating v2(20)\n"); + VectorArr v2(20); + for (int i = 0; i < v2.size(); ++i) { + v2[i] = Vector3(10*i+1); + } + + // printf("Creating a(3)\n"); + Array<VectorArr> a(3); + a[0] = v1; + a[1] = v2; + + Array<Array<VectorArr>> b(3); + b[0] = a; + b[2] = std::move(a); + + Array<Array<VectorArr>>* b_d = b.copy_to_cuda(); + Array<Array<VectorArr>> b_d_h = b_d->copy_from_cuda(b_d); + + REQUIRE( b[0][0][0] == b_d_h[0][0][0] ); + b_d->remove_from_cuda(b_d); + } + } +} diff --git a/src/Tests/bitmask.cu b/src/Tests/bitmask.cu index 7703e033e693928e9ae0b1b1d05ad687dcf5bfad..85089823169618e7d7b613c8dad3d899970f7225 100644 --- a/src/Tests/bitmask.cu +++ b/src/Tests/bitmask.cu @@ -32,11 +32,13 @@ namespace Tests::Bitmask { T* b_d = b.copy_to_cuda(); cudaDeviceSynchronize(); - T b2 = b.retrieve_from_cuda(b_d); + T b2 = b.copy_from_cuda(b_d); cudaDeviceSynchronize(); - REQUIRE( b == b2 ); + b.remove_from_cuda(b_d); + cudaDeviceSynchronize(); + } } diff --git a/src/Tests/catch_boiler.h b/src/Tests/catch_boiler.h index 898a8a7d5a7d7607351c7edb7a793183ed74a93a..571424f62e6af61b25e50a61575b4ba05dcd5fdf 100644 --- a/src/Tests/catch_boiler.h +++ b/src/Tests/catch_boiler.h @@ -7,7 +7,7 @@ #include <cuda.h> #include <nvfunctional> -#include "type_name.h" +#include "../type_name.h" /* #include <catch2/catch_tostring.hpp> */ /* namespace Catch { */ diff --git a/src/Tests/matrix3.cu b/src/Tests/matrix3.cu index 8f89488a2aede5ce98fa3e2b544901dba3625f58..3be6f5843c79ee002f72c4a2762e382984f84586 100644 --- a/src/Tests/matrix3.cu +++ b/src/Tests/matrix3.cu @@ -11,6 +11,8 @@ namespace Catch { }; } +#include "../type_name.h" + DEF_RUN_TRIAL namespace Tests::Unary::Matrix3 { diff --git a/src/Tests/vector3_precision.cu b/src/Tests/vector3_precision.cu index a8b93db041a933919275add24674f2acc417986a..ec706e080d3791b66b54f9b1b0768b65048d96c7 100644 --- a/src/Tests/vector3_precision.cu +++ b/src/Tests/vector3_precision.cu @@ -11,7 +11,7 @@ #include <catch2/catch_test_macros.hpp> #include <catch2/matchers/catch_matchers_floating_point.hpp> -#include "type_name.h" +#include "../type_name.h" namespace Tests::Vector3 { enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL }; diff --git a/src/Types.h b/src/Types.h index b1510301015b6e5114bd578210605e02c4677e23..ff72cbe3d249c66ed6eb943c5ca2dbe743459429 100644 --- a/src/Types.h +++ b/src/Types.h @@ -5,6 +5,8 @@ #include <memory> // For std::unique_ptr #include <cstring> +#include "type_name.h" + // Utility function used by types to return std::string using format syntax inline std::string string_format(const std::string fmt_str, ...) { // from: https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf/8098080#8098080 @@ -32,3 +34,6 @@ using Vector3 = Vector3_t<float>; using Matrix3 = Matrix3_t<float,false>; #include "Types/Bitmask.h" + +#include "Types/Array.h" +using VectorArr = Array<Vector3>; diff --git a/src/Types/Array.h b/src/Types/Array.h new file mode 100644 index 0000000000000000000000000000000000000000..4f8d5510ae1eb321e62b4eec70fae6493552567f --- /dev/null +++ b/src/Types/Array.h @@ -0,0 +1,247 @@ +/********************************************************************* + * @file Array.h + * + * @brief Declaration of templated Array class. + *********************************************************************/ +#pragma once +#include <memory> +#include <type_traits> // for std::common_type<T,U> +#include <sstream> + +// Simple templated array object without resizing capabilities +template<typename T> +class Array { +public: + HOST inline Array<T>() : num(0), values(nullptr) {} // printf("Creating Array1 %x\n",this); + HOST inline Array<T>(size_t num) : num(num), values(nullptr) { + // printf("Constructing Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values); + host_allocate(); + // printf("Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values); + } + HOST inline Array<T>(size_t num, const T* inp ) : num(num), values(nullptr) { + // printf("Constructing Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values); + host_allocate(); + for (size_t i = 0; i < num; ++i) { + values[i] = inp[i]; + } + // printf("Created Array3 %x with values %x\n",this, values); + } + HOST inline Array<T>(const Array<T>& a) { // copy constructor + // printf("Copy-constructing Array<T> %x from %x with values %x\n",this, &a, a.values); + num = a.num; + host_allocate(); + for (size_t i = 0; i < num; ++i) { + values[i] = a[i]; + } + // printf("Copy-constructed Array<T> %x with values %x\n",this, values); + } + HOST inline Array<T>(Array<T>&& a) { // move constructor + // printf("Move-constructing Array<T> from %x with values %x\n", &a, a.values); + num = a.num; + values = a.values; + a.values = nullptr; + a.num = 0; // not needed? + // printf("Move-constructed Array<T> with values %x\n", values); + } + HOST inline Array<T>& operator=(const Array<T>& a) { // copy assignment operator + num = a.num; + host_allocate(); + for (size_t i = 0; i < num; ++i) { + values[i] = a[i]; + } + printf("Copy-operator for Array<T> %x with values %x\n",this, values); + return *this; + } + HOST inline Array<T>& operator=(Array<T>&& a) { // move assignment operator + host_deallocate(); + num = a.num; + values = a.values; + a.num = 0; + a.values = nullptr; + printf("Move-operator for Array<T> %x with values %x\n",this, values); + return *this; + } + HOST DEVICE inline T& operator[](size_t i) { + assert( i < num ); + return values[i]; + } + HOST DEVICE inline const T& operator[](size_t i) const { + assert( i < num ); + return values[i]; + } + HOST inline ~Array<T>() { + // printf("Destroying Array %x with values %x\n",this, values); + host_deallocate(); + } + +#ifdef USE_CUDA + // This ugly template allows overloading copy_to_cuda, depending on whether T.copy_to_cuda exists using C++14-compatible SFINAE + template <typename Dummy = void, typename std::enable_if_t<!has_copy_to_cuda<T>::value, Dummy>* = nullptr> + HOST inline Array<T>* copy_to_cuda(Array<T>* dev_ptr = nullptr) const { + if (dev_ptr == nullptr) { // allocate if needed + // printf(" cudaMalloc for array\n"); + gpuErrchk(cudaMalloc(&dev_ptr, sizeof(Array<T>))); + } + + // Allocate values_d + T* values_d = nullptr; + if (num > 0) { + // printf(" cudaMalloc for %d items\n", num); + size_t sz = sizeof(T) * num; + gpuErrchk(cudaMalloc(&values_d, sz)); + + // Copy values + gpuErrchk(cudaMemcpy(values_d, values, sz, cudaMemcpyHostToDevice)); + } + + // Copy Array with pointers correctly assigned + Array<T> tmp(0); + tmp.num = num; + tmp.values = values_d; + gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sizeof(Array<T>), cudaMemcpyHostToDevice)); + tmp.num = 0; + tmp.values = nullptr; + // printf("Copying Array<%s> %x with %d values %x to device at %x\n", type_name<T>().c_str(), this, num, values, dev_ptr); + return dev_ptr; + } + + template <typename Dummy = void, typename std::enable_if_t<has_copy_to_cuda<T>::value, Dummy>* = nullptr> + HOST inline Array<T>* copy_to_cuda(Array<T>* dev_ptr = nullptr) const { + // enable_if<!has_copy_to_cuda<T>::value, T>::type* = 0) const { + if (dev_ptr == nullptr) { // allocate if needed + // printf(" cudaMalloc for array\n"); + gpuErrchk(cudaMalloc(&dev_ptr, sizeof(Array<T>))); + } + + // Allocate values_d + T* values_d = nullptr; + if (num > 0) { + size_t sz = sizeof(T) * num; + // printf(" cudaMalloc for %d items\n", num); + gpuErrchk(cudaMalloc(&values_d, sz)); + + // Copy values + for (size_t i = 0; i < num; ++i) { + values[i].copy_to_cuda(values_d + i); + } + } + + // Copy Array with pointers correctly assigned + Array<T> tmp(0); + tmp.num = num; + tmp.values = values_d; + gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sizeof(Array<T>), cudaMemcpyHostToDevice)); + tmp.num = 0; + tmp.values = nullptr; + // printf("Copying Array %x with values %x to device at %x\n",this, values, dev_ptr); + return dev_ptr; + } + + template <typename Dummy = void, typename std::enable_if_t<!has_copy_to_cuda<T>::value, Dummy>* = nullptr> + HOST static Array<T> copy_from_cuda(Array<T>* dev_ptr) { + // Create host object, copy raw device data over + Array<T> tmp(0); + if (dev_ptr != nullptr) { + gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost)); + + if (tmp.num > 0) { + T* values_d = tmp.values; + tmp.values = new T[tmp.num]; + + // Copy values + size_t sz = sizeof(T) * tmp.num; + gpuErrchk(cudaMemcpy(tmp.values, values_d, sz, cudaMemcpyDeviceToHost)); + } else { + tmp.values = nullptr; + } + } + // printf("Copying device Array %x to host %x with values %x\n", dev_ptr, &tmp, tmp.values); + return tmp; + } + + template <typename Dummy = void, typename std::enable_if_t<has_copy_to_cuda<T>::value, Dummy>* = nullptr> + HOST static Array<T> copy_from_cuda(Array<T>* dev_ptr) { + // Create host object, copy raw device data over + Array<T> tmp(0); + + if (dev_ptr != nullptr) { + gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost)); + + if (tmp.num > 0) { + T* values_d = tmp.values; + tmp.values = new T[tmp.num]; + + // Copy values + for (size_t i = 0; i < tmp.num; ++i) { + tmp.values[i] = T::copy_from_cuda(values_d + i); + } + } else { + tmp.values = nullptr; + } + } + // printf("Copying device Array %x to host %x with values %x\n", dev_ptr, &tmp, tmp.values); + return tmp; + } + + template <typename Dummy = void, typename std::enable_if_t<!has_copy_to_cuda<T>::value, Dummy>* = nullptr> + HOST static void remove_from_cuda(Array<T>* dev_ptr, bool remove_self = true) { + // printf("Removing device Array<%s> %x\n", typeid(T).name(), dev_ptr); + if (dev_ptr == nullptr) return; + Array<T> tmp(0); + gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost)); + if (tmp.num > 0) { + // Remove values + gpuErrchk(cudaFree(tmp.values)); + } + tmp.values = nullptr; + gpuErrchk(cudaMemset((void*) &(dev_ptr->values), 0, sizeof(T*))); // set nullptr on to device + if (remove_self) { + gpuErrchk(cudaFree(dev_ptr)); + dev_ptr = nullptr; + } + // printf("...done removing device Array<%s> %x\n", typeid(T).name(), dev_ptr); + } + + template <typename Dummy = void, typename std::enable_if_t<has_copy_to_cuda<T>::value, Dummy>* = nullptr> + HOST static void remove_from_cuda(Array<T>* dev_ptr, bool remove_self = true) { + // printf("Removing device Array<%s> %x\n", typeid(T).name(), dev_ptr); + if (dev_ptr == nullptr) return; + Array<T> tmp(0); + gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost)); + if (tmp.num > 0) { + // Remove values + for (size_t i = 0; i < tmp.num; ++i) { + T::remove_from_cuda(tmp.values+i, false); + } + } + tmp.values = nullptr; + gpuErrchk(cudaMemset((void*) &(dev_ptr->values), 0, sizeof(T*))); // set nullptr on device + if (remove_self) { + gpuErrchk(cudaFree(dev_ptr)); + dev_ptr = nullptr; + } + // printf("...done removing device Array<%s> %x\n", typeid(T).name(), dev_ptr); + } +#endif + HOST DEVICE size_t size() const { return num; } + +private: + HOST void host_allocate() { + host_deallocate(); + if (num > 0) { + values = new T[num]; + } else { + values = nullptr; + } + // printf("Array<%s>.host_allocate() %d values at %x\n", typeid(T).name(), num, values); + + } + HOST void host_deallocate() { + // printf("Array<%s>.host_deallocate() %d values at %x\n", typeid(T).name(), num, values); + if (values != nullptr) delete[] values; + values = nullptr; + } + + size_t num; + T* values; +}; diff --git a/src/Types/Bitmask.h b/src/Types/Bitmask.h index 3e4ca4aaa3ab55d9665ebccbffed830eb7f2ea5d..1ca22ffb384519e2a53ce5ffd4a25185b2d14416 100644 --- a/src/Types/Bitmask.h +++ b/src/Types/Bitmask.h @@ -83,12 +83,13 @@ public: #ifdef USE_CUDA HOST - Bitmask* copy_to_cuda() const { - Bitmask* tmp_obj_d = nullptr; + Bitmask* copy_to_cuda(Bitmask* tmp_obj_d = nullptr) const { Bitmask obj_tmp(0); data_t* mask_d = nullptr; size_t sz = sizeof(data_t) * get_array_size(); - gpuErrchk(cudaMalloc(&tmp_obj_d, sizeof(Bitmask))); + if (tmp_obj_d == nullptr) { + gpuErrchk(cudaMalloc(&tmp_obj_d, sizeof(Bitmask))); + } if (sz > 0) { gpuErrchk(cudaMalloc(&mask_d, sz)); gpuErrchk(cudaMemcpy(mask_d, mask, sz, cudaMemcpyHostToDevice)); @@ -102,7 +103,7 @@ public: } HOST - static Bitmask retrieve_from_cuda(Bitmask* obj_d) { + static Bitmask copy_from_cuda(Bitmask* obj_d) { Bitmask obj_tmp(0); gpuErrchk(cudaMemcpy(&obj_tmp, obj_d, sizeof(Bitmask), cudaMemcpyDeviceToHost)); printf("TEST: %d\n", obj_tmp.len); diff --git a/src/Types/Vector3.h b/src/Types/Vector3.h index f53f3bf6cd5c947f6aa6827b5a073436bcb40c65..4718273eb5a25622ccdbda8c08475babc37a0c61 100644 --- a/src/Types/Vector3.h +++ b/src/Types/Vector3.h @@ -6,6 +6,7 @@ #pragma once #include <memory> #include <type_traits> // for std::common_type<T,U> +#include <sstream> /** * 3D vector utility class with common operations implemented on CPU and GPU. @@ -22,8 +23,8 @@ public: HOST DEVICE inline Vector3_t<T>() : x(T(0)), y(T(0)), z(T(0)), w(T(0)) {} HOST DEVICE inline Vector3_t<T>(T s):x(s), y(s), z(s), w(s) {} HOST DEVICE inline Vector3_t<T>(const Vector3_t<T>& v):x(v.x), y(v.y), z(v.z), w(v.w) {} - HOST DEVICE inline Vector3_t<T>(T x0, T y0, T z0) : x(x0), y(y0), z(z0), w(0) {} - HOST DEVICE inline Vector3_t<T>(T x0, T y0, T z0, T w0) : x(x0), y(y0), z(z0), w(w0) {} + HOST DEVICE inline Vector3_t<T>(T x, T y, T z) : x(x), y(y), z(z), w(0) {} + HOST DEVICE inline Vector3_t<T>(T x, T y, T z, T w) : x(x), y(y), z(z), w(w) {} // HOST DEVICE inline Vector3_t<T>(const T* d) : x(d[0]), y(d[1]), z(d[2]), w(0) {} HOST DEVICE inline Vector3_t<T>(const float4 a) : x(a.x), y(a.y), z(a.z), w(a.w) {} @@ -57,6 +58,12 @@ public: } #endif + HOST DEVICE inline Vector3_t<T>& operator=(const Vector3_t<T>& v) { + x = v.x; + y = v.y; + z = v.z; + return *this; + } HOST DEVICE inline Vector3_t<T>& operator=(const Vector3_t<T>&& v) { x = v.x; y = v.y; @@ -177,12 +184,17 @@ public: printf("%0.3f %0.3f %0.3f\n", x,y,z); } - auto to_string() const { + auto to_string_old() const { char s[128]; sprintf(s, "%.10g %.10g %.10g (%.10g)", x, y, z, w); s[127] = 0; return std::string(s); } + auto to_string() const { + std::ostringstream oss; + oss << x << " " << y << " " << z << " (" << w << ")"; + return oss.str(); + } template<typename U> HOST DEVICE inline bool operator==(U b) const { diff --git a/src/Tests/type_name.h b/src/type_name.h similarity index 95% rename from src/Tests/type_name.h rename to src/type_name.h index f64e440bef08d1df44fafd73fb64367d8542327e..8ad2039003f4a1900685be63404a8d6af210d18e 100644 --- a/src/Tests/type_name.h +++ b/src/type_name.h @@ -1,3 +1,5 @@ +#pragma once + #include <type_traits> #include <typeinfo> #ifndef _MSC_VER @@ -9,6 +11,7 @@ template <typename T, typename ...Extras> std::string type_name() { + return typeid(T).name(); using TR = typename std::remove_reference<T>::type; std::unique_ptr<char, void(*)(void*)> own (