diff --git a/src/GPUManager.h b/src/GPUManager.h
index 159a3ac0117d4208f34ffef6c8edee251756cec8..fe7741e2ac2275ee75262a1fdb73691186ad3743 100644
--- a/src/GPUManager.h
+++ b/src/GPUManager.h
@@ -33,6 +33,27 @@ struct MY_ALIGN(16) float4 {
};
#endif
+// START traits
+// https://stackoverflow.com/questions/55191505/c-compile-time-check-if-method-exists-in-template-type
+#include <type_traits>
+// template<class ...Ts>
+// struct voider{
+// using type = void;
+// };
+
+// template<class T, class = void>
+// struct has_copy_to_cuda : std::false_type{};
+
+// template<class T>
+// struct has_copy_to_cuda<T, typename voider<decltype(std::declval<T>().copy_to_cuda())>::type> : std::true_type{};
+
+template <typename T, typename = void>
+struct has_copy_to_cuda : std::false_type {};
+
+template <typename T>
+struct has_copy_to_cuda<T, decltype(std::declval<T>().copy_to_cuda(), void())> : std::true_type {};
+// END traits
+
#ifdef USE_CUDA
#include <cstdio>
diff --git a/src/Tests/CMakeLists.txt b/src/Tests/CMakeLists.txt
index 26f5dcacaf5b298fc45707f8adbbd1c93ea84f38..4b2055025b40384e6a98abc2f04296bf1b582805 100644
--- a/src/Tests/CMakeLists.txt
+++ b/src/Tests/CMakeLists.txt
@@ -9,6 +9,7 @@ add_executable("arbd_tests"
matrix3.cu
vector3_precision.cu
bitmask.cu
+array.cu
)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")
set_property(TARGET arbd_tests PROPERTY CXX_STANDARD 14)
diff --git a/src/Tests/array.cu b/src/Tests/array.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3a2ec001c4eca0497cc91fa4451fa1b61cf9463e
--- /dev/null
+++ b/src/Tests/array.cu
@@ -0,0 +1,230 @@
+#include <float.h>
+#include <iostream>
+#include <cstdio>
+
+// #include "useful.h"
+#include "../SignalManager.h"
+#include "../Types.h"
+#include <cuda.h>
+#include <nvfunctional>
+
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+
+namespace Tests::TestArray {
+ // enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL };
+ // BinaryOp_t& operator++(BinaryOp_t& op) { return op = static_cast<BinaryOp_t>( 1+static_cast<int>(op) ); }
+
+ // std::string get_binary_op_name( BinaryOp_t op ) {
+ // switch (op) {
+ // case ADD:
+ // return "add";
+ // case SUB:
+ // return "subtract";
+ // case CROSS:
+ // return "cross";
+ // case DOT:
+ // return "dot";
+ // }
+ // return std::string(""); // (static_cast<int>(op)));
+ // }
+
+ // template<typename R, typename T, typename U>
+ // __host__ __device__ nvstd::function<R(T,U)> get_binary_op_func( BinaryOp_t op) {
+ // switch (op) {
+ // case ADD:
+ // return [] (T a, U b) {return static_cast<R>(b+a);};
+ // case SUB:
+ // return [] (T a, U b) {return static_cast<R>(b-a);};
+ // case CROSS:
+ // return [] (T a, U b) {return static_cast<R>(b.cross(a));};
+ // case DOT:
+ // return [] (T a, U b) {return static_cast<R>(b.dot(a));};
+ // default:
+ // assert(false);
+ // }
+ // return [] (T a, U b) {return static_cast<R>(b+a);};
+ // }
+
+ // template<typename R, typename T, typename U>
+ // __global__ void binary_op_test_kernel( BinaryOp_t op, R* result, T in1, U in2 ) {
+ // nvstd::function<R(T,U)> fn = get_binary_op_func<R,T,U>(op);
+ // if (blockIdx.x == 0) {
+ // *result = fn(in1,in2);
+ // }
+ // }
+
+ // template<typename T, typename U>
+ // void check_vectors_equal( T&& cpu, U&& gpu) {
+ // CHECK( type_name<decltype(cpu)>() == type_name<decltype(gpu)>() ); // should be unneccesary
+ // CHECK( cpu.x == gpu.x );
+ // CHECK( cpu.y == gpu.y );
+ // CHECK( cpu.z == gpu.z );
+ // CHECK( cpu.w == gpu.w );
+ // }
+
+ // template<typename A, typename B>
+ // void run_tests() {
+ // using T = Vector3_t<A>;
+ // using U = Vector3_t<B>;
+ // using R = std::common_type_t<T,U>;
+
+ // T v1(1,1.005,0);
+ // U v2(0,2,0);
+ // R *gpu_result_d, gpu_result, cpu_result;
+ // cudaMalloc((void **)&gpu_result_d, sizeof(R));
+
+ // for (BinaryOp_t op = ADD; op < FINAL; ++op) {
+ // INFO( get_binary_op_name( op ) );
+ // binary_op_test_kernel<R,T,U><<<1,1>>>(op, gpu_result_d, v1, v2);
+ // cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost);
+ // cudaDeviceSynchronize();
+
+ // // Get cpu_result
+ // cpu_result = (get_binary_op_func<R,T,U>(op))(v1,v2);
+
+ // // Check consistency
+ // check_vectors_equal(cpu_result, gpu_result);
+ // }
+ // cudaFree(gpu_result_d);
+ // }
+
+ // template <typename T>
+ // void print_enable_if_value_helper(std::true_type) {
+ // std::cout << "has_copy_to_cuda is true" << std::endl;
+ // }
+
+ // template <typename T>
+ // void print_enable_if_value_helper(std::false_type) {
+ // std::cout << "has_copy_to_cuda is false" << std::endl;
+ // }
+
+ // template <typename T>
+ // void print_enable_if_value_helper(std::true_type) {
+ // std::cout << "has_copy_to_cuda is true" << std::endl;
+ // }
+
+ // template <typename T>
+ // void print_enable_if_value_helper(std::false_type) {
+ // std::cout << "has_copy_to_cuda is false" << std::endl;
+ // }
+
+ // template <typename T>
+ // void print_enable_if_value() {
+ // print_enable_if_value_helper<has_copy_to_cuda<T>>(typename has_copy_to_cuda<T>::type{});
+ // }
+
+ template <typename T>
+ void print_enable_if_value() {
+ if (has_copy_to_cuda<T>::value) {
+ std::cout << "has_copy_to_cuda is true" << std::endl;
+ } else {
+ std::cout << "has_copy_to_cuda is false" << std::endl;
+ }
+ }
+
+ template <typename T>
+ Array<T> create_array(size_t num) {
+ Array<T> arr(num);
+ return arr;
+ }
+ TEST_CASE( "Test Array assignment and copy_to_cuda", "[Array]" ) {
+ {
+ // Creation and copy assignment
+ Array<Vector3> a = create_array<Vector3>(10);
+ }
+
+ {
+ // Allocation and deallocation
+ VectorArr a(10);
+ a[0] = Vector3(1);
+ // a[0].print();
+ // a[1].print();
+ a[3] = Vector3(3);
+ // a[3].print();
+
+ VectorArr* a_d = a.copy_to_cuda();
+ VectorArr b(0);
+ VectorArr* b_d = b.copy_to_cuda();
+ VectorArr a_d_h = a_d->copy_from_cuda(a_d);
+ VectorArr b_d_h = b_d->copy_from_cuda(b_d);
+
+ // a_d_h[0].print();
+ // a_d_h[1].print();
+ // a_d_h[3].print();
+
+ REQUIRE( a[1] == a_d_h[1] );
+ REQUIRE( a[3] == a_d_h[3] );
+
+ VectorArr::remove_from_cuda(a_d);
+ VectorArr::remove_from_cuda(b_d);
+
+ print_enable_if_value<int>(); // Replace VectorArr with your actual type
+ print_enable_if_value<Vector3>(); // Replace VectorArr with your actual type
+ print_enable_if_value<VectorArr>(); // Replace VectorArr with your actual type
+ print_enable_if_value<Array<VectorArr>>(); // Replace VectorArr with your actual type
+
+ // b_d_h[0].print();
+ }
+ }
+ TEST_CASE( "Test Assigment and copying of Arrays of Arrays and copy_to_cuda", "[Array]" ) {
+ {
+ // Allocation and deallocation
+ // printf("Creating v1(10)\n");
+ VectorArr v1(10);
+ for (int i = 0; i < v1.size(); ++i) {
+ v1[i] = Vector3(i+1);
+ }
+ // printf("Creating v2(20)\n");
+ VectorArr v2(20);
+ for (int i = 0; i < v2.size(); ++i) {
+ v2[i] = Vector3(10*i+1);
+ }
+
+ // printf("Creating a(2)\n");
+ Array<VectorArr> a(3);
+ a[0] = v1;
+ a[1] = v2;
+ // a[1] = std::move(v2);
+
+ Array<VectorArr>* a_d = a.copy_to_cuda();
+ Array<VectorArr> a_d_h = a_d->copy_from_cuda(a_d);
+
+
+ REQUIRE( a[0][1] == a_d_h[0][1] );
+ // REQUIRE( a[0][5] == a_d_h[0][5] );
+
+ a_d->remove_from_cuda(a_d);
+ }
+ }
+ TEST_CASE( "Test Assigment and copying of Arrays of Arrays of Arrays", "[Array]" ) {
+ {
+ // Allocation and deallocation
+ // printf("Creating v1(10)\n");
+ VectorArr v1(10);
+ for (int i = 0; i < v1.size(); ++i) {
+ v1[i] = Vector3(i+1);
+ }
+ // printf("Creating v2(20)\n");
+ VectorArr v2(20);
+ for (int i = 0; i < v2.size(); ++i) {
+ v2[i] = Vector3(10*i+1);
+ }
+
+ // printf("Creating a(3)\n");
+ Array<VectorArr> a(3);
+ a[0] = v1;
+ a[1] = v2;
+
+ Array<Array<VectorArr>> b(3);
+ b[0] = a;
+ b[2] = std::move(a);
+
+ Array<Array<VectorArr>>* b_d = b.copy_to_cuda();
+ Array<Array<VectorArr>> b_d_h = b_d->copy_from_cuda(b_d);
+
+ REQUIRE( b[0][0][0] == b_d_h[0][0][0] );
+ b_d->remove_from_cuda(b_d);
+ }
+ }
+}
diff --git a/src/Tests/bitmask.cu b/src/Tests/bitmask.cu
index 7703e033e693928e9ae0b1b1d05ad687dcf5bfad..85089823169618e7d7b613c8dad3d899970f7225 100644
--- a/src/Tests/bitmask.cu
+++ b/src/Tests/bitmask.cu
@@ -32,11 +32,13 @@ namespace Tests::Bitmask {
T* b_d = b.copy_to_cuda();
cudaDeviceSynchronize();
- T b2 = b.retrieve_from_cuda(b_d);
+ T b2 = b.copy_from_cuda(b_d);
cudaDeviceSynchronize();
-
REQUIRE( b == b2 );
+ b.remove_from_cuda(b_d);
+ cudaDeviceSynchronize();
+
}
}
diff --git a/src/Tests/catch_boiler.h b/src/Tests/catch_boiler.h
index 898a8a7d5a7d7607351c7edb7a793183ed74a93a..571424f62e6af61b25e50a61575b4ba05dcd5fdf 100644
--- a/src/Tests/catch_boiler.h
+++ b/src/Tests/catch_boiler.h
@@ -7,7 +7,7 @@
#include <cuda.h>
#include <nvfunctional>
-#include "type_name.h"
+#include "../type_name.h"
/* #include <catch2/catch_tostring.hpp> */
/* namespace Catch { */
diff --git a/src/Tests/matrix3.cu b/src/Tests/matrix3.cu
index 8f89488a2aede5ce98fa3e2b544901dba3625f58..3be6f5843c79ee002f72c4a2762e382984f84586 100644
--- a/src/Tests/matrix3.cu
+++ b/src/Tests/matrix3.cu
@@ -11,6 +11,8 @@ namespace Catch {
};
}
+#include "../type_name.h"
+
DEF_RUN_TRIAL
namespace Tests::Unary::Matrix3 {
diff --git a/src/Tests/vector3_precision.cu b/src/Tests/vector3_precision.cu
index a8b93db041a933919275add24674f2acc417986a..ec706e080d3791b66b54f9b1b0768b65048d96c7 100644
--- a/src/Tests/vector3_precision.cu
+++ b/src/Tests/vector3_precision.cu
@@ -11,7 +11,7 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
-#include "type_name.h"
+#include "../type_name.h"
namespace Tests::Vector3 {
enum BinaryOp_t { ADD, CROSS, DOT, SUB, FINAL };
diff --git a/src/Types.h b/src/Types.h
index b1510301015b6e5114bd578210605e02c4677e23..ff72cbe3d249c66ed6eb943c5ca2dbe743459429 100644
--- a/src/Types.h
+++ b/src/Types.h
@@ -5,6 +5,8 @@
#include <memory> // For std::unique_ptr
#include <cstring>
+#include "type_name.h"
+
// Utility function used by types to return std::string using format syntax
inline std::string string_format(const std::string fmt_str, ...) {
// from: https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf/8098080#8098080
@@ -32,3 +34,6 @@ using Vector3 = Vector3_t<float>;
using Matrix3 = Matrix3_t<float,false>;
#include "Types/Bitmask.h"
+
+#include "Types/Array.h"
+using VectorArr = Array<Vector3>;
diff --git a/src/Types/Array.h b/src/Types/Array.h
new file mode 100644
index 0000000000000000000000000000000000000000..4f8d5510ae1eb321e62b4eec70fae6493552567f
--- /dev/null
+++ b/src/Types/Array.h
@@ -0,0 +1,247 @@
+/*********************************************************************
+ * @file Array.h
+ *
+ * @brief Declaration of templated Array class.
+ *********************************************************************/
+#pragma once
+#include <memory>
+#include <type_traits> // for std::common_type<T,U>
+#include <sstream>
+
+// Simple templated array object without resizing capabilities
+template<typename T>
+class Array {
+public:
+ HOST inline Array<T>() : num(0), values(nullptr) {} // printf("Creating Array1 %x\n",this);
+ HOST inline Array<T>(size_t num) : num(num), values(nullptr) {
+ // printf("Constructing Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values);
+ host_allocate();
+ // printf("Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values);
+ }
+ HOST inline Array<T>(size_t num, const T* inp ) : num(num), values(nullptr) {
+ // printf("Constructing Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values);
+ host_allocate();
+ for (size_t i = 0; i < num; ++i) {
+ values[i] = inp[i];
+ }
+ // printf("Created Array3 %x with values %x\n",this, values);
+ }
+ HOST inline Array<T>(const Array<T>& a) { // copy constructor
+ // printf("Copy-constructing Array<T> %x from %x with values %x\n",this, &a, a.values);
+ num = a.num;
+ host_allocate();
+ for (size_t i = 0; i < num; ++i) {
+ values[i] = a[i];
+ }
+ // printf("Copy-constructed Array<T> %x with values %x\n",this, values);
+ }
+ HOST inline Array<T>(Array<T>&& a) { // move constructor
+ // printf("Move-constructing Array<T> from %x with values %x\n", &a, a.values);
+ num = a.num;
+ values = a.values;
+ a.values = nullptr;
+ a.num = 0; // not needed?
+ // printf("Move-constructed Array<T> with values %x\n", values);
+ }
+ HOST inline Array<T>& operator=(const Array<T>& a) { // copy assignment operator
+ num = a.num;
+ host_allocate();
+ for (size_t i = 0; i < num; ++i) {
+ values[i] = a[i];
+ }
+ printf("Copy-operator for Array<T> %x with values %x\n",this, values);
+ return *this;
+ }
+ HOST inline Array<T>& operator=(Array<T>&& a) { // move assignment operator
+ host_deallocate();
+ num = a.num;
+ values = a.values;
+ a.num = 0;
+ a.values = nullptr;
+ printf("Move-operator for Array<T> %x with values %x\n",this, values);
+ return *this;
+ }
+ HOST DEVICE inline T& operator[](size_t i) {
+ assert( i < num );
+ return values[i];
+ }
+ HOST DEVICE inline const T& operator[](size_t i) const {
+ assert( i < num );
+ return values[i];
+ }
+ HOST inline ~Array<T>() {
+ // printf("Destroying Array %x with values %x\n",this, values);
+ host_deallocate();
+ }
+
+#ifdef USE_CUDA
+ // This ugly template allows overloading copy_to_cuda, depending on whether T.copy_to_cuda exists using C++14-compatible SFINAE
+ template <typename Dummy = void, typename std::enable_if_t<!has_copy_to_cuda<T>::value, Dummy>* = nullptr>
+ HOST inline Array<T>* copy_to_cuda(Array<T>* dev_ptr = nullptr) const {
+ if (dev_ptr == nullptr) { // allocate if needed
+ // printf(" cudaMalloc for array\n");
+ gpuErrchk(cudaMalloc(&dev_ptr, sizeof(Array<T>)));
+ }
+
+ // Allocate values_d
+ T* values_d = nullptr;
+ if (num > 0) {
+ // printf(" cudaMalloc for %d items\n", num);
+ size_t sz = sizeof(T) * num;
+ gpuErrchk(cudaMalloc(&values_d, sz));
+
+ // Copy values
+ gpuErrchk(cudaMemcpy(values_d, values, sz, cudaMemcpyHostToDevice));
+ }
+
+ // Copy Array with pointers correctly assigned
+ Array<T> tmp(0);
+ tmp.num = num;
+ tmp.values = values_d;
+ gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sizeof(Array<T>), cudaMemcpyHostToDevice));
+ tmp.num = 0;
+ tmp.values = nullptr;
+ // printf("Copying Array<%s> %x with %d values %x to device at %x\n", type_name<T>().c_str(), this, num, values, dev_ptr);
+ return dev_ptr;
+ }
+
+ template <typename Dummy = void, typename std::enable_if_t<has_copy_to_cuda<T>::value, Dummy>* = nullptr>
+ HOST inline Array<T>* copy_to_cuda(Array<T>* dev_ptr = nullptr) const {
+ // enable_if<!has_copy_to_cuda<T>::value, T>::type* = 0) const {
+ if (dev_ptr == nullptr) { // allocate if needed
+ // printf(" cudaMalloc for array\n");
+ gpuErrchk(cudaMalloc(&dev_ptr, sizeof(Array<T>)));
+ }
+
+ // Allocate values_d
+ T* values_d = nullptr;
+ if (num > 0) {
+ size_t sz = sizeof(T) * num;
+ // printf(" cudaMalloc for %d items\n", num);
+ gpuErrchk(cudaMalloc(&values_d, sz));
+
+ // Copy values
+ for (size_t i = 0; i < num; ++i) {
+ values[i].copy_to_cuda(values_d + i);
+ }
+ }
+
+ // Copy Array with pointers correctly assigned
+ Array<T> tmp(0);
+ tmp.num = num;
+ tmp.values = values_d;
+ gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sizeof(Array<T>), cudaMemcpyHostToDevice));
+ tmp.num = 0;
+ tmp.values = nullptr;
+ // printf("Copying Array %x with values %x to device at %x\n",this, values, dev_ptr);
+ return dev_ptr;
+ }
+
+ template <typename Dummy = void, typename std::enable_if_t<!has_copy_to_cuda<T>::value, Dummy>* = nullptr>
+ HOST static Array<T> copy_from_cuda(Array<T>* dev_ptr) {
+ // Create host object, copy raw device data over
+ Array<T> tmp(0);
+ if (dev_ptr != nullptr) {
+ gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost));
+
+ if (tmp.num > 0) {
+ T* values_d = tmp.values;
+ tmp.values = new T[tmp.num];
+
+ // Copy values
+ size_t sz = sizeof(T) * tmp.num;
+ gpuErrchk(cudaMemcpy(tmp.values, values_d, sz, cudaMemcpyDeviceToHost));
+ } else {
+ tmp.values = nullptr;
+ }
+ }
+ // printf("Copying device Array %x to host %x with values %x\n", dev_ptr, &tmp, tmp.values);
+ return tmp;
+ }
+
+ template <typename Dummy = void, typename std::enable_if_t<has_copy_to_cuda<T>::value, Dummy>* = nullptr>
+ HOST static Array<T> copy_from_cuda(Array<T>* dev_ptr) {
+ // Create host object, copy raw device data over
+ Array<T> tmp(0);
+
+ if (dev_ptr != nullptr) {
+ gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost));
+
+ if (tmp.num > 0) {
+ T* values_d = tmp.values;
+ tmp.values = new T[tmp.num];
+
+ // Copy values
+ for (size_t i = 0; i < tmp.num; ++i) {
+ tmp.values[i] = T::copy_from_cuda(values_d + i);
+ }
+ } else {
+ tmp.values = nullptr;
+ }
+ }
+ // printf("Copying device Array %x to host %x with values %x\n", dev_ptr, &tmp, tmp.values);
+ return tmp;
+ }
+
+ template <typename Dummy = void, typename std::enable_if_t<!has_copy_to_cuda<T>::value, Dummy>* = nullptr>
+ HOST static void remove_from_cuda(Array<T>* dev_ptr, bool remove_self = true) {
+ // printf("Removing device Array<%s> %x\n", typeid(T).name(), dev_ptr);
+ if (dev_ptr == nullptr) return;
+ Array<T> tmp(0);
+ gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost));
+ if (tmp.num > 0) {
+ // Remove values
+ gpuErrchk(cudaFree(tmp.values));
+ }
+ tmp.values = nullptr;
+ gpuErrchk(cudaMemset((void*) &(dev_ptr->values), 0, sizeof(T*))); // set nullptr on to device
+ if (remove_self) {
+ gpuErrchk(cudaFree(dev_ptr));
+ dev_ptr = nullptr;
+ }
+ // printf("...done removing device Array<%s> %x\n", typeid(T).name(), dev_ptr);
+ }
+
+ template <typename Dummy = void, typename std::enable_if_t<has_copy_to_cuda<T>::value, Dummy>* = nullptr>
+ HOST static void remove_from_cuda(Array<T>* dev_ptr, bool remove_self = true) {
+ // printf("Removing device Array<%s> %x\n", typeid(T).name(), dev_ptr);
+ if (dev_ptr == nullptr) return;
+ Array<T> tmp(0);
+ gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(Array<T>), cudaMemcpyDeviceToHost));
+ if (tmp.num > 0) {
+ // Remove values
+ for (size_t i = 0; i < tmp.num; ++i) {
+ T::remove_from_cuda(tmp.values+i, false);
+ }
+ }
+ tmp.values = nullptr;
+ gpuErrchk(cudaMemset((void*) &(dev_ptr->values), 0, sizeof(T*))); // set nullptr on device
+ if (remove_self) {
+ gpuErrchk(cudaFree(dev_ptr));
+ dev_ptr = nullptr;
+ }
+ // printf("...done removing device Array<%s> %x\n", typeid(T).name(), dev_ptr);
+ }
+#endif
+ HOST DEVICE size_t size() const { return num; }
+
+private:
+ HOST void host_allocate() {
+ host_deallocate();
+ if (num > 0) {
+ values = new T[num];
+ } else {
+ values = nullptr;
+ }
+ // printf("Array<%s>.host_allocate() %d values at %x\n", typeid(T).name(), num, values);
+
+ }
+ HOST void host_deallocate() {
+ // printf("Array<%s>.host_deallocate() %d values at %x\n", typeid(T).name(), num, values);
+ if (values != nullptr) delete[] values;
+ values = nullptr;
+ }
+
+ size_t num;
+ T* values;
+};
diff --git a/src/Types/Bitmask.h b/src/Types/Bitmask.h
index 3e4ca4aaa3ab55d9665ebccbffed830eb7f2ea5d..1ca22ffb384519e2a53ce5ffd4a25185b2d14416 100644
--- a/src/Types/Bitmask.h
+++ b/src/Types/Bitmask.h
@@ -83,12 +83,13 @@ public:
#ifdef USE_CUDA
HOST
- Bitmask* copy_to_cuda() const {
- Bitmask* tmp_obj_d = nullptr;
+ Bitmask* copy_to_cuda(Bitmask* tmp_obj_d = nullptr) const {
Bitmask obj_tmp(0);
data_t* mask_d = nullptr;
size_t sz = sizeof(data_t) * get_array_size();
- gpuErrchk(cudaMalloc(&tmp_obj_d, sizeof(Bitmask)));
+ if (tmp_obj_d == nullptr) {
+ gpuErrchk(cudaMalloc(&tmp_obj_d, sizeof(Bitmask)));
+ }
if (sz > 0) {
gpuErrchk(cudaMalloc(&mask_d, sz));
gpuErrchk(cudaMemcpy(mask_d, mask, sz, cudaMemcpyHostToDevice));
@@ -102,7 +103,7 @@ public:
}
HOST
- static Bitmask retrieve_from_cuda(Bitmask* obj_d) {
+ static Bitmask copy_from_cuda(Bitmask* obj_d) {
Bitmask obj_tmp(0);
gpuErrchk(cudaMemcpy(&obj_tmp, obj_d, sizeof(Bitmask), cudaMemcpyDeviceToHost));
printf("TEST: %d\n", obj_tmp.len);
diff --git a/src/Types/Vector3.h b/src/Types/Vector3.h
index f53f3bf6cd5c947f6aa6827b5a073436bcb40c65..4718273eb5a25622ccdbda8c08475babc37a0c61 100644
--- a/src/Types/Vector3.h
+++ b/src/Types/Vector3.h
@@ -6,6 +6,7 @@
#pragma once
#include <memory>
#include <type_traits> // for std::common_type<T,U>
+#include <sstream>
/**
* 3D vector utility class with common operations implemented on CPU and GPU.
@@ -22,8 +23,8 @@ public:
HOST DEVICE inline Vector3_t<T>() : x(T(0)), y(T(0)), z(T(0)), w(T(0)) {}
HOST DEVICE inline Vector3_t<T>(T s):x(s), y(s), z(s), w(s) {}
HOST DEVICE inline Vector3_t<T>(const Vector3_t<T>& v):x(v.x), y(v.y), z(v.z), w(v.w) {}
- HOST DEVICE inline Vector3_t<T>(T x0, T y0, T z0) : x(x0), y(y0), z(z0), w(0) {}
- HOST DEVICE inline Vector3_t<T>(T x0, T y0, T z0, T w0) : x(x0), y(y0), z(z0), w(w0) {}
+ HOST DEVICE inline Vector3_t<T>(T x, T y, T z) : x(x), y(y), z(z), w(0) {}
+ HOST DEVICE inline Vector3_t<T>(T x, T y, T z, T w) : x(x), y(y), z(z), w(w) {}
// HOST DEVICE inline Vector3_t<T>(const T* d) : x(d[0]), y(d[1]), z(d[2]), w(0) {}
HOST DEVICE inline Vector3_t<T>(const float4 a) : x(a.x), y(a.y), z(a.z), w(a.w) {}
@@ -57,6 +58,12 @@ public:
}
#endif
+ HOST DEVICE inline Vector3_t<T>& operator=(const Vector3_t<T>& v) {
+ x = v.x;
+ y = v.y;
+ z = v.z;
+ return *this;
+ }
HOST DEVICE inline Vector3_t<T>& operator=(const Vector3_t<T>&& v) {
x = v.x;
y = v.y;
@@ -177,12 +184,17 @@ public:
printf("%0.3f %0.3f %0.3f\n", x,y,z);
}
- auto to_string() const {
+ auto to_string_old() const {
char s[128];
sprintf(s, "%.10g %.10g %.10g (%.10g)", x, y, z, w);
s[127] = 0;
return std::string(s);
}
+ auto to_string() const {
+ std::ostringstream oss;
+ oss << x << " " << y << " " << z << " (" << w << ")";
+ return oss.str();
+ }
template<typename U>
HOST DEVICE inline bool operator==(U b) const {
diff --git a/src/Tests/type_name.h b/src/type_name.h
similarity index 95%
rename from src/Tests/type_name.h
rename to src/type_name.h
index f64e440bef08d1df44fafd73fb64367d8542327e..8ad2039003f4a1900685be63404a8d6af210d18e 100644
--- a/src/Tests/type_name.h
+++ b/src/type_name.h
@@ -1,3 +1,5 @@
+#pragma once
+
#include <type_traits>
#include <typeinfo>
#ifndef _MSC_VER
@@ -9,6 +11,7 @@
template <typename T, typename ...Extras>
std::string type_name() {
+ return typeid(T).name();
using TR = typename std::remove_reference<T>::type;
std::unique_ptr<char, void(*)(void*)> own
(