Skip to content
Snippets Groups Projects
Commit 8e1618d7 authored by cmaffeo2's avatar cmaffeo2
Browse files

Add bitmask, ParticlePatch boilerplate code, tests

parent b40b0a7c
No related branches found
No related tags found
No related merge requests found
#pragma once
#ifdef __CUDACC__
#define HOST __host__
#define DEVICE __device__
#else
#define HOST
#define DEVICE
#endif
#if defined(__CUDACC__) // NVCC
#define MY_ALIGN(n) __align__(n)
#elif defined(__GNUC__) // GCC
#define MY_ALIGN(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER) // MSVC
#define MY_ALIGN(n) __declspec(align(n))
#else
#error "Please provide a definition for MY_ALIGN macro for your host compiler!"
#endif
#ifndef USE_CUDA
struct MY_ALIGN(16) float4 {
float4() : x(0), y(0), z(0), w(0) {};
float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {};
float4 operator+(const float4&& o) {
return float4(x+o.x,y+o.y,z+o.z,w+o.w);
};
float4 operator*(const float&& s) {
return float4(x*s,y*s,z*s,w*s);
};
float x,y,z,w;
};
#endif
#ifdef USE_CUDA
#include <cstdio>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
// #include "useful.h"
......
......@@ -6,3 +6,9 @@ public:
void compute(Patch* patch);
int num_patches() const { return 1; };
};
class LangevinIntegrate : public Integrator {
public:
void compute(Patch* patch);
int num_patches() const { return 1; };
};
#include "ParticlePatch.h"
Patch::Patch() : BasePatch() {
pos_force = momentum = nullptr;
// num_rb = num_rb_attached_particles = 0;
// rb_pos = rb_orient = rb_mom = rb_ang_mom = type = rb_type = nullptr;
};
void Patch::compute() {
for (auto& c_p: local_computes) {
c_p->compute(this);
......
......@@ -27,22 +27,24 @@ class BasePatch {
public:
// BasePatch(size_t num, short thread_id, short gpu_id, SimSystem& sys);
// BasePatch(size_t num, short thread_id, short gpu_id);
// BasePatch();
BasePatch() : num(0), capacity(0) {}
// ~BasePatch();
private:
size_t capacity;
size_t num;
short thread_id; // MPI
short gpu_id; // -1 if GPU unavailable
int patch_idx; // Unique ID across ranks
// short thread_id; // MPI
// short gpu_id; // -1 if GPU unavailable
static size_t patch_idx; // Unique ID across ranks
Vector3 lower_bound;
Vector3 upper_bound;
};
class PatchProxy : public BasePatch {
class PatchProxy {
//
public:
// ???
private:
......@@ -51,50 +53,53 @@ private:
class Patch : public BasePatch {
public:
Patch(size_t num, short thread_id, short gpu_id) {};
Patch();
// void deleteParticles(IndexList& p);
// void addParticles(int n, int typ);
// void addParticles(size_t n, size_t typ);
// template<class T>
// void add_compute(std::unique_ptr<T>&& p) {
// std::unique_ptr<BasePatchOp> base_p = static_cast<std::unique_ptr<BasePatchOp>>(p);
// local_computes.emplace_back(p);
// };
void add_compute(std::unique_ptr<BasePatchOp>&& p) {
local_computes.emplace_back(std::move(p));
};
void add_point_particles(size_t num_added);
void add_point_particles(size_t num_added, Vector3* positions, Vector3* momenta = nullptr);
// TODO? emplace_point_particles
void compute();
private:
// void randomize_positions(size_t start = 0, size_t num = -1);
// std::vector<PatchProxy> neighbors;
std::vector<std::unique_ptr<BasePatchOp>> local_computes; // Operations that will be performed on this patch each timestep
std::vector<std::unique_ptr<BasePatchOp>> nonlocal_computes; // Operations that will be performed on this patch each timestep
static int patch_idx; // Unique ID across ranks
// CPU particle arrays
Vector3* pos;
Vector3* pos_force;
Vector3* momentum;
Vector3* rb_pos;
Matrix3* rb_orient;
Vector3* rb_mom;
Vector3* rb_amom;
int* type; // particle types: 0, 1, ... -> num * numReplicas
int num_rb_attached_particles;
int num_group_sites;
int* groupSiteData_d;
// Device arrays
Vector3* pos_d;
Vector3* momentum_d;
Vector3* rb_pos_d;
Matrix3* rb_orient_d;
Vector3* rb_mom_d;
Vector3* rb_amom_d;
int* type_d;
// size_t num_rb;
// Vector3* rb_pos;
// Matrix3* rb_orient;
// Vector3* rb_mom;
// Vector3* rb_ang_mom;
// size_t* type; // particle types: 0, 1, ... -> num * numReplicas
// size_t* rb_type; // rigid body types: 0, 1, ... -> num * numReplicas
// size_t num_rb_attached_particles;
// TODO: add a rb_attached bitmask
size_t num_group_sites;
};
// // Patch::Patch(size_t num, short thread_id, short gpu_id) {};
......
#include "CPU.h"
void BDIntegrate::compute(Patch* p) {
std::cout << "BDIntegrate::compute()" << std::endl;
IntegratorKernels::BDIntegrate();
};
#pragma once
#include "../ParticlePatch.h"
#ifdef USE_CUDA
class PatchCPU : public Patch {
public:
void compute();
void add_compute(std::unique_ptr<BasePatchOp>&& p) {
local_computes.emplace_back(std::move(p));
};
void add_point_particles(size_t num_added);
void add_point_particles(size_t num_added, Vector3* positions, Vector3* momenta = nullptr);
};
#endif
#include "CUDA.h"
#ifdef USE_CUDA
// __global__ void BDIntegrate_kernel() {
// if (threadIdx.x == 0) {
// printf("BDIntegrate_kernel()\n");
// IntegratorKernels::BDIntegrate();
// }
// };
// void BDIntegrateCUDA::compute(Patch* p) {
// printf("BDIntegrateCUDA::compute()\n");
// BDIntegrate_kernel<<<1,32>>>();
// };
PatchCUDA::PatchCUDA() : Patch() {
pos_force_d = momentum_d = rb_pos_d = rb_orient_d = rb_mom_d = rb_ang_mom_d = type_d = rb_type_d = nullptr;
}
#endif
#pragma once
#include "../GPUManager.h"
#include "../ParticlePatch.h"
class PatchCUDA : public Patch {
public:
void compute();
// void assign_gpu(GPU);
private:
GPU my_gpu;
// Device arrays
size_t* groupSiteData_d;
Vector3* pos_force_d;
Vector3* momentum_d;
Vector3* rb_pos_d;
Matrix3* rb_orient_d;
Vector3* rb_mom_d;
Vector3* rb_ang_mom_d;
size_t* type_d;
size_t* rb_type_d;
};
#pragma once
// #include "../useful.h"
#include "../Types.h"
#ifdef __CUDACC__
#define HOST __host__
#define DEVICE __device__
#else
#define HOST
#define DEVICE
#endif
namespace IntegratorKernels {
HOST DEVICE void __inline__ BDIntegrate() {
// std::cout << "Computes::BDIntegrate_inline" << std::endl;
printf("Integrator::BDIntegrate\n");
};
HOST DEVICE void __inline__ BDIntegrate(Vector3* __restrict__ pos, const Vector3 * const __restrict__ force, const int& idx, float& root_Dt, Vector3& normal_sample_3D) {
printf("Integrator::BDIntegrate\n");
pos[idx] = pos[idx] + force[idx] * root_Dt + normal_sample_3D;
};
}
#include "PatchOp.cuh" // TODO: refactor this file
......@@ -7,7 +7,7 @@ void SimManager::run() {
// SimSystem sys = SimSystem();
// Patch p(10,0,0,sys);
Patch p(10,0,0);
Patch p;
//ProxyPatch p2(10,0,0);
......@@ -28,6 +28,5 @@ void SimManager::run() {
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
}
}
};
......@@ -8,6 +8,7 @@ include(CTest)
add_executable("arbd_tests"
matrix3.cu
vector3_precision.cu
bitmask.cu
)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")
set_property(TARGET arbd_tests PROPERTY CXX_STANDARD 14)
......
#include "catch_boiler.h"
#include "../Types.h"
#include <catch2/catch_tostring.hpp>
namespace Catch {
template <>
struct StringMaker<Bitmask> {
static std::string convert( Bitmask const& value ) {
return value.to_string();
}
};
}
DEF_RUN_TRIAL
namespace Tests::Bitmask {
void run_tests() {
using T = ::Bitmask;
// std::ostream test_info;
INFO( "Testing " << type_name<T>() << " functionally" );
{
using R = T;
int i = 10;
T b = T(i); // bitmask of length i
for (int j: {0,3,10,19}) {
if (j < i) b.set_mask(j,1);
}
REQUIRE( b.to_string() == "1001000000" );
T* b_d = b.copy_to_cuda();
cudaDeviceSynchronize();
T b2 = b.retrieve_from_cuda(b_d);
cudaDeviceSynchronize();
REQUIRE( b == b2 );
}
}
TEST_CASE( "Testing Bitmask for GPU/CPU consistency", "[Tests::Bitmask]" ) {
const bool check_diag = true;
run_tests();
}
}
#include <iostream>
#include <cstdio>
#include "../SignalManager.h"
/* #include "../Types.h" */
#include "../GPUManager.h"
#include <cuda.h>
#include <nvfunctional>
#include "type_name.h"
/* #include <catch2/catch_tostring.hpp> */
/* namespace Catch { */
/* template<typename T, bool b1, bool b2> */
/* struct StringMaker<Matrix3_t<T,b1,b2>> { */
/* static std::string convert( Matrix3_t<T,b1,b2> const& value ) { */
/* return value.to_string(); */
/* } */
/* }; */
/* } */
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
namespace Tests {
template<typename Op_t, typename R, typename ...T>
__global__ void op_kernel(R* result, T...in) {
if (blockIdx.x == 0) {
*result = Op_t::op(in...);
}
}
}
#define DEF_RUN_TRIAL \
namespace Tests {\
template<typename Op_t, typename R, typename ...T>\
void run_trial( std::string name, R expected_result, T...args) {\
R *gpu_result_d, gpu_result, cpu_result;\
cpu_result = Op_t::op(args...);\
cudaMalloc((void **)&gpu_result_d, sizeof(R));\
\
op_kernel<Op_t, R, T...><<<1,1>>>(gpu_result_d, args...);\
cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost);\
cudaDeviceSynchronize();\
\
INFO( name );\
\
CAPTURE( cpu_result );\
CAPTURE( expected_result );\
\
REQUIRE( cpu_result == expected_result );\
CHECK( cpu_result == gpu_result );\
}\
}
namespace Tests::Unary {
template<typename R, typename T>
struct NegateOp { HOST DEVICE static R op(T in) { return static_cast<R>(-in); } };
template<typename R, typename T>
struct NormalizedOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized()); } };
}
namespace Tests::Binary {
// R is return type, T and U are types of operands
template<typename R, typename T, typename U>
struct AddOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a+b); } };
template<typename R, typename T, typename U>
struct SubOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a-b); } };
template<typename R, typename T, typename U>
struct MultOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a*b); } };
template<typename R, typename T, typename U>
struct DivOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a/b); } };
}
#include <iostream>
#include <cstdio>
#include "../SignalManager.h"
#include "catch_boiler.h"
#include "../Types.h"
#include <cuda.h>
#include <nvfunctional>
#include "type_name.h"
#include <catch2/catch_tostring.hpp>
namespace Catch {
......@@ -17,144 +10,21 @@ namespace Catch {
}
};
}
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
namespace Tests {
template<typename Op_t, typename R, typename ...T>
__global__ void op_kernel(R* result, T...in) {
if (blockIdx.x == 0) {
*result = Op_t::op(in...);
}
}
// In C++14, how can I unpack a tuple so its elements are passed to a function as arguments?
template <typename F, typename Tuple, size_t... I>
decltype(auto) apply_impl(F&& f, Tuple&& t, std::index_sequence<I...>)
{
return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...);
}
template <typename F, typename Tuple>
decltype(auto) apply(F&& f, Tuple&& t)
{
using Indices = std::make_index_sequence<std::tuple_size<std::decay_t<Tuple>>::value>;
return apply_impl(std::forward<F>(f), std::forward<Tuple>(t), Indices{});
}
DEF_RUN_TRIAL
template <typename F, typename Tuple, size_t... I>
decltype(auto) apply_each_impl(F&& f, Tuple&& t, std::index_sequence<I...>)
{
return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...);
}
template <typename F, typename Tuple>
decltype(auto) apply_each(F&& f, Tuple&& t)
{
using Indices = std::make_index_sequence<std::tuple_size<std::decay_t<Tuple>>::value>;
return apply_each_impl(std::forward<F>(f), std::forward<Tuple>(t), Indices{});
}
template<typename T>
void call_info(T t) {
// INFO( type_name(t) );
// UNSCOPED_INFO( type_name<T>() << " " << t );
}
// template<>
// void call_info(double t) {
// UNSCOPED_INFO( "double " << t );
// }
// template<>
// void call_info(float t) {
// UNSCOPED_INFO( "double " << t );
// // INFO( type_name(t) );
// INFO( "float" );
// }
// template<>
// void call_info(int t) {
// // INFO( type_name(t) );
// INFO( "int" );
// }
template<typename Op_t, typename R, typename ...T>
void run_trial( std::string name, R expected_result, T...args) {
R *gpu_result_d, gpu_result, cpu_result;
cpu_result = Op_t::op(args...);
cudaMalloc((void **)&gpu_result_d, sizeof(R));
op_kernel<Op_t, R, T...><<<1,1>>>(gpu_result_d, args...);
cudaMemcpy(&gpu_result, gpu_result_d, sizeof(R), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
INFO( name );
// INFO( type_name<T...>() );
// auto tmp = std::make_tuple(args...);
// for (int i = 0; i < sizeof(T...); ++i) {
// auto& arg = std::get<i>(tmp);
// CAPTURE( arg );
// }
// auto fn = [](auto a) { CAPTURE( a ); };
// auto fn = [](auto a) { INFO( a->to_string() ); };
// using expander = int[];
// (void)expander{0, (void(call_info<T>(std::forward<T>(args))), 0)...};
CAPTURE( cpu_result );
CAPTURE( expected_result );
REQUIRE( cpu_result == expected_result );
CHECK( cpu_result == gpu_result );
}
}
namespace Tests::Unary {
namespace Tests::Unary::Matrix3 {
template<typename R, typename T>
struct NegateOp { HOST DEVICE static R op(T in) { return static_cast<R>(-in); } };
struct DeterminantOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.det()); } };
template<typename R, typename T>
struct NormalizedOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized()); } };
namespace Matrix3 {
template<typename R, typename T>
struct DeterminantOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.det()); } };
template<typename R, typename T>
struct NormalizeDetOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized().det()); } };
// template<typename R, typename T>
// struct NormalizeOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized()); } };
struct NormalizeDetOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.normalized().det()); } };
template<typename R, typename T>
struct InverseOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.inverse()); } };
template<typename R, typename T>
struct TransposeOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.transpose()); } };
}
}
namespace Tests::Binary {
// R is return type, T and U are types of operands
template<typename R, typename T, typename U>
struct AddOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a+b); } };
template<typename R, typename T, typename U>
struct SubOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a-b); } };
template<typename R, typename T, typename U>
struct MultOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a*b); } };
template<typename R, typename T, typename U>
struct DivOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a/b); } };
namespace Matrix3 {
template<typename R, typename T, typename U>
struct CrossOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a.cross(b)); } };
}
}
template<typename R, typename T>
struct InverseOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.inverse()); } };
namespace Tests::Unary::Matrix3 {
template<typename R, typename T>
struct TransposeOp { HOST DEVICE static R op(T in) { return static_cast<R>(in.transpose()); } };
template<typename A, bool is_diag=false, bool check_diag=false>
void run_tests() {
......@@ -204,7 +74,11 @@ namespace Tests::Unary::Matrix3 {
}
namespace Tests::Binary::Matrix3 {
// Custom Operations for Matrix3
template<typename R, typename T, typename U>
struct CrossOp { HOST DEVICE static R op(T a, U b) { return static_cast<R>(a.cross(b)); } };
// Run
template<typename A, typename B, bool is_diag=false, bool check_diag=false>
void run_tests() {
using T = Matrix3_t<A, is_diag, check_diag>;
......
#pragma once
#ifdef __CUDACC__
#define HOST __host__
#define DEVICE __device__
#else
#define HOST
#define DEVICE
#endif
#if defined(__CUDACC__) // NVCC
#define MY_ALIGN(n) __align__(n)
#elif defined(__GNUC__) // GCC
#define MY_ALIGN(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER) // MSVC
#define MY_ALIGN(n) __declspec(align(n))
#else
#error "Please provide a definition for MY_ALIGN macro for your host compiler!"
#endif
#ifdef USE_CUDA
#include <cuda_runtime.h>
#else
struct MY_ALIGN(16) float4 {
float4() : x(0), y(0), z(0), w(0) {};
float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {};
float4 operator+(const float4&& o) {
return float4(x+o.x,y+o.y,z+o.z,w+o.w);
};
float4 operator*(const float&& s) {
return float4(x*s,y*s,z*s,w*s);
};
float x,y,z,w;
};
#endif
#include "GPUManager.h"
#include <stdarg.h> // For va_start, etc.
#include <memory> // For std::unique_ptr
#include <cstring>
// from: https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf/8098080#8098080
// Utility function used by types to return std::string using format syntax
inline std::string string_format(const std::string fmt_str, ...) {
// from: https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf/8098080#8098080
int final_n, n = ((int)fmt_str.size()) * 2; /* Reserve two times as much as the length of the fmt_str */
std::unique_ptr<char[]> formatted;
va_list ap;
......@@ -63,3 +30,5 @@ inline std::string string_format(const std::string fmt_str, ...) {
using Vector3 = Vector3_t<float>;
using Matrix3 = Matrix3_t<float,false>;
#include "Types/Bitmask.h"
#pragma once
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <initializer_list>
class BitmaskBase {
public:
BitmaskBase(const size_t len) : len(len) {}
HOST DEVICE
virtual void set_mask(size_t i, bool value) = 0;
HOST DEVICE
virtual bool get_mask(size_t i) const = 0;
size_t get_len() const { return len; }
virtual void print() {
for (int i = 0; i < len; ++i) {
printf("%d", static_cast<int>(get_mask(i)) );
}
printf("\n");
}
protected:
size_t len;
};
// Don't use base because virtual member functions require device malloc
class Bitmask {
typedef size_t idx_t;
typedef unsigned int data_t;
public:
Bitmask(const idx_t len) : len(len) {
idx_t tmp = get_array_size();
// printf("len %lld\n",len);
// printf("tmp %d\n",tmp);
assert(tmp * data_stride >= len);
mask = (tmp > 0) ? new data_t[tmp] : nullptr;
for (int i = 0; i < tmp; ++i) mask[i] = data_t(0);
}
~Bitmask() { if (mask != nullptr) delete[] mask; }
HOST DEVICE
idx_t get_len() const { return len; }
HOST DEVICE
void set_mask(idx_t i, bool value) {
// return;
assert(i < len);
idx_t ci = i/data_stride;
data_t change_bit = (data_t(1) << (i-ci*data_stride));
#ifdef __CUDA_ARCH__
if (value) {
atomicOr( &mask[ci], change_bit );
} else {
atomicAnd( &mask[ci], ~change_bit );
}
#else
if (value) {
mask[ci] = mask[ci] | change_bit;
} else {
mask[ci] = mask[ci] & (~change_bit);
}
#endif
}
HOST DEVICE
bool get_mask(const idx_t i) const {
// return false;
assert(i < len);
const idx_t ci = i/data_stride;
return mask[ci] & (data_t(1) << (i-ci*data_stride));
}
HOST DEVICE inline bool operator==(Bitmask& b) const {
// Inefficient but straightforward approach; directly comparing underlying data would be fine, but then we need to deal with data strides
if (len != b.len) return false;
for (idx_t i = 0; i < len; ++i) {
if (get_mask(i) != b.get_mask(i)) return false;
}
return true;
}
#ifdef USE_CUDA
HOST
Bitmask* copy_to_cuda() const {
Bitmask* tmp_obj_d = nullptr;
Bitmask obj_tmp(0);
data_t* mask_d = nullptr;
size_t sz = sizeof(data_t) * get_array_size();
gpuErrchk(cudaMalloc(&tmp_obj_d, sizeof(Bitmask)));
if (sz > 0) {
gpuErrchk(cudaMalloc(&mask_d, sz));
gpuErrchk(cudaMemcpy(mask_d, mask, sz, cudaMemcpyHostToDevice));
}
// printf("Bitmask::copy_to_cuda() len(%lld) mask(%x)\n", len, mask_d);
obj_tmp.len = len;
obj_tmp.mask = mask_d;
gpuErrchk(cudaMemcpy(tmp_obj_d, &obj_tmp, sizeof(Bitmask), cudaMemcpyHostToDevice));
obj_tmp.mask = nullptr;
return tmp_obj_d;
}
HOST
static Bitmask retrieve_from_cuda(Bitmask* obj_d) {
Bitmask obj_tmp(0);
gpuErrchk(cudaMemcpy(&obj_tmp, obj_d, sizeof(Bitmask), cudaMemcpyDeviceToHost));
printf("TEST: %d\n", obj_tmp.len);
if (obj_tmp.len > 0) {
size_t array_size = obj_tmp.get_array_size();
size_t sz = sizeof(data_t) * array_size;
data_t *data_addr = obj_tmp.mask;
obj_tmp.mask = new data_t[array_size];
gpuErrchk(cudaMemcpy(obj_tmp.mask, data_addr, sz, cudaMemcpyDeviceToHost));
} else {
obj_tmp.mask = nullptr;
}
return obj_tmp;
}
HOST
static void remove_from_cuda(Bitmask* obj_d) {
Bitmask obj_tmp(0);
gpuErrchk(cudaMemcpy(&obj_tmp, obj_d, sizeof(Bitmask), cudaMemcpyDeviceToHost));
if (obj_tmp.len > 0) {
gpuErrchk(cudaFree(obj_tmp.mask));
}
obj_tmp.mask = nullptr;
gpuErrchk(cudaMemset((void*) &(obj_d->mask), 0, sizeof(data_t*))); // set nullptr on to device
gpuErrchk(cudaFree(obj_d));
obj_d = nullptr;
}
#endif
HOST
auto to_string() const {
std::string s;
s.reserve(len);
for (size_t i = 0; i < len; ++i) {
s += get_mask(i) ? '1' : '0';
}
return s;
}
HOST DEVICE
void print() {
for (int i = 0; i < len; ++i) {
printf("%d", (int) get_mask(i));
}
printf("\n");
}
private:
idx_t get_array_size() const { return (len == 0) ? 1 : (len-1)/data_stride + 1; }
idx_t len;
const static idx_t data_stride = CHAR_BIT * sizeof(data_t)/sizeof(char);
data_t* __restrict__ mask;
};
/*
template<size_t chunk_size>
class SparseBitmask : public BitmaskBase {
public:
SparseBitmask(const size_t len) : BitmaskBase(len) {
assert( sparse_size > 0 );
meta_len = (len-1)/data_stride + 1;
meta_mask = Bitmask(meta_len);
}
~Bitmask() { delete[] mask; }
void set_mask(size_t i, bool value) {
assert(i < len);
size_t meta_i = i/data_stride;
// TODO: keep track of
// bool is_set = get_mask(i);
// if ((!is_set) && value) {
// mask[ci] = mask[ci] + (1 << (i-ci*data_stride));
// } else if (is_set && (!value)) {
// mask[ci] = mask[ci] - (1 << (i-ci*data_stride));
// }
}
size_t meta_idx_to_char_idx(size_t meta_idx) {
return meta_idx * CHAR_BIT;
}
bool get_mask(size_t i) {
// assert(i < len);
// size_t ci = i/data_stride;
// return mask[ci] & (1 << (i-ci*data_stride));
}
private:
const static size_t data_stride = CHAR_BIT*spare_size;
size_t meta_len;
Bitmask meta_mask;
std::vector<char> mask;
};
*/
/*
bool test_Bitmask() {
for (int i: {8,9,20}) {
Bitmask b = Bitmask(i);
for (int j: {0,3,10,19}) {
if (j < i) b.set_mask(j,1);
}
printf("Testing Bitmask(%d)\n", i);
b.print();
b.set_mask(3,1);
b.print();
b.set_mask(3,1);
b.print();
b.set_mask(3,0);
b.print();
b.set_mask(2,0);
b.print();
}
return true;
}
// */
/*
int main(int argc, char* argv[]) {
test_Bitmask();
return 1;
}
//*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment