From cceb53ecf044f00c372221dec60ccd6ca280ce55 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Wed, 28 Apr 2021 16:06:29 -0500 Subject: [PATCH] Small bugfixes; the code is now minimally tested and seems to work --- src/GrandBrownTown.cu | 44 ++++++++++++++++++++++++++++++++------ src/GrandBrownTown.cuh | 39 ++++++++++++++++++++++++++++++--- src/RigidBody.cu | 6 +++--- src/RigidBodyController.cu | 7 ++---- 4 files changed, 78 insertions(+), 18 deletions(-) diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 46f556a..0510a0e 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -610,9 +610,9 @@ void GrandBrownTown::RunNoseHooverLangevin() #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) { RBC[i]->update_attached_particle_positions( - internal->getPos_d()[0]+i*(num+num_rb_attached_particles), - internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles), - internal->getEnergy()+i*(num+num_rb_attached_particles), + internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getEnergy()+num+i*(num+num_rb_attached_particles), sys_d, num, num_rb_attached_particles, numReplicas); } } @@ -694,6 +694,21 @@ void GrandBrownTown::RunNoseHooverLangevin() } } }//if inter-particle force + + if (get_energy) { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], internal -> getForceInternal_d()[0], + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } else { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], + internal -> getForceInternal_d()[0], internal -> getEnergy(), + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } + + #ifdef _OPENMP omp_set_num_threads(4); #endif @@ -775,9 +790,9 @@ void GrandBrownTown::RunNoseHooverLangevin() #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) { RBC[i]->update_attached_particle_positions( - internal->getPos_d()[0]+i*(num+num_rb_attached_particles), - internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles), - internal->getEnergy()+i*(num+num_rb_attached_particles), + internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getEnergy()+num+i*(num+num_rb_attached_particles), sys_d, num, num_rb_attached_particles, numReplicas); } } @@ -947,6 +962,21 @@ void GrandBrownTown::RunNoseHooverLangevin() } } } + + if (get_energy) { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], internal -> getForceInternal_d()[0], + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } else { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], + internal -> getForceInternal_d()[0], internal -> getEnergy(), + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } + + //compute the force for rigid bodies #ifdef _OPENMP omp_set_num_threads(4); @@ -992,7 +1022,7 @@ void GrandBrownTown::RunNoseHooverLangevin() { if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { - gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * (num+num_rb_attached_particles) * numReplicas, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * (num) * numReplicas, cudaMemcpyDeviceToHost)); } t = s*timestep; // Loop over all replicas diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 6d3a887..a152dd1 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -17,7 +17,7 @@ Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, Vector3 inline __device__ ForceEnergy compute_position_dependent_force( const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal, - const int* __restrict__ type, const BrownianParticleType* __restrict__ * __restrict__ part, + const int* __restrict__ type, BrownianParticleType** part, const float electricField, const int scheme, const int idx) { int t = type[idx]; @@ -157,7 +157,7 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ { idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); - forceEnergy fe = compute_position_dependent_force( + ForceEnergy fe = compute_position_dependent_force( pos, forceInternal, type, part, electricField, scheme, idx ); int t = type[idx]; @@ -330,7 +330,6 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ pos, forceInternal, type, part, electricField, scheme, idx ); if (get_energy) energy[idx] += fe.e; - int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; @@ -652,3 +651,37 @@ __global__ void devicePrint(RigidBodyType* rb[]) { // cudaMalloc( &totalForce_h, sizeof(Vector3) ); // cudaMemcpyToSymbol(totalForce, &totalForce_h, sizeof(totalForce_h)); // } + +__global__ void compute_position_dependent_force_for_rb_attached_particles( + const Vector3* __restrict__ pos, + Vector3* __restrict__ forceInternal, float* __restrict__ energy, + const int* __restrict__ type, BrownianParticleType** __restrict__ part, + const float electricField, const int num, const int num_rb_attached_particles, + const int numReplicas, const int scheme) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_rb_attached_particles * numReplicas) + { + idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles); + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + atomicAdd( &forceInternal[idx], fe.f ); + atomicAdd( &energy[idx], fe.e ); + } +} +__global__ void compute_position_dependent_force_for_rb_attached_particles( + const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal, + const int* __restrict__ type, BrownianParticleType** __restrict__ part, + const float electricField, const int num, const int num_rb_attached_particles, + const int numReplicas, const int scheme) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (idx < num_rb_attached_particles * numReplicas) + { + idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles); + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + atomicAdd( &forceInternal[idx], fe.f ); + } +} diff --git a/src/RigidBody.cu b/src/RigidBody.cu index 0c9afe3..8aeb7aa 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -114,9 +114,9 @@ void update_particle_positions_kernel(Vector3* __restrict__ pos, const int start } } void RigidBody::update_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d) { - int num = attached_particle_end - attached_particle_start; - int nb = floor(num/NUMTHREADS) + 1; - update_particle_positions_kernel<<<nb,NUMTHREADS>>>(pos_d, attached_particle_start, num, + int num_attached = attached_particle_end - attached_particle_start; + int nb = floor(num_attached/NUMTHREADS) + 1; + update_particle_positions_kernel<<<nb,NUMTHREADS>>>(pos_d, attached_particle_start, num_attached, t->attached_particle_positions_d, position, orientation); } diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index 087823a..b7fd3c8 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -536,11 +536,8 @@ void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vec rigidBodyByType[i][j].update_particle_positions(pos_d, force_d, energy_d); } } - for (int rep = 0; rep < numReplicas; ++rep) { - size_t o = (num+num_rb_attached_particles)*rep; - gpuErrchk(cudaMemset((void*)&(force_d[num+o]),0,num_rb_attached_particles*sizeof(Vector3))); - } - gpuErrchk(cudaMemset((void*)&(energy_d[num]),0,num_rb_attached_particles*sizeof(float))); + gpuErrchk(cudaMemset((void*) force_d, 0, num_rb_attached_particles*sizeof(Vector3))); + gpuErrchk(cudaMemset((void*) energy_d, 0, num_rb_attached_particles*sizeof(float))); } -- GitLab