From 57eb95c029fab874f00736a724438d786e460105 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Thu, 4 Feb 2021 17:41:49 -0600 Subject: [PATCH] Use device sync to control flow of groupSites kernels --- src/GrandBrownTown.cu | 18 +++++++++++++----- src/GrandBrownTown.cuh | 5 ++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index f1e3b73..f5c6c61 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -599,6 +599,8 @@ void GrandBrownTown::RunNoseHooverLangevin() internal->clear_force(); internal->clear_energy(); const std::vector<Vector3*>& _pos = internal->getPos_d(); + if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas); + #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { gpuman.nccl_broadcast(0, _pos, _pos, (num+numGroupSites)*numReplicas, -1); @@ -606,7 +608,6 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif gpuman.sync(); - if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas); #ifdef _OPENMP @@ -701,12 +702,12 @@ void GrandBrownTown::RunNoseHooverLangevin() } #endif + if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); }//if step == 1 internal->clear_energy(); gpuman.sync(); - if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); if(particle_dynamic == String("Langevin")) updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); @@ -828,6 +829,12 @@ void GrandBrownTown::RunNoseHooverLangevin() #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) RBC[i]->clearForceAndTorque(); + + if (numGroupSites > 0) { + updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + gpuman.sync(); + } + if (imd_on && clientsock) internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD // TODO add multigpu support with IMD else { @@ -841,8 +848,6 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif } - if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); - if (interparticleForce) { // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas @@ -915,7 +920,10 @@ void GrandBrownTown::RunNoseHooverLangevin() RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, RigidBodyInterpolationType, sys, sys_d); - if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + if (numGroupSites > 0) { + distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + gpuman.sync(); + } if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getForceInternal_d()[0], diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 589fc2d..4ea6ce5 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -648,7 +648,10 @@ void distributeGroupSiteForces(Vector3 force[], Vector3 pos[], int* groupSiteDat const int start = groupSiteData[imod]; const int finish = groupSiteData[imod+1]; float weight = 1.0 / (finish-start); - + + // Vector3 tmp = force[num*numReplicas+i]; + // printf("GroupSite %d Force: %f %f %f\n",i, tmp.x, tmp.y, tmp.z); + for (int j = start; j < finish; j++) { const int aj = groupSiteData[j] + num*rep; atomicAdd( force+aj, weight * force[num*numReplicas+i] ); -- GitLab