From 57eb95c029fab874f00736a724438d786e460105 Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Thu, 4 Feb 2021 17:41:49 -0600
Subject: [PATCH] Use device sync to control flow of groupSites kernels

---
 src/GrandBrownTown.cu  | 18 +++++++++++++-----
 src/GrandBrownTown.cuh |  5 ++++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index f1e3b73..f5c6c61 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -599,6 +599,8 @@ void GrandBrownTown::RunNoseHooverLangevin()
 	    internal->clear_force();
 	    internal->clear_energy();
 	    const std::vector<Vector3*>& _pos = internal->getPos_d();
+	    if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas);
+
 	    #ifdef USE_NCCL
 	    if (gpuman.gpus.size() > 1) {
 		gpuman.nccl_broadcast(0, _pos, _pos, (num+numGroupSites)*numReplicas, -1);
@@ -606,7 +608,6 @@ void GrandBrownTown::RunNoseHooverLangevin()
 	    #endif
 	    gpuman.sync();
 
-	    if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas);
 
 
             #ifdef _OPENMP
@@ -701,12 +702,12 @@ void GrandBrownTown::RunNoseHooverLangevin()
 	    }
 	    #endif
 
+	    if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas);
 
         }//if step == 1
 
 	internal->clear_energy();
 	gpuman.sync();
-	if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas);
 
         if(particle_dynamic == String("Langevin"))
             updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, ParticleInterpolationType);
@@ -828,6 +829,12 @@ void GrandBrownTown::RunNoseHooverLangevin()
         #pragma omp parallel for
         for(int i = 0; i < numReplicas; ++i) 
             RBC[i]->clearForceAndTorque();
+
+	if (numGroupSites > 0) {
+	    updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas);
+	    gpuman.sync();
+	}
+
         if (imd_on && clientsock)
             internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD // TODO add multigpu support with IMD
 	else {
@@ -841,8 +848,6 @@ void GrandBrownTown::RunNoseHooverLangevin()
 	    #endif
     	}
 
-	if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas);
-
         if (interparticleForce)
         {
             // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas
@@ -915,7 +920,10 @@ void GrandBrownTown::RunNoseHooverLangevin()
             RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, 
                                  RigidBodyInterpolationType, sys, sys_d);
 
-	if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas);
+	if (numGroupSites > 0) {
+	    distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas);
+	    gpuman.sync();
+	}
 
         if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
             LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getForceInternal_d()[0], 
diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh
index 589fc2d..4ea6ce5 100644
--- a/src/GrandBrownTown.cuh
+++ b/src/GrandBrownTown.cuh
@@ -648,7 +648,10 @@ void distributeGroupSiteForces(Vector3 force[], Vector3 pos[], int* groupSiteDat
 	const int start  = groupSiteData[imod];
 	const int finish = groupSiteData[imod+1];
 	float weight = 1.0 / (finish-start);
-	    
+
+	// Vector3 tmp = force[num*numReplicas+i];
+	// printf("GroupSite %d Force: %f %f %f\n",i, tmp.x, tmp.y, tmp.z);
+
 	for (int j = start; j < finish; j++) {
 	    const int aj = groupSiteData[j] + num*rep;
 	    atomicAdd( force+aj, weight * force[num*numReplicas+i] );
-- 
GitLab