From 1e3c933c3a3204fc6f92f224ef4b829b786ae78c Mon Sep 17 00:00:00 2001
From: Han-Yi Chou <hchou10@illinois.edu>
Date: Tue, 30 Jan 2018 18:06:49 -0600
Subject: [PATCH] fix bug in RigidBodyController.cu

---
 src/RigidBodyController.cu | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu
index bb49a5e..ba9a97d 100644
--- a/src/RigidBodyController.cu
+++ b/src/RigidBodyController.cu
@@ -20,7 +20,11 @@ inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true)
       if (abort) exit(code);
    }
 }
-
+// allocate and initialize an array of stream handles
+cudaStream_t *RigidBodyForcePair::stream = (cudaStream_t *) malloc(NUMSTREAMS * sizeof(cudaStream_t));
+int RigidBodyForcePair::nextStreamID = 0;        /* used during stream init */
+int RigidBodyForcePair::lastRbGridID = -1; /* used to schedule kernel interaction */
+RigidBodyForcePair* RigidBodyForcePair::lastRbForcePair = NULL;
 /* #include <cuda.h> */
 /* #include <cuda_runtime.h> */
 /* #include <curand_kernel.h> */
@@ -292,7 +296,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s,
 	}
 	
 	// Grid–Grid forces
-	if ( (s % conf.rigidBodyGridGridPeriod) == 0 && forcePairs.size() > 0) {
+	if ( ((s % conf.rigidBodyGridGridPeriod) == 0 || s == 1 ) && forcePairs.size() > 0) {
 		for (int i=0; i < forcePairs.size(); i++) {
 			// TODO: performance: make this check occur less frequently
 			if (forcePairs[i].isWithinPairlistDist())
@@ -301,9 +305,11 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s,
 		
 		// each kernel call is followed by async memcpy for previous; now get last
 		RigidBodyForcePair* fp = RigidBodyForcePair::lastRbForcePair;
-		fp->retrieveForcesForGrid( fp->lastRbGridID );
-		fp->lastRbGridID = -1;
-
+                if(RigidBodyForcePair::lastRbGridID >= 0)
+                {
+		    fp->retrieveForcesForGrid( fp->lastRbGridID );
+		    fp->lastRbGridID = -1;
+                }
 		// stream sync was slower than device sync
 		/* for (int i = 0; i < NUMSTREAMS; i++) { */
 		/* 	const cudaStream_t &s = RigidBodyForcePair::stream[i]; */
@@ -448,12 +454,13 @@ void RigidBodyController::KineticEnergy()
     else
         return 0.;*/
 }
-
+#if 0
 // allocate and initialize an array of stream handles
 cudaStream_t *RigidBodyForcePair::stream = (cudaStream_t *) malloc(NUMSTREAMS * sizeof(cudaStream_t));
 int RigidBodyForcePair::nextStreamID = 0;	 /* used during stream init */
 int RigidBodyForcePair::lastRbGridID = -1; /* used to schedule kernel interaction */
 RigidBodyForcePair* RigidBodyForcePair::lastRbForcePair = NULL;
+#endif
 
 void RigidBodyForcePair::createStreams() {
 	for (int i = 0; i < NUMSTREAMS; i++)
-- 
GitLab