From cceb53ecf044f00c372221dec60ccd6ca280ce55 Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Wed, 28 Apr 2021 16:06:29 -0500
Subject: [PATCH] Small bugfixes; the code is now minimally tested and seems to
 work

---
 src/GrandBrownTown.cu      | 44 ++++++++++++++++++++++++++++++++------
 src/GrandBrownTown.cuh     | 39 ++++++++++++++++++++++++++++++---
 src/RigidBody.cu           |  6 +++---
 src/RigidBodyController.cu |  7 ++----
 4 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index 46f556a..0510a0e 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -610,9 +610,9 @@ void GrandBrownTown::RunNoseHooverLangevin()
 		#pragma omp parallel for
 		for(int i = 0; i < numReplicas; ++i) {
 		    RBC[i]->update_attached_particle_positions(
-			internal->getPos_d()[0]+i*(num+num_rb_attached_particles),
-			internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles),
-			internal->getEnergy()+i*(num+num_rb_attached_particles),
+			internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles),
+			internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles),
+			internal->getEnergy()+num+i*(num+num_rb_attached_particles),
 			sys_d, num, num_rb_attached_particles, numReplicas);
 		}
 	    }
@@ -694,6 +694,21 @@ void GrandBrownTown::RunNoseHooverLangevin()
                     }
                 }
             }//if inter-particle force
+
+	    if (get_energy) {
+		compute_position_dependent_force_for_rb_attached_particles
+		    <<< numBlocks, NUM_THREADS >>> (
+			internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
+			internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+	    } else {
+		compute_position_dependent_force_for_rb_attached_particles
+		    <<< numBlocks, NUM_THREADS >>> (
+			internal -> getPos_d()[0],
+			internal -> getForceInternal_d()[0], internal -> getEnergy(),
+			internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+	    }
+
+
             #ifdef _OPENMP
             omp_set_num_threads(4);
             #endif
@@ -775,9 +790,9 @@ void GrandBrownTown::RunNoseHooverLangevin()
 	    #pragma omp parallel for
 	    for(int i = 0; i < numReplicas; ++i) {
 		RBC[i]->update_attached_particle_positions(
-		    internal->getPos_d()[0]+i*(num+num_rb_attached_particles),
-		    internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles),
-		    internal->getEnergy()+i*(num+num_rb_attached_particles),
+		    internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles),
+		    internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles),
+		    internal->getEnergy()+num+i*(num+num_rb_attached_particles),
 		    sys_d, num, num_rb_attached_particles, numReplicas);
 	    }
 	}
@@ -947,6 +962,21 @@ void GrandBrownTown::RunNoseHooverLangevin()
                 }
             }
         }
+
+	if (get_energy) {
+	    compute_position_dependent_force_for_rb_attached_particles
+		<<< numBlocks, NUM_THREADS >>> (
+		    internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
+		    internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+	} else {
+	    compute_position_dependent_force_for_rb_attached_particles
+		<<< numBlocks, NUM_THREADS >>> (
+		    internal -> getPos_d()[0],
+		    internal -> getForceInternal_d()[0], internal -> getEnergy(),
+		    internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+	}
+
+
         //compute the force for rigid bodies
         #ifdef _OPENMP
         omp_set_num_threads(4);
@@ -992,7 +1022,7 @@ void GrandBrownTown::RunNoseHooverLangevin()
         {
             if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
             {
-                gpuErrchk(cudaMemcpy(momentum, internal ->  getMom_d(), sizeof(Vector3) * (num+num_rb_attached_particles) * numReplicas, cudaMemcpyDeviceToHost));
+                gpuErrchk(cudaMemcpy(momentum, internal ->  getMom_d(), sizeof(Vector3) * (num) * numReplicas, cudaMemcpyDeviceToHost));
             }
             t = s*timestep;
             // Loop over all replicas
diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh
index 6d3a887..a152dd1 100644
--- a/src/GrandBrownTown.cuh
+++ b/src/GrandBrownTown.cuh
@@ -17,7 +17,7 @@ Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, Vector3
 inline __device__
 ForceEnergy compute_position_dependent_force(
     const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal,
-    const int* __restrict__ type, const BrownianParticleType* __restrict__ * __restrict__ part,
+    const int* __restrict__ type, BrownianParticleType** part,
     const float electricField, const int scheme, const int idx)
 {
     int t = type[idx];
@@ -157,7 +157,7 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__
     {
 	idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
 
-	forceEnergy fe = compute_position_dependent_force(
+	ForceEnergy fe = compute_position_dependent_force(
 	    pos, forceInternal, type, part, electricField, scheme, idx );
 
         int t = type[idx];
@@ -330,7 +330,6 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _
 	    pos, forceInternal, type, part, electricField, scheme, idx );
 	if (get_energy) energy[idx] += fe.e;
 
-        int t = type[idx];
         Vector3 r0 = pos[idx];
         Vector3 p0 = momentum[idx];
 
@@ -652,3 +651,37 @@ __global__ void devicePrint(RigidBodyType* rb[]) {
 //     cudaMalloc( &totalForce_h, sizeof(Vector3) );
 //     cudaMemcpyToSymbol(totalForce, &totalForce_h, sizeof(totalForce_h));
 // }
+
+__global__ void compute_position_dependent_force_for_rb_attached_particles(
+    const Vector3* __restrict__ pos,
+    Vector3* __restrict__ forceInternal, float* __restrict__ energy,
+    const int* __restrict__ type, BrownianParticleType** __restrict__ part,
+    const float electricField, const int num, const int num_rb_attached_particles,
+    const int numReplicas, const int scheme)
+{
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < num_rb_attached_particles * numReplicas)
+    {
+	idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles);
+	ForceEnergy fe = compute_position_dependent_force(
+	    pos, forceInternal, type, part, electricField, scheme, idx );
+	atomicAdd( &forceInternal[idx], fe.f );
+	atomicAdd( &energy[idx], fe.e );
+    }
+}
+__global__ void compute_position_dependent_force_for_rb_attached_particles(
+    const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal,
+    const int* __restrict__ type, BrownianParticleType** __restrict__ part,
+    const float electricField, const int num, const int num_rb_attached_particles,
+    const int numReplicas, const int scheme)
+{
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (idx < num_rb_attached_particles * numReplicas)
+    {
+	idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles);
+	ForceEnergy fe = compute_position_dependent_force(
+	    pos, forceInternal, type, part, electricField, scheme, idx );
+	atomicAdd( &forceInternal[idx], fe.f );
+    }
+}
diff --git a/src/RigidBody.cu b/src/RigidBody.cu
index 0c9afe3..8aeb7aa 100644
--- a/src/RigidBody.cu
+++ b/src/RigidBody.cu
@@ -114,9 +114,9 @@ void update_particle_positions_kernel(Vector3* __restrict__ pos, const int start
 	}
 }		
 void RigidBody::update_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d) {
-    int num = attached_particle_end - attached_particle_start;
-    int nb = floor(num/NUMTHREADS) + 1;
-    update_particle_positions_kernel<<<nb,NUMTHREADS>>>(pos_d, attached_particle_start, num,
+    int num_attached = attached_particle_end - attached_particle_start;
+    int nb = floor(num_attached/NUMTHREADS) + 1;
+    update_particle_positions_kernel<<<nb,NUMTHREADS>>>(pos_d, attached_particle_start, num_attached,
 						 t->attached_particle_positions_d, position, orientation);
 }
 
diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu
index 087823a..b7fd3c8 100644
--- a/src/RigidBodyController.cu
+++ b/src/RigidBodyController.cu
@@ -536,11 +536,8 @@ void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vec
 	    rigidBodyByType[i][j].update_particle_positions(pos_d, force_d, energy_d);
 	}
     }
-    for (int rep = 0; rep < numReplicas; ++rep) {
-	size_t o = (num+num_rb_attached_particles)*rep;
-	gpuErrchk(cudaMemset((void*)&(force_d[num+o]),0,num_rb_attached_particles*sizeof(Vector3)));
-    }
-    gpuErrchk(cudaMemset((void*)&(energy_d[num]),0,num_rb_attached_particles*sizeof(float)));
+    gpuErrchk(cudaMemset((void*) force_d, 0, num_rb_attached_particles*sizeof(Vector3)));
+    gpuErrchk(cudaMemset((void*) energy_d, 0, num_rb_attached_particles*sizeof(float)));
 }
 
 
-- 
GitLab