From ec1b6e9d5379ced4200ff0fcd82c78d1c00fb7eb Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Thu, 21 Mar 2024 14:29:02 -0500 Subject: [PATCH] Improved fetching energy so it will be exactly synced with coordinates --- src/GrandBrownTown.cu | 63 +++++++++++++++++++++++++----------------- src/GrandBrownTown.cuh | 30 +++++++++++++++----- 2 files changed, 61 insertions(+), 32 deletions(-) diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 29f23b3..c3b313a 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -646,10 +646,16 @@ void GrandBrownTown::run() //float total_energy = 0.f; // Main loop over Brownian dynamics steps + + float kinetic_energy = 0.0f; + float potential_energy = 0.0f; + + bool get_energy = false; + bool print_energy = false; + for (long int s = 1; s < steps; s++) { PUSH_NVTX("Main loop timestep",0) - bool get_energy = ((s % outputEnergyPeriod) == 0); //At the very first time step, the force is computed if(s == 1) { @@ -750,13 +756,13 @@ void GrandBrownTown::run() if (get_energy) { compute_position_dependent_force_for_rb_attached_particles <<< numBlocks, NUM_THREADS >>> ( - internal -> getPos_d()[0], internal -> getForceInternal_d()[0], + internal -> getPos_d()[0], + internal -> getForceInternal_d()[0], internal -> getEnergy(), internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); } else { compute_position_dependent_force_for_rb_attached_particles <<< numBlocks, NUM_THREADS >>> ( - internal -> getPos_d()[0], - internal -> getForceInternal_d()[0], internal -> getEnergy(), + internal -> getPos_d()[0], internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); } } @@ -795,17 +801,14 @@ void GrandBrownTown::run() }//if step == 1 - PUSH_NVTX("Clear particle energy data",1) - internal->clear_energy(); gpuman.sync(); - POP_NVTX PUSH_NVTX("Integrate particles",2) if(particle_dynamic == String("Langevin")) - updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); + updateKernelBAOAB<s == 1><<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); else if(particle_dynamic == String("NoseHooverLangevin")) //kernel for Nose-Hoover Langevin dynamic - updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), + updateKernelNoseHooverLangevin<s == 1><<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); ////For Brownian motion @@ -953,10 +956,32 @@ void GrandBrownTown::run() POP_NVTX } + if (get_energy) { + PUSH_NVTX("Fetch particle energy data",1) + thrust::device_ptr<float> en_d(internal->getEnergy()); + potential_energy = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas; + if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) + { + gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); + kinetic_energy = KineticEnergy(); + } + gpuman.sync(); + POP_NVTX + } + + get_energy = (((s+1) % outputEnergyPeriod) == 0) || (((s+1) % outputPeriod) == 0); + print_energy = ((s % outputEnergyPeriod) == 0); + + if (get_energy) { + PUSH_NVTX("Clear particle energy data",1) + internal->clear_energy(); + POP_NVTX + } + if (imd_on && clientsock) internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD // TODO add multigpu support with IMD else { - PUSH_NVTX("Clear particle forces",2) + PUSH_NVTX("Clear particle forces",2) internal->clear_force(); #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { @@ -1135,7 +1160,7 @@ void GrandBrownTown::run() remember(t); } - if (get_energy) + if (print_energy) { wkf_timer_stop(timerS); t = s * timestep; @@ -1149,27 +1174,15 @@ void GrandBrownTown::run() // Do the output printf("\rStep %ld [%.2f%% complete | %.3f ms/step | %.3f ns/day]",s, percent, msPerStep, nsPerDay); - //} - //if (get_energy) - //{ // Copy positions from GPU to CPU. //gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - float e = 0.f; - float V = 0.f; - thrust::device_ptr<float> en_d(internal->getEnergy()); - V = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas; - if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) - { - gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - e = KineticEnergy(); - } std::fstream energy_file; energy_file.open( (outFilePrefixes[0]+".energy.dat").c_str(), std::fstream::out | std::fstream::app); if(energy_file.is_open()) { - energy_file << "Kinetic Energy: " << e*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl; - energy_file << "Potential Energy: " << V << " (kcal/mol) " << std::endl; + energy_file << "Kinetic Energy: " << kinetic_energy*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl; + energy_file << "Potential Energy: " << potential_energy << " (kcal/mol) " << std::endl; energy_file.close(); } else diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 2783943..b0b6bdc 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -61,6 +61,7 @@ ForceEnergy compute_position_dependent_force( ////The kernel is for Nose-Hoover Langevin dynamics +template <bool need_position_dep_force = false> __global__ void updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], @@ -72,9 +73,12 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ { idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); - ForceEnergy fe = compute_position_dependent_force( - pos, forceInternal, type, part, electricField, scheme, idx ); - + ForceEnergy fe; + if (need_position_dep_force) { + fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + } + int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; @@ -82,7 +86,10 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ const BrownianParticleType& pt = *part[t]; - Vector3 force = forceInternal[idx] + fe.f; + if (need_position_dep_force) { + forceInternal[idx] = forceInternal[idx] + fe.f; + } + Vector3 force = forceInternal[idx]; #ifdef Debug forceInternal[idx] = -force; #endif @@ -152,6 +159,7 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ //The original BBK kernel is no longer used since the random numbers should be reused //which is not possible in GPU code. //Han-Yi Chou +template <bool need_position_dep_force = false> __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField,int tGridLength, float timestep, @@ -165,8 +173,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re { idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); - ForceEnergy fe = compute_position_dependent_force( - pos, forceInternal, type, part, electricField, scheme, idx ); + ForceEnergy fe; + if (need_position_dep_force) { + fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + } // if (get_energy) energy[idx] += fe.e; int t = type[idx]; @@ -174,7 +185,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re Vector3 p0 = momentum[idx]; const BrownianParticleType& pt = *part[t]; - Vector3 force = forceInternal[idx] + fe.f; + if (need_position_dep_force) { // only needed at ts == 1 + forceInternal[idx] = forceInternal[idx] + fe.f; + } + Vector3 force = forceInternal[idx]; + #ifdef Debug forceInternal[idx] = -force; #endif @@ -249,6 +264,7 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ Vector3 p0 = momentum[idx]; Vector3 force = forceInternal[idx] + fe.f; + forceInternal[idx] = forceInternal[idx] + fe.f; // Adding position-dep force so it doesn't need to be calculated in the "plain" kernel #ifdef Debug forceInternal[idx] = -force; #endif -- GitLab