Skip to content
Snippets Groups Projects
Commit ec1b6e9d authored by cmaffeo2's avatar cmaffeo2
Browse files

Improved fetching energy so it will be exactly synced with coordinates

parent cdc4900d
No related branches found
No related tags found
No related merge requests found
......@@ -646,10 +646,16 @@ void GrandBrownTown::run()
//float total_energy = 0.f;
// Main loop over Brownian dynamics steps
float kinetic_energy = 0.0f;
float potential_energy = 0.0f;
bool get_energy = false;
bool print_energy = false;
for (long int s = 1; s < steps; s++)
{
PUSH_NVTX("Main loop timestep",0)
bool get_energy = ((s % outputEnergyPeriod) == 0);
//At the very first time step, the force is computed
if(s == 1)
{
......@@ -750,13 +756,13 @@ void GrandBrownTown::run()
if (get_energy) {
compute_position_dependent_force_for_rb_attached_particles
<<< numBlocks, NUM_THREADS >>> (
internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
internal -> getPos_d()[0],
internal -> getForceInternal_d()[0], internal -> getEnergy(),
internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
} else {
compute_position_dependent_force_for_rb_attached_particles
<<< numBlocks, NUM_THREADS >>> (
internal -> getPos_d()[0],
internal -> getForceInternal_d()[0], internal -> getEnergy(),
internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
}
}
......@@ -795,17 +801,14 @@ void GrandBrownTown::run()
}//if step == 1
PUSH_NVTX("Clear particle energy data",1)
internal->clear_energy();
gpuman.sync();
POP_NVTX
PUSH_NVTX("Integrate particles",2)
if(particle_dynamic == String("Langevin"))
updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType);
updateKernelBAOAB<s == 1><<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType);
else if(particle_dynamic == String("NoseHooverLangevin"))
//kernel for Nose-Hoover Langevin dynamic
updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(),
updateKernelNoseHooverLangevin<s == 1><<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(),
internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d,
randoGen_d, numReplicas, ParticleInterpolationType);
////For Brownian motion
......@@ -953,10 +956,32 @@ void GrandBrownTown::run()
POP_NVTX
}
if (get_energy) {
PUSH_NVTX("Fetch particle energy data",1)
thrust::device_ptr<float> en_d(internal->getEnergy());
potential_energy = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas;
if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
{
gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost));
kinetic_energy = KineticEnergy();
}
gpuman.sync();
POP_NVTX
}
get_energy = (((s+1) % outputEnergyPeriod) == 0) || (((s+1) % outputPeriod) == 0);
print_energy = ((s % outputEnergyPeriod) == 0);
if (get_energy) {
PUSH_NVTX("Clear particle energy data",1)
internal->clear_energy();
POP_NVTX
}
if (imd_on && clientsock)
internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD // TODO add multigpu support with IMD
else {
PUSH_NVTX("Clear particle forces",2)
PUSH_NVTX("Clear particle forces",2)
internal->clear_force();
#ifdef USE_NCCL
if (gpuman.gpus.size() > 1) {
......@@ -1135,7 +1160,7 @@ void GrandBrownTown::run()
remember(t);
}
if (get_energy)
if (print_energy)
{
wkf_timer_stop(timerS);
t = s * timestep;
......@@ -1149,27 +1174,15 @@ void GrandBrownTown::run()
// Do the output
printf("\rStep %ld [%.2f%% complete | %.3f ms/step | %.3f ns/day]",s, percent, msPerStep, nsPerDay);
//}
//if (get_energy)
//{
// Copy positions from GPU to CPU.
//gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost));
float e = 0.f;
float V = 0.f;
thrust::device_ptr<float> en_d(internal->getEnergy());
V = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas;
if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
{
gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost));
e = KineticEnergy();
}
std::fstream energy_file;
energy_file.open( (outFilePrefixes[0]+".energy.dat").c_str(), std::fstream::out | std::fstream::app);
if(energy_file.is_open())
{
energy_file << "Kinetic Energy: " << e*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl;
energy_file << "Potential Energy: " << V << " (kcal/mol) " << std::endl;
energy_file << "Kinetic Energy: " << kinetic_energy*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl;
energy_file << "Potential Energy: " << potential_energy << " (kcal/mol) " << std::endl;
energy_file.close();
}
else
......
......@@ -61,6 +61,7 @@ ForceEnergy compute_position_dependent_force(
////The kernel is for Nose-Hoover Langevin dynamics
template <bool need_position_dep_force = false>
__global__ void
updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random,
Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[],
......@@ -72,9 +73,12 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__
{
idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
ForceEnergy fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
ForceEnergy fe;
if (need_position_dep_force) {
fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
}
int t = type[idx];
Vector3 r0 = pos[idx];
Vector3 p0 = momentum[idx];
......@@ -82,7 +86,10 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__
const BrownianParticleType& pt = *part[t];
Vector3 force = forceInternal[idx] + fe.f;
if (need_position_dep_force) {
forceInternal[idx] = forceInternal[idx] + fe.f;
}
Vector3 force = forceInternal[idx];
#ifdef Debug
forceInternal[idx] = -force;
#endif
......@@ -152,6 +159,7 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__
//The original BBK kernel is no longer used since the random numbers should be reused
//which is not possible in GPU code.
//Han-Yi Chou
template <bool need_position_dep_force = false>
__global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal,
int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid,
float electricField,int tGridLength, float timestep,
......@@ -165,8 +173,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re
{
idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
ForceEnergy fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
ForceEnergy fe;
if (need_position_dep_force) {
fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
}
// if (get_energy) energy[idx] += fe.e;
int t = type[idx];
......@@ -174,7 +185,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re
Vector3 p0 = momentum[idx];
const BrownianParticleType& pt = *part[t];
Vector3 force = forceInternal[idx] + fe.f;
if (need_position_dep_force) { // only needed at ts == 1
forceInternal[idx] = forceInternal[idx] + fe.f;
}
Vector3 force = forceInternal[idx];
#ifdef Debug
forceInternal[idx] = -force;
#endif
......@@ -249,6 +264,7 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _
Vector3 p0 = momentum[idx];
Vector3 force = forceInternal[idx] + fe.f;
forceInternal[idx] = forceInternal[idx] + fe.f; // Adding position-dep force so it doesn't need to be calculated in the "plain" kernel
#ifdef Debug
forceInternal[idx] = -force;
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment