From 75be5d8522fe91e5a329a0b490c47d1060371872 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Wed, 9 May 2018 19:30:23 -0500 Subject: [PATCH] Removed device synchronize barriers --- src/ComputeForce.cu | 1 - src/GrandBrownTown.cu | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 903b6c2..2bed3e5 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -699,7 +699,6 @@ float ComputeForce::computeTabulated(bool get_energy) { // int nb = (1+(decomp.nCells.x * decomp.nCells.y * decomp.nCells.z)) * 75; /* RBTODO: number of pairLists */ const int nb = 800; // printf("ComputeTabulated\n"); - gpuErrchk(cudaDeviceSynchronize()); // RBTODO: get_energy //if (get_energy) diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 343a81f..5cd64dc 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -600,7 +600,7 @@ void GrandBrownTown::RunNoseHooverLangevin() } }//if step == 1 - gpuErrchk(cudaDeviceSynchronize()); + // gpuErrchk(cudaDeviceSynchronize()); if(particle_dynamic == String("Langevin")) updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); @@ -621,10 +621,11 @@ void GrandBrownTown::RunNoseHooverLangevin() else RBC.integrate(s); - if (s % outputPeriod == 0) + if (s % outputPeriod == 0) { // Copy particle positions back to CPU gpuErrchk(cudaDeviceSynchronize()); gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); + } if (imd_on && clientsock && s % outputPeriod == 0) { gpuErrchk(cudaDeviceSynchronize()); @@ -697,7 +698,7 @@ void GrandBrownTown::RunNoseHooverLangevin() internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD RBC.clearForceAndTorque(); - gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); + gpuErrchk(cudaMemsetAsync((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); if (interparticleForce) { // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas @@ -745,10 +746,8 @@ void GrandBrownTown::RunNoseHooverLangevin() } } } - gpuErrchk(cudaDeviceSynchronize()); //compute the force for rigid bodies RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s); - gpuErrchk(cudaDeviceSynchronize()); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); @@ -762,7 +761,6 @@ void GrandBrownTown::RunNoseHooverLangevin() RBC.print(s); } - gpuErrchk(cudaDeviceSynchronize()); if (s % outputPeriod == 0) { if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) -- GitLab