diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 7b8c42ba37bdd160bef7526a481839a1c99e0f30..8f817614aa94e58923899ed977ee186e8304e0dd 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -766,7 +766,7 @@ float ComputeForce::computeTabulated(bool get_energy) { // TODO: Sum energy if (restraintIds_d != NULL ) - computeHarmonicRestraints<<<1, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numRestraints*numReplicas, restraintIds_d, restraintLocs_d, restraintSprings_d); + computeHarmonicRestraints<<<1, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numRestraints*numReplicas, restraintList_d, restraintLocs_d, restraintSprings_d); // Calculate the energy based on the array created by the kernel @@ -1079,9 +1079,7 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, // } // } -void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4* bondAngleList) { - - +void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4* bondAngleList, int2* restraintList) { size_t size; if (numBonds > 0) { @@ -1112,4 +1110,10 @@ void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *d gpuErrchk( cudaMemcpyAsync( bondAngleList_d, bondAngleList, size, cudaMemcpyHostToDevice) ); } + if (numRestraints > 0) { + size = numRestraints * numReplicas * sizeof(int2); + gpuErrchk( cudaMalloc( &restraintList_d, size ) ); + gpuErrchk( cudaMemcpyAsync( restraintList_d, restraintList, size, cudaMemcpyHostToDevice) ); + } + } diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index 63778e529cc2e0f26b65fa31ec2f59267c6845f0..9c05c38c5b85e322f254110a8041d91fdeda2681 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -1068,14 +1068,16 @@ void computeTabulatedDihedrals(Vector3* force, const Vector3* __restrict__ pos, __global__ void computeHarmonicRestraints(Vector3* force, const Vector3* __restrict__ pos, const BaseGrid* __restrict__ sys, - int numRestraints, const int* const __restrict__ particleId, + int numRestraints, const int2* const __restrict__ restraintList, const Vector3* __restrict__ r0, const float* __restrict__ k) { // Loop over ALL dihedrals in ALL replicas for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numRestraints; i+=blockDim.x*gridDim.x) { - const int& id = particleId[i]; - const Vector3 dr = sys->wrapDiff(pos[id]-r0[i]); - Vector3 f = -k[i]*dr; + const int& id = restraintList[i].x; + const int& rid = restraintList[i].y; + + const Vector3 dr = sys->wrapDiff(pos[id]-r0[rid]); + Vector3 f = -k[rid]*dr; atomicAdd( &force[ id ], f ); } } diff --git a/src/ComputeForce.h b/src/ComputeForce.h index 39247282970c4b83713afffe9eb3e1a073c9d253..3d0e7041a259834e3179412782f4666d71206b01 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -103,7 +103,7 @@ public: void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random); // void createBondList(int3 *bondList); - void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4 *bondAngleList); + void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4 *bondAngleList, int2 *restraintList); //MLog: because of the move of a lot of private variables, some functions get starved necessary memory access to these variables, below is a list of functions that return the specified private variable. std::vector<Vector3*> getPos_d() @@ -171,14 +171,16 @@ public: } void clear_force() { + const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas; for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); - gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,(num+numGroupSites)*numReplicas*sizeof(Vector3))); + gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,tot_num*sizeof(Vector3))); } gpuman.use(0); // TODO move to a paradigm where gpu0 is not preferentially treated } void clear_energy() { - gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*(num+numGroupSites)*numReplicas)); // TODO make async + const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas; + gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*tot_num)); // TODO make async } HOST DEVICE @@ -288,10 +290,13 @@ private: int4* angleList_d; int4* dihedralList_d; int* dihedralPotList_d; - + int2* restraintList_d; + int numRestraints; int* restraintIds_d; Vector3* restraintLocs_d; float* restraintSprings_d; + + }; diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 5df60f03a558d739ad3042c4412b9b0c658ad24f..cc8e21720e8ca24e16cc5ee2768e4e15e7a1c712 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -395,14 +395,35 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, exit(1); } int idx = i+k*numBondAngles; - bondAngleList[idx*2] = make_int4( bondAngles[i].ind1+k*num, bondAngles[i].ind2+k*num, - bondAngles[i].ind3+k*num, bondAngles[i].ind4+k*num ); + bondAngleList[idx*2] = make_int4( _get_index(bondAngles[i].ind1,k), + _get_index(bondAngles[i].ind2,k), + _get_index(bondAngles[i].ind3,k), + _get_index(bondAngles[i].ind4,k) ); bondAngleList[idx*2+1] = make_int4( bondAngles[i].tabFileIndex1, bondAngles[i].tabFileIndex2, bondAngles[i].tabFileIndex3, -1 ); } } } - internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList,bondAngleList); + // TODO: remove older copy-to-cuda + int2* restraintList = NULL; + if (c.numRestraints > 0) { + restraintList = new int2[c.numRestraints * numReplicas]; + int j = 0; + for(int k = 0 ; k < numReplicas; k++) + { + for(int i = 0; i < c.numRestraints; ++i) + { + restraintList[j] = make_int2( _get_index(c.restraints[i].id, k), i ); + // cout << "Displaying: bondList["<< j <<"].x = " << bondList[j].x << ".\n" + // << "Displaying: bondList["<< j <<"].y = " << bondList[j].y << ".\n" + // << "Displaying: bondList["<< j <<"].z = " << bondList[j].z << ".\n"; + ++j; + } + } + } + + internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList,bondAngleList,restraintList); + if (c.numRestraints > 0) delete[] restraintList; forceInternal = new Vector3[(num+num_rb_attached_particles+numGroupSites)*numReplicas]; if (fullLongRange != 0) @@ -657,11 +678,9 @@ void GrandBrownTown::run() #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { gpuman.nccl_broadcast(0, _pos, _pos, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1); + gpuman.sync(); } #endif - gpuman.sync(); - - #ifdef _OPENMP omp_set_num_threads(4); @@ -729,20 +748,21 @@ void GrandBrownTown::run() } }//if inter-particle force - if (get_energy) { - compute_position_dependent_force_for_rb_attached_particles - <<< numBlocks, NUM_THREADS >>> ( - internal -> getPos_d()[0], internal -> getForceInternal_d()[0], - internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); - } else { - compute_position_dependent_force_for_rb_attached_particles - <<< numBlocks, NUM_THREADS >>> ( - internal -> getPos_d()[0], - internal -> getForceInternal_d()[0], internal -> getEnergy(), - internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + if (num_rb_attached_particles > 0) { + if (get_energy) { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], internal -> getForceInternal_d()[0], + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } else { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], + internal -> getForceInternal_d()[0], internal -> getEnergy(), + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } } - #ifdef _OPENMP omp_set_num_threads(4); #endif @@ -1401,7 +1421,6 @@ void GrandBrownTown::init_cuda_group_sites() } // Create GPU-friendly data structure - assert(numReplicas == 1); // TODO make this work for replicas int* tmp = new int[numGroupSites+1+num_particles]; num_particles = 0; int i = 0;