diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 4f39b3fe3a905fca9703f03423b357a401bef198..f06b4badfea31f23f917aabc7671e166c60aed4f 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -40,14 +40,15 @@ void runSort(int2 *d1, int *d2, float *key, unsigned int count); ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : - num(c.num), numParts(c.numParts), sys(c.sys), switchStart(c.switchStart), + num(c.num), numParts(c.numParts), num_rb_attached_particles(c.num_rb_attached_particles), + sys(c.sys), switchStart(c.switchStart), switchLen(c.switchLen), electricConst(c.coulombConst), cutoff2((c.switchLen + c.switchStart) * (c.switchLen + c.switchStart)), decomp(c.sys->getBox(), c.sys->getOrigin(), c.switchStart + c.switchLen + c.pairlistDistance, numReplicas), numBonds(c.numBonds), numTabBondFiles(c.numTabBondFiles), numExcludes(c.numExcludes), numAngles(c.numAngles), numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals), - numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), + numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), numGroupSites(c.numGroupSites), numReplicas(numReplicas) { @@ -237,7 +238,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : gridSize = num / NUM_THREADS + 1; // Create and allocate the energy arrays - gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+numGroupSites) * numReplicas)); + gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+num_rb_attached_particles+numGroupSites) * numReplicas)); cudaEventCreate(&start); cudaEventCreate(&stop); } @@ -894,7 +895,7 @@ float ComputeForce::computeTabulatedFull(bool get_energy) { void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) { - const size_t tot_num = (num+numGroupSites) * numReplicas; + const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas; for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); @@ -965,10 +966,10 @@ void ComputeForce::setForceInternalOnDevice(Vector3* f) { void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints) { assert(simNum == numReplicas); // Not sure why we have both of these things - int tot_num = num * simNum; + int tot_num_with_rb = (num+num_rb_attached_particles) * simNum; // type_d - gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num)); - gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num_with_rb)); + gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num_with_rb, cudaMemcpyHostToDevice)); if (numBonds > 0) { diff --git a/src/ComputeForce.h b/src/ComputeForce.h index 5d602e4a609482a73ba7c639f8a575fc3023b2a7..b3bc258541f9603f387221363f3c39fe7fbae881 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -174,6 +174,7 @@ private: int numReplicas; int num; int numParts; + int num_rb_attached_particles; int numBonds; int numExcludes; int numTabBondFiles; diff --git a/src/Configuration.h b/src/Configuration.h index 8bc023de4be3c92c07bc45d2e36fa63477475f8a..ff7879faf686ff4b29e2c63be454d99ca2a96cde 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -91,9 +91,11 @@ public: ~Configuration(); int find_particle_type(const char* s) const { - for (int j = 0; j < numParts; j++) - if (s == part[j].name) + for (int j = 0; j < numParts; j++) { + printf("Searching particle %d (%s) =? %s\n", j, part[j].name.val(), s); + if (strcmp(s,part[j].name.val()) == 0) return j; + } return -1; } diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 339a9f37938bdb0481e8d52f022e7b3d6a288f8a..46f556aa46c85066ec564d48786965b8785690a5 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -613,7 +613,7 @@ void GrandBrownTown::RunNoseHooverLangevin() internal->getPos_d()[0]+i*(num+num_rb_attached_particles), internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles), internal->getEnergy()+i*(num+num_rb_attached_particles), - sys_d); + sys_d, num, num_rb_attached_particles, numReplicas); } } @@ -704,7 +704,7 @@ void GrandBrownTown::RunNoseHooverLangevin() s, internal->getEnergy()+i*(num+num_rb_attached_particles), get_energy, - RigidBodyInterpolationType, sys, sys_d); + RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles); if(rigidbody_dynamic == String("Langevin")) { #ifdef _OPENMP @@ -778,7 +778,7 @@ void GrandBrownTown::RunNoseHooverLangevin() internal->getPos_d()[0]+i*(num+num_rb_attached_particles), internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles), internal->getEnergy()+i*(num+num_rb_attached_particles), - sys_d); + sys_d, num, num_rb_attached_particles, numReplicas); } } @@ -954,7 +954,7 @@ void GrandBrownTown::RunNoseHooverLangevin() #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) // TODO: Use different buffer for RB particle forces to avoid race condition RBC[i]->updateForces((internal->getPos_d()[0])+i*(num+num_rb_attached_particles), (internal->getForceInternal_d()[0])+i*(num+num_rb_attached_particles), s, (internal->getEnergy())+i*(num+num_rb_attached_particles), get_energy, - RigidBodyInterpolationType, sys, sys_d); + RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles); if (numGroupSites > 0) { gpuman.sync(); diff --git a/src/Makefile b/src/Makefile index 4f6bb5779b9248ff6e4ef7e018387e236b18ab58..68598f462bf925e748a759b91ab9bd5ab09d6afc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,7 @@ ### Paths, libraries, includes, options include ./findcudalib.mk +SUFFIX ?= "" ifeq ($(dbg),1) NV_FLAGS += -g -G -O0 @@ -99,17 +100,17 @@ ifeq ($(omp),1) TARGET := $(TARGET)_omp endif -all: $(TARGET) - @echo "Done ->" $(TARGET) +all: $(TARGET)$(SUFFIX) + @echo "Done ->" $(TARGET)$(SUFFIX) $(TARGET)_link.o: $(CU_OBJ) # $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o -$(TARGET): $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) arbd.cpp vmdsock.c imd.c imd.h +$(TARGET)$(SUFFIX): $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) arbd.cpp vmdsock.c imd.c imd.h # $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o # $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o - $(CC) $(CC_FLAGS) $(EX_FLAGS) arbd.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS) -o $(TARGET) + $(CC) $(CC_FLAGS) $(EX_FLAGS) arbd.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS) -o $(TARGET)$(SUFFIX) .SECONDEXPANSION: diff --git a/src/RigidBody.cu b/src/RigidBody.cu index abd693bdd3895f19af9027655132825501b52c37..0c9afe316652b93026792abe01f0e39001f0e2c8 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -333,6 +333,20 @@ void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, in } } +void RigidBody::apply_attached_particle_forces(const Vector3* force) { + const auto &rb_pos = t->attached_particle_positions; + int num = rb_pos.size(); + Vector3 total_force = Vector3(0.0f); + Vector3 torque = Vector3(0.0f); + for (int i = 0; i < num; ++i) { + const int j = i + attached_particle_start; + torque = torque + (orientation*rb_pos[i]).cross(force[j]); + total_force = total_force + force[j]; + } + addForce(total_force); + addTorque(torque); +} + void RigidBody::applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx) { // loop over potential grids for (int i = 0; i < t->numPotGrids; ++i) { diff --git a/src/RigidBody.h b/src/RigidBody.h index 636c80e94fd0f17475156bbc84e0cd74409d8e94..e3e81737ae1d7aee45f16e915d44f6aa413e1749 100644 --- a/src/RigidBody.h +++ b/src/RigidBody.h @@ -83,6 +83,7 @@ class RigidBody { // host side representation of rigid bodies void initializeParticleLists(); void updateParticleList(Vector3* pos_d, BaseGrid* sys_d); void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, ForceEnergy* forcestorques_d, const std::vector<int>& forcestorques_offset, int& fto_idx); + void apply_attached_particle_forces(const Vector3* force); void applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx); bool langevin; diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index b16eedbe24610071705a3772919ac3561eef8840..087823a0f7e4a0b6cabf0a3238de87cab5b06feb 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -72,6 +72,7 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* pre } rigidBodyByType.push_back(tmp); } + attached_particle_forces = new Vector3[attached_particle_offset]; totalParticleForceNumBlocks = 0; for (int i=0; i < particleForceNumBlocks.size(); ++i) { @@ -98,6 +99,8 @@ RigidBodyController::~RigidBodyController() { for (int i = 0; i < rigidBodyByType.size(); i++) rigidBodyByType[i].clear(); rigidBodyByType.clear(); + + delete [] attached_particle_forces; delete random; } @@ -527,13 +530,17 @@ void RigidBodyController::initializeForcePairs() { } -void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d) { +void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas) { for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { rigidBodyByType[i][j].update_particle_positions(pos_d, force_d, energy_d); } } - assert(false); // clear force and energy + for (int rep = 0; rep < numReplicas; ++rep) { + size_t o = (num+num_rb_attached_particles)*rep; + gpuErrchk(cudaMemset((void*)&(force_d[num+o]),0,num_rb_attached_particles*sizeof(Vector3))); + } + gpuErrchk(cudaMemset((void*)&(energy_d[num]),0,num_rb_attached_particles*sizeof(float))); } @@ -559,7 +566,7 @@ void RigidBodyController::clearForceAndTorque() } } -void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d) +void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles) { //if (s <= 1) //gpuErrchk( cudaProfilerStart() ); @@ -576,6 +583,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, // RBTODO: launch kernels ahead of time and sync using event and memcpyAsync gpuErrchk( cudaDeviceSynchronize() ); cudaMemcpy(particleForces, particleForces_d, sizeof(ForceEnergy)*2*totalParticleForceNumBlocks, cudaMemcpyDeviceToHost); + cudaMemcpyAsync(attached_particle_forces, &force_d[num], sizeof(Vector3)*(num_rb_attached_particles), cudaMemcpyDeviceToHost); pfo_idx=0; for (int i = 0; i < rigidBodyByType.size(); i++) { @@ -586,6 +594,16 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, } } + { + gpuErrchk( cudaDeviceSynchronize() ); + for (auto &rbv: rigidBodyByType) { + for (auto &rb: rbv) { + rb.apply_attached_particle_forces( attached_particle_forces ); + } + } + } + + // Grid–Grid forces if ( ((s % conf.rigidBodyGridGridPeriod) == 0 || s == 1 ) && forcePairs.size() > 0) { for (int i=0; i < forcePairs.size(); i++) { diff --git a/src/RigidBodyController.h b/src/RigidBodyController.h index bf8e58637d2b9860a8d1fd132f2c34a108ecfcb2..fa2fa4e5d1efd6b8bf43a83e9e34b456d5f67245 100644 --- a/src/RigidBodyController.h +++ b/src/RigidBodyController.h @@ -108,9 +108,9 @@ public: void SetRandomTorques(); void integrate(BaseGrid* sys, int step); void integrateDLM(BaseGrid* sys, int step); - void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d); + void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles); void updateParticleLists(Vector3* pos_d, BaseGrid* sys_d); - void update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d); + void update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas); void clearForceAndTorque(); void KineticEnergy(); void print(int step); @@ -152,7 +152,8 @@ private: void construct_grids(); void destruct_grids(); - //float* rb_energy; + //float* rb_energy; + Vector3* attached_particle_forces; ForceEnergy* particleForces; ForceEnergy* particleForces_d; std::vector<int> particleForceNumBlocks; diff --git a/src/RigidBodyType.cu b/src/RigidBodyType.cu index ec0f8b65c7a25587755326291ce45542af777c44..2137160943aaf63fa2345f48e0b115373a242704 100644 --- a/src/RigidBodyType.cu +++ b/src/RigidBodyType.cu @@ -110,7 +110,7 @@ void RigidBodyType::attach_particles() { // Particle_type | x | y | z // A line without exactly six tokens should be discarded. if (numTokens != 4) { - printf("Error: Invalid attached particle file line: %s\n", line); + printf("Error: Invalid attached particle file line: %s\n", tokenList[0].val()); fclose(inp); exit(-1); }