diff --git a/src/BrownParticlesKernel.h b/src/BrownParticlesKernel.h index 36f7daa3ea95f0e3ddafdabde661b143382f17c7..5475a454d4a7b3ef4c70a904c8ce262a8d71780e 100644 --- a/src/BrownParticlesKernel.h +++ b/src/BrownParticlesKernel.h @@ -4,7 +4,7 @@ template<const int BlockSize> static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], BrownianParticleType* part[], - float *vec_red, int n) + float *vec_red, int num, int num_rb_attached_particles, int num_replicas) { __shared__ __align__(4) float sdata[BlockSize]; @@ -17,9 +17,12 @@ static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], Bro sdata[tid] = 0.f; + int n = (num*num_replicas); + while (i < n) - { - int t1 = type[i]; + { + const int i1 = (i % num) + (i/num)*(num+num_rb_attached_particles); + const int t1 = type[i1]; const BrownianParticleType& pt1 = *part[t1]; p1 = P_n[i]; @@ -27,7 +30,8 @@ static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], Bro if(i + BlockSize < n) { - int t2 = type[i+BlockSize]; + const int i2 = ((i+BlockSize) % num) + ((i+BlockSize)/num)*(num+num_rb_attached_particles); + const int t2 = type[i2]; const BrownianParticleType& pt2 = *part[t2]; p2 = P_n[i+BlockSize]; @@ -38,7 +42,7 @@ static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], Bro else sdata[tid] += p1.length2() / mass1; - i += gridSize; + i += gridSize; } sdata[tid] *= 0.50f; diff --git a/src/Configuration.cpp b/src/Configuration.cpp index 04f2a94653b3942ff55be51c85fd6b4c9f9de3ec..679daac15e9d595c81ec74848126c6a58aad79bf 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -122,9 +122,7 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : printf("%d groups\n", numGroupSites); // Allocate particle variables. - // First num*simNum entries are for point particles, next num_rb_attached_particles*simNum are for RB particles - - + // Each replica works with num+num_rb_attached_particles in array pos = new Vector3[ (num+num_rb_attached_particles) * simNum]; //Han-Yi Chou diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 1fb98bc084e49aca00279e5c91611bfbe0a9f32e..f6c9d380421fbd63a22ccfe65d9dbcb234ed636d 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -97,10 +97,11 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, bonds = c.bonds; numCap = c.numCap; // max number of particles num = c.num; // current number of particles + num_rb_attached_particles = c.num_rb_attached_particles; numGroupSites = c.numGroupSites; // Allocate arrays of positions, types and serial numbers - pos = new Vector3[(num+numGroupSites) * numReplicas]; // [HOST] array of particles' positions. + pos = new Vector3[(num+num_rb_attached_particles+numGroupSites) * numReplicas]; // [HOST] array of particles' positions. // Allocate arrays of momentum Han-Yi Chou if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { @@ -112,8 +113,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, //for debug //force = new Vector3[num * numReplicas]; - type = new int[num * numReplicas]; // [HOST] array of particles' types. - serial = new int[num * numReplicas]; // [HOST] array of particles' serial numbers. + type = new int[(num+num_rb_attached_particles) * numReplicas]; // [HOST] array of particles' types. + serial = new int[(num+num_rb_attached_particles) * numReplicas]; // [HOST] array of particles' serial numbers. // Allocate things for rigid body // RBC = RigidBodyController(c); @@ -127,8 +128,10 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, // Replicate identical initial conditions across all replicas for (int r = 0; r < numReplicas; ++r) { - std::copy(c.type, c.type + num, type + r*num); - std::copy(c.serial, c.serial + num, serial + r*num); + std::copy(c.type, c.type + num+num_rb_attached_particles, + type + r*(num+num_rb_attached_particles)); + std::copy(c.serial, c.serial + num + num_rb_attached_particles, + serial + r*(num+num_rb_attached_particles)); if (c.copyReplicaCoordinates > 0) std::copy(c.pos, c.pos + num, pos + r*num); } @@ -307,8 +310,12 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, auto _get_index = [this](int idx, int replica) { // Convenient lambda function to deal with increasingly complicated indexing - auto num = this->num; auto numReplicas = this->numReplicas; auto numGroupSites = this->numGroupSites; - idx = (idx < num) ? idx + replica*num : (idx-num) + numReplicas*num + replica * numGroupSites; + auto num = this->num; + auto numReplicas = this->numReplicas; + auto num_rb_attached_particles = this->num_rb_attached_particles; + auto numGroupSites = this->numGroupSites; + idx = (idx < num+num_rb_attached_particles) ? idx + replica*(num+num_rb_attached_particles) + : (idx-num-num_rb_attached_particles) + numReplicas*(num+num_rb_attached_particles) + replica * numGroupSites; return idx; }; @@ -367,7 +374,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, } internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList); - forceInternal = new Vector3[(num+numGroupSites)*numReplicas]; + forceInternal = new Vector3[(num+num_rb_attached_particles+numGroupSites)*numReplicas]; if (fullLongRange != 0) printf("No cell decomposition created.\n"); @@ -569,16 +576,16 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+conf.num_rb_attached_particles), sys_d); gpuErrchk(cudaDeviceSynchronize()); } float t; // simulation time - int numBlocks = (num * numReplicas) / NUM_THREADS + ((num * numReplicas) % NUM_THREADS == 0 ? 0 : 1); + int numBlocks = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + (((num+num_rb_attached_particles) * numReplicas) % NUM_THREADS == 0 ? 0 : 1); int tl = temperatureGridFile.length(); Vector3 *force_d; - gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*(num+numGroupSites) * numReplicas)); + gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*(num+num_rb_attached_particles+numGroupSites) * numReplicas)); printf("Configuration: %d particles | %d replicas\n", num, numReplicas); for (int i=0; i< gpuman.gpus.size(); ++i) { @@ -599,11 +606,11 @@ void GrandBrownTown::RunNoseHooverLangevin() internal->clear_force(); internal->clear_energy(); const std::vector<Vector3*>& _pos = internal->getPos_d(); - if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas); + if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num + num_rb_attached_particles, numGroupSites, numReplicas); #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { - gpuman.nccl_broadcast(0, _pos, _pos, (num+numGroupSites)*numReplicas, -1); + gpuman.nccl_broadcast(0, _pos, _pos, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1); } #endif gpuman.sync(); @@ -633,7 +640,7 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d); } internal -> computeTabulated(get_energy); break; @@ -657,7 +664,7 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d); } internal->compute(get_energy); break; @@ -681,8 +688,12 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, - RigidBodyInterpolationType, sys, sys_d); + RBC[i]->updateForces(internal->getPos_d()[0]+i*(num+num_rb_attached_particles), + internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles), + s, + internal->getEnergy()+i*(num+num_rb_attached_particles), + get_energy, + RigidBodyInterpolationType, sys, sys_d); if(rigidbody_dynamic == String("Langevin")) { #ifdef _OPENMP @@ -698,28 +709,29 @@ void GrandBrownTown::RunNoseHooverLangevin() #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _f = internal->getForceInternal_d(); - gpuman.nccl_reduce(0, _f, _f, (num+numGroupSites)*numReplicas, -1); + gpuman.nccl_reduce(0, _f, _f, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1); } #endif - if (numGroupSites > 0) distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + if (numGroupSites > 0) distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas); }//if step == 1 internal->clear_energy(); gpuman.sync(); + assert(false); // TODO update RB particels if(particle_dynamic == String("Langevin")) - updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); + updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); else if(particle_dynamic == String("NoseHooverLangevin")) //kernel for Nose-Hoover Langevin dynamic updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), - internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, + internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); ////For Brownian motion else updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getForceInternal_d()[0], internal -> getType_d(), - part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, + part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, ParticleInterpolationType); if(rigidbody_dynamic == String("Langevin")) @@ -751,7 +763,7 @@ void GrandBrownTown::RunNoseHooverLangevin() if (s % outputPeriod == 0) { // Copy particle positions back to CPU gpuErrchk(cudaDeviceSynchronize()); - gpuErrchk(cudaMemcpy(pos, internal ->getPos_d()[0], sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(pos, internal ->getPos_d()[0], sizeof(Vector3) * (num+num_rb_attached_particles) * numReplicas, cudaMemcpyDeviceToHost)); } if (imd_on && clientsock && s % outputPeriod == 0) { @@ -832,7 +844,7 @@ void GrandBrownTown::RunNoseHooverLangevin() if (numGroupSites > 0) { gpuman.sync(); - updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num + num_rb_attached_particles, numGroupSites, numReplicas); gpuman.sync(); } @@ -844,7 +856,7 @@ void GrandBrownTown::RunNoseHooverLangevin() if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _p = internal->getPos_d(); nccl_broadcast_streams[0] = gpuman.gpus[0].get_next_stream(); - gpuman.nccl_broadcast(0, _p, _p, (num+numGroupSites)*numReplicas, nccl_broadcast_streams); + gpuman.nccl_broadcast(0, _p, _p, (num+num_rb_attached_particles+numGroupSites)*numReplicas, nccl_broadcast_streams); } #endif } @@ -865,13 +877,13 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d); } internal -> computeTabulated(get_energy); #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _f = internal->getForceInternal_d(); - gpuman.nccl_reduce(0, _f, _f, num*numReplicas, -1); + gpuman.nccl_reduce(0, _f, _f, (num+num_rb_attached_particles)*numReplicas, -1); } #endif break; @@ -918,22 +930,22 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) // TODO: Use different buffer for RB particle forces to avoid race condition - RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, + RBC[i]->updateForces((internal->getPos_d()[0])+i*(num+num_rb_attached_particles), (internal->getForceInternal_d()[0])+i*(num+num_rb_attached_particles), s, (internal->getEnergy())+i*(num+num_rb_attached_particles), get_energy, RigidBodyInterpolationType, sys, sys_d); if (numGroupSites > 0) { gpuman.sync(); // if ((s%100) == 0) { - distributeGroupSiteForces<true><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + distributeGroupSiteForces<true><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas); // } else { - // distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + // distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas); // } gpuman.sync(); } if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getForceInternal_d()[0], - internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, + internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, ParticleInterpolationType); //gpuErrchk(cudaDeviceSynchronize()); @@ -957,7 +969,7 @@ void GrandBrownTown::RunNoseHooverLangevin() { if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { - gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * (num+num_rb_attached_particles) * numReplicas, cudaMemcpyDeviceToHost)); } t = s*timestep; // Loop over all replicas @@ -968,12 +980,12 @@ void GrandBrownTown::RunNoseHooverLangevin() updateNameList(); // no need for it here if particles stay the same // Write the trajectory. - writers[repID]->append(pos + (repID*num), name, serial, t, num); + writers[repID]->append(pos + repID*(num+num_rb_attached_particles), name, serial, t, num); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { - momentum_writers[repID]->append(momentum + (repID * num), name, serial, t, num); - //force_writers[repID]->append(force + (repID * num), name, serial, t, num); + momentum_writers[repID]->append(momentum + repID * (num+num_rb_attached_particles), name, serial, t, num); + //force_writers[repID]->append(force + repID * (num+num_rb_attached_particles), name, serial, t, num); } } // TODO: Currently, not compatible with replicas. Needs a fix. @@ -1005,7 +1017,7 @@ void GrandBrownTown::RunNoseHooverLangevin() float e = 0.f; float V = 0.f; thrust::device_ptr<float> en_d(internal->getEnergy()); - V = (thrust::reduce(en_d, en_d+num*numReplicas)) / numReplicas; + V = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas; if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); @@ -1616,7 +1628,7 @@ void GrandBrownTown::run() { // void GrandBrownTown::populate() { for (int repID = 0; repID < numReplicas; repID++) { - const int offset = repID * num; + const int offset = repID * (num+num_rb_attached_particles); int pn = 0; int p = 0; for (int i = 0; i < num; i++) { @@ -1636,7 +1648,7 @@ void GrandBrownTown::populate() { void GrandBrownTown::writeRestart(int repID) const { FILE* out = fopen(restartFiles[repID].c_str(), "w"); - const int offset = repID * num; + const int offset = repID * (num+num_rb_attached_particles); for (int i = 0; i < num; ++i) { @@ -1671,7 +1683,7 @@ void GrandBrownTown::initialCondCen() { // Set random initial positions for all particles and replicas void GrandBrownTown::initialCond() { for (int repID = 0; repID < numReplicas; repID++) { - const int offset = repID * num; + const int offset = repID * (num+num_rb_attached_particles); for (int i = 0; i < num; i++) { pos[i + offset] = findPos(type[i + offset]); } @@ -1715,7 +1727,7 @@ float GrandBrownTown::KineticEnergy() gpuErrchk(cudaMalloc((void**)&energy, sizeof(float))); gpuErrchk(cudaMemset((void*)energy,0,sizeof(float))); - BrownParticlesKineticEnergy<64><<<dim3(512),dim3(64)>>>(internal->getMom_d(), internal -> getType_d(), part_d, vec_red, numReplicas*num); + BrownParticlesKineticEnergy<64><<<dim3(512),dim3(64)>>>(internal->getMom_d(), internal -> getType_d(), part_d, vec_red, num, num_rb_attached_particles, numReplicas); gpuErrchk(cudaDeviceSynchronize()); Reduction<64><<<dim3(1),dim3(64)>>>(vec_red, energy, 512); @@ -1763,7 +1775,7 @@ void GrandBrownTown::init_cuda_group_sites() } // Create GPU-friendly data structure - // TODO make this work for replicas + assert(numReplicas == 1); // TODO make this work for replicas int* tmp = new int[numGroupSites+1+num_particles]; num_particles = 0; int i = 0; diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index c4c059aea09aafe0f9a77c3f0cd6a6a1f71e7073..6857d21bd853d10f8d54629c1cb0ce120e01a93a 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -19,12 +19,14 @@ Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, Vector3 #if 0 __global__ void updateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, BaseGrid* sys, Random* randoGen, int numReplicas) + int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; //Vector3 r0(tex1Dfetch(PosTex, idx)); //Vector3 p0(tex1Dfetch(MomTex, idx)); @@ -101,11 +103,13 @@ __global__ void updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, - int num, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) + int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; Vector3 r0 = pos[idx]; @@ -214,13 +218,17 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ //Han-Yi Chou __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, - float electricField,int tGridLength, float timestep, int num, BaseGrid* sys, + float electricField,int tGridLength, float timestep, + int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; Vector3 r0 = pos[idx]; @@ -315,13 +323,15 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re //update momentum in the last step of BAOAB integrator for the Langevin dynamics. Han-Yi Chou __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, - float electricField, int tGridLength, float timestep, int num, + float electricField, int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy,int scheme) { - const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); + int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; @@ -380,12 +390,14 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ #if 0 __global__ void LastUpdateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, BaseGrid* sys, Random* randoGen, int numReplicas) + int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; @@ -441,16 +453,17 @@ __global__ void LastUpdateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __global__ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[],float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, BaseGrid* sys, + int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, float* energy, bool get_energy, int scheme) { // Calculate this thread's ID - const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); + int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); // TODO: Make this a grid-stride loop to make efficient reuse of RNG states // Loop over ALL particles in ALL replicas if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); const int t = type[idx]; Vector3 p = pos[idx]; diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index c6479a081fb2b4461493f49269c1e67b29eb7323..6d8ed7f34c5bcdbed402929d8c5c3b41ffcb5f89 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -230,6 +230,8 @@ private: int numAngles; int numDihedrals; + int num_rb_attached_particles; + int numGroupSites; int* groupSiteData_d; diff --git a/src/RigidBody.cu b/src/RigidBody.cu index e224d3dbf42be8bb3afde7dfa1e28f37ff4d8d26..f65fcc75298816e893a6052045e4815c0ef08527 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -114,6 +114,7 @@ void RigidBody::addEnergy(float e) energy += e; } void RigidBody::updateParticleList(Vector3* pos_d, BaseGrid* sys_d) { + assert(false); // TODO update particles, excluding own particles for (int i = 0; i < t->numPotGrids; ++i) { numParticles[i] = 0; int& tnp = t->numParticles[i];