diff --git a/src/BrownParticlesKernel.h b/src/BrownParticlesKernel.h index 36f7daa3ea95f0e3ddafdabde661b143382f17c7..5475a454d4a7b3ef4c70a904c8ce262a8d71780e 100644 --- a/src/BrownParticlesKernel.h +++ b/src/BrownParticlesKernel.h @@ -4,7 +4,7 @@ template<const int BlockSize> static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], BrownianParticleType* part[], - float *vec_red, int n) + float *vec_red, int num, int num_rb_attached_particles, int num_replicas) { __shared__ __align__(4) float sdata[BlockSize]; @@ -17,9 +17,12 @@ static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], Bro sdata[tid] = 0.f; + int n = (num*num_replicas); + while (i < n) - { - int t1 = type[i]; + { + const int i1 = (i % num) + (i/num)*(num+num_rb_attached_particles); + const int t1 = type[i1]; const BrownianParticleType& pt1 = *part[t1]; p1 = P_n[i]; @@ -27,7 +30,8 @@ static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], Bro if(i + BlockSize < n) { - int t2 = type[i+BlockSize]; + const int i2 = ((i+BlockSize) % num) + ((i+BlockSize)/num)*(num+num_rb_attached_particles); + const int t2 = type[i2]; const BrownianParticleType& pt2 = *part[t2]; p2 = P_n[i+BlockSize]; @@ -38,7 +42,7 @@ static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], Bro else sdata[tid] += p1.length2() / mass1; - i += gridSize; + i += gridSize; } sdata[tid] *= 0.50f; diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 4f39b3fe3a905fca9703f03423b357a401bef198..74710b86fd0fd131f53c22e372c85e341c809450 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -40,14 +40,15 @@ void runSort(int2 *d1, int *d2, float *key, unsigned int count); ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : - num(c.num), numParts(c.numParts), sys(c.sys), switchStart(c.switchStart), + num(c.num), numParts(c.numParts), num_rb_attached_particles(c.num_rb_attached_particles), + sys(c.sys), switchStart(c.switchStart), switchLen(c.switchLen), electricConst(c.coulombConst), cutoff2((c.switchLen + c.switchStart) * (c.switchLen + c.switchStart)), decomp(c.sys->getBox(), c.sys->getOrigin(), c.switchStart + c.switchLen + c.pairlistDistance, numReplicas), numBonds(c.numBonds), numTabBondFiles(c.numTabBondFiles), numExcludes(c.numExcludes), numAngles(c.numAngles), numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals), - numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), + numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), numGroupSites(c.numGroupSites), numReplicas(numReplicas) { @@ -234,10 +235,10 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : restraintIds_d = NULL; //Calculate the number of blocks the grid should contain - gridSize = num / NUM_THREADS + 1; + gridSize = (num+num_rb_attached_particles) / NUM_THREADS + 1; // Create and allocate the energy arrays - gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+numGroupSites) * numReplicas)); + gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+num_rb_attached_particles+numGroupSites) * numReplicas)); cudaEventCreate(&start); cudaEventCreate(&stop); } @@ -341,7 +342,7 @@ void ComputeForce::updateNumber(int newNum) { // Recalculate the number of blocks in the grid gridSize = 0; - while ((int)sqrt(NUM_THREADS) * gridSize < num) + while ((int)sqrt(NUM_THREADS) * gridSize < num+num_rb_attached_particles) ++gridSize; gpuErrchk(cudaFree(energies_d)); @@ -500,7 +501,7 @@ void ComputeForce::decompose() { cudaFree(decomp_d); decomp_d = NULL; } - decomp.decompose_d(pos_d[0], num); + decomp.decompose_d(pos_d[0], num+num_rb_attached_particles); decomp_d = decomp.copyToCUDA(); // Update pairlists using cell decomposition (not sure this is really needed or good) @@ -555,11 +556,11 @@ void ComputeForce::decompose() { //Han-Yi Chou 2017 my code #if __CUDA_ARCH__ >= 520 - createPairlists<64,64,8><<<dim3(128,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], + createPairlists<64,64,8><<<dim3(128,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num+num_rb_attached_particles, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2, pos_tex[0], neighbors_tex); #else //__CUDA_ARCH__ == 300 - createPairlists<64,64,8><<<dim3(256,256,numReplicas),dim3(64,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], + createPairlists<64,64,8><<<dim3(256,256,numReplicas),dim3(64,1,1)>>>(pos_d[0], num+num_rb_attached_particles, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2, pos_tex[0], neighbors_tex); #endif @@ -663,20 +664,20 @@ float ComputeForce::decompCutoff() { return decomp.getCutoff(); } float ComputeForce::computeFull(bool get_energy) { float energy = 0.0f; - gridSize = (num * numReplicas) / NUM_THREADS + 1; + gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1; dim3 numBlocks(gridSize, 1, 1); dim3 numThreads(NUM_THREADS, 1, 1); // Call the kernel to calculate forces computeFullKernel<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], type_d, tableAlpha_d, - tableEps_d, tableRad6_d, num, numParts, sys_d[0], energies_d, gridSize, + tableEps_d, tableRad6_d, num+num_rb_attached_particles, numParts, sys_d[0], energies_d, gridSize, numReplicas, get_energy); // Calculate energy based on the array created by the kernel if (get_energy) { gpuErrchk(cudaDeviceSynchronize()); thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num + numGroupSites); + energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles + numGroupSites); } return energy; @@ -684,20 +685,20 @@ float ComputeForce::computeFull(bool get_energy) { float ComputeForce::computeSoftcoreFull(bool get_energy) { float energy = 0.0f; - gridSize = (num * numReplicas) / NUM_THREADS + 1; + gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1; dim3 numBlocks(gridSize, 1, 1); dim3 numThreads(NUM_THREADS, 1, 1); // Call the kernel to calculate forces computeSoftcoreFullKernel<<<numBlocks, numThreads>>>(forceInternal_d[0], pos_d[0], type_d, - tableEps_d, tableRad6_d, num, numParts, sys_d[0], energies_d, gridSize, + tableEps_d, tableRad6_d, num+num_rb_attached_particles, numParts, sys_d[0], energies_d, gridSize, numReplicas, get_energy); // Calculate energy based on the array created by the kernel if (get_energy) { cudaDeviceSynchronize(); thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num); + energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles); } return energy; @@ -729,13 +730,13 @@ float ComputeForce::computeElecFull(bool get_energy) { float ComputeForce::compute(bool get_energy) { float energy = 0.0f; - gridSize = (num * numReplicas) / NUM_THREADS + 1; + gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1; dim3 numBlocks(gridSize, 1, 1); dim3 numThreads(NUM_THREADS, 1, 1); // Call the kernel to calculate forces computeKernel<<<numBlocks, numThreads>>>(forceInternal_d[0], pos_d[0], type_d, - tableAlpha_d, tableEps_d, tableRad6_d, num, numParts, sys_d[0], + tableAlpha_d, tableEps_d, tableRad6_d, num+num_rb_attached_particles, numParts, sys_d[0], decomp_d, energies_d, switchStart, switchLen, gridSize, numReplicas, get_energy); @@ -744,7 +745,7 @@ float ComputeForce::compute(bool get_energy) { if (get_energy) { gpuErrchk(cudaDeviceSynchronize()); thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num + numGroupSites); + energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles + numGroupSites); } return energy; @@ -757,7 +758,7 @@ float ComputeForce::compute(bool get_energy) { float ComputeForce::computeTabulated(bool get_energy) { float energy = 0.0f; - gridSize = (num * numReplicas) / NUM_THREADS + 1; + gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1; dim3 numBlocks(gridSize, 1, 1); dim3 numThreads(NUM_THREADS, 1, 1); @@ -773,7 +774,7 @@ float ComputeForce::computeTabulated(bool get_energy) { { //clearEnergies<<< nb, numThreads >>>(energies_d,num); //gpuErrchk(cudaDeviceSynchronize()); - cudaMemset((void*)energies_d, 0, sizeof(float)*(num+numGroupSites)*numReplicas); + cudaMemset((void*)energies_d, 0, sizeof(float)*(num+num_rb_attached_particles+numGroupSites)*numReplicas); computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d[0], pos_d[0], sys_d[0], cutoff2, numPairs_d[0], pairLists_d[0], pairTabPotType_d[0], tablePot_d[0], energies_d); } @@ -866,27 +867,27 @@ float ComputeForce::computeTabulated(bool get_energy) { float ComputeForce::computeTabulatedFull(bool get_energy) { energy = 0.0f; - gridSize = (num * numReplicas) / NUM_THREADS + 1; + gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1; dim3 numBlocks(gridSize, 1, 1); dim3 numThreads(NUM_THREADS, 1, 1); // Call the kernel to calculate forces - computeTabulatedFullKernel<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], type_d, tablePot_d[0], tableBond_d, num, numParts, sys_d[0], bonds_d, bondMap_d, numBonds, excludes_d, excludeMap_d, numExcludes, energies_d, gridSize, numReplicas, get_energy, angles_d); + computeTabulatedFullKernel<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], type_d, tablePot_d[0], tableBond_d, num+num_rb_attached_particles, numParts, sys_d[0], bonds_d, bondMap_d, numBonds, excludes_d, excludeMap_d, numExcludes, energies_d, gridSize, numReplicas, get_energy, angles_d); gpuErrchk(cudaDeviceSynchronize()); computeAngles<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], angles_d, tableAngle_d, - numAngles, num, sys_d[0], energies_d, + numAngles, num+num_rb_attached_particles, sys_d[0], energies_d, get_energy); gpuErrchk(cudaDeviceSynchronize()); computeDihedrals<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], dihedrals_d, tableDihedral_d, numDihedrals, - num, sys_d[0], energies_d, + num+num_rb_attached_particles, sys_d[0], energies_d, get_energy); // Calculate the energy based on the array created by the kernel if (get_energy) { gpuErrchk(cudaDeviceSynchronize()); thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num); + energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles); } return energy; @@ -894,7 +895,7 @@ float ComputeForce::computeTabulatedFull(bool get_energy) { void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) { - const size_t tot_num = (num+numGroupSites) * numReplicas; + const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas; for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); @@ -935,11 +936,10 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) } void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom) { - const size_t tot_num = (num+numGroupSites) * numReplicas; - const size_t small_tot_num = num * numReplicas; + const size_t tot_num = num * numReplicas; gpuErrchk(cudaMalloc(&mom_d, sizeof(Vector3) * tot_num)); - gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * small_tot_num, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice)); copyToCUDA(forceInternal,pos); gpuErrchk(cudaDeviceSynchronize()); @@ -965,10 +965,11 @@ void ComputeForce::setForceInternalOnDevice(Vector3* f) { void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints) { assert(simNum == numReplicas); // Not sure why we have both of these things - int tot_num = num * simNum; + int tot_num_with_rb = (num+num_rb_attached_particles) * simNum; + int tot_num_with_rb_group = (num+num_rb_attached_particles+numGroupSites) * simNum; // type_d - gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num)); - gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num_with_rb)); + gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num_with_rb, cudaMemcpyHostToDevice)); if (numBonds > 0) { @@ -977,8 +978,8 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice)); // bondMap_d - gpuErrchk(cudaMalloc(&bondMap_d, sizeof(int2) * num)); - gpuErrchk(cudaMemcpyAsync(bondMap_d, bondMap, sizeof(int2) * num, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMalloc(&bondMap_d, sizeof(int2) * tot_num_with_rb_group)); + gpuErrchk(cudaMemcpyAsync(bondMap_d, bondMap, sizeof(int2) * tot_num_with_rb_group, cudaMemcpyHostToDevice)); } if (numExcludes > 0) { @@ -990,8 +991,8 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, cudaMemcpyHostToDevice)); // excludeMap_d - gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * num)); - gpuErrchk(cudaMemcpyAsync(excludeMap_d, excludeMap, sizeof(int2) * num, + gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * tot_num_with_rb)); + gpuErrchk(cudaMemcpyAsync(excludeMap_d, excludeMap, sizeof(int2) * tot_num_with_rb, cudaMemcpyHostToDevice)); } diff --git a/src/ComputeForce.h b/src/ComputeForce.h index 5d602e4a609482a73ba7c639f8a575fc3023b2a7..b3bc258541f9603f387221363f3c39fe7fbae881 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -174,6 +174,7 @@ private: int numReplicas; int num; int numParts; + int num_rb_attached_particles; int numBonds; int numExcludes; int numTabBondFiles; diff --git a/src/ComputeGridGrid.cu b/src/ComputeGridGrid.cu index 1ecfb9fad191cb1e70d787e384198b57b2a87883..f6cb72cc78899b85ca0395e28e9021dedd9c92ed 100644 --- a/src/ComputeGridGrid.cu +++ b/src/ComputeGridGrid.cu @@ -171,9 +171,11 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc } } + __global__ void createPartlist(const Vector3* __restrict__ pos, const int numTypeParticles, const int* __restrict__ typeParticles_d, + const int attached_particle_start, const int attached_particle_end, int* numParticles_d, int* particles_d, const Vector3 gridCenter, const float radius2, BaseGrid* sys_d) { const int tid = threadIdx.x; @@ -182,11 +184,13 @@ void createPartlist(const Vector3* __restrict__ pos, const int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < numTypeParticles) { int aid = typeParticles_d[i]; - float dist = (sys_d->wrapDiff(pos[aid] - gridCenter)).length2(); - - if (dist <= radius2) { + if (aid < attached_particle_start || aid >= attached_particle_end) { + float dist = (sys_d->wrapDiff(pos[aid] - gridCenter)).length2(); + + if (dist <= radius2) { int tmp = atomicAggInc(numParticles_d, warpLane); particles_d[tmp] = aid; + } } } } diff --git a/src/ComputeGridGrid.cuh b/src/ComputeGridGrid.cuh index 739aa622e2514840f8586c47d0a277d306427ff4..27fc32ccb6b3b3fd43dd005fd9eda7eb1689c8f8 100644 --- a/src/ComputeGridGrid.cuh +++ b/src/ComputeGridGrid.cuh @@ -29,6 +29,7 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc extern __global__ void createPartlist(const Vector3* __restrict__ pos, const int numTypeParticles, const int* __restrict__ typeParticles_d, + const int attached_particle_start, const int attached_particle_end, int* numParticles_d, int* particles_d, const Vector3 gridCenter, const float radius2, BaseGrid* sys_d); diff --git a/src/Configuration.cpp b/src/Configuration.cpp index b1a89c49049548ef72024c4d4bfd69357acf60ab..e7bdfd71c57e163646c644d6ef3bb85485f4e7a7 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -97,8 +97,24 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : } } // end result: variable "num" is set + // Count particles associated with rigid bodies + num_rb_attached_particles = 0; + if (numRigidTypes > 0) { + // grow list of rbs + for (int i = 0; i < numRigidTypes; i++) { + RigidBodyType &rbt = rigidBody[i]; + rbt.attach_particles(); + num_rb_attached_particles += rbt.num * rbt.num_attached_particles(); + } + } + assert( num_rb_attached_particles == 0 || simNum == 1 ); // replicas not yet implemented + // num = num+num_rb_attached_particles; + + // Set the number capacity printf("\n%d particles\n", num); + printf("%d particles attached to RBs\n", num_rb_attached_particles); + if (numCap <= 0) numCap = numCapFactor*num; // max number of particles if (numCap <= 0) numCap = 20; @@ -106,18 +122,35 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : printf("%d groups\n", numGroupSites); // Allocate particle variables. - pos = new Vector3[num * simNum]; + // Each replica works with num+num_rb_attached_particles in array + pos = new Vector3[ (num+num_rb_attached_particles) * simNum]; + //Han-Yi Chou if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin")) - momentum = new Vector3[num * simNum]; - - type = new int[num * simNum]; - serial = new int[num * simNum]; - posLast = new Vector3[num * simNum]; + momentum = new Vector3[(num+num_rb_attached_particles) * simNum]; + + type = new int[(num+num_rb_attached_particles) * simNum]; + serial = new int[(num+num_rb_attached_particles) * simNum]; + posLast = new Vector3[(num+num_rb_attached_particles) * simNum]; + + { + int pidx = 0; + for (int i = 0; i < numRigidTypes; i++) { // Loop over RB types + RigidBodyType &rbt = rigidBody[i]; + for (int j = 0; j < rbt.num; ++j) { // Loop over RBs + for (const int& t: rbt.get_attached_particle_types()) { + type[num+pidx] = t; + serial[num+pidx] = num+pidx; + pidx++; + } + } + } + } + //Han-Yi Chou if(ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin")) - momLast = new Vector3[num * simNum]; - name = new String[num * simNum]; + momLast = new Vector3[(num+num_rb_attached_particles) * simNum]; + name = new String[(num+num_rb_attached_particles) * simNum]; currSerial = 0; @@ -164,7 +197,12 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : String* tokenList = new String[numTokens]; partsFromFile[i].tokenize(tokenList); - int currType = 0; + int currType = find_particle_type(tokenList[2]); + if (currType == -1) { + printf("Error: Unable to find particle type %s\n", tokenList[2].val()); + exit(1); + + } for (int j = 0; j < numParts; j++) if (tokenList[2] == part[j].name) currType = j; @@ -216,7 +254,7 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : printf("done!\n"); } else - loadedMomentum = Boltzmann(COM_Velocity, num * simNum); + loadedMomentum = Boltzmann(COM_Velocity, (num * simNum)); } } } @@ -420,13 +458,45 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : excludes = new Exclude[numExcludes]; for (int i = 0; i < oldNumExcludes; i++) excludes[i] = tempExcludes[i]; - delete tempExcludes; + delete [] tempExcludes; } for (int i = oldNumExcludes; i < numExcludes; i++) excludes[i] = newExcludes[i - oldNumExcludes]; printf("Built %d exclusions.\n",numExcludes); } + { // Add exclusions for RB attached particles + std::vector<Exclude> ex; + int start = num; + for (int i = 0; i < numRigidTypes; i++) { // Loop over RB types + RigidBodyType &rbt = rigidBody[i]; + const int nap = rbt.num_attached_particles(); + for (int j = 0; j < rbt.num; ++j) { // Loop over RBs + for (int ai = 0; ai < nap-1; ++ai) { + for (int aj = ai+1; aj < nap; ++aj) { + ex.push_back( Exclude( ai+start, aj+start ) ); + } + } + start += nap; + } + } + // copy + int oldNumExcludes = numExcludes; + numExcludes = numExcludes + ex.size(); + if (excludes == NULL) { + excludes = new Exclude[numExcludes]; + } else if (numExcludes >= excludeCapacity) { + Exclude* tempExcludes = excludes; + excludes = new Exclude[numExcludes]; + for (int i = 0; i < oldNumExcludes; i++) + excludes[i] = tempExcludes[i]; + delete [] tempExcludes; + } + for (int i = oldNumExcludes; i < numExcludes; i++) + excludes[i] = ex[i - oldNumExcludes]; + } + + printf("Built %d exclusions.\n",numExcludes); buildExcludeMap(); // Count number of particles of each type @@ -434,7 +504,7 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : for (int i = 0; i < numParts; ++i) { numPartsOfType[i] = 0; } - for (int i = 0; i < num; ++i) { + for (int i = 0; i < num+num_rb_attached_particles; ++i) { ++numPartsOfType[type[i]]; } @@ -679,7 +749,7 @@ void Configuration::copyToCUDA() { gpuErrchk(cudaMalloc(&sys_d, sizeof(BaseGrid))); gpuErrchk(cudaMemcpyAsync(sys_d, sys, sizeof(BaseGrid), cudaMemcpyHostToDevice)); /*gpuErrchk(cudaMalloc(&type_d, sizeof(int) * num * simNum)); - gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * num * simNum, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int+num_rb_attached_particles) * num * simNum, cudaMemcpyHostToDevice)); if (numBonds > 0) { // bonds_d @@ -698,7 +768,7 @@ void Configuration::copyToCUDA() { cudaMemcpyHostToDevice)); // excludeMap_d - gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * num)); + gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * (num)); gpuErrchk(cudaMemcpyAsync(excludeMap_d, excludeMap, sizeof(int2) * num, cudaMemcpyHostToDevice)); } @@ -1117,6 +1187,8 @@ int Configuration::readParameters(const char * config_file) { else if (param == String("rotDamping")) rigidBody[currRB].rotDamping = stringToVector3( value ); + else if (param == String("attachedParticles")) + rigidBody[currRB].append_attached_particle_file(value); else if (param == String("densityGrid")) rigidBody[currRB].addDensityGrid(value); else if (param == String("potentialGrid")) @@ -1275,6 +1347,7 @@ void Configuration::readAtoms() { for (int i = 0; i < numParts; i++) if (part[i].num == 0) found = false; + // assert(false); // TODO probably relax constraint that particle must be found; could just be in RB if (!found) { printf("ERROR: Number of particles not specified in config file.\n"); exit(1); @@ -1454,9 +1527,11 @@ void Configuration::readGroups() { std::vector<int> tmp; for (int i=1; i < numTokens; ++i) { const int ai = atoi(tokenList[i].val()); - if (ai >= num) { + if (ai >= num+num_rb_attached_particles) { printf("Error: Attempted to include invalid particle in group: %s\n", line); exit(-1); + } else if (ai >= num) { + printf("WARNING: including RB particles in group with line: %s\n", line); } tmp.push_back( ai ); } @@ -1514,7 +1589,8 @@ void Configuration::readBonds() { continue; } - if (ind1 < 0 || ind1 >= num+numGroupSites || ind2 < 0 || ind2 >= num+numGroupSites) { + if (ind1 < 0 || ind1 >= num+num_rb_attached_particles+numGroupSites || + ind2 < 0 || ind2 >= num+num_rb_attached_particles+numGroupSites) { printf("ERROR: Bond file line '%s' includes invalid index\n", line); exit(1); } @@ -1562,8 +1638,8 @@ void Configuration::readBonds() { * bondMap[i].x is the index in the bonds array where the ith particle's bonds begin * bondMap[i].y is the index in the bonds array where the ith particle's bonds end */ - bondMap = new int2[num+numGroupSites]; - for (int i = 0; i < num+numGroupSites; i++) { + bondMap = new int2[num+num_rb_attached_particles+numGroupSites]; + for (int i = 0; i < num+num_rb_attached_particles+numGroupSites; i++) { bondMap[i].x = -1; bondMap[i].y = -1; } @@ -1621,8 +1697,8 @@ void Configuration::readExcludes() } } void Configuration::addExclusion(int ind1, int ind2) { - if (ind1 >= num || ind2 >= num) { - printf("WARNING: Attempted to add an exclusion for an out-of-range particle index (%d or %d >= %d).\n", ind1, ind2, num); + if (ind1 >= num+num_rb_attached_particles || ind2 >= num+num_rb_attached_particles) { + printf("WARNING: Attempted to add an exclusion for an out-of-range particle index (%d or %d >= %d).\n", ind1, ind2, num+num_rb_attached_particles); return; } @@ -1665,8 +1741,8 @@ void Configuration::buildExcludeMap() { * excludeMap[i].x is the index in the excludes array where the ith particle's excludes begin * excludeMap[i].y is the index in the excludes array where the ith particle's excludes end */ - excludeMap = new int2[num]; - for (int i = 0; i < num; i++) { + excludeMap = new int2[num+num_rb_attached_particles]; + for (int i = 0; i < num+num_rb_attached_particles; i++) { excludeMap[i].x = -1; excludeMap[i].y = -1; } @@ -1675,7 +1751,7 @@ void Configuration::buildExcludeMap() { for (int i = 0; i < numExcludes; i++) { if (excludes[i].ind1 != currPart) { currPart = excludes[i].ind1; - assert(currPart < num); + assert(currPart < num+num_rb_attached_particles); excludeMap[currPart].x = i; if (lastPart >= 0) excludeMap[lastPart].y = i; @@ -1724,7 +1800,7 @@ void Configuration::readAngles() { int ind3 = atoi(tokenList[3].val()); String file_name = tokenList[4]; //printf("file_name %s\n", file_name.val()); - if (ind1 >= num+numGroupSites or ind2 >= num+numGroupSites or ind3 >= num+numGroupSites) + if (ind1 >= num+num_rb_attached_particles+numGroupSites or ind2 >= num+num_rb_attached_particles+numGroupSites or ind3 >= num+num_rb_attached_particles+numGroupSites) continue; if (numAngles >= capacity) { @@ -1785,8 +1861,10 @@ void Configuration::readDihedrals() { int ind4 = atoi(tokenList[4].val()); String file_name = tokenList[5]; //printf("file_name %s\n", file_name.val()); - if (ind1 >= num+numGroupSites or ind2 >= num+numGroupSites - or ind3 >= num+numGroupSites or ind4 >= num+numGroupSites) + if (ind1 >= num+num_rb_attached_particles+numGroupSites or + ind2 >= num+num_rb_attached_particles+numGroupSites or + ind3 >= num+num_rb_attached_particles+numGroupSites or + ind4 >= num+num_rb_attached_particles+numGroupSites) continue; if (numDihedrals >= capacity) { @@ -1845,7 +1923,7 @@ void Configuration::readRestraints() { float y0 = (float) strtod(tokenList[4].val(), NULL); float z0 = (float) strtod(tokenList[5].val(), NULL); - if (id >= num+numGroupSites) continue; + if (id >= num + num_rb_attached_particles + numGroupSites) continue; if (numRestraints >= capacity) { Restraint* temp = restraints; diff --git a/src/Configuration.h b/src/Configuration.h index fc70e2a1e61eed800c52235e868d8f3afd5bf6cc..ff7879faf686ff4b29e2c63be454d99ca2a96cde 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -65,6 +65,7 @@ class Configuration { void readDihedrals(); void readRestraints(); + bool readTableFile(const String& value, int currTab); bool readBondFile(const String& value, int currBond); bool readAngleFile(const String& value, int currAngle); @@ -89,6 +90,15 @@ public: Configuration(const char * config_file, int simNum = 0, bool debug=false); ~Configuration(); + int find_particle_type(const char* s) const { + for (int j = 0; j < numParts; j++) { + printf("Searching particle %d (%s) =? %s\n", j, part[j].name.val(), s); + if (strcmp(s,part[j].name.val()) == 0) + return j; + } + return -1; + } + void copyToCUDA(); // Output variables @@ -122,7 +132,8 @@ public: Bond* bonds; int numCap; // max number of particles int num; // current number of particles - Vector3* pos; // position of each particle + int num_rb_attached_particles; + Vector3* pos; // position of each particle Vector3* momentum; //momentum of each brownian particles Han-Yi Chou Vector3 COM_Velocity; //center of mass velocity Han-Yi Chou int* type; // type of each particle diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 1fb98bc084e49aca00279e5c91611bfbe0a9f32e..0510a0e41881166cabb49556a27b9c2f278130e6 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -97,10 +97,11 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, bonds = c.bonds; numCap = c.numCap; // max number of particles num = c.num; // current number of particles + num_rb_attached_particles = c.num_rb_attached_particles; numGroupSites = c.numGroupSites; // Allocate arrays of positions, types and serial numbers - pos = new Vector3[(num+numGroupSites) * numReplicas]; // [HOST] array of particles' positions. + pos = new Vector3[(num+num_rb_attached_particles+numGroupSites) * numReplicas]; // [HOST] array of particles' positions. // Allocate arrays of momentum Han-Yi Chou if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { @@ -112,8 +113,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, //for debug //force = new Vector3[num * numReplicas]; - type = new int[num * numReplicas]; // [HOST] array of particles' types. - serial = new int[num * numReplicas]; // [HOST] array of particles' serial numbers. + type = new int[(num+num_rb_attached_particles) * numReplicas]; // [HOST] array of particles' types. + serial = new int[(num+num_rb_attached_particles) * numReplicas]; // [HOST] array of particles' serial numbers. // Allocate things for rigid body // RBC = RigidBodyController(c); @@ -127,8 +128,10 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, // Replicate identical initial conditions across all replicas for (int r = 0; r < numReplicas; ++r) { - std::copy(c.type, c.type + num, type + r*num); - std::copy(c.serial, c.serial + num, serial + r*num); + std::copy(c.type, c.type + num+num_rb_attached_particles, + type + r*(num+num_rb_attached_particles)); + std::copy(c.serial, c.serial + num + num_rb_attached_particles, + serial + r*(num+num_rb_attached_particles)); if (c.copyReplicaCoordinates > 0) std::copy(c.pos, c.pos + num, pos + r*num); } @@ -307,8 +310,12 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, auto _get_index = [this](int idx, int replica) { // Convenient lambda function to deal with increasingly complicated indexing - auto num = this->num; auto numReplicas = this->numReplicas; auto numGroupSites = this->numGroupSites; - idx = (idx < num) ? idx + replica*num : (idx-num) + numReplicas*num + replica * numGroupSites; + auto num = this->num; + auto numReplicas = this->numReplicas; + auto num_rb_attached_particles = this->num_rb_attached_particles; + auto numGroupSites = this->numGroupSites; + idx = (idx < num+num_rb_attached_particles) ? idx + replica*(num+num_rb_attached_particles) + : (idx-num-num_rb_attached_particles) + numReplicas*(num+num_rb_attached_particles) + replica * numGroupSites; return idx; }; @@ -367,7 +374,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, } internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList); - forceInternal = new Vector3[(num+numGroupSites)*numReplicas]; + forceInternal = new Vector3[(num+num_rb_attached_particles+numGroupSites)*numReplicas]; if (fullLongRange != 0) printf("No cell decomposition created.\n"); @@ -496,8 +503,7 @@ void GrandBrownTown::RunNoseHooverLangevin() // Open the files for recording ionic currents for (int repID = 0; repID < numReplicas; ++repID) { - - writers[repID]->newFile(pos + (repID * num), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay + writers[repID]->newFile(pos + repID*(num+num_rb_attached_particles), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) momentum_writers[repID]->newFile((momentum + repID * num), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay //random_writers[repID]->newFile(random + (repID * num), name, 0.0f, num); @@ -569,16 +575,16 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+conf.num_rb_attached_particles), sys_d); gpuErrchk(cudaDeviceSynchronize()); } float t; // simulation time - int numBlocks = (num * numReplicas) / NUM_THREADS + ((num * numReplicas) % NUM_THREADS == 0 ? 0 : 1); + int numBlocks = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + (((num+num_rb_attached_particles) * numReplicas) % NUM_THREADS == 0 ? 0 : 1); int tl = temperatureGridFile.length(); Vector3 *force_d; - gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*(num+numGroupSites) * numReplicas)); + gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*(num+num_rb_attached_particles+numGroupSites) * numReplicas)); printf("Configuration: %d particles | %d replicas\n", num, numReplicas); for (int i=0; i< gpuman.gpus.size(); ++i) { @@ -599,11 +605,23 @@ void GrandBrownTown::RunNoseHooverLangevin() internal->clear_force(); internal->clear_energy(); const std::vector<Vector3*>& _pos = internal->getPos_d(); - if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas); + + if (num_rb_attached_particles > 0) { + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) { + RBC[i]->update_attached_particle_positions( + internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getEnergy()+num+i*(num+num_rb_attached_particles), + sys_d, num, num_rb_attached_particles, numReplicas); + } + } + + if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num + num_rb_attached_particles, numGroupSites, numReplicas); #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { - gpuman.nccl_broadcast(0, _pos, _pos, (num+numGroupSites)*numReplicas, -1); + gpuman.nccl_broadcast(0, _pos, _pos, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1); } #endif gpuman.sync(); @@ -633,7 +651,7 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d); } internal -> computeTabulated(get_energy); break; @@ -657,7 +675,7 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d); } internal->compute(get_energy); break; @@ -676,13 +694,32 @@ void GrandBrownTown::RunNoseHooverLangevin() } } }//if inter-particle force + + if (get_energy) { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], internal -> getForceInternal_d()[0], + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } else { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], + internal -> getForceInternal_d()[0], internal -> getEnergy(), + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } + + #ifdef _OPENMP omp_set_num_threads(4); #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, - RigidBodyInterpolationType, sys, sys_d); + RBC[i]->updateForces(internal->getPos_d()[0]+i*(num+num_rb_attached_particles), + internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles), + s, + internal->getEnergy()+i*(num+num_rb_attached_particles), + get_energy, + RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles); if(rigidbody_dynamic == String("Langevin")) { #ifdef _OPENMP @@ -698,28 +735,29 @@ void GrandBrownTown::RunNoseHooverLangevin() #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _f = internal->getForceInternal_d(); - gpuman.nccl_reduce(0, _f, _f, (num+numGroupSites)*numReplicas, -1); + gpuman.nccl_reduce(0, _f, _f, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1); } #endif - if (numGroupSites > 0) distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + if (numGroupSites > 0) distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas); }//if step == 1 internal->clear_energy(); gpuman.sync(); + if(particle_dynamic == String("Langevin")) - updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); + updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); else if(particle_dynamic == String("NoseHooverLangevin")) //kernel for Nose-Hoover Langevin dynamic updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), - internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, + internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); ////For Brownian motion else updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getForceInternal_d()[0], internal -> getType_d(), - part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, + part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, ParticleInterpolationType); if(rigidbody_dynamic == String("Langevin")) @@ -748,10 +786,21 @@ void GrandBrownTown::RunNoseHooverLangevin() } } + if (num_rb_attached_particles > 0) { + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) { + RBC[i]->update_attached_particle_positions( + internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles), + internal->getEnergy()+num+i*(num+num_rb_attached_particles), + sys_d, num, num_rb_attached_particles, numReplicas); + } + } + if (s % outputPeriod == 0) { // Copy particle positions back to CPU gpuErrchk(cudaDeviceSynchronize()); - gpuErrchk(cudaMemcpy(pos, internal ->getPos_d()[0], sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(pos, internal ->getPos_d()[0], sizeof(Vector3) * (num+num_rb_attached_particles) * numReplicas, cudaMemcpyDeviceToHost)); } if (imd_on && clientsock && s % outputPeriod == 0) { @@ -830,9 +879,10 @@ void GrandBrownTown::RunNoseHooverLangevin() for(int i = 0; i < numReplicas; ++i) RBC[i]->clearForceAndTorque(); + if (numGroupSites > 0) { gpuman.sync(); - updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num + num_rb_attached_particles, numGroupSites, numReplicas); gpuman.sync(); } @@ -844,7 +894,7 @@ void GrandBrownTown::RunNoseHooverLangevin() if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _p = internal->getPos_d(); nccl_broadcast_streams[0] = gpuman.gpus[0].get_next_stream(); - gpuman.nccl_broadcast(0, _p, _p, (num+numGroupSites)*numReplicas, nccl_broadcast_streams); + gpuman.nccl_broadcast(0, _p, _p, (num+num_rb_attached_particles+numGroupSites)*numReplicas, nccl_broadcast_streams); } #endif } @@ -865,13 +915,13 @@ void GrandBrownTown::RunNoseHooverLangevin() #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) - RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d); + RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d); } internal -> computeTabulated(get_energy); #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _f = internal->getForceInternal_d(); - gpuman.nccl_reduce(0, _f, _f, num*numReplicas, -1); + gpuman.nccl_reduce(0, _f, _f, (num+num_rb_attached_particles)*numReplicas, -1); } #endif break; @@ -912,28 +962,43 @@ void GrandBrownTown::RunNoseHooverLangevin() } } } + + if (get_energy) { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], internal -> getForceInternal_d()[0], + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } else { + compute_position_dependent_force_for_rb_attached_particles + <<< numBlocks, NUM_THREADS >>> ( + internal -> getPos_d()[0], + internal -> getForceInternal_d()[0], internal -> getEnergy(), + internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType); + } + + //compute the force for rigid bodies #ifdef _OPENMP omp_set_num_threads(4); #endif #pragma omp parallel for for(int i = 0; i < numReplicas; ++i) // TODO: Use different buffer for RB particle forces to avoid race condition - RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, - RigidBodyInterpolationType, sys, sys_d); + RBC[i]->updateForces((internal->getPos_d()[0])+i*(num+num_rb_attached_particles), (internal->getForceInternal_d()[0])+i*(num+num_rb_attached_particles), s, (internal->getEnergy())+i*(num+num_rb_attached_particles), get_energy, + RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles); if (numGroupSites > 0) { gpuman.sync(); // if ((s%100) == 0) { - distributeGroupSiteForces<true><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + distributeGroupSiteForces<true><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas); // } else { - // distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + // distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas); // } gpuman.sync(); } if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getForceInternal_d()[0], - internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, + internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, ParticleInterpolationType); //gpuErrchk(cudaDeviceSynchronize()); @@ -957,7 +1022,7 @@ void GrandBrownTown::RunNoseHooverLangevin() { if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { - gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * (num) * numReplicas, cudaMemcpyDeviceToHost)); } t = s*timestep; // Loop over all replicas @@ -968,12 +1033,12 @@ void GrandBrownTown::RunNoseHooverLangevin() updateNameList(); // no need for it here if particles stay the same // Write the trajectory. - writers[repID]->append(pos + (repID*num), name, serial, t, num); + writers[repID]->append(pos + repID*(num+num_rb_attached_particles), name, serial, t, num); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { - momentum_writers[repID]->append(momentum + (repID * num), name, serial, t, num); - //force_writers[repID]->append(force + (repID * num), name, serial, t, num); + momentum_writers[repID]->append(momentum + repID * (num+num_rb_attached_particles), name, serial, t, num); + //force_writers[repID]->append(force + repID * (num+num_rb_attached_particles), name, serial, t, num); } } // TODO: Currently, not compatible with replicas. Needs a fix. @@ -1005,7 +1070,7 @@ void GrandBrownTown::RunNoseHooverLangevin() float e = 0.f; float V = 0.f; thrust::device_ptr<float> en_d(internal->getEnergy()); - V = (thrust::reduce(en_d, en_d+num*numReplicas)) / numReplicas; + V = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas; if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); @@ -1616,7 +1681,7 @@ void GrandBrownTown::run() { // void GrandBrownTown::populate() { for (int repID = 0; repID < numReplicas; repID++) { - const int offset = repID * num; + const int offset = repID * (num+num_rb_attached_particles); int pn = 0; int p = 0; for (int i = 0; i < num; i++) { @@ -1636,7 +1701,7 @@ void GrandBrownTown::populate() { void GrandBrownTown::writeRestart(int repID) const { FILE* out = fopen(restartFiles[repID].c_str(), "w"); - const int offset = repID * num; + const int offset = repID * (num+num_rb_attached_particles); for (int i = 0; i < num; ++i) { @@ -1671,7 +1736,7 @@ void GrandBrownTown::initialCondCen() { // Set random initial positions for all particles and replicas void GrandBrownTown::initialCond() { for (int repID = 0; repID < numReplicas; repID++) { - const int offset = repID * num; + const int offset = repID * (num+num_rb_attached_particles); for (int i = 0; i < num; i++) { pos[i + offset] = findPos(type[i + offset]); } @@ -1715,7 +1780,7 @@ float GrandBrownTown::KineticEnergy() gpuErrchk(cudaMalloc((void**)&energy, sizeof(float))); gpuErrchk(cudaMemset((void*)energy,0,sizeof(float))); - BrownParticlesKineticEnergy<64><<<dim3(512),dim3(64)>>>(internal->getMom_d(), internal -> getType_d(), part_d, vec_red, numReplicas*num); + BrownParticlesKineticEnergy<64><<<dim3(512),dim3(64)>>>(internal->getMom_d(), internal -> getType_d(), part_d, vec_red, num, num_rb_attached_particles, numReplicas); gpuErrchk(cudaDeviceSynchronize()); Reduction<64><<<dim3(1),dim3(64)>>>(vec_red, energy, 512); @@ -1763,7 +1828,7 @@ void GrandBrownTown::init_cuda_group_sites() } // Create GPU-friendly data structure - // TODO make this work for replicas + assert(numReplicas == 1); // TODO make this work for replicas int* tmp = new int[numGroupSites+1+num_particles]; num_particles = 0; int i = 0; diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index c4c059aea09aafe0f9a77c3f0cd6a6a1f71e7073..a152dd1ddef4f945e5f468c124a685b0b2d23a13 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -14,17 +14,66 @@ __device__ Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad, float timestep, BaseGrid *sys, Random *randoGen, int num); +inline __device__ +ForceEnergy compute_position_dependent_force( + const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal, + const int* __restrict__ type, BrownianParticleType** part, + const float electricField, const int scheme, const int idx) +{ + int t = type[idx]; + Vector3 r0 = pos[idx]; + const BrownianParticleType& pt = *part[t]; + Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); + + ForceEnergy fe(0.f, 0.f); + for(int i = 0; i < pt.numPartGridFiles; ++i) + { + ForceEnergy tmp(0.f, 0.f); + if(!scheme) { + BoundaryCondition bc = pt.pmf_boundary_conditions[i]; + INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) + } else + tmp = pt.pmf[i]->interpolateForceD(r0); + fe.f += tmp.f * pt.pmf_scale[i]; + fe.e += tmp.e * pt.pmf_scale[i]; + } + // if(get_energy) + // energy[idx] += fe.e; + +#ifndef FORCEGRIDOFF + // Add a force defined via 3D FORCE maps (not 3D potential maps) + if(!scheme) + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + } + else + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); + } +#endif + fe.f = fe.f + forceExternal; + return fe; +} + + + //update both position and momentum for Langevin dynamics //Han-Yi Chou #if 0 __global__ void updateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, BaseGrid* sys, Random* randoGen, int numReplicas) + int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; //Vector3 r0(tex1Dfetch(PosTex, idx)); //Vector3 p0(tex1Dfetch(MomTex, idx)); @@ -101,48 +150,24 @@ __global__ void updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, - int num, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) + int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { - int t = type[idx]; + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; float ran = random[idx]; const BrownianParticleType& pt = *part[t]; - Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); - ForceEnergy fe(0.f, 0.f); - for(int i = 0; i < pt.numPartGridFiles; ++i) - { - ForceEnergy tmp(0.f,0.f); - if(!scheme) { - const BoundaryCondition& bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) - } else - tmp = pt.pmf[i]->interpolateForceD(r0); - fe.f += tmp.f * pt.pmf_scale[i]; - } - //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); -#ifndef FORCEGRIDOFF - // Add a force defined via 3D FORCE maps (not 3D potential maps) - if(!scheme) - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); - } - else - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); - } -#endif - Vector3 force = forceInternal[idx] + forceExternal + fe.f; + Vector3 force = forceInternal[idx] + fe.f; #ifdef Debug forceInternal[idx] = -force; #endif @@ -214,50 +239,27 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ //Han-Yi Chou __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, - float electricField,int tGridLength, float timestep, int num, BaseGrid* sys, + float electricField,int tGridLength, float timestep, + int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num * numReplicas) { - int t = type[idx]; + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + // if (get_energy) energy[idx] += fe.e; + int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; - const BrownianParticleType& pt = *part[t]; - Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); - //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); - ForceEnergy fe(0.f, 0.f); - for(int i = 0; i < pt.numPartGridFiles; ++i) - { - ForceEnergy tmp(0.f, 0.f); - if(!scheme) { - BoundaryCondition bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) - } else - tmp = pt.pmf[i]->interpolateForceD(r0); - fe.f += tmp.f * pt.pmf_scale[i]; - } - -#ifndef FORCEGRIDOFF - // Add a force defined via 3D FORCE maps (not 3D potential maps) - if(!scheme) - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); - } - else - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); - } -#endif - Vector3 force = forceInternal[idx] + forceExternal + fe.f; + Vector3 force = forceInternal[idx] + fe.f; #ifdef Debug forceInternal[idx] = -force; #endif @@ -315,54 +317,23 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re //update momentum in the last step of BAOAB integrator for the Langevin dynamics. Han-Yi Chou __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, - float electricField, int tGridLength, float timestep, int num, + float electricField, int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy,int scheme) { - const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); + int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); if (idx < num * numReplicas) { - int t = type[idx]; + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + if (get_energy) energy[idx] += fe.e; + Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; - const BrownianParticleType& pt = *part[t]; - Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); - - //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); - ForceEnergy fe(0.f, 0.f); - for(int i = 0; i < pt.numPartGridFiles; ++i) - { - ForceEnergy tmp(0.f, 0.f); - if(!scheme) { - BoundaryCondition bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) - } else - tmp = pt.pmf[i]->interpolateForceD(r0); - - tmp.f = tmp.f * pt.pmf_scale[i]; - tmp.e = tmp.e * pt.pmf_scale[i]; - fe += tmp; - //fe.e += tmp.e; - } - if(get_energy) - energy[idx] += fe.e; -#ifndef FORCEGRIDOFF - // Add a force defined via 3D FORCE maps (not 3D potential maps) - if(!scheme) - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); - } - else - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); - } -#endif - Vector3 force = forceInternal[idx] + forceExternal + fe.f; + Vector3 force = forceInternal[idx] + fe.f; #ifdef Debug forceInternal[idx] = -force; #endif @@ -380,12 +351,14 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ #if 0 __global__ void LastUpdateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, BaseGrid* sys, Random* randoGen, int numReplicas) + int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); + int t = type[idx]; Vector3 r0 = pos[idx]; Vector3 p0 = momentum[idx]; @@ -441,16 +414,17 @@ __global__ void LastUpdateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __global__ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[],float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, BaseGrid* sys, + int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, float* energy, bool get_energy, int scheme) { // Calculate this thread's ID - const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); + int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); // TODO: Make this a grid-stride loop to make efficient reuse of RNG states // Loop over ALL particles in ALL replicas if (idx < num * numReplicas) { + idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); const int t = type[idx]; Vector3 p = pos[idx]; @@ -458,46 +432,14 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], /* printf("atom %d: forceInternal: %f %f %f\n", idx, forceInternal[idx].x, forceInternal[idx].y, forceInternal[idx].z); */ - // Compute external force - Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); - - // Compute PMF - // TODO: maybe make periodic and nonPeriodic versions of this kernel - //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(p); - ForceEnergy fe(0.f, 0.f); - for(int i = 0; i < pt.numPartGridFiles; ++i) - { - ForceEnergy tmp(0.f,0.f); - if(!scheme) { - BoundaryCondition bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, p) - } else - tmp = pt.pmf[i]->interpolateForceD(p); - tmp.f *= pt.pmf_scale[i]; - tmp.e *= pt.pmf_scale[i]; - fe += tmp; - } -#ifndef FORCEGRIDOFF - // Add a force defined via 3D FORCE maps (not 3D potential maps) - if(!scheme) - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(p); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(p); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(p); - } - else - { - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(p); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(p); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(p); - } -#endif + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); // Compute total force: // Internal: interaction between particles // External: electric field (now this is basically a constant vector) // forceGrid: ADD force due to PMF or other potentials defined in 3D space - Vector3 force = forceInternal[idx] + forceExternal + fe.f; + Vector3 force = forceInternal[idx] + fe.f; #ifdef Debug forceInternal[idx] = -force; #endif @@ -709,3 +651,37 @@ __global__ void devicePrint(RigidBodyType* rb[]) { // cudaMalloc( &totalForce_h, sizeof(Vector3) ); // cudaMemcpyToSymbol(totalForce, &totalForce_h, sizeof(totalForce_h)); // } + +__global__ void compute_position_dependent_force_for_rb_attached_particles( + const Vector3* __restrict__ pos, + Vector3* __restrict__ forceInternal, float* __restrict__ energy, + const int* __restrict__ type, BrownianParticleType** __restrict__ part, + const float electricField, const int num, const int num_rb_attached_particles, + const int numReplicas, const int scheme) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_rb_attached_particles * numReplicas) + { + idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles); + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + atomicAdd( &forceInternal[idx], fe.f ); + atomicAdd( &energy[idx], fe.e ); + } +} +__global__ void compute_position_dependent_force_for_rb_attached_particles( + const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal, + const int* __restrict__ type, BrownianParticleType** __restrict__ part, + const float electricField, const int num, const int num_rb_attached_particles, + const int numReplicas, const int scheme) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (idx < num_rb_attached_particles * numReplicas) + { + idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles); + ForceEnergy fe = compute_position_dependent_force( + pos, forceInternal, type, part, electricField, scheme, idx ); + atomicAdd( &forceInternal[idx], fe.f ); + } +} diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index c6479a081fb2b4461493f49269c1e67b29eb7323..6d8ed7f34c5bcdbed402929d8c5c3b41ffcb5f89 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -230,6 +230,8 @@ private: int numAngles; int numDihedrals; + int num_rb_attached_particles; + int numGroupSites; int* groupSiteData_d; diff --git a/src/Makefile b/src/Makefile index b0356b86521cc4ca5448f5fdb8b0b35ea106b2dc..68598f462bf925e748a759b91ab9bd5ab09d6afc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,7 @@ ### Paths, libraries, includes, options include ./findcudalib.mk +SUFFIX ?= "" ifeq ($(dbg),1) NV_FLAGS += -g -G -O0 @@ -99,13 +100,18 @@ ifeq ($(omp),1) TARGET := $(TARGET)_omp endif -all: $(TARGET) - @echo "Done ->" $(TARGET) +all: $(TARGET)$(SUFFIX) + @echo "Done ->" $(TARGET)$(SUFFIX) -$(TARGET): $(CU_OBJ) $(CC_OBJ) arbd.cpp vmdsock.c imd.c imd.h - $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o +$(TARGET)_link.o: $(CU_OBJ) +# $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o - $(CC) $(CC_FLAGS) $(EX_FLAGS) arbd.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS) -o $(TARGET) + +$(TARGET)$(SUFFIX): $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) arbd.cpp vmdsock.c imd.c imd.h +# $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o +# $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o + $(CC) $(CC_FLAGS) $(EX_FLAGS) arbd.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS) -o $(TARGET)$(SUFFIX) + .SECONDEXPANSION: $(CU_OBJ): %.o: %.cu $$(wildcard %.h) $$(wildcard %.cuh) diff --git a/src/RigidBody.cu b/src/RigidBody.cu index e224d3dbf42be8bb3afde7dfa1e28f37ff4d8d26..8aeb7aa65711c72fa02149e73cf969a611f4078f 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -18,10 +18,10 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t } #endif -RigidBody::RigidBody(String name, const Configuration& cref, const RigidBodyType& tref, RigidBodyController* RBCref) - : name(name), c(&cref), t(&tref), RBC(RBCref), impulse_to_momentum(4.1867999435271e4) /*impulse_to_momentum(4.184e8f)*/ { init(); } +RigidBody::RigidBody(String name, const Configuration& cref, const RigidBodyType& tref, RigidBodyController* RBCref, int attached_particle_start, int attached_particle_end) + : name(name), c(&cref), t(&tref), RBC(RBCref), attached_particle_start(attached_particle_start), attached_particle_end(attached_particle_end), impulse_to_momentum(4.1867999435271e4) /*impulse_to_momentum(4.184e8f)*/ { init(); } RigidBody::RigidBody(const RigidBody& rb) - : name(rb.name), c(rb.c), t(rb.t), RBC(rb.RBC), impulse_to_momentum(4.1867999435271e4)/*impulse_to_momentum(4.184e8f)*/ { init(); } + : name(rb.name), c(rb.c), t(rb.t), RBC(rb.RBC), attached_particle_start(rb.attached_particle_start), attached_particle_end(rb.attached_particle_end), impulse_to_momentum(4.1867999435271e4)/*impulse_to_momentum(4.184e8f)*/ { init(); } void RigidBody::init() { // units "(kcal_mol/AA) * ns" "dalton AA/ns" * 4.184e+08 timestep = c->timestep; @@ -103,6 +103,23 @@ int RigidBody::appendNumParticleBlocks( std::vector<int>* blocks ) { return ret; } +__global__ +void update_particle_positions_kernel(Vector3* __restrict__ pos, const int start, const int num, + const Vector3* __restrict__ pos_rb, + const Vector3 center, const Matrix3 orientation) { + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < num) { + const int aid = i+start; + pos[aid] = orientation*pos_rb[i] + center; + } +} +void RigidBody::update_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d) { + int num_attached = attached_particle_end - attached_particle_start; + int nb = floor(num_attached/NUMTHREADS) + 1; + update_particle_positions_kernel<<<nb,NUMTHREADS>>>(pos_d, attached_particle_start, num_attached, + t->attached_particle_positions_d, position, orientation); +} + void RigidBody::addForce(Force f) { force += f; } @@ -113,6 +130,141 @@ void RigidBody::addEnergy(float e) { energy += e; } + +// TODO move code snippet to more general location +template<class C, class T> +auto contains(const C& v, const T& x) +-> decltype(end(v), true) +{ + return end(v) != std::find(begin(v), end(v), x); +} +/* +void RigidBody::initialize_possible_particles() +{ + std::vector<int> atomic_ids; + + // Loop over potential grids + for (int i = 0; i < t->numPotGrids; ++i) { + atomic_ids.clear(); + + String& gridName = t->potentialGridKeys[i]; + + // Loop over particle types to count the number of particles + for (int j = 0; j < conf->numParts; ++j) { + if (contains(conf->partRigidBodyGrid[j], gridName)) { + // gridNames contained gridName, so add the particles to atomic_ids + for (int aid = 0; aid < conf->num + conf->num_rb_attached_particles; ++aid) { + if (conf->type[aid] == j && (aid < exclude_start || aid > exclude_end)) { + atomic_ids.push_back(aid); + } + } + } + } + + // Initialize device data + size_t sz = sizeof(int) * atomic_ids.size(); + gpuErrchk(cudaMalloc( &(possible_particles_d[i]), sz )); + gpuErrchk(cudaMemcpyAsync( possible_particles_d[i], &atomic_ids[0], sz, cudaMemcpyHostToDevice)) + + // // Add particles attached to OTHER RBs + // int rb_particle_offset = conf->num; + // for (const auto& rbs: RBC->rigidBodyByType) + // { + // int rb_type + // const RigidBodyType& rb_type = rbs[0].t; + // if (rbs.t != + // for (int& ptype: rb_type->attached_particle_types) { + // // This could be made much more efficient + // if (contains(conf->partRigidBodyGrid[ptype], gridName)) { + // // Add particles + // rb + // rbs[0].sum + // } + // rb_particle_offset += rb_type.attached_particle_types + + // for (int k = 0; k < t->gridNames.size(); ++k) { + // if (t->gridNames[k] == gridName) { + // t->numParticles[i] += conf->numPartsOfType[j]; + // } + + + // // Loop over rigid body grid names associated with particle type + // const std::vector<String>& gridNames = conf->partRigidBodyGrid[j]; + // for (int k = 0; k < gridNames.size(); ++k) { + // if (gridNames[k] == gridName) { + // // Copy type j particles to particles[i] + // memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId ); + // assert(currId == conf->numPartsOfType[j]); + // pid += conf->numPartsOfType[j]; + // } + // } + // } + + // numParticles[i] = 0; + + // Count the particles interacting with potential grid i + // Loop over particle types + for (int j = 0; j < conf->numParts; ++j) { + + // Loop over rigid body grid names associated with particle type + const std::vector<String>& gridNames = conf->partRigidBodyGrid[j]; + for (int k = 0; k < t->gridNames.size(); ++k) { + if (t->gridNames[k] == gridName) { + numParticles[i] += conf->numPartsOfType[j]; + } + } + } + + // Add RB particles + for (const auto& rbv: RBC->rigidBodyByType) + { + int ptype = rbv[0].t->attached_particle_types; + const std::vector<String>& gridNames = conf->partRigidBodyGrid[ptype]; + for (int k = 0; k < t->gridNames.size(); ++k) { + if (t->gridNames[k] == gridName) { + t->numParticles[i] += conf->numPartsOfType[j]; + } + + attached_particle_ + + if (numParticles[i] > 0) { + + // allocate array of particle ids for the potential grid + particles[i] = new int[numParticles[i]]; + int pid = 0; + + // Loop over particle types to count the number of particles + for (int j = 0; j < conf->numParts; ++j) { + + // Build temporary id array of type j particles + int tmp[conf->numPartsOfType[j]]; + int currId = 0; + for (int aid = 0; aid < conf->num; ++aid) { + if (conf->type[aid] == j) + tmp[currId++] = aid; + } + if (currId == 0) continue; + + // Loop over rigid body grid names associated with particle type + const std::vector<String>& gridNames = conf->partRigidBodyGrid[j]; + for (int k = 0; k < gridNames.size(); ++k) { + if (gridNames[k] == gridName) { + // Copy type j particles to particles[i] + memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId ); + assert(currId == conf->numPartsOfType[j]); + pid += conf->numPartsOfType[j]; + } + } + } + + // Initialize device data + size_t sz = sizeof(int) * numParticles[i]; + gpuErrchk(cudaMalloc( &(particles_d[i]), sz )); + gpuErrchk(cudaMemcpyAsync( particles_d[i], particles[i], sz, cudaMemcpyHostToDevice)); + } + } +} +*/ void RigidBody::updateParticleList(Vector3* pos_d, BaseGrid* sys_d) { for (int i = 0; i < t->numPotGrids; ++i) { numParticles[i] = 0; @@ -131,10 +283,12 @@ void RigidBody::updateParticleList(Vector3* pos_d, BaseGrid* sys_d) { int nb = floor(tnp/NUMTHREADS) + 1; #if __CUDA_ARCH__ >= 300 createPartlist<<<nb,NUMTHREADS>>>(pos_d, tnp, t->particles_d[i], + attached_particle_start, attached_particle_end, tmp_d, particles_d[i], gridCenter + position, cutoff*cutoff, sys_d); #else createPartlist<<<nb,NUMTHREADS,NUMTHREADS/WARPSIZE>>>(pos_d, tnp, t->particles_d[i], + attached_particle_start, attached_particle_end, tmp_d, particles_d[i], gridCenter + position, cutoff*cutoff, sys_d); #endif @@ -179,6 +333,20 @@ void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, in } } +void RigidBody::apply_attached_particle_forces(const Vector3* force) { + const auto &rb_pos = t->attached_particle_positions; + int num = rb_pos.size(); + Vector3 total_force = Vector3(0.0f); + Vector3 torque = Vector3(0.0f); + for (int i = 0; i < num; ++i) { + const int j = i + attached_particle_start; + torque = torque + (orientation*rb_pos[i]).cross(force[j]); + total_force = total_force + force[j]; + } + addForce(total_force); + addTorque(torque); +} + void RigidBody::applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx) { // loop over potential grids for (int i = 0; i < t->numPotGrids; ++i) { diff --git a/src/RigidBody.h b/src/RigidBody.h index 5854a082a29f975d11281b9ad45ffd5bc8cc0fbf..e3e81737ae1d7aee45f16e915d44f6aa413e1749 100644 --- a/src/RigidBody.h +++ b/src/RigidBody.h @@ -31,7 +31,9 @@ class RigidBody { // host side representation of rigid bodies | splitting methods for rigid body molecular dynamics". J Chem Phys. (1997) | `––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ public: - RigidBody(String name, const Configuration& c, const RigidBodyType& t, RigidBodyController* RBC); + RigidBody(String name, const Configuration& c, const RigidBodyType& t, RigidBodyController* RBC, + int attached_particle_start, int attached_particle_end); + RigidBody(const RigidBody& rb); // RigidBody(const RigidBody& rb) : RigidBody(rb.name, *rb.c, *rb.t) {}; void init(); @@ -40,6 +42,8 @@ class RigidBody { // host side representation of rigid bodies int appendNumParticleBlocks( std::vector<int>* blocks ); + void update_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d); + HOST DEVICE void addForce(Force f); HOST DEVICE void addTorque(Force t); HOST DEVICE void addEnergy(float e); @@ -76,8 +80,10 @@ class RigidBody { // host side representation of rigid bodies return Vector3( angularMomentum.x, angularMomentum.y, angularMomentum.z); } + void initializeParticleLists(); void updateParticleList(Vector3* pos_d, BaseGrid* sys_d); void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, ForceEnergy* forcestorques_d, const std::vector<int>& forcestorques_offset, int& fto_idx); + void apply_attached_particle_forces(const Vector3* force); void applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx); bool langevin; @@ -122,9 +128,12 @@ private: bool isFirstStep; int* numParticles; /* particles affected by potential grids */ + int** possible_particles_d; int** particles_d; const cudaStream_t** particleForceStreams; - + + int attached_particle_start, attached_particle_end; + /*–––––––––––––––––––––––––––––––––––––––––. | units "kcal_mol/AA * ns" "(AA/ns) * amu" | `–––––––––––––––––––––––––––––––––––––––––*/ diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index c5e855ebb34692a168a72198431ff1450d914a53..b7fd3c8964273b336705e4f912e1f7d4df8eedd3 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -45,15 +45,14 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* pre gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ construct_grids(); - for (int i = 0; i < conf.numRigidTypes; i++) - conf.rigidBody[i].initializeParticleLists(); int numRB = 0; // grow list of rbs - + int attached_particle_offset = 0; for (int i = 0; i < conf.numRigidTypes; i++) { numRB += conf.rigidBody[i].num; + int attached_particle_in_type = conf.rigidBody[i].num_attached_particles(); std::vector<RigidBody> tmp; // RBTODO: change conf.rigidBody to conf.rigidBodyType const int jmax = conf.rigidBody[i].num; @@ -64,13 +63,17 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* pre snprintf(stmp, 128, "#%d", j); name.add( stmp ); } - RigidBody r(name, conf, conf.rigidBody[i], this); + RigidBody r(name, conf, conf.rigidBody[i], this, + attached_particle_offset, attached_particle_offset+attached_particle_in_type); + attached_particle_offset += attached_particle_in_type; + int nb = r.appendNumParticleBlocks( &particleForceNumBlocks ); tmp.push_back( r ); } rigidBodyByType.push_back(tmp); } - + attached_particle_forces = new Vector3[attached_particle_offset]; + totalParticleForceNumBlocks = 0; for (int i=0; i < particleForceNumBlocks.size(); ++i) { particleForce_offset.push_back(2*totalParticleForceNumBlocks); @@ -96,6 +99,8 @@ RigidBodyController::~RigidBodyController() { for (int i = 0; i < rigidBodyByType.size(); i++) rigidBodyByType[i].clear(); rigidBodyByType.clear(); + + delete [] attached_particle_forces; delete random; } @@ -525,6 +530,17 @@ void RigidBodyController::initializeForcePairs() { } +void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas) { + for (int i = 0; i < rigidBodyByType.size(); i++) { + for (int j = 0; j < rigidBodyByType[i].size(); j++) { + rigidBodyByType[i][j].update_particle_positions(pos_d, force_d, energy_d); + } + } + gpuErrchk(cudaMemset((void*) force_d, 0, num_rb_attached_particles*sizeof(Vector3))); + gpuErrchk(cudaMemset((void*) energy_d, 0, num_rb_attached_particles*sizeof(float))); +} + + void RigidBodyController::updateParticleLists(Vector3* pos_d, BaseGrid* sys_d) { for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { @@ -547,7 +563,7 @@ void RigidBodyController::clearForceAndTorque() } } -void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d) +void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles) { //if (s <= 1) //gpuErrchk( cudaProfilerStart() ); @@ -564,6 +580,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, // RBTODO: launch kernels ahead of time and sync using event and memcpyAsync gpuErrchk( cudaDeviceSynchronize() ); cudaMemcpy(particleForces, particleForces_d, sizeof(ForceEnergy)*2*totalParticleForceNumBlocks, cudaMemcpyDeviceToHost); + cudaMemcpyAsync(attached_particle_forces, &force_d[num], sizeof(Vector3)*(num_rb_attached_particles), cudaMemcpyDeviceToHost); pfo_idx=0; for (int i = 0; i < rigidBodyByType.size(); i++) { @@ -574,6 +591,16 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, } } + { + gpuErrchk( cudaDeviceSynchronize() ); + for (auto &rbv: rigidBodyByType) { + for (auto &rb: rbv) { + rb.apply_attached_particle_forces( attached_particle_forces ); + } + } + } + + // Grid–Grid forces if ( ((s % conf.rigidBodyGridGridPeriod) == 0 || s == 1 ) && forcePairs.size() > 0) { for (int i=0; i < forcePairs.size(); i++) { diff --git a/src/RigidBodyController.h b/src/RigidBodyController.h index 0e2dbadbf1e5eb35f9ad4fd007fe1eeb145e8f81..fa2fa4e5d1efd6b8bf43a83e9e34b456d5f67245 100644 --- a/src/RigidBodyController.h +++ b/src/RigidBodyController.h @@ -108,8 +108,9 @@ public: void SetRandomTorques(); void integrate(BaseGrid* sys, int step); void integrateDLM(BaseGrid* sys, int step); - void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d); + void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles); void updateParticleLists(Vector3* pos_d, BaseGrid* sys_d); + void update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas); void clearForceAndTorque(); void KineticEnergy(); void print(int step); @@ -151,7 +152,8 @@ private: void construct_grids(); void destruct_grids(); - //float* rb_energy; + //float* rb_energy; + Vector3* attached_particle_forces; ForceEnergy* particleForces; ForceEnergy* particleForces_d; std::vector<int> particleForceNumBlocks; diff --git a/src/RigidBodyType.cu b/src/RigidBodyType.cu index 969a16ad7d6c3183d9a0f69ee0c4bffd142faf0f..2137160943aaf63fa2345f48e0b115373a242704 100644 --- a/src/RigidBodyType.cu +++ b/src/RigidBodyType.cu @@ -81,6 +81,58 @@ void RigidBodyType::setDampingCoeffs(float timestep) { /* MUST ONLY BE CALLED ON } +void RigidBodyType::attach_particles() { + for (const auto& filename: attached_particle_files) { + const size_t line_char = 256; + FILE* inp = fopen(filename.val(), "r"); + char line[line_char]; + + // If the particle file cannot be found, exit the program + if (inp == NULL) { + printf("ERROR: Could not open `%s'.\n", filename.val()); + fclose(inp); + exit(1); + } + + // Get and process all lines of input + while (fgets(line, line_char, inp) != NULL) { + // Lines in the particle file that begin with # are comments + if (line[0] == '#') continue; + + String s(line); + int numTokens = s.tokenCount(); + + // Break the line down into pieces (tokens) so we can process them individually + String* tokenList = new String[numTokens]; + s.tokenize(tokenList); + + // Legitimate GROUP input lines have at least 3 tokens: + // Particle_type | x | y | z + // A line without exactly six tokens should be discarded. + if (numTokens != 4) { + printf("Error: Invalid attached particle file line: %s\n", tokenList[0].val()); + fclose(inp); + exit(-1); + } + + // Make sure the index of this particle is unique. + // NOTE: The particle list is sorted by index. + int type_idx = conf->find_particle_type( tokenList[0].val() ); + if (type_idx < 0) { + printf("Error: Unrecognized particle type: %s\n", line); + fclose(inp); + exit(-1); + } + attached_particle_types.push_back( type_idx ); + attached_particle_positions.push_back( Vector3(atof(tokenList[1].val()), atof(tokenList[2].val()), atof(tokenList[3].val())) ); + } + fclose(inp); + } + size_t sz = sizeof(Vector3)*attached_particle_positions.size(); + gpuErrchk(cudaMalloc( &(attached_particle_positions_d), sz )); + gpuErrchk(cudaMemcpyAsync( attached_particle_positions_d, &attached_particle_positions[0], sz, cudaMemcpyHostToDevice)); +} + void RigidBodyType::addGrid(String s, std::vector<String> &keys, std::vector<String> &files) { // tokenize and return int numTokens = s.tokenCount(); diff --git a/src/RigidBodyType.h b/src/RigidBodyType.h index 575a31348eb24dd305e913bbf46bf0b095bb8649..5cc1442052a102da7198e1e6eeda806c4cdaa8b7 100644 --- a/src/RigidBodyType.h +++ b/src/RigidBodyType.h @@ -17,9 +17,10 @@ class BaseGrid; class RigidBodyGrid; class Configuration; class RigidBodyController; +class RigidBody; class RigidBodyType { - + friend class RigidBody; public: RigidBodyType(const String& name = "", const Configuration* conf = NULL ) : name(name), conf(conf), num(0), @@ -39,6 +40,11 @@ public: /* RigidBodyType& operator=(const RigidBodyType& src); */ void copyGridsToDevice(); + void append_attached_particle_file(String s) { attached_particle_files.push_back(s); } + void attach_particles(); + size_t num_attached_particles() const { return attached_particle_types.size() ;} + const std::vector<int>& get_attached_particle_types() const { return attached_particle_types; } + void addPotentialGrid(String s); void addDensityGrid(String s); void addPMF(String s); @@ -54,6 +60,12 @@ public: String name; private: const Configuration* conf; + std::vector<String> attached_particle_files; + std::vector<int>attached_particle_types; +private: + std::vector<Vector3>attached_particle_positions; + Vector3* attached_particle_positions_d; + public: int num; // number of particles of this type