diff --git a/BaseGrid.h b/BaseGrid.h index 3df015c508129e7dacd79cf819503c03cf788c75..716c6e335816f8c492824061ca4023dbc8d6eadb 100644 --- a/BaseGrid.h +++ b/BaseGrid.h @@ -160,10 +160,27 @@ public: // A matrix defining the basis for the entire system. Matrix3 getBox() const; - // The longest diagonal of the system. + // The diagonal (nx,ny,nz) of the system. Vector3 getExtent() const; - // The longest diagonal of the system. + // The length of diagonal (nx,ny,nz) of the system. float getDiagonal() const; + + HOST DEVICE inline int getRadius() const { + // return radius of smallest sphere circumscribing grid + float radius = basis.transform(Vector3(nx,ny,nz)).length2(); + + float tmp = basis.transform(Vector3(-nx,ny,nz)).length2(); + radius = tmp > radius ? tmp : radius; + + tmp = basis.transform(Vector3(nx,-ny,nz)).length2(); + radius = tmp > radius ? tmp : radius; + + tmp = basis.transform(Vector3(nx,ny,-nz)).length2(); + radius = tmp > radius ? tmp : radius; + + return 0.5 * sqrt(radius); + } + // The position farthest from the origin. Vector3 getDestination() const; // The center of the grid. diff --git a/ComputeGridGrid.cu b/ComputeGridGrid.cu index 2126f25c6c8ed460304b94337df24e9e894516e3..c6d1f27bcf515df601b004ee76b3df20f84897f3 100644 --- a/ComputeGridGrid.cu +++ b/ComputeGridGrid.cu @@ -110,6 +110,27 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc } } +__global__ +void createPartlist(const Vector3* __restrict__ pos, + const int numTypeParticles, const int* __restrict__ typeParticles_d, + int* numParticles_d, int* particles_d, + const Vector3 gridCenter, const float radius2) { + const int tid = threadIdx.x; + const int warpLane = tid % WARPSIZE; /* RBTODO: optimize */ + const int split = 32; /* numblocks should be divisible by split */ + /* const int blocksPerCell = gridDim.x/split; */ + + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < numTypeParticles) { + int aid = typeParticles_d[i]; + float dist = (pos[aid] - gridCenter).length2(); + + if (dist <= radius2) { + int tmp = atomicAggInc(numParticles_d, warpLane); + particles_d[tmp] = aid; + } + } +} __global__ void printRigidBodyGrid(const RigidBodyGrid* rho) { diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh index 20067121dc23b853cdf362ac0a650445062e0bd3..670041ed15164ff4b3a3cf9511a472087da9c9ec 100644 --- a/ComputeGridGrid.cuh +++ b/ComputeGridGrid.cuh @@ -18,5 +18,11 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc const Matrix3 basis_u_inv, const Vector3 origin_u, Vector3* __restrict__ retForce, Vector3* __restrict__ retTorque); +extern __global__ +void createPartlist(const Vector3* __restrict__ pos, + const int numTypeParticles, const int* __restrict__ typeParticles_d, + int* numParticles_d, int* particles_d, + const Vector3 gridCenter, const float radius2); + extern __global__ void printRigidBodyGrid(const RigidBodyGrid* rho); diff --git a/Configuration.cpp b/Configuration.cpp index 2ed7f53aab9bea28d382e4e740815979184e1f38..ec6d3ff36cc546a63160daf55dd3455810d971f4 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -386,7 +386,7 @@ Configuration::~Configuration() { delete[] partForceZGridFile; delete[] partDiffusionGridFile; delete[] partReservoirFile; - delete[] partRigidBodyGrid; + partRigidBodyGrid.clear(); // TODO: plug memory leaks if (partsFromFile != NULL) delete[] partsFromFile; @@ -545,6 +545,7 @@ void Configuration::setDefaults() { electricField = 0.0f; cutoff = 10.0f; switchLen = 2.0f; + pairlistDistance = 2.0f; outputPeriod = 200; outputEnergyPeriod = -1; outputFormat = TrajectoryWriter::formatDcd; @@ -596,7 +597,7 @@ int Configuration::readParameters(const char * config_file) { partForceZGridFile = new String[numParts]; partDiffusionGridFile = new String[numParts]; partReservoirFile = new String[numParts]; - partRigidBodyGrid = new std::vector<String>[numParts]; + partRigidBodyGrid.resize(numParts); // Allocate the table variables. partTableFile = new String[numParts*numParts]; @@ -662,6 +663,8 @@ int Configuration::readParameters(const char * config_file) { cutoff = (float) strtod(value.val(), NULL); else if (param == String("switchLen")) switchLen = (float) strtod(value.val(), NULL); + else if (param == String("pairlistDistance")) + pairlistDistance = (float) strtod(value.val(), NULL); else if (param == String("outputPeriod")) outputPeriod = atoi(value.val()); else if (param == String("outputEnergyPeriod")) @@ -778,7 +781,7 @@ int Configuration::readParameters(const char * config_file) { // RIGID BODY else if (param == String("rigidBody")) { // part[++currPart] = BrownianParticleType(value); - rigidBody[++currRB] = RigidBodyType(value); + rigidBody[++currRB] = RigidBodyType(value, this); currPartClass = partClassRB; } else if (param == String("mass")) diff --git a/Configuration.h b/Configuration.h index 450cc8b9a52cb9645f3a51febd8376bc2a8760b6..fdcfa5575215711b5902b714500e7468d18b53e4 100644 --- a/Configuration.h +++ b/Configuration.h @@ -139,6 +139,7 @@ public: float coulombConst; float electricField; float cutoff; + float pairlistDistance; float switchLen; int outputPeriod; int outputEnergyPeriod; @@ -176,7 +177,7 @@ public: bool readAnglesFromFile; bool readDihedralsFromFile; String* partGridFile; - std::vector<String>* partRigidBodyGrid; + std::vector< std::vector<String> > partRigidBodyGrid; String* partDiffusionGridFile; String* partForceXGridFile; String* partForceYGridFile; diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index ba82eddecb4b99d6c048e1e31ecae00f51f7b773..a7104ba61c45fd373cfb63eaaa2d1c7f1480c3bb 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -364,6 +364,8 @@ void GrandBrownTown::run() { { // cudaSetDevice(0); internal->decompose(); + gpuErrchk(cudaDeviceSynchronize()); + RBC.updateParticleLists( internal->getPos_d() ); } float t; // simulation time @@ -395,6 +397,7 @@ void GrandBrownTown::run() { { // cudaSetDevice(0); internal -> decompose(); + RBC.updateParticleLists( internal->getPos_d() ); } //MLog: added Bond* bondList to the list of passed in variables. @@ -416,6 +419,7 @@ void GrandBrownTown::run() { { // cudaSetDevice(0); internal->decompose(); + RBC.updateParticleLists( internal->getPos_d() ); } energy = internal->compute(get_energy); break; diff --git a/RigidBody.cu b/RigidBody.cu index 67cea83a5f50ca0074957106d3e244bc7f1f7828..4d991c6ea99d2a1834de592d34ec8d9ccf0fc836 100644 --- a/RigidBody.cu +++ b/RigidBody.cu @@ -2,13 +2,25 @@ #include <typeinfo> #include "RigidBody.h" #include "Configuration.h" +#include "ComputeGridGrid.cuh" #include "Debug.h" -RigidBody::RigidBody(String name, const Configuration& cref, RigidBodyType& tref) - : name(name), c(&cref), t(&tref), impulse_to_momentum(4.184e8f) { - // units "(kcal_mol/AA) * ns" "dalton AA/ns" * 4.184e+08 - +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { + if (code != cudaSuccess) { + fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line); + if (abort) exit(code); + } +} + + +RigidBody::RigidBody(String name, const Configuration& cref, const RigidBodyType& tref) + : name(name), c(&cref), t(&tref), impulse_to_momentum(4.184e8f) { init(); } +RigidBody::RigidBody(const RigidBody& rb) + : name(rb.name), c(rb.c), t(rb.t), impulse_to_momentum(4.184e8f) { init(); } +void RigidBody::init() { + // units "(kcal_mol/AA) * ns" "dalton AA/ns" * 4.184e+08 timestep = c->timestep; Temp = c->temperature; // RBTODO: use temperature grids @@ -25,6 +37,48 @@ RigidBody::RigidBody(String name, const Configuration& cref, RigidBodyType& tref angularMomentum.x *= t->inertia.x; angularMomentum.y *= t->inertia.y; angularMomentum.z *= t->inertia.z; + + // Memory allocation for forces between particles and grids + const int& numGrids = t->numPotGrids; + numParticles = new int[numGrids]; + particles_d = new int*[numGrids]; + particleForces = new Vector3*[numGrids]; + particleTorques = new Vector3*[numGrids]; + particleForces_d = new Vector3*[numGrids]; + particleTorques_d = new Vector3*[numGrids]; + for (int i = 0; i < numGrids; ++i) { + const int& n = t->numParticles[i]; + const int nb = (n/NUMTHREADS)+1; // max number of blocks + if (n > 0) { + gpuErrchk(cudaMalloc( &particles_d[i], sizeof(int)*n )); + particleForces[i] = new Vector3[nb]; + particleTorques[i] = new Vector3[nb]; + gpuErrchk(cudaMalloc( &particleForces_d[i], sizeof(Vector3)*nb )); + gpuErrchk(cudaMalloc( &particleTorques_d[i], sizeof(Vector3)*nb )); + } + } +} + +RigidBody::~RigidBody() { + const int& numGrids = t->numPotGrids; + for (int i = 0; i < numGrids; ++i) { + const int& n = t->numParticles[i]; + if (n > 0) { + gpuErrchk(cudaFree( particles_d[i] )); + delete[] particleForces[i]; + delete[] particleTorques[i]; + gpuErrchk(cudaFree( particleForces_d[i] )); + gpuErrchk(cudaFree( particleTorques_d[i] )); + } + } + if (numParticles != NULL) { + delete[] numParticles; + delete[] particles_d; + delete[] particleForces; + delete[] particleTorques; + delete[] particleForces_d; + delete[] particleTorques_d; + } } void RigidBody::addForce(Force f) { @@ -33,7 +87,91 @@ void RigidBody::addForce(Force f) { void RigidBody::addTorque(Force torq) { torque += torq; } -RigidBody::~RigidBody() {} + +void RigidBody::updateParticleList(Vector3* pos_d) { + for (int i = 0; i < t->numPotGrids; ++i) { + int& tnp = t->numParticles[i]; + if (tnp > 0) { + Vector3 gridCenter = t->potentialGrids[i].getCenter(); + float cutoff = gridCenter.length(); + cutoff += t->potentialGrids[i].getRadius(); + cutoff += c->pairlistDistance; + + numParticles[i] = 0; + int* tmp_d; + gpuErrchk(cudaMalloc( &tmp_d, sizeof(int) )); + gpuErrchk(cudaMemcpy( tmp_d, &numParticles[i], sizeof(int), cudaMemcpyHostToDevice )); + + int nb = floor(tnp/NUMTHREADS) + 1; + createPartlist<<<NUMTHREADS,nb>>>(pos_d, tnp, t->particles_d[i], + tmp_d, particles_d[i], + gridCenter + position, cutoff*cutoff); + + gpuErrchk(cudaMemcpy(&numParticles[i], tmp_d, sizeof(int), cudaMemcpyDeviceToHost )); + } + } +} + +void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s) { + // Apply the force and torque on the rigid body, and forces on particles + + // RBTODO: performance: consolidate CUDA stream management + // loop over potential grids + for (int i = 0; i < t->numPotGrids; ++i) { + if (numParticles[i] == 0) continue; + // const int nb = 500; + /* + r: postion of particle in real space + B: grid Basis + o: grid origin + R: rigid body orientation + c: rigid body center + + B': R.B + c': R.o + c + */ + + Vector3 c = getOrientation()*t->potentialGrids[i].getOrigin() + getPosition(); + Matrix3 B = (getOrientation()*t->potentialGrids[i].getBasis()).inverse(); + + // RBTODO: get energy + // RBTODO: performance: Improve parellism here (use streams/events; overlap with other computations) + const int nb = (numParticles[i]/NUMTHREADS)+1; + + Vector3* forces = particleForces[i]; + Vector3* torques = particleTorques[i]; + Vector3* forces_d = particleForces_d[i]; + Vector3* torques_d = particleTorques_d[i]; + + for (int k=0; k < nb; ++k) { + forces[k] = Vector3(0.0f); + torques[k] = Vector3(0.0f); + } + gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpy(torques_d, torques, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); + + computePartGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3) >>>( + pos_d, force_d, numParticles[i], particles_d[i], + t->rawPotentialGrids_d[i], + B, c, forces_d, torques_d); + + gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(torques, torques_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); + + // Sum and apply forces and torques + Vector3 f = Vector3(0.0f); + Vector3 torq = Vector3(0.0f); + for (int k = 0; k < nb; ++k) { + f = f + forces[k]; + torq = torq + torques[k]; + } + + torq = -torq + (getPosition()-c).cross( f ); + addForce( -f ); + addTorque( torq ); + } +} + /*===========================================================================\ | Following "Algorithm for rigid-body Brownian dynamics" Dan Gordon, Matthew | | Hoyles, and Shin-Ho Chung | diff --git a/RigidBody.h b/RigidBody.h index 630db1feb90262ffb4edad9d149e3a54d5c059ad..f2be7912bb1a33787788f66ad94621ce99ddd370 100644 --- a/RigidBody.h +++ b/RigidBody.h @@ -27,9 +27,12 @@ class RigidBody { // host side representation of rigid bodies | splitting methods for rigid body molecular dynamics". J Chem Phys. (1997) | `––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ public: - HOST DEVICE RigidBody(String name, const Configuration& c, RigidBodyType& t); + RigidBody(String name, const Configuration& c, const RigidBodyType& t); + RigidBody(const RigidBody& rb); + // RigidBody(const RigidBody& rb) : RigidBody(rb.name, *rb.c, *rb.t) {}; + void init(); /* HOST DEVICE RigidBody(RigidBodyType t); */ - HOST DEVICE ~RigidBody(); + ~RigidBody(); HOST DEVICE void addForce(Force f); HOST DEVICE void addTorque(Force t); @@ -46,6 +49,7 @@ class RigidBody { // host side representation of rigid bodies // HOST DEVICE inline String getKey() const { return t->name; } HOST DEVICE inline String getKey() const { return name; } + HOST DEVICE inline Vector3 transformBodyToLab(Vector3 v) const { return orientation*v + position; } HOST DEVICE inline Vector3 getPosition() const { return position; } HOST DEVICE inline Matrix3 getOrientation() const { return orientation; } // HOST DEVICE inline Matrix3 getBasis() const { return orientation; } @@ -56,9 +60,13 @@ class RigidBody { // host side representation of rigid bodies angularMomentum.y / t->inertia.y, angularMomentum.z / t->inertia.z ); } + void updateParticleList(Vector3* pos_d); + void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s); + + bool langevin; Vector3 torque; // lab frame (except in integrate()) - + private: // String key; String name; @@ -82,12 +90,20 @@ private: // integration const Configuration* c; - RigidBodyType* t; /* RBTODO: const? */ + const RigidBodyType* t; float timestep; Vector3 force; // lab frame bool isFirstStep; - + + int* numParticles; /* particles affected by potential grids */ + int** particles_d; + Vector3** particleForces; + Vector3** particleTorques; + Vector3** particleForces_d; + Vector3** particleTorques_d; + + /*–––––––––––––––––––––––––––––––––––––––––. | units "kcal_mol/AA * ns" "(AA/ns) * amu" | `–––––––––––––––––––––––––––––––––––––––––*/ diff --git a/RigidBodyController.cu b/RigidBodyController.cu index 268e0dc9856c7f1c52c3e4cdb78b3d439d405533..66cb5cd6b863747a5a27afd2dd3882ffb45c43a4 100644 --- a/RigidBodyController.cu +++ b/RigidBodyController.cu @@ -32,14 +32,14 @@ inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true) RigidBodyController::RigidBodyController(const Configuration& c, const char* outArg) : conf(c), outArg(outArg) { - - if (conf.numRigidTypes > 0) { - copyGridsToDevice(); - } + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ + for (int i = 0; i < conf.numRigidTypes; i++) + conf.rigidBody[i].initializeParticleLists(); int numRB = 0; // grow list of rbs for (int i = 0; i < conf.numRigidTypes; i++) { + conf.rigidBody[i].copyGridsToDevice(); numRB += conf.rigidBody[i].num; std::vector<RigidBody> tmp; // RBTODO: change conf.rigidBody to conf.rigidBodyType @@ -53,15 +53,17 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* out } RigidBody r(name, conf, conf.rigidBody[i]); tmp.push_back( r ); - } + } rigidBodyByType.push_back(tmp); -} + } random = new RandomCPU(conf.seed + 1); /* +1 to avoid using same seed as RandomCUDA */ + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ initializeForcePairs(); - initializeParticleLists(); + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ } + RigidBodyController::~RigidBodyController() { for (int i = 0; i < rigidBodyByType.size(); i++) rigidBodyByType[i].clear(); @@ -168,85 +170,12 @@ void RigidBodyController::initializeForcePairs() { } -void RigidBodyController::initializeParticleLists() { - // Populate RigidBodyType.particles - - // TODO: ensure no duplicates in conf.partRigidBodyGrid[i] - - // Allocate RB type's numParticles array - for (int rb = 0; rb < conf.numRigidTypes; ++rb) { - RigidBodyType& t = conf.rigidBody[rb]; - t.numParticles = new int[t.numPotGrids]; - for (int i = 0; i < t.numPotGrids; ++i) t.numParticles[i] = 0; - } - - // Count the number of particles; Loop over particle types - for (int i = 0; i < conf.numParts; ++i) { - - // Loop over rigid body grid names associated with particle type - const std::vector<String>& gridNames = conf.partRigidBodyGrid[i]; - for (int j = 0; j < gridNames.size(); ++j) { - - // Loop over RB types - for (int rb = 0; rb < conf.numRigidTypes; ++rb) { - RigidBodyType& t = conf.rigidBody[rb]; - const std::vector<String>& keys = t.potentialGridKeys; - - // Loop over potential grids - for(int k = 0; k < keys.size(); k++) { - // printf(" checking grid keys "); - if (gridNames[j] == keys[k]) - t.numParticles[k] += conf.numPartsOfType[i]; - } - } - } - } - - // Allocate each particles array - for (int rb = 0; rb < conf.numRigidTypes; ++rb) { - RigidBodyType& t = conf.rigidBody[rb]; - t.particles = new int*[t.numPotGrids]; - for (int i = 0; i < t.numPotGrids; ++i) { - t.particles[i] = new int[t.numParticles[i]]; - t.numParticles[i] = 0; // now use this as a counter - } - } - - // Set the number of particles; Loop over particle types - for (int i = 0; i < conf.numParts; ++i) { - int tmp[conf.numPartsOfType[i]]; // temporary array holding particles of type i - int currId = 0; - for (int j = 0; j < conf.num; ++j) { - if (conf.type[j] == i) - tmp[currId++] = j; - } - - // Loop over rigid body grid names associated with particle type - const std::vector<String>& gridNames = conf.partRigidBodyGrid[i]; - for (int j = 0; j < gridNames.size(); ++j) { - - // Loop over RB types - for (int rb = 0; rb < conf.numRigidTypes; ++rb) { - RigidBodyType& t = conf.rigidBody[rb]; - const std::vector<String>& keys = t.potentialGridKeys; - - // Loop over potential grids - for(int k = 0; k < keys.size(); k++) { - // printf(" checking grid keys "); - if (gridNames[j] == keys[k]) { - memcpy( &(t.particles[k][t.numParticles[k]]), tmp, sizeof(int)*currId ); - t.numParticles[k] += currId; - } - } - } +void RigidBodyController::updateParticleLists(Vector3* pos_d) { + for (int i = 0; i < rigidBodyByType.size(); i++) { + for (int j = 0; j < rigidBodyByType[i].size(); j++) { + rigidBodyByType[i][j].updateParticleList(pos_d); } } - - // Initialize device data for RB force pairs after std::vector is done growing - - // for (int i = 0; i < forcePairs.size(); i++) - // forcePairs[i].initialize(); - } @@ -263,18 +192,24 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s) rb.clearTorque(); } } - + // Grid–particle forces for (int i = 0; i < rigidBodyByType.size(); i++) { - callGridParticleForceKernel( pos_d, force_d, conf.rigidBody[i], rigidBodyByType[i], s ); + for (int j = 0; j < rigidBodyByType[i].size(); j++) { + RigidBody& rb = rigidBodyByType[i][j]; + rb.callGridParticleForceKernel( pos_d, force_d, s ); + } } - + // Grid–Grid forces if (forcePairs.size() > 0) { - for (int i=0; i < forcePairs.size(); i++) - forcePairs[i].callGridForceKernel(i,s); - + for (int i=0; i < forcePairs.size(); i++) { + // TODO: performance: make this check occur less frequently + if (forcePairs[i].isWithinPairlistDist()) + forcePairs[i].callGridForceKernel(i,s); + } + // each kernel call is followed by async memcpy for previous; now get last RigidBodyForcePair* fp = RigidBodyForcePair::lastRbForcePair; fp->retrieveForcesForGrid( fp->lastRbGridID ); @@ -288,10 +223,11 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s) gpuErrchk(cudaDeviceSynchronize()); for (int i=0; i < forcePairs.size(); i++) - forcePairs[i].processGPUForces(); - + if (forcePairs[i].isWithinPairlistDist()) + forcePairs[i].processGPUForces(); } } + void RigidBodyController::integrate(int step) { // tell RBs to integrate for (int i = 0; i < rigidBodyByType.size(); i++) { @@ -360,14 +296,28 @@ void RigidBodyForcePair::createStreams() { gpuErrchk( cudaStreamCreate( &(stream[i]) ) ); // gpuErrchk( cudaStreamCreateWithFlags( &(stream[i]) , cudaStreamNonBlocking ) ); } +bool RigidBodyForcePair::isWithinPairlistDist() const { + float pairlistDist = 2.0f; /* TODO: get from conf */ + float rbDist = (rb1->getPosition() - rb2->getPosition()).length(); + + for (int i = 0; i < gridKeyId1.size(); ++i) { + const int k1 = gridKeyId1[i]; + const int k2 = gridKeyId2[i]; + float d1 = type1->densityGrids[k1].getRadius() + type1->densityGrids[k1].getCenter().length(); + float d2 = type2->potentialGrids[k2].getRadius() + type2->potentialGrids[k2].getCenter().length(); + if (rbDist < d1+d2+pairlistDist) + return true; + } + return false; +} Vector3 RigidBodyForcePair::getOrigin1(const int i) { const int k1 = gridKeyId1[i]; - return rb1->getOrientation()*type1->densityGrids[k1].getOrigin() + rb1->getPosition(); + return rb1->transformBodyToLab( type1->densityGrids[k1].getOrigin() ); } Vector3 RigidBodyForcePair::getOrigin2(const int i) { const int k2 = gridKeyId2[i]; if (!isPmf) - return rb2->getOrientation()*type2->potentialGrids[k2].getOrigin() + rb2->getPosition(); + return rb2->transformBodyToLab( type2->potentialGrids[k2].getOrigin() ); else return type2->rawPmfs[k2].getOrigin(); } @@ -441,68 +391,6 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { lastRbGridID = i; } } -void RigidBodyController::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, - const RigidBodyType& t, std::vector<RigidBody>& rbs, int s) { - // get the force/torque on a rigid body, and forces on particles - - // RBTODO: consolidate CUDA stream management - for (int i = 0; i < t.numPotGrids; ++i) { - if (t.numParticles[i] == 0) continue; - - for (int j = 0; j < rbs.size(); ++j) { - // const int nb = 500; - /* - r: postion of particle in real space - B: grid Basis - o: grid origin - R: rigid body orientation - c: rigid body center - - B': R.B - c': R.o + c - */ - // Matrix3 B1 = getBasis1(i); - Vector3 c = rbs[j].getOrientation()*t.potentialGrids[i].getOrigin() + rbs[j].getPosition(); - Matrix3 B = (rbs[j].getOrientation()*t.potentialGrids[i].getBasis()).inverse(); - - // RBTODO: get energy - const int nb = (t.numParticles[i]/NUMTHREADS)+1; - - // RBTODO: IMPORTANT: Improve this - Vector3 forces[nb]; - Vector3 torques[nb]; - for (int k=0; k < nb; ++k) { - forces[k] = Vector3(0.0f); - torques[k] = Vector3(0.0f); - } - Vector3* forces_d; - Vector3* torques_d; - gpuErrchk(cudaMalloc(&forces_d, sizeof(Vector3)*nb)); - gpuErrchk(cudaMalloc(&torques_d, sizeof(Vector3)*nb)); - gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(torques_d, torques, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); - - computePartGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3) >>>( - pos_d, force_d, t.numParticles[i], t.particles[i], - t.rawPotentialGrids_d[i], - B, c, forces_d, torques_d); - - gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); - gpuErrchk(cudaMemcpy(torques, torques_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); - - Vector3 f = Vector3(0.0f); - Vector3 t = Vector3(0.0f); - for (int k = 0; k < nb; ++k) { - f = f + forces[k]; - t = t + torques[j]; - } - - t = -t - (rbs[j].getPosition()-c).cross( -f ); - rbs[j].addForce( -f ); - rbs[j].addTorque( t ); - } - } -} void RigidBodyForcePair::retrieveForcesForGrid(const int i) { // i: grid ID (less than numGrids) @@ -553,137 +441,6 @@ void RigidBodyForcePair::processGPUForces() { // printf("force: %s\n", f.toString().val()); // printf("torque: %s\n", t.toString().val()); - -} - -void RigidBodyController::copyGridsToDevice() { - // RBTODO: clean this function up - RigidBodyType **rb_addr = new RigidBodyType*[conf.numRigidTypes]; /* temporary pointer to device pointer */ - - gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * conf.numRigidTypes)); - // TODO: The above line fails when there is not enough memory. If it fails, stop. - - printf("Copying RBs\n"); - // Copy rigidbody types - // http://stackoverflow.com/questions/16024087/copy-an-object-to-device - for (int i = 0; i < conf.numRigidTypes; i++) - conf.rigidBody[i].updateRaw(); - - - // density grids - for (int i = 0; i < conf.numRigidTypes; i++) { - printf("Copying density grids of RB type %d\n",i); - RigidBodyType& rb = conf.rigidBody[i]; - - int ng = rb.numDenGrids; - rb.rawDensityGrids_d = new RigidBodyGrid*[ng]; /* not sure this is needed */ - - printf(" RigidBodyType %d: numGrids = %d\n", i, ng); - // copy grid data to device - for (int gid = 0; gid < ng; gid++) { - RigidBodyGrid* g = &(rb.rawDensityGrids[gid]); // convenience - // RigidBodyGrid* g_d = rb.rawDensityGrids_d[gid]; // convenience - int len = g->getSize(); - float* tmpData; - - size_t sz = sizeof(RigidBodyGrid); - gpuErrchk(cudaMalloc((void **) &(rb.rawDensityGrids_d[gid]), sz)); - /* gpuErrchk(cudaMemcpy(rb.rawDensityGrids_d[gid], g, */ - /* sz, cudaMemcpyHostToDevice)); */ - gpuErrchk(cudaMemcpy(rb.rawDensityGrids_d[gid], &(rb.rawDensityGrids[gid]), - sz, cudaMemcpyHostToDevice)); - - // allocate grid data on device - // copy temporary host pointer to device pointer - // copy data to device through temporary host pointer - sz = sizeof(float) * len; - gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - // gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy( tmpData, rb.rawDensityGrids[gid].val, sz, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy( &(rb.rawDensityGrids_d[gid]->val), &tmpData, - sizeof(float*), cudaMemcpyHostToDevice)); - } - } - - for (int i = 0; i < conf.numRigidTypes; i++) { - printf("Working on RB %d\n",i); - RigidBodyType& rb = conf.rigidBody[i]; - - int ng = rb.numPotGrids; - rb.rawPotentialGrids_d = new RigidBodyGrid*[ng]; /* not 100% sure this is needed, possible memory leak */ - - printf(" RigidBodyType %d: numGrids = %d\n", i, ng); - // copy potential grid data to device - for (int gid = 0; gid < ng; gid++) { - RigidBodyGrid* g = &(rb.rawPotentialGrids[gid]); // convenience - // RigidBodyGrid* g_d = rb.rawDensityGrids_d[gid]; // convenience - int len = g->getSize(); - float* tmpData; - // tmpData = new float*[len]; - - size_t sz = sizeof(RigidBodyGrid); - gpuErrchk(cudaMalloc((void **) &(rb.rawPotentialGrids_d[gid]), sz)); - gpuErrchk(cudaMemcpy( rb.rawPotentialGrids_d[gid], &(rb.rawPotentialGrids[gid]), - sz, cudaMemcpyHostToDevice )); - - // allocate grid data on device - // copy temporary host pointer to device pointer - // copy data to device through temporary host pointer - sz = sizeof(float) * len; - gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - // sz = sizeof(float) * len; - gpuErrchk(cudaMemcpy( tmpData, rb.rawPotentialGrids[gid].val, sz, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy( &(rb.rawPotentialGrids_d[gid]->val), &tmpData, - sizeof(float*), cudaMemcpyHostToDevice)); - - // RBTODO: why can't tmpData be deleted? - // delete[] tmpData; - } - } - - for (int i = 0; i < conf.numRigidTypes; i++) { - printf("Copying PMFs for RB %d\n",i); - RigidBodyType& rb = conf.rigidBody[i]; - - int ng = rb.numPmfs; - rb.rawPmfs_d = new RigidBodyGrid*[ng]; /* not 100% sure this is needed, possible memory leak */ - - printf(" RigidBodyType %d: numPmfs = %d\n", i, ng); - - // copy pmf grid data to device - for (int gid = 0; gid < ng; gid++) { - RigidBodyGrid g = rb.rawPmfs[gid]; - int len = g.getSize(); - float* tmpData; - // tmpData = new float*[len]; - - size_t sz = sizeof(RigidBodyGrid); - gpuErrchk(cudaMalloc((void **) &(rb.rawPmfs_d[gid]), sz)); - gpuErrchk(cudaMemcpy( rb.rawPmfs_d[gid], &g, - sz, cudaMemcpyHostToDevice )); - - // allocate grid data on device - // copy temporary host pointer to device pointer - // copy data to device through temporary host pointer - sz = sizeof(float) * len; - gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - // sz = sizeof(float) * len; - gpuErrchk(cudaMemcpy( tmpData, rb.rawPmfs[gid].val, sz, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy( &(rb.rawPmfs_d[gid]->val), &tmpData, - sizeof(float*), cudaMemcpyHostToDevice)); - - } - } - - gpuErrchk(cudaDeviceSynchronize()); - printf("Done copying RBs\n"); - - /* // DEBUG */ - /* RigidBodyType& rb = conf.rigidBody[0]; */ - /* printRigidBodyGrid<<<1,1>>>( rb.rawPotentialGrids_d[0] ); */ - /* gpuErrchk(cudaDeviceSynchronize()); */ - /* printRigidBodyGrid<<<1,1>>>( rb.rawDensityGrids_d[0] ); */ - /* gpuErrchk(cudaDeviceSynchronize()); */ } void RigidBodyController::print(int step) { diff --git a/RigidBodyController.h b/RigidBodyController.h index 8aed324cd00d4ea7605a9264a00b625cfc44179e..66f40e2dd87685cdeb1e18ef131ff6b4de384f27 100644 --- a/RigidBodyController.h +++ b/RigidBodyController.h @@ -18,6 +18,8 @@ class RigidBody; class Configuration; class RandomCPU; + +// TODO: performance: create RigidBodyGridPair so pairlistdist check is done per grid pair, not per RB pair class RigidBodyForcePair { friend class RigidBodyController; @@ -42,10 +44,11 @@ public: printf(" Copying assigning RB force pair...\n"); swap(*this,o); return *this; - } - + } ~RigidBodyForcePair(); + bool isWithinPairlistDist() const; + private: int initialize(); void swap(RigidBodyForcePair& a, RigidBodyForcePair& b); @@ -95,12 +98,10 @@ public: void integrate(int step); void updateForces(Vector3* pos_d, Vector3* force_d, int s); - void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, const RigidBodyType& t, std::vector<RigidBody>& rbs, int s); + void updateParticleLists(Vector3* pos_d); private: - void copyGridsToDevice(); void initializeForcePairs(); - void initializeParticleLists(); void print(int step); void printLegend(std::ofstream &file); diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu index f3663d439f721674a35edd6519f6a0a6d560bd8c..5f6caf3ca444c4ae626dd01674074f85eae52137 100644 --- a/RigidBodyGrid.cu +++ b/RigidBodyGrid.cu @@ -27,80 +27,6 @@ RigidBodyGrid::RigidBodyGrid(int nx0, int ny0, int nz0) { zero(); } -// Make an orthogonal grid given the box dimensions and resolution. -RigidBodyGrid::RigidBodyGrid(Vector3 box, float dx) { - dx = fabsf(dx); - box.x = fabsf(box.x); - box.y = fabsf(box.y); - box.z = fabsf(box.z); - - // Tile the grid into the system box. - // The grid spacing is always a bit smaller than dx. - nx = int(ceilf(box.x/dx)); - ny = int(ceilf(box.y/dx)); - nz = int(ceilf(box.z/dx)); - if (nx <= 0) nx = 1; - if (ny <= 0) ny = 1; - if (nz <= 0) nz = 1; - - val = new float[nx*ny*nz]; - zero(); -} - -// The box gives the system geometry. -// The grid point numbers define the resolution. -RigidBodyGrid::RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0) { - nx = nx0; - ny = ny0; - nz = nz0; - - // Tile the grid into the system box. - if (nx <= 0) nx = 1; - if (ny <= 0) ny = 1; - if (nz <= 0) nz = 1; - - val = new float[nx*ny*nz]; - zero(); -} - -// The box gives the system geometry. -// dx is the approx. resolution. -// The grid spacing is always a bit larger than dx. -RigidBodyGrid::RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx) { - dx = fabs(dx); - - // Tile the grid into the system box. - // The grid spacing is always a bit larger than dx. - nx = int(floor(box.ex().length()/dx))-1; - ny = int(floor(box.ey().length()/dx))-1; - nz = int(floor(box.ez().length()/dx))-1; - if (nx <= 0) nx = 1; - if (ny <= 0) ny = 1; - if (nz <= 0) nz = 1; - - val = new float[nx*ny*nz]; - zero(); -} - -// The box gives the system geometry. -// dx is the approx. resolution. -// The grid spacing is always a bit smaller than dx. -RigidBodyGrid::RigidBodyGrid(Matrix3 box, float dx) { - dx = fabs(dx); - - // Tile the grid into the system box. - // The grid spacing is always a bit smaller than dx. - nx = int(ceilf(box.ex().length()/dx)); - ny = int(ceilf(box.ey().length()/dx)); - nz = int(ceilf(box.ez().length()/dx)); - if (nx <= 0) nx = 1; - if (ny <= 0) ny = 1; - if (nz <= 0) nz = 1; - - val = new float[nx*ny*nz]; - zero(); -} - RigidBodyGrid::RigidBodyGrid(const BaseGrid& g) { nx = g.nx; ny = g.ny; @@ -302,7 +228,7 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) co Vector3 f; - const float wx = l.x - homeY; + const float wx = l.x - homeX; const float wy = l.y - homeY; const float wz = l.z - homeZ; diff --git a/RigidBodyGrid.h b/RigidBodyGrid.h index bcc44d1294862511b17d5d81b068a8cb73fe9b26..2c46b4ff1331f57a8d04cc4b21e79e124bb438ef 100644 --- a/RigidBodyGrid.h +++ b/RigidBodyGrid.h @@ -47,23 +47,6 @@ public: // The most obvious of constructors. RigidBodyGrid(int nx0, int ny0, int nz0); - // Make an orthogonal grid given the box dimensions and resolution. - RigidBodyGrid(Vector3 box, float dx); - - // The box gives the system geometry. - // The grid point numbers define the resolution. - RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0); - - // The box gives the system geometry. - // dx is the approx. resolution. - // The grid spacing is always a bit larger than dx. - RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx); - - // The box gives the system geometry. - // dx is the approx. resolution. - // The grid spacing is always a bit smaller than dx. - RigidBodyGrid(Matrix3 box, float dx); - // Make a copy of a BaseGrid grid. RigidBodyGrid(const BaseGrid& g); @@ -109,6 +92,22 @@ public: HOST DEVICE inline int getNz() const {return nz;} HOST DEVICE inline int getSize() const {return nx*ny*nz;} + HOST DEVICE inline int getRadius(Matrix3 basis) const { + // return radius of smallest sphere circumscribing grid + float radius = basis.transform(Vector3(nx,ny,nz)).length2(); + + float tmp = basis.transform(Vector3(-nx,ny,nz)).length2(); + radius = tmp > radius ? tmp : radius; + + tmp = basis.transform(Vector3(nx,-ny,nz)).length2(); + radius = tmp > radius ? tmp : radius; + + tmp = basis.transform(Vector3(nx,ny,-nz)).length2(); + radius = tmp > radius ? tmp : radius; + + return 0.5 * sqrt(radius); + } + // Add a fixed value to the grid. void shift(float s); diff --git a/RigidBodyType.cu b/RigidBodyType.cu index c3f600d9d0be96174900453ee3b465e1a29f9ed7..cc6577af2b16500739d4c6e94bec90a03728aaba 100644 --- a/RigidBodyType.cu +++ b/RigidBodyType.cu @@ -1,7 +1,15 @@ +#include "Configuration.h" #include "RigidBodyType.h" #include "Reservoir.h" #include "BaseGrid.h" #include "RigidBodyGrid.h" +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { + if (code != cudaSuccess) { + fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line); + if (abort) exit(code); + } +} void RigidBodyType::clear() { num = 0; // RBTODO: not 100% sure about this @@ -19,14 +27,21 @@ void RigidBodyType::clear() { densityGridKeys.clear(); pmfKeys.clear(); - if (numParticles > 0) { - for (int i=0; i < numParticles[i]; ++i) { - delete[] particles[i]; + + if (numParticles != NULL) { + for (int i=0; i < numPotGrids; ++i) { + printf("CLEARING\n"); + if (numParticles[i] > 0) { + delete[] particles[i]; + gpuErrchk(cudaFree( particles_d[i] )); + } } delete[] numParticles; delete[] particles; + delete[] particles_d; + numParticles = NULL; } - + if (numPotGrids > 0) delete[] rawPotentialGrids; if (numDenGrids > 0) delete[] rawDensityGrids; rawPotentialGrids = NULL; @@ -43,18 +58,18 @@ void RigidBodyType::setDampingCoeffs(float timestep) { /* MUST ONLY BE CALLED ON | | | f[kcal/mol AA] = - dampingCoeff * momentum[amu AA/ns] | | | - | units "(1/ps) * (amu AA/ns)" "kcal_mol/AA" * 2.390e-06 | + | units "(1/ns) * (amu AA/ns)" "kcal_mol/AA" * 2.390e-09 | `–––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ /*––––––––––––––––––––––––––––––––––––––––––––––––––––. | < f(t) f(t') > = 2 kT dampingCoeff mass delta(t-t') | | | - | units "sqrt( k K (1/ps) amu / ns )" "kcal_mol/AA" | - | * 6.8916889e-05 | + | units "sqrt( k K (1/ns) amu / ns )" "kcal_mol/AA" | + | * 2.1793421e-06 | `––––––––––––––––––––––––––––––––––––––––––––––––––––*/ // RBTODO: make units consistent with rest of RB code float Temp = 295; /* RBTODO: temperature should be read from grid? Or set in uniformly in config file */ - transForceCoeff = 6.8916889e-05 * Vector3::element_sqrt( 2*Temp*mass*transDamping/timestep ); + transForceCoeff = 2.1793421e-06 * Vector3::element_sqrt( 2*Temp*mass*transDamping/timestep ); // setup for langevin // langevin = rbParams->langevin; @@ -62,12 +77,12 @@ void RigidBodyType::setDampingCoeffs(float timestep) { /* MUST ONLY BE CALLED ON // T = - dampingCoeff * angularMomentum // < f(t) f(t') > = 2 kT dampingCoeff inertia delta(t-t') - rotTorqueCoeff = 6.8916889e-05 * + rotTorqueCoeff = 2.1793421e-06 * Vector3::element_sqrt( 2*Temp* Vector3::element_mult(inertia,rotDamping) / timestep ); - transDamping = 2.3900574e-6 * transDamping; - rotDamping = 2.3900574e-6 * rotDamping; + transDamping = 2.3900574e-9 * transDamping; + rotDamping = 2.3900574e-9 * rotDamping; // Also apply scale factors applyScaleFactors(); @@ -139,10 +154,73 @@ void RigidBodyType::scalePMF(String s) { addScaleFactor(s, pmfScaleKeys, pmfScale); } +// void RigidBodyType::copyGridToDevice(RigidBodyGrid* ptr_d, const RigidBodyGrid* g) { +// // copy grid data +// float* tmpData; +// size_t sz = sizeof(float) * g->getSize(); +// gpuErrchk(cudaMalloc( &tmpData, sz)); +// gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); + +// // create temporary grid +// sz = sizeof(RigidBodyGrid); +// RigidBodyGrid* tmp; +// memcpy(tmp, g, sz); +// tmp->val = tmpData; + +// // copy grid +// gpuErrchk(cudaMalloc(&ptr_d, sz)); +// gpuErrchk(cudaMemcpy(ptr_d, tmp, sz, cudaMemcpyHostToDevice)); + +// // gpuErrchk(cudaMemcpy( &(ptr_d->val), &tmpData, sizeof(float*), cudaMemcpyHostToDevice)); +// } +void RigidBodyType::copyGridToDevice(RigidBodyGrid*& ptr_d, RigidBodyGrid g) { + // copy grid data + float* tmpData; + size_t sz = sizeof(float) * g.getSize(); + gpuErrchk(cudaMalloc( &tmpData, sz)); + gpuErrchk(cudaMemcpy( tmpData, g.val, sz, cudaMemcpyHostToDevice)); + + // set grid pointer to device + g.val = tmpData; + + // copy grid + sz = sizeof(RigidBodyGrid); + gpuErrchk(cudaMalloc(&ptr_d, sz)); + gpuErrchk(cudaMemcpy(ptr_d, &g, sz, cudaMemcpyHostToDevice)); + + // prevent destructor from trying to delete device pointer + g.val = NULL; +} + +void RigidBodyType::freeGridFromDevice(RigidBodyGrid* ptr_d) { + gpuErrchk(cudaFree( ptr_d->val )); // free grid data + gpuErrchk(cudaFree( ptr_d )); // free grid +} + +void RigidBodyType::copyGridsToDevice() { + int ng = numDenGrids; + rawDensityGrids_d = new RigidBodyGrid*[ng]; + for (int gid = 0; gid < ng; gid++) + copyGridToDevice(rawDensityGrids_d[gid], rawDensityGrids[gid]); + + ng = numPotGrids; + rawPotentialGrids_d = new RigidBodyGrid*[ng]; + for (int gid = 0; gid < ng; gid++) + copyGridToDevice(rawPotentialGrids_d[gid], rawPotentialGrids[gid]); + + ng = numPmfs; + rawPmfs_d = new RigidBodyGrid*[ng]; + for (int gid = 0; gid < ng; gid++) { + //RigidBodyGrid tmp = RigidBodyGrid(rawPmfs[gid]); + //copyGridToDevice(rawPmfs_d[gid], &tmp); + copyGridToDevice(rawPmfs_d[gid], RigidBodyGrid(rawPmfs[gid])); + } +} + void RigidBodyType::updateRaw() { - if (numPotGrids > 0) delete[] rawPotentialGrids; - if (numDenGrids > 0) delete[] rawDensityGrids; - if (numDenGrids > 0) delete[] rawPmfs; + if (numPotGrids > 0 && rawPotentialGrids != NULL) delete[] rawPotentialGrids; + if (numDenGrids > 0 && rawDensityGrids != NULL) delete[] rawDensityGrids; + if (numPmfs > 0 && rawPmfs != NULL) delete[] rawPmfs; numPotGrids = potentialGrids.size(); numDenGrids = densityGrids.size(); numPmfs = pmfs.size(); @@ -174,3 +252,59 @@ void RigidBodyType::updateRaw() { rawPmfs[i] = pmfs[i]; } +void RigidBodyType::initializeParticleLists() { + updateRaw(); + + numParticles = new int[numPotGrids]; + particles = new int*[numPotGrids]; + particles_d = new int*[numPotGrids]; + + // Loop over potential grids + for (int i = 0; i < numPotGrids; ++i) { + String& gridName = potentialGridKeys[i]; + numParticles[i] = 0; + + // Loop over particle types to count the number of particles + for (int j = 0; j < conf->numParts; ++j) { + // Loop over rigid body grid names associated with particle type + const std::vector<String>& gridNames = conf->partRigidBodyGrid[j]; + for (int k = 0; k < gridNames.size(); ++k) { + if (gridNames[k] == gridName) { + numParticles[i] += conf->numPartsOfType[j]; + } + } + } + + // allocate array of particle ids for the potential grid + particles[i] = new int[numParticles[i]]; + int pid = 0; + + // Loop over particle types to count the number of particles + for (int j = 0; j < conf->numParts; ++j) { + + // Build temporary id array of type j particles + int tmp[conf->numPartsOfType[j]]; + int currId = 0; + for (int aid = 0; aid < conf->num; ++aid) { + if (conf->type[aid] == j) + tmp[currId++] = aid; + } + + // Loop over rigid body grid names associated with particle type + const std::vector<String>& gridNames = conf->partRigidBodyGrid[j]; + for (int k = 0; k < gridNames.size(); ++k) { + if (gridNames[k] == gridName) { + // Copy type j particles to particles[i] + memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId ); + assert(currId == conf->numPartsOfType[j]); + pid += conf->numPartsOfType[j]; + } + } + } + + // Initialize device data + size_t sz = sizeof(int*) * numParticles[i]; + gpuErrchk(cudaMalloc( &(particles_d[i]), sz )); + gpuErrchk(cudaMemcpyAsync( particles_d[i], particles[i], sz, cudaMemcpyHostToDevice)); + } +} diff --git a/RigidBodyType.h b/RigidBodyType.h index 4d9849dea3fe999636d92417581c55348a1cb724..1c5d48da0718e70e7ce8a6b46f58e9dbad6dc2e6 100644 --- a/RigidBodyType.h +++ b/RigidBodyType.h @@ -15,8 +15,17 @@ class Reservoir; class BaseGrid; class RigidBodyGrid; +class Configuration; class RigidBodyType { + +public: +RigidBodyType(const String& name = "", const Configuration* conf = NULL ) : + name(name), conf(conf), num(0), + reservoir(NULL), mass(1.0f), inertia(), transDamping(), + rotDamping(), initPos(), initRot(Matrix3(1.0f)), + numPotGrids(0), numDenGrids(0), numPmfs(0), numParticles(NULL) { } + ~RigidBodyType() { clear(); } private: // Deletes all members void clear(); @@ -28,40 +37,31 @@ private: void applyScaleFactor( const std::vector<String> &scaleKeys, const std::vector<float> &scaleFactors, const std::vector<String> &gridKeys, std::vector<BaseGrid> &grids); + + void updateRaw(); + void copyGridToDevice(RigidBodyGrid*& ptr_d, RigidBodyGrid g); + void freeGridFromDevice(RigidBodyGrid* ptr_d); public: -/* RigidBodyType(const String& name = "") : */ -/* name(name), num(0), */ -/* reservoir(NULL), mass(1.0f), inertia(), transDamping(), */ -/* rotDamping(), potentialGrids(NULL), densityGrids(NULL), */ -/* potentialGrids_D(NULL), densityGrids_D(NULL) { } */ - -RigidBodyType(const String& name = "") : - name(name), num(0), - reservoir(NULL), mass(1.0f), inertia(), transDamping(), - rotDamping(), initPos(), initRot(Matrix3(1.0f)), - numPotGrids(0), numDenGrids(0), numPmfs(0), numParticles(0) { } - - /* RigidBodyType(const RigidBodyType& src) { copy(src); } */ - ~RigidBodyType() { clear(); } - /* RigidBodyType& operator=(const RigidBodyType& src); */ - - void addPotentialGrid(String s); + void copyGridsToDevice(); + + void addPotentialGrid(String s); void addDensityGrid(String s); void addPMF(String s); - void scalePotentialGrid(String s); + void scalePotentialGrid(String s); void scaleDensityGrid(String s); void scalePMF(String s); - void updateRaw(); void setDampingCoeffs(float timestep); + void initializeParticleLists(); // TODO: privatize public: - - String name; +private: + const Configuration* conf; +public: int num; // number of particles of this type Reservoir* reservoir; @@ -91,9 +91,6 @@ public: std::vector<float> potentialGridScale; std::vector<float> densityGridScale; std::vector<float> pmfScale; - - int* numParticles; /* particles affected by potential grids */ - int** particles; // RBTODO: clear std::vectors after initialization, (but keep offsets) // duplicates of std::vector grids for device @@ -101,6 +98,12 @@ public: int numPotGrids; int numDenGrids; int numPmfs; + + int* numParticles; /* particles affected by potential grids */ + int** particles; + int** particles_d; + + RigidBodyGrid* rawPotentialGrids; RigidBodyGrid* rawDensityGrids; BaseGrid* rawPmfs; @@ -116,4 +119,3 @@ public: RigidBodyGrid** rawPmfs_d; }; - diff --git a/TabulatedMethods.cuh b/TabulatedMethods.cuh index af169c82ad76026cee624d451e071062c7dd5ea5..7f2d2aa9bb70abb27067ef99d630d3b78caf6b79 100644 --- a/TabulatedMethods.cuh +++ b/TabulatedMethods.cuh @@ -51,7 +51,7 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ float dUdx = (a->pot[(home+1)] - U0) / a->angle_step; // float energy = (dUdx * angle) + U0; float sin = sqrtf(1.0f - cos*cos); - dUdx /= abs(sin) > 1e-4 ? sin : 1e-4; // avoid singularity + dUdx /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity // Calculate the forces Vector3 force1 = (dUdx/distab) * (ab * (cos/distab) - bc / distbc); // force on particle 1 diff --git a/useful.h b/useful.h index 02f0e521e9181ec40555da97d305bff002412044..84b675e22338521d243611f1a10796e5f5d06131 100644 --- a/useful.h +++ b/useful.h @@ -236,9 +236,8 @@ public: return ret; } - HOST DEVICE inline void print() { - // printf("%0.3f %0.3f %0.3f\n", x,y,z); - printf("%0.12f %0.12f %0.12f\n", x,y,z); + HOST DEVICE inline void print() const { + printf("%0.3f %0.3f %0.3f\n", x,y,z); } String toString() const; @@ -389,9 +388,14 @@ public: HOST DEVICE inline Vector3 ey() const { return Vector3(exy,eyy,ezy); } HOST DEVICE inline Vector3 ez() const { return Vector3(exz,eyz,ezz); } String toString() const; - String toString1() const; + HOST DEVICE inline void print() const { + printf("%0.3f %0.3f %0.3f\n", exx,exy,exz); + printf("%0.3f %0.3f %0.3f\n", eyx,eyy,eyz); + printf("%0.3f %0.3f %0.3f\n", ezx,ezy,ezz); + } + HOST DEVICE inline bool isDiagonal() const { return isDiag; }