diff --git a/BaseGrid.h b/BaseGrid.h
index 3df015c508129e7dacd79cf819503c03cf788c75..716c6e335816f8c492824061ca4023dbc8d6eadb 100644
--- a/BaseGrid.h
+++ b/BaseGrid.h
@@ -160,10 +160,27 @@ public:
// A matrix defining the basis for the entire system.
Matrix3 getBox() const;
- // The longest diagonal of the system.
+ // The diagonal (nx,ny,nz) of the system.
Vector3 getExtent() const;
- // The longest diagonal of the system.
+ // The length of diagonal (nx,ny,nz) of the system.
float getDiagonal() const;
+
+ HOST DEVICE inline int getRadius() const {
+ // return radius of smallest sphere circumscribing grid
+ float radius = basis.transform(Vector3(nx,ny,nz)).length2();
+
+ float tmp = basis.transform(Vector3(-nx,ny,nz)).length2();
+ radius = tmp > radius ? tmp : radius;
+
+ tmp = basis.transform(Vector3(nx,-ny,nz)).length2();
+ radius = tmp > radius ? tmp : radius;
+
+ tmp = basis.transform(Vector3(nx,ny,-nz)).length2();
+ radius = tmp > radius ? tmp : radius;
+
+ return 0.5 * sqrt(radius);
+ }
+
// The position farthest from the origin.
Vector3 getDestination() const;
// The center of the grid.
diff --git a/ComputeGridGrid.cu b/ComputeGridGrid.cu
index 2126f25c6c8ed460304b94337df24e9e894516e3..c6d1f27bcf515df601b004ee76b3df20f84897f3 100644
--- a/ComputeGridGrid.cu
+++ b/ComputeGridGrid.cu
@@ -110,6 +110,27 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc
}
}
+__global__
+void createPartlist(const Vector3* __restrict__ pos,
+ const int numTypeParticles, const int* __restrict__ typeParticles_d,
+ int* numParticles_d, int* particles_d,
+ const Vector3 gridCenter, const float radius2) {
+ const int tid = threadIdx.x;
+ const int warpLane = tid % WARPSIZE; /* RBTODO: optimize */
+ const int split = 32; /* numblocks should be divisible by split */
+ /* const int blocksPerCell = gridDim.x/split; */
+
+ const int i = blockIdx.x * blockDim.x + threadIdx.x;
+ if (i < numTypeParticles) {
+ int aid = typeParticles_d[i];
+ float dist = (pos[aid] - gridCenter).length2();
+
+ if (dist <= radius2) {
+ int tmp = atomicAggInc(numParticles_d, warpLane);
+ particles_d[tmp] = aid;
+ }
+ }
+}
__global__
void printRigidBodyGrid(const RigidBodyGrid* rho) {
diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh
index 20067121dc23b853cdf362ac0a650445062e0bd3..670041ed15164ff4b3a3cf9511a472087da9c9ec 100644
--- a/ComputeGridGrid.cuh
+++ b/ComputeGridGrid.cuh
@@ -18,5 +18,11 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc
const Matrix3 basis_u_inv, const Vector3 origin_u,
Vector3* __restrict__ retForce, Vector3* __restrict__ retTorque);
+extern __global__
+void createPartlist(const Vector3* __restrict__ pos,
+ const int numTypeParticles, const int* __restrict__ typeParticles_d,
+ int* numParticles_d, int* particles_d,
+ const Vector3 gridCenter, const float radius2);
+
extern __global__
void printRigidBodyGrid(const RigidBodyGrid* rho);
diff --git a/Configuration.cpp b/Configuration.cpp
index 2ed7f53aab9bea28d382e4e740815979184e1f38..ec6d3ff36cc546a63160daf55dd3455810d971f4 100644
--- a/Configuration.cpp
+++ b/Configuration.cpp
@@ -386,7 +386,7 @@ Configuration::~Configuration() {
delete[] partForceZGridFile;
delete[] partDiffusionGridFile;
delete[] partReservoirFile;
- delete[] partRigidBodyGrid;
+ partRigidBodyGrid.clear();
// TODO: plug memory leaks
if (partsFromFile != NULL) delete[] partsFromFile;
@@ -545,6 +545,7 @@ void Configuration::setDefaults() {
electricField = 0.0f;
cutoff = 10.0f;
switchLen = 2.0f;
+ pairlistDistance = 2.0f;
outputPeriod = 200;
outputEnergyPeriod = -1;
outputFormat = TrajectoryWriter::formatDcd;
@@ -596,7 +597,7 @@ int Configuration::readParameters(const char * config_file) {
partForceZGridFile = new String[numParts];
partDiffusionGridFile = new String[numParts];
partReservoirFile = new String[numParts];
- partRigidBodyGrid = new std::vector<String>[numParts];
+ partRigidBodyGrid.resize(numParts);
// Allocate the table variables.
partTableFile = new String[numParts*numParts];
@@ -662,6 +663,8 @@ int Configuration::readParameters(const char * config_file) {
cutoff = (float) strtod(value.val(), NULL);
else if (param == String("switchLen"))
switchLen = (float) strtod(value.val(), NULL);
+ else if (param == String("pairlistDistance"))
+ pairlistDistance = (float) strtod(value.val(), NULL);
else if (param == String("outputPeriod"))
outputPeriod = atoi(value.val());
else if (param == String("outputEnergyPeriod"))
@@ -778,7 +781,7 @@ int Configuration::readParameters(const char * config_file) {
// RIGID BODY
else if (param == String("rigidBody")) {
// part[++currPart] = BrownianParticleType(value);
- rigidBody[++currRB] = RigidBodyType(value);
+ rigidBody[++currRB] = RigidBodyType(value, this);
currPartClass = partClassRB;
}
else if (param == String("mass"))
diff --git a/Configuration.h b/Configuration.h
index 450cc8b9a52cb9645f3a51febd8376bc2a8760b6..fdcfa5575215711b5902b714500e7468d18b53e4 100644
--- a/Configuration.h
+++ b/Configuration.h
@@ -139,6 +139,7 @@ public:
float coulombConst;
float electricField;
float cutoff;
+ float pairlistDistance;
float switchLen;
int outputPeriod;
int outputEnergyPeriod;
@@ -176,7 +177,7 @@ public:
bool readAnglesFromFile;
bool readDihedralsFromFile;
String* partGridFile;
- std::vector<String>* partRigidBodyGrid;
+ std::vector< std::vector<String> > partRigidBodyGrid;
String* partDiffusionGridFile;
String* partForceXGridFile;
String* partForceYGridFile;
diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu
index ba82eddecb4b99d6c048e1e31ecae00f51f7b773..a7104ba61c45fd373cfb63eaaa2d1c7f1480c3bb 100644
--- a/GrandBrownTown.cu
+++ b/GrandBrownTown.cu
@@ -364,6 +364,8 @@ void GrandBrownTown::run() {
{
// cudaSetDevice(0);
internal->decompose();
+ gpuErrchk(cudaDeviceSynchronize());
+ RBC.updateParticleLists( internal->getPos_d() );
}
float t; // simulation time
@@ -395,6 +397,7 @@ void GrandBrownTown::run() {
{
// cudaSetDevice(0);
internal -> decompose();
+ RBC.updateParticleLists( internal->getPos_d() );
}
//MLog: added Bond* bondList to the list of passed in variables.
@@ -416,6 +419,7 @@ void GrandBrownTown::run() {
{
// cudaSetDevice(0);
internal->decompose();
+ RBC.updateParticleLists( internal->getPos_d() );
}
energy = internal->compute(get_energy);
break;
diff --git a/RigidBody.cu b/RigidBody.cu
index 67cea83a5f50ca0074957106d3e244bc7f1f7828..4d991c6ea99d2a1834de592d34ec8d9ccf0fc836 100644
--- a/RigidBody.cu
+++ b/RigidBody.cu
@@ -2,13 +2,25 @@
#include <typeinfo>
#include "RigidBody.h"
#include "Configuration.h"
+#include "ComputeGridGrid.cuh"
#include "Debug.h"
-RigidBody::RigidBody(String name, const Configuration& cref, RigidBodyType& tref)
- : name(name), c(&cref), t(&tref), impulse_to_momentum(4.184e8f) {
- // units "(kcal_mol/AA) * ns" "dalton AA/ns" * 4.184e+08
-
+#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
+ if (code != cudaSuccess) {
+ fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line);
+ if (abort) exit(code);
+ }
+}
+
+
+RigidBody::RigidBody(String name, const Configuration& cref, const RigidBodyType& tref)
+ : name(name), c(&cref), t(&tref), impulse_to_momentum(4.184e8f) { init(); }
+RigidBody::RigidBody(const RigidBody& rb)
+ : name(rb.name), c(rb.c), t(rb.t), impulse_to_momentum(4.184e8f) { init(); }
+void RigidBody::init() {
+ // units "(kcal_mol/AA) * ns" "dalton AA/ns" * 4.184e+08
timestep = c->timestep;
Temp = c->temperature;
// RBTODO: use temperature grids
@@ -25,6 +37,48 @@ RigidBody::RigidBody(String name, const Configuration& cref, RigidBodyType& tref
angularMomentum.x *= t->inertia.x;
angularMomentum.y *= t->inertia.y;
angularMomentum.z *= t->inertia.z;
+
+ // Memory allocation for forces between particles and grids
+ const int& numGrids = t->numPotGrids;
+ numParticles = new int[numGrids];
+ particles_d = new int*[numGrids];
+ particleForces = new Vector3*[numGrids];
+ particleTorques = new Vector3*[numGrids];
+ particleForces_d = new Vector3*[numGrids];
+ particleTorques_d = new Vector3*[numGrids];
+ for (int i = 0; i < numGrids; ++i) {
+ const int& n = t->numParticles[i];
+ const int nb = (n/NUMTHREADS)+1; // max number of blocks
+ if (n > 0) {
+ gpuErrchk(cudaMalloc( &particles_d[i], sizeof(int)*n ));
+ particleForces[i] = new Vector3[nb];
+ particleTorques[i] = new Vector3[nb];
+ gpuErrchk(cudaMalloc( &particleForces_d[i], sizeof(Vector3)*nb ));
+ gpuErrchk(cudaMalloc( &particleTorques_d[i], sizeof(Vector3)*nb ));
+ }
+ }
+}
+
+RigidBody::~RigidBody() {
+ const int& numGrids = t->numPotGrids;
+ for (int i = 0; i < numGrids; ++i) {
+ const int& n = t->numParticles[i];
+ if (n > 0) {
+ gpuErrchk(cudaFree( particles_d[i] ));
+ delete[] particleForces[i];
+ delete[] particleTorques[i];
+ gpuErrchk(cudaFree( particleForces_d[i] ));
+ gpuErrchk(cudaFree( particleTorques_d[i] ));
+ }
+ }
+ if (numParticles != NULL) {
+ delete[] numParticles;
+ delete[] particles_d;
+ delete[] particleForces;
+ delete[] particleTorques;
+ delete[] particleForces_d;
+ delete[] particleTorques_d;
+ }
}
void RigidBody::addForce(Force f) {
@@ -33,7 +87,91 @@ void RigidBody::addForce(Force f) {
void RigidBody::addTorque(Force torq) {
torque += torq;
}
-RigidBody::~RigidBody() {}
+
+void RigidBody::updateParticleList(Vector3* pos_d) {
+ for (int i = 0; i < t->numPotGrids; ++i) {
+ int& tnp = t->numParticles[i];
+ if (tnp > 0) {
+ Vector3 gridCenter = t->potentialGrids[i].getCenter();
+ float cutoff = gridCenter.length();
+ cutoff += t->potentialGrids[i].getRadius();
+ cutoff += c->pairlistDistance;
+
+ numParticles[i] = 0;
+ int* tmp_d;
+ gpuErrchk(cudaMalloc( &tmp_d, sizeof(int) ));
+ gpuErrchk(cudaMemcpy( tmp_d, &numParticles[i], sizeof(int), cudaMemcpyHostToDevice ));
+
+ int nb = floor(tnp/NUMTHREADS) + 1;
+ createPartlist<<<NUMTHREADS,nb>>>(pos_d, tnp, t->particles_d[i],
+ tmp_d, particles_d[i],
+ gridCenter + position, cutoff*cutoff);
+
+ gpuErrchk(cudaMemcpy(&numParticles[i], tmp_d, sizeof(int), cudaMemcpyDeviceToHost ));
+ }
+ }
+}
+
+void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s) {
+ // Apply the force and torque on the rigid body, and forces on particles
+
+ // RBTODO: performance: consolidate CUDA stream management
+ // loop over potential grids
+ for (int i = 0; i < t->numPotGrids; ++i) {
+ if (numParticles[i] == 0) continue;
+ // const int nb = 500;
+ /*
+ r: postion of particle in real space
+ B: grid Basis
+ o: grid origin
+ R: rigid body orientation
+ c: rigid body center
+
+ B': R.B
+ c': R.o + c
+ */
+
+ Vector3 c = getOrientation()*t->potentialGrids[i].getOrigin() + getPosition();
+ Matrix3 B = (getOrientation()*t->potentialGrids[i].getBasis()).inverse();
+
+ // RBTODO: get energy
+ // RBTODO: performance: Improve parellism here (use streams/events; overlap with other computations)
+ const int nb = (numParticles[i]/NUMTHREADS)+1;
+
+ Vector3* forces = particleForces[i];
+ Vector3* torques = particleTorques[i];
+ Vector3* forces_d = particleForces_d[i];
+ Vector3* torques_d = particleTorques_d[i];
+
+ for (int k=0; k < nb; ++k) {
+ forces[k] = Vector3(0.0f);
+ torques[k] = Vector3(0.0f);
+ }
+ gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(Vector3)*nb, cudaMemcpyHostToDevice));
+ gpuErrchk(cudaMemcpy(torques_d, torques, sizeof(Vector3)*nb, cudaMemcpyHostToDevice));
+
+ computePartGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3) >>>(
+ pos_d, force_d, numParticles[i], particles_d[i],
+ t->rawPotentialGrids_d[i],
+ B, c, forces_d, torques_d);
+
+ gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost));
+ gpuErrchk(cudaMemcpy(torques, torques_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost));
+
+ // Sum and apply forces and torques
+ Vector3 f = Vector3(0.0f);
+ Vector3 torq = Vector3(0.0f);
+ for (int k = 0; k < nb; ++k) {
+ f = f + forces[k];
+ torq = torq + torques[k];
+ }
+
+ torq = -torq + (getPosition()-c).cross( f );
+ addForce( -f );
+ addTorque( torq );
+ }
+}
+
/*===========================================================================\
| Following "Algorithm for rigid-body Brownian dynamics" Dan Gordon, Matthew |
| Hoyles, and Shin-Ho Chung |
diff --git a/RigidBody.h b/RigidBody.h
index 630db1feb90262ffb4edad9d149e3a54d5c059ad..f2be7912bb1a33787788f66ad94621ce99ddd370 100644
--- a/RigidBody.h
+++ b/RigidBody.h
@@ -27,9 +27,12 @@ class RigidBody { // host side representation of rigid bodies
| splitting methods for rigid body molecular dynamics". J Chem Phys. (1997) |
`––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/
public:
- HOST DEVICE RigidBody(String name, const Configuration& c, RigidBodyType& t);
+ RigidBody(String name, const Configuration& c, const RigidBodyType& t);
+ RigidBody(const RigidBody& rb);
+ // RigidBody(const RigidBody& rb) : RigidBody(rb.name, *rb.c, *rb.t) {};
+ void init();
/* HOST DEVICE RigidBody(RigidBodyType t); */
- HOST DEVICE ~RigidBody();
+ ~RigidBody();
HOST DEVICE void addForce(Force f);
HOST DEVICE void addTorque(Force t);
@@ -46,6 +49,7 @@ class RigidBody { // host side representation of rigid bodies
// HOST DEVICE inline String getKey() const { return t->name; }
HOST DEVICE inline String getKey() const { return name; }
+ HOST DEVICE inline Vector3 transformBodyToLab(Vector3 v) const { return orientation*v + position; }
HOST DEVICE inline Vector3 getPosition() const { return position; }
HOST DEVICE inline Matrix3 getOrientation() const { return orientation; }
// HOST DEVICE inline Matrix3 getBasis() const { return orientation; }
@@ -56,9 +60,13 @@ class RigidBody { // host side representation of rigid bodies
angularMomentum.y / t->inertia.y,
angularMomentum.z / t->inertia.z );
}
+ void updateParticleList(Vector3* pos_d);
+ void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s);
+
+
bool langevin;
Vector3 torque; // lab frame (except in integrate())
-
+
private:
// String key;
String name;
@@ -82,12 +90,20 @@ private:
// integration
const Configuration* c;
- RigidBodyType* t; /* RBTODO: const? */
+ const RigidBodyType* t;
float timestep;
Vector3 force; // lab frame
bool isFirstStep;
-
+
+ int* numParticles; /* particles affected by potential grids */
+ int** particles_d;
+ Vector3** particleForces;
+ Vector3** particleTorques;
+ Vector3** particleForces_d;
+ Vector3** particleTorques_d;
+
+
/*–––––––––––––––––––––––––––––––––––––––––.
| units "kcal_mol/AA * ns" "(AA/ns) * amu" |
`–––––––––––––––––––––––––––––––––––––––––*/
diff --git a/RigidBodyController.cu b/RigidBodyController.cu
index 268e0dc9856c7f1c52c3e4cdb78b3d439d405533..66cb5cd6b863747a5a27afd2dd3882ffb45c43a4 100644
--- a/RigidBodyController.cu
+++ b/RigidBodyController.cu
@@ -32,14 +32,14 @@ inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true)
RigidBodyController::RigidBodyController(const Configuration& c, const char* outArg) :
conf(c), outArg(outArg) {
-
- if (conf.numRigidTypes > 0) {
- copyGridsToDevice();
- }
+ gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
+ for (int i = 0; i < conf.numRigidTypes; i++)
+ conf.rigidBody[i].initializeParticleLists();
int numRB = 0;
// grow list of rbs
for (int i = 0; i < conf.numRigidTypes; i++) {
+ conf.rigidBody[i].copyGridsToDevice();
numRB += conf.rigidBody[i].num;
std::vector<RigidBody> tmp;
// RBTODO: change conf.rigidBody to conf.rigidBodyType
@@ -53,15 +53,17 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* out
}
RigidBody r(name, conf, conf.rigidBody[i]);
tmp.push_back( r );
- }
+ }
rigidBodyByType.push_back(tmp);
-}
+ }
random = new RandomCPU(conf.seed + 1); /* +1 to avoid using same seed as RandomCUDA */
+ gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
initializeForcePairs();
- initializeParticleLists();
+ gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
}
+
RigidBodyController::~RigidBodyController() {
for (int i = 0; i < rigidBodyByType.size(); i++)
rigidBodyByType[i].clear();
@@ -168,85 +170,12 @@ void RigidBodyController::initializeForcePairs() {
}
-void RigidBodyController::initializeParticleLists() {
- // Populate RigidBodyType.particles
-
- // TODO: ensure no duplicates in conf.partRigidBodyGrid[i]
-
- // Allocate RB type's numParticles array
- for (int rb = 0; rb < conf.numRigidTypes; ++rb) {
- RigidBodyType& t = conf.rigidBody[rb];
- t.numParticles = new int[t.numPotGrids];
- for (int i = 0; i < t.numPotGrids; ++i) t.numParticles[i] = 0;
- }
-
- // Count the number of particles; Loop over particle types
- for (int i = 0; i < conf.numParts; ++i) {
-
- // Loop over rigid body grid names associated with particle type
- const std::vector<String>& gridNames = conf.partRigidBodyGrid[i];
- for (int j = 0; j < gridNames.size(); ++j) {
-
- // Loop over RB types
- for (int rb = 0; rb < conf.numRigidTypes; ++rb) {
- RigidBodyType& t = conf.rigidBody[rb];
- const std::vector<String>& keys = t.potentialGridKeys;
-
- // Loop over potential grids
- for(int k = 0; k < keys.size(); k++) {
- // printf(" checking grid keys ");
- if (gridNames[j] == keys[k])
- t.numParticles[k] += conf.numPartsOfType[i];
- }
- }
- }
- }
-
- // Allocate each particles array
- for (int rb = 0; rb < conf.numRigidTypes; ++rb) {
- RigidBodyType& t = conf.rigidBody[rb];
- t.particles = new int*[t.numPotGrids];
- for (int i = 0; i < t.numPotGrids; ++i) {
- t.particles[i] = new int[t.numParticles[i]];
- t.numParticles[i] = 0; // now use this as a counter
- }
- }
-
- // Set the number of particles; Loop over particle types
- for (int i = 0; i < conf.numParts; ++i) {
- int tmp[conf.numPartsOfType[i]]; // temporary array holding particles of type i
- int currId = 0;
- for (int j = 0; j < conf.num; ++j) {
- if (conf.type[j] == i)
- tmp[currId++] = j;
- }
-
- // Loop over rigid body grid names associated with particle type
- const std::vector<String>& gridNames = conf.partRigidBodyGrid[i];
- for (int j = 0; j < gridNames.size(); ++j) {
-
- // Loop over RB types
- for (int rb = 0; rb < conf.numRigidTypes; ++rb) {
- RigidBodyType& t = conf.rigidBody[rb];
- const std::vector<String>& keys = t.potentialGridKeys;
-
- // Loop over potential grids
- for(int k = 0; k < keys.size(); k++) {
- // printf(" checking grid keys ");
- if (gridNames[j] == keys[k]) {
- memcpy( &(t.particles[k][t.numParticles[k]]), tmp, sizeof(int)*currId );
- t.numParticles[k] += currId;
- }
- }
- }
+void RigidBodyController::updateParticleLists(Vector3* pos_d) {
+ for (int i = 0; i < rigidBodyByType.size(); i++) {
+ for (int j = 0; j < rigidBodyByType[i].size(); j++) {
+ rigidBodyByType[i][j].updateParticleList(pos_d);
}
}
-
- // Initialize device data for RB force pairs after std::vector is done growing
-
- // for (int i = 0; i < forcePairs.size(); i++)
- // forcePairs[i].initialize();
-
}
@@ -263,18 +192,24 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s)
rb.clearTorque();
}
}
-
+
// Grid–particle forces
for (int i = 0; i < rigidBodyByType.size(); i++) {
- callGridParticleForceKernel( pos_d, force_d, conf.rigidBody[i], rigidBodyByType[i], s );
+ for (int j = 0; j < rigidBodyByType[i].size(); j++) {
+ RigidBody& rb = rigidBodyByType[i][j];
+ rb.callGridParticleForceKernel( pos_d, force_d, s );
+ }
}
-
+
// Grid–Grid forces
if (forcePairs.size() > 0) {
- for (int i=0; i < forcePairs.size(); i++)
- forcePairs[i].callGridForceKernel(i,s);
-
+ for (int i=0; i < forcePairs.size(); i++) {
+ // TODO: performance: make this check occur less frequently
+ if (forcePairs[i].isWithinPairlistDist())
+ forcePairs[i].callGridForceKernel(i,s);
+ }
+
// each kernel call is followed by async memcpy for previous; now get last
RigidBodyForcePair* fp = RigidBodyForcePair::lastRbForcePair;
fp->retrieveForcesForGrid( fp->lastRbGridID );
@@ -288,10 +223,11 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s)
gpuErrchk(cudaDeviceSynchronize());
for (int i=0; i < forcePairs.size(); i++)
- forcePairs[i].processGPUForces();
-
+ if (forcePairs[i].isWithinPairlistDist())
+ forcePairs[i].processGPUForces();
}
}
+
void RigidBodyController::integrate(int step) {
// tell RBs to integrate
for (int i = 0; i < rigidBodyByType.size(); i++) {
@@ -360,14 +296,28 @@ void RigidBodyForcePair::createStreams() {
gpuErrchk( cudaStreamCreate( &(stream[i]) ) );
// gpuErrchk( cudaStreamCreateWithFlags( &(stream[i]) , cudaStreamNonBlocking ) );
}
+bool RigidBodyForcePair::isWithinPairlistDist() const {
+ float pairlistDist = 2.0f; /* TODO: get from conf */
+ float rbDist = (rb1->getPosition() - rb2->getPosition()).length();
+
+ for (int i = 0; i < gridKeyId1.size(); ++i) {
+ const int k1 = gridKeyId1[i];
+ const int k2 = gridKeyId2[i];
+ float d1 = type1->densityGrids[k1].getRadius() + type1->densityGrids[k1].getCenter().length();
+ float d2 = type2->potentialGrids[k2].getRadius() + type2->potentialGrids[k2].getCenter().length();
+ if (rbDist < d1+d2+pairlistDist)
+ return true;
+ }
+ return false;
+}
Vector3 RigidBodyForcePair::getOrigin1(const int i) {
const int k1 = gridKeyId1[i];
- return rb1->getOrientation()*type1->densityGrids[k1].getOrigin() + rb1->getPosition();
+ return rb1->transformBodyToLab( type1->densityGrids[k1].getOrigin() );
}
Vector3 RigidBodyForcePair::getOrigin2(const int i) {
const int k2 = gridKeyId2[i];
if (!isPmf)
- return rb2->getOrientation()*type2->potentialGrids[k2].getOrigin() + rb2->getPosition();
+ return rb2->transformBodyToLab( type2->potentialGrids[k2].getOrigin() );
else
return type2->rawPmfs[k2].getOrigin();
}
@@ -441,68 +391,6 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) {
lastRbGridID = i;
}
}
-void RigidBodyController::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d,
- const RigidBodyType& t, std::vector<RigidBody>& rbs, int s) {
- // get the force/torque on a rigid body, and forces on particles
-
- // RBTODO: consolidate CUDA stream management
- for (int i = 0; i < t.numPotGrids; ++i) {
- if (t.numParticles[i] == 0) continue;
-
- for (int j = 0; j < rbs.size(); ++j) {
- // const int nb = 500;
- /*
- r: postion of particle in real space
- B: grid Basis
- o: grid origin
- R: rigid body orientation
- c: rigid body center
-
- B': R.B
- c': R.o + c
- */
- // Matrix3 B1 = getBasis1(i);
- Vector3 c = rbs[j].getOrientation()*t.potentialGrids[i].getOrigin() + rbs[j].getPosition();
- Matrix3 B = (rbs[j].getOrientation()*t.potentialGrids[i].getBasis()).inverse();
-
- // RBTODO: get energy
- const int nb = (t.numParticles[i]/NUMTHREADS)+1;
-
- // RBTODO: IMPORTANT: Improve this
- Vector3 forces[nb];
- Vector3 torques[nb];
- for (int k=0; k < nb; ++k) {
- forces[k] = Vector3(0.0f);
- torques[k] = Vector3(0.0f);
- }
- Vector3* forces_d;
- Vector3* torques_d;
- gpuErrchk(cudaMalloc(&forces_d, sizeof(Vector3)*nb));
- gpuErrchk(cudaMalloc(&torques_d, sizeof(Vector3)*nb));
- gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(Vector3)*nb, cudaMemcpyHostToDevice));
- gpuErrchk(cudaMemcpy(torques_d, torques, sizeof(Vector3)*nb, cudaMemcpyHostToDevice));
-
- computePartGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3) >>>(
- pos_d, force_d, t.numParticles[i], t.particles[i],
- t.rawPotentialGrids_d[i],
- B, c, forces_d, torques_d);
-
- gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost));
- gpuErrchk(cudaMemcpy(torques, torques_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost));
-
- Vector3 f = Vector3(0.0f);
- Vector3 t = Vector3(0.0f);
- for (int k = 0; k < nb; ++k) {
- f = f + forces[k];
- t = t + torques[j];
- }
-
- t = -t - (rbs[j].getPosition()-c).cross( -f );
- rbs[j].addForce( -f );
- rbs[j].addTorque( t );
- }
- }
-}
void RigidBodyForcePair::retrieveForcesForGrid(const int i) {
// i: grid ID (less than numGrids)
@@ -553,137 +441,6 @@ void RigidBodyForcePair::processGPUForces() {
// printf("force: %s\n", f.toString().val());
// printf("torque: %s\n", t.toString().val());
-
-}
-
-void RigidBodyController::copyGridsToDevice() {
- // RBTODO: clean this function up
- RigidBodyType **rb_addr = new RigidBodyType*[conf.numRigidTypes]; /* temporary pointer to device pointer */
-
- gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * conf.numRigidTypes));
- // TODO: The above line fails when there is not enough memory. If it fails, stop.
-
- printf("Copying RBs\n");
- // Copy rigidbody types
- // http://stackoverflow.com/questions/16024087/copy-an-object-to-device
- for (int i = 0; i < conf.numRigidTypes; i++)
- conf.rigidBody[i].updateRaw();
-
-
- // density grids
- for (int i = 0; i < conf.numRigidTypes; i++) {
- printf("Copying density grids of RB type %d\n",i);
- RigidBodyType& rb = conf.rigidBody[i];
-
- int ng = rb.numDenGrids;
- rb.rawDensityGrids_d = new RigidBodyGrid*[ng]; /* not sure this is needed */
-
- printf(" RigidBodyType %d: numGrids = %d\n", i, ng);
- // copy grid data to device
- for (int gid = 0; gid < ng; gid++) {
- RigidBodyGrid* g = &(rb.rawDensityGrids[gid]); // convenience
- // RigidBodyGrid* g_d = rb.rawDensityGrids_d[gid]; // convenience
- int len = g->getSize();
- float* tmpData;
-
- size_t sz = sizeof(RigidBodyGrid);
- gpuErrchk(cudaMalloc((void **) &(rb.rawDensityGrids_d[gid]), sz));
- /* gpuErrchk(cudaMemcpy(rb.rawDensityGrids_d[gid], g, */
- /* sz, cudaMemcpyHostToDevice)); */
- gpuErrchk(cudaMemcpy(rb.rawDensityGrids_d[gid], &(rb.rawDensityGrids[gid]),
- sz, cudaMemcpyHostToDevice));
-
- // allocate grid data on device
- // copy temporary host pointer to device pointer
- // copy data to device through temporary host pointer
- sz = sizeof(float) * len;
- gpuErrchk(cudaMalloc((void **) &tmpData, sz));
- // gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice));
- gpuErrchk(cudaMemcpy( tmpData, rb.rawDensityGrids[gid].val, sz, cudaMemcpyHostToDevice));
- gpuErrchk(cudaMemcpy( &(rb.rawDensityGrids_d[gid]->val), &tmpData,
- sizeof(float*), cudaMemcpyHostToDevice));
- }
- }
-
- for (int i = 0; i < conf.numRigidTypes; i++) {
- printf("Working on RB %d\n",i);
- RigidBodyType& rb = conf.rigidBody[i];
-
- int ng = rb.numPotGrids;
- rb.rawPotentialGrids_d = new RigidBodyGrid*[ng]; /* not 100% sure this is needed, possible memory leak */
-
- printf(" RigidBodyType %d: numGrids = %d\n", i, ng);
- // copy potential grid data to device
- for (int gid = 0; gid < ng; gid++) {
- RigidBodyGrid* g = &(rb.rawPotentialGrids[gid]); // convenience
- // RigidBodyGrid* g_d = rb.rawDensityGrids_d[gid]; // convenience
- int len = g->getSize();
- float* tmpData;
- // tmpData = new float*[len];
-
- size_t sz = sizeof(RigidBodyGrid);
- gpuErrchk(cudaMalloc((void **) &(rb.rawPotentialGrids_d[gid]), sz));
- gpuErrchk(cudaMemcpy( rb.rawPotentialGrids_d[gid], &(rb.rawPotentialGrids[gid]),
- sz, cudaMemcpyHostToDevice ));
-
- // allocate grid data on device
- // copy temporary host pointer to device pointer
- // copy data to device through temporary host pointer
- sz = sizeof(float) * len;
- gpuErrchk(cudaMalloc((void **) &tmpData, sz));
- // sz = sizeof(float) * len;
- gpuErrchk(cudaMemcpy( tmpData, rb.rawPotentialGrids[gid].val, sz, cudaMemcpyHostToDevice));
- gpuErrchk(cudaMemcpy( &(rb.rawPotentialGrids_d[gid]->val), &tmpData,
- sizeof(float*), cudaMemcpyHostToDevice));
-
- // RBTODO: why can't tmpData be deleted?
- // delete[] tmpData;
- }
- }
-
- for (int i = 0; i < conf.numRigidTypes; i++) {
- printf("Copying PMFs for RB %d\n",i);
- RigidBodyType& rb = conf.rigidBody[i];
-
- int ng = rb.numPmfs;
- rb.rawPmfs_d = new RigidBodyGrid*[ng]; /* not 100% sure this is needed, possible memory leak */
-
- printf(" RigidBodyType %d: numPmfs = %d\n", i, ng);
-
- // copy pmf grid data to device
- for (int gid = 0; gid < ng; gid++) {
- RigidBodyGrid g = rb.rawPmfs[gid];
- int len = g.getSize();
- float* tmpData;
- // tmpData = new float*[len];
-
- size_t sz = sizeof(RigidBodyGrid);
- gpuErrchk(cudaMalloc((void **) &(rb.rawPmfs_d[gid]), sz));
- gpuErrchk(cudaMemcpy( rb.rawPmfs_d[gid], &g,
- sz, cudaMemcpyHostToDevice ));
-
- // allocate grid data on device
- // copy temporary host pointer to device pointer
- // copy data to device through temporary host pointer
- sz = sizeof(float) * len;
- gpuErrchk(cudaMalloc((void **) &tmpData, sz));
- // sz = sizeof(float) * len;
- gpuErrchk(cudaMemcpy( tmpData, rb.rawPmfs[gid].val, sz, cudaMemcpyHostToDevice));
- gpuErrchk(cudaMemcpy( &(rb.rawPmfs_d[gid]->val), &tmpData,
- sizeof(float*), cudaMemcpyHostToDevice));
-
- }
- }
-
- gpuErrchk(cudaDeviceSynchronize());
- printf("Done copying RBs\n");
-
- /* // DEBUG */
- /* RigidBodyType& rb = conf.rigidBody[0]; */
- /* printRigidBodyGrid<<<1,1>>>( rb.rawPotentialGrids_d[0] ); */
- /* gpuErrchk(cudaDeviceSynchronize()); */
- /* printRigidBodyGrid<<<1,1>>>( rb.rawDensityGrids_d[0] ); */
- /* gpuErrchk(cudaDeviceSynchronize()); */
}
void RigidBodyController::print(int step) {
diff --git a/RigidBodyController.h b/RigidBodyController.h
index 8aed324cd00d4ea7605a9264a00b625cfc44179e..66f40e2dd87685cdeb1e18ef131ff6b4de384f27 100644
--- a/RigidBodyController.h
+++ b/RigidBodyController.h
@@ -18,6 +18,8 @@ class RigidBody;
class Configuration;
class RandomCPU;
+
+// TODO: performance: create RigidBodyGridPair so pairlistdist check is done per grid pair, not per RB pair
class RigidBodyForcePair {
friend class RigidBodyController;
@@ -42,10 +44,11 @@ public:
printf(" Copying assigning RB force pair...\n");
swap(*this,o);
return *this;
- }
-
+ }
~RigidBodyForcePair();
+ bool isWithinPairlistDist() const;
+
private:
int initialize();
void swap(RigidBodyForcePair& a, RigidBodyForcePair& b);
@@ -95,12 +98,10 @@ public:
void integrate(int step);
void updateForces(Vector3* pos_d, Vector3* force_d, int s);
- void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, const RigidBodyType& t, std::vector<RigidBody>& rbs, int s);
+ void updateParticleLists(Vector3* pos_d);
private:
- void copyGridsToDevice();
void initializeForcePairs();
- void initializeParticleLists();
void print(int step);
void printLegend(std::ofstream &file);
diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu
index f3663d439f721674a35edd6519f6a0a6d560bd8c..5f6caf3ca444c4ae626dd01674074f85eae52137 100644
--- a/RigidBodyGrid.cu
+++ b/RigidBodyGrid.cu
@@ -27,80 +27,6 @@ RigidBodyGrid::RigidBodyGrid(int nx0, int ny0, int nz0) {
zero();
}
-// Make an orthogonal grid given the box dimensions and resolution.
-RigidBodyGrid::RigidBodyGrid(Vector3 box, float dx) {
- dx = fabsf(dx);
- box.x = fabsf(box.x);
- box.y = fabsf(box.y);
- box.z = fabsf(box.z);
-
- // Tile the grid into the system box.
- // The grid spacing is always a bit smaller than dx.
- nx = int(ceilf(box.x/dx));
- ny = int(ceilf(box.y/dx));
- nz = int(ceilf(box.z/dx));
- if (nx <= 0) nx = 1;
- if (ny <= 0) ny = 1;
- if (nz <= 0) nz = 1;
-
- val = new float[nx*ny*nz];
- zero();
-}
-
-// The box gives the system geometry.
-// The grid point numbers define the resolution.
-RigidBodyGrid::RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0) {
- nx = nx0;
- ny = ny0;
- nz = nz0;
-
- // Tile the grid into the system box.
- if (nx <= 0) nx = 1;
- if (ny <= 0) ny = 1;
- if (nz <= 0) nz = 1;
-
- val = new float[nx*ny*nz];
- zero();
-}
-
-// The box gives the system geometry.
-// dx is the approx. resolution.
-// The grid spacing is always a bit larger than dx.
-RigidBodyGrid::RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx) {
- dx = fabs(dx);
-
- // Tile the grid into the system box.
- // The grid spacing is always a bit larger than dx.
- nx = int(floor(box.ex().length()/dx))-1;
- ny = int(floor(box.ey().length()/dx))-1;
- nz = int(floor(box.ez().length()/dx))-1;
- if (nx <= 0) nx = 1;
- if (ny <= 0) ny = 1;
- if (nz <= 0) nz = 1;
-
- val = new float[nx*ny*nz];
- zero();
-}
-
-// The box gives the system geometry.
-// dx is the approx. resolution.
-// The grid spacing is always a bit smaller than dx.
-RigidBodyGrid::RigidBodyGrid(Matrix3 box, float dx) {
- dx = fabs(dx);
-
- // Tile the grid into the system box.
- // The grid spacing is always a bit smaller than dx.
- nx = int(ceilf(box.ex().length()/dx));
- ny = int(ceilf(box.ey().length()/dx));
- nz = int(ceilf(box.ez().length()/dx));
- if (nx <= 0) nx = 1;
- if (ny <= 0) ny = 1;
- if (nz <= 0) nz = 1;
-
- val = new float[nx*ny*nz];
- zero();
-}
-
RigidBodyGrid::RigidBodyGrid(const BaseGrid& g) {
nx = g.nx;
ny = g.ny;
@@ -302,7 +228,7 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) co
Vector3 f;
- const float wx = l.x - homeY;
+ const float wx = l.x - homeX;
const float wy = l.y - homeY;
const float wz = l.z - homeZ;
diff --git a/RigidBodyGrid.h b/RigidBodyGrid.h
index bcc44d1294862511b17d5d81b068a8cb73fe9b26..2c46b4ff1331f57a8d04cc4b21e79e124bb438ef 100644
--- a/RigidBodyGrid.h
+++ b/RigidBodyGrid.h
@@ -47,23 +47,6 @@ public:
// The most obvious of constructors.
RigidBodyGrid(int nx0, int ny0, int nz0);
- // Make an orthogonal grid given the box dimensions and resolution.
- RigidBodyGrid(Vector3 box, float dx);
-
- // The box gives the system geometry.
- // The grid point numbers define the resolution.
- RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0);
-
- // The box gives the system geometry.
- // dx is the approx. resolution.
- // The grid spacing is always a bit larger than dx.
- RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx);
-
- // The box gives the system geometry.
- // dx is the approx. resolution.
- // The grid spacing is always a bit smaller than dx.
- RigidBodyGrid(Matrix3 box, float dx);
-
// Make a copy of a BaseGrid grid.
RigidBodyGrid(const BaseGrid& g);
@@ -109,6 +92,22 @@ public:
HOST DEVICE inline int getNz() const {return nz;}
HOST DEVICE inline int getSize() const {return nx*ny*nz;}
+ HOST DEVICE inline int getRadius(Matrix3 basis) const {
+ // return radius of smallest sphere circumscribing grid
+ float radius = basis.transform(Vector3(nx,ny,nz)).length2();
+
+ float tmp = basis.transform(Vector3(-nx,ny,nz)).length2();
+ radius = tmp > radius ? tmp : radius;
+
+ tmp = basis.transform(Vector3(nx,-ny,nz)).length2();
+ radius = tmp > radius ? tmp : radius;
+
+ tmp = basis.transform(Vector3(nx,ny,-nz)).length2();
+ radius = tmp > radius ? tmp : radius;
+
+ return 0.5 * sqrt(radius);
+ }
+
// Add a fixed value to the grid.
void shift(float s);
diff --git a/RigidBodyType.cu b/RigidBodyType.cu
index c3f600d9d0be96174900453ee3b465e1a29f9ed7..cc6577af2b16500739d4c6e94bec90a03728aaba 100644
--- a/RigidBodyType.cu
+++ b/RigidBodyType.cu
@@ -1,7 +1,15 @@
+#include "Configuration.h"
#include "RigidBodyType.h"
#include "Reservoir.h"
#include "BaseGrid.h"
#include "RigidBodyGrid.h"
+#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
+ if (code != cudaSuccess) {
+ fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line);
+ if (abort) exit(code);
+ }
+}
void RigidBodyType::clear() {
num = 0; // RBTODO: not 100% sure about this
@@ -19,14 +27,21 @@ void RigidBodyType::clear() {
densityGridKeys.clear();
pmfKeys.clear();
- if (numParticles > 0) {
- for (int i=0; i < numParticles[i]; ++i) {
- delete[] particles[i];
+
+ if (numParticles != NULL) {
+ for (int i=0; i < numPotGrids; ++i) {
+ printf("CLEARING\n");
+ if (numParticles[i] > 0) {
+ delete[] particles[i];
+ gpuErrchk(cudaFree( particles_d[i] ));
+ }
}
delete[] numParticles;
delete[] particles;
+ delete[] particles_d;
+ numParticles = NULL;
}
-
+
if (numPotGrids > 0) delete[] rawPotentialGrids;
if (numDenGrids > 0) delete[] rawDensityGrids;
rawPotentialGrids = NULL;
@@ -43,18 +58,18 @@ void RigidBodyType::setDampingCoeffs(float timestep) { /* MUST ONLY BE CALLED ON
| |
| f[kcal/mol AA] = - dampingCoeff * momentum[amu AA/ns] |
| |
- | units "(1/ps) * (amu AA/ns)" "kcal_mol/AA" * 2.390e-06 |
+ | units "(1/ns) * (amu AA/ns)" "kcal_mol/AA" * 2.390e-09 |
`–––––––––––––––––––––––––––––––––––––––––––––––––––––––*/
/*––––––––––––––––––––––––––––––––––––––––––––––––––––.
| < f(t) f(t') > = 2 kT dampingCoeff mass delta(t-t') |
| |
- | units "sqrt( k K (1/ps) amu / ns )" "kcal_mol/AA" |
- | * 6.8916889e-05 |
+ | units "sqrt( k K (1/ns) amu / ns )" "kcal_mol/AA" |
+ | * 2.1793421e-06 |
`––––––––––––––––––––––––––––––––––––––––––––––––––––*/
// RBTODO: make units consistent with rest of RB code
float Temp = 295; /* RBTODO: temperature should be read from grid? Or set in uniformly in config file */
- transForceCoeff = 6.8916889e-05 * Vector3::element_sqrt( 2*Temp*mass*transDamping/timestep );
+ transForceCoeff = 2.1793421e-06 * Vector3::element_sqrt( 2*Temp*mass*transDamping/timestep );
// setup for langevin
// langevin = rbParams->langevin;
@@ -62,12 +77,12 @@ void RigidBodyType::setDampingCoeffs(float timestep) { /* MUST ONLY BE CALLED ON
// T = - dampingCoeff * angularMomentum
// < f(t) f(t') > = 2 kT dampingCoeff inertia delta(t-t')
- rotTorqueCoeff = 6.8916889e-05 *
+ rotTorqueCoeff = 2.1793421e-06 *
Vector3::element_sqrt( 2*Temp* Vector3::element_mult(inertia,rotDamping) / timestep );
- transDamping = 2.3900574e-6 * transDamping;
- rotDamping = 2.3900574e-6 * rotDamping;
+ transDamping = 2.3900574e-9 * transDamping;
+ rotDamping = 2.3900574e-9 * rotDamping;
// Also apply scale factors
applyScaleFactors();
@@ -139,10 +154,73 @@ void RigidBodyType::scalePMF(String s) {
addScaleFactor(s, pmfScaleKeys, pmfScale);
}
+// void RigidBodyType::copyGridToDevice(RigidBodyGrid* ptr_d, const RigidBodyGrid* g) {
+// // copy grid data
+// float* tmpData;
+// size_t sz = sizeof(float) * g->getSize();
+// gpuErrchk(cudaMalloc( &tmpData, sz));
+// gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice));
+
+// // create temporary grid
+// sz = sizeof(RigidBodyGrid);
+// RigidBodyGrid* tmp;
+// memcpy(tmp, g, sz);
+// tmp->val = tmpData;
+
+// // copy grid
+// gpuErrchk(cudaMalloc(&ptr_d, sz));
+// gpuErrchk(cudaMemcpy(ptr_d, tmp, sz, cudaMemcpyHostToDevice));
+
+// // gpuErrchk(cudaMemcpy( &(ptr_d->val), &tmpData, sizeof(float*), cudaMemcpyHostToDevice));
+// }
+void RigidBodyType::copyGridToDevice(RigidBodyGrid*& ptr_d, RigidBodyGrid g) {
+ // copy grid data
+ float* tmpData;
+ size_t sz = sizeof(float) * g.getSize();
+ gpuErrchk(cudaMalloc( &tmpData, sz));
+ gpuErrchk(cudaMemcpy( tmpData, g.val, sz, cudaMemcpyHostToDevice));
+
+ // set grid pointer to device
+ g.val = tmpData;
+
+ // copy grid
+ sz = sizeof(RigidBodyGrid);
+ gpuErrchk(cudaMalloc(&ptr_d, sz));
+ gpuErrchk(cudaMemcpy(ptr_d, &g, sz, cudaMemcpyHostToDevice));
+
+ // prevent destructor from trying to delete device pointer
+ g.val = NULL;
+}
+
+void RigidBodyType::freeGridFromDevice(RigidBodyGrid* ptr_d) {
+ gpuErrchk(cudaFree( ptr_d->val )); // free grid data
+ gpuErrchk(cudaFree( ptr_d )); // free grid
+}
+
+void RigidBodyType::copyGridsToDevice() {
+ int ng = numDenGrids;
+ rawDensityGrids_d = new RigidBodyGrid*[ng];
+ for (int gid = 0; gid < ng; gid++)
+ copyGridToDevice(rawDensityGrids_d[gid], rawDensityGrids[gid]);
+
+ ng = numPotGrids;
+ rawPotentialGrids_d = new RigidBodyGrid*[ng];
+ for (int gid = 0; gid < ng; gid++)
+ copyGridToDevice(rawPotentialGrids_d[gid], rawPotentialGrids[gid]);
+
+ ng = numPmfs;
+ rawPmfs_d = new RigidBodyGrid*[ng];
+ for (int gid = 0; gid < ng; gid++) {
+ //RigidBodyGrid tmp = RigidBodyGrid(rawPmfs[gid]);
+ //copyGridToDevice(rawPmfs_d[gid], &tmp);
+ copyGridToDevice(rawPmfs_d[gid], RigidBodyGrid(rawPmfs[gid]));
+ }
+}
+
void RigidBodyType::updateRaw() {
- if (numPotGrids > 0) delete[] rawPotentialGrids;
- if (numDenGrids > 0) delete[] rawDensityGrids;
- if (numDenGrids > 0) delete[] rawPmfs;
+ if (numPotGrids > 0 && rawPotentialGrids != NULL) delete[] rawPotentialGrids;
+ if (numDenGrids > 0 && rawDensityGrids != NULL) delete[] rawDensityGrids;
+ if (numPmfs > 0 && rawPmfs != NULL) delete[] rawPmfs;
numPotGrids = potentialGrids.size();
numDenGrids = densityGrids.size();
numPmfs = pmfs.size();
@@ -174,3 +252,59 @@ void RigidBodyType::updateRaw() {
rawPmfs[i] = pmfs[i];
}
+void RigidBodyType::initializeParticleLists() {
+ updateRaw();
+
+ numParticles = new int[numPotGrids];
+ particles = new int*[numPotGrids];
+ particles_d = new int*[numPotGrids];
+
+ // Loop over potential grids
+ for (int i = 0; i < numPotGrids; ++i) {
+ String& gridName = potentialGridKeys[i];
+ numParticles[i] = 0;
+
+ // Loop over particle types to count the number of particles
+ for (int j = 0; j < conf->numParts; ++j) {
+ // Loop over rigid body grid names associated with particle type
+ const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
+ for (int k = 0; k < gridNames.size(); ++k) {
+ if (gridNames[k] == gridName) {
+ numParticles[i] += conf->numPartsOfType[j];
+ }
+ }
+ }
+
+ // allocate array of particle ids for the potential grid
+ particles[i] = new int[numParticles[i]];
+ int pid = 0;
+
+ // Loop over particle types to count the number of particles
+ for (int j = 0; j < conf->numParts; ++j) {
+
+ // Build temporary id array of type j particles
+ int tmp[conf->numPartsOfType[j]];
+ int currId = 0;
+ for (int aid = 0; aid < conf->num; ++aid) {
+ if (conf->type[aid] == j)
+ tmp[currId++] = aid;
+ }
+
+ // Loop over rigid body grid names associated with particle type
+ const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
+ for (int k = 0; k < gridNames.size(); ++k) {
+ if (gridNames[k] == gridName) {
+ // Copy type j particles to particles[i]
+ memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId );
+ assert(currId == conf->numPartsOfType[j]);
+ pid += conf->numPartsOfType[j];
+ }
+ }
+ }
+
+ // Initialize device data
+ size_t sz = sizeof(int*) * numParticles[i];
+ gpuErrchk(cudaMalloc( &(particles_d[i]), sz ));
+ gpuErrchk(cudaMemcpyAsync( particles_d[i], particles[i], sz, cudaMemcpyHostToDevice));
+ }
+}
diff --git a/RigidBodyType.h b/RigidBodyType.h
index 4d9849dea3fe999636d92417581c55348a1cb724..1c5d48da0718e70e7ce8a6b46f58e9dbad6dc2e6 100644
--- a/RigidBodyType.h
+++ b/RigidBodyType.h
@@ -15,8 +15,17 @@
class Reservoir;
class BaseGrid;
class RigidBodyGrid;
+class Configuration;
class RigidBodyType {
+
+public:
+RigidBodyType(const String& name = "", const Configuration* conf = NULL ) :
+ name(name), conf(conf), num(0),
+ reservoir(NULL), mass(1.0f), inertia(), transDamping(),
+ rotDamping(), initPos(), initRot(Matrix3(1.0f)),
+ numPotGrids(0), numDenGrids(0), numPmfs(0), numParticles(NULL) { }
+ ~RigidBodyType() { clear(); }
private:
// Deletes all members
void clear();
@@ -28,40 +37,31 @@ private:
void applyScaleFactor(
const std::vector<String> &scaleKeys, const std::vector<float> &scaleFactors,
const std::vector<String> &gridKeys, std::vector<BaseGrid> &grids);
+
+ void updateRaw();
+ void copyGridToDevice(RigidBodyGrid*& ptr_d, RigidBodyGrid g);
+ void freeGridFromDevice(RigidBodyGrid* ptr_d);
public:
-/* RigidBodyType(const String& name = "") : */
-/* name(name), num(0), */
-/* reservoir(NULL), mass(1.0f), inertia(), transDamping(), */
-/* rotDamping(), potentialGrids(NULL), densityGrids(NULL), */
-/* potentialGrids_D(NULL), densityGrids_D(NULL) { } */
-
-RigidBodyType(const String& name = "") :
- name(name), num(0),
- reservoir(NULL), mass(1.0f), inertia(), transDamping(),
- rotDamping(), initPos(), initRot(Matrix3(1.0f)),
- numPotGrids(0), numDenGrids(0), numPmfs(0), numParticles(0) { }
-
- /* RigidBodyType(const RigidBodyType& src) { copy(src); } */
- ~RigidBodyType() { clear(); }
-
/* RigidBodyType& operator=(const RigidBodyType& src); */
-
- void addPotentialGrid(String s);
+ void copyGridsToDevice();
+
+ void addPotentialGrid(String s);
void addDensityGrid(String s);
void addPMF(String s);
- void scalePotentialGrid(String s);
+ void scalePotentialGrid(String s);
void scaleDensityGrid(String s);
void scalePMF(String s);
- void updateRaw();
void setDampingCoeffs(float timestep);
+ void initializeParticleLists();
// TODO: privatize
public:
-
-
String name;
+private:
+ const Configuration* conf;
+public:
int num; // number of particles of this type
Reservoir* reservoir;
@@ -91,9 +91,6 @@ public:
std::vector<float> potentialGridScale;
std::vector<float> densityGridScale;
std::vector<float> pmfScale;
-
- int* numParticles; /* particles affected by potential grids */
- int** particles;
// RBTODO: clear std::vectors after initialization, (but keep offsets)
// duplicates of std::vector grids for device
@@ -101,6 +98,12 @@ public:
int numPotGrids;
int numDenGrids;
int numPmfs;
+
+ int* numParticles; /* particles affected by potential grids */
+ int** particles;
+ int** particles_d;
+
+
RigidBodyGrid* rawPotentialGrids;
RigidBodyGrid* rawDensityGrids;
BaseGrid* rawPmfs;
@@ -116,4 +119,3 @@ public:
RigidBodyGrid** rawPmfs_d;
};
-
diff --git a/TabulatedMethods.cuh b/TabulatedMethods.cuh
index af169c82ad76026cee624d451e071062c7dd5ea5..7f2d2aa9bb70abb27067ef99d630d3b78caf6b79 100644
--- a/TabulatedMethods.cuh
+++ b/TabulatedMethods.cuh
@@ -51,7 +51,7 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__
float dUdx = (a->pot[(home+1)] - U0) / a->angle_step;
// float energy = (dUdx * angle) + U0;
float sin = sqrtf(1.0f - cos*cos);
- dUdx /= abs(sin) > 1e-4 ? sin : 1e-4; // avoid singularity
+ dUdx /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity
// Calculate the forces
Vector3 force1 = (dUdx/distab) * (ab * (cos/distab) - bc / distbc); // force on particle 1
diff --git a/useful.h b/useful.h
index 02f0e521e9181ec40555da97d305bff002412044..84b675e22338521d243611f1a10796e5f5d06131 100644
--- a/useful.h
+++ b/useful.h
@@ -236,9 +236,8 @@ public:
return ret;
}
- HOST DEVICE inline void print() {
- // printf("%0.3f %0.3f %0.3f\n", x,y,z);
- printf("%0.12f %0.12f %0.12f\n", x,y,z);
+ HOST DEVICE inline void print() const {
+ printf("%0.3f %0.3f %0.3f\n", x,y,z);
}
String toString() const;
@@ -389,9 +388,14 @@ public:
HOST DEVICE inline Vector3 ey() const { return Vector3(exy,eyy,ezy); }
HOST DEVICE inline Vector3 ez() const { return Vector3(exz,eyz,ezz); }
String toString() const;
-
String toString1() const;
+ HOST DEVICE inline void print() const {
+ printf("%0.3f %0.3f %0.3f\n", exx,exy,exz);
+ printf("%0.3f %0.3f %0.3f\n", eyx,eyy,eyz);
+ printf("%0.3f %0.3f %0.3f\n", ezx,ezy,ezz);
+ }
+
HOST DEVICE inline bool isDiagonal() const { return isDiag; }