diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh index 82bc0ea22f3298f2a326f76f0088e82f7a930861..f78b705de7233f3609aa7cb602be14d7211a0047 100644 --- a/ComputeGridGrid.cuh +++ b/ComputeGridGrid.cuh @@ -1,40 +1,46 @@ // Included in RigidBodyController.cu #pragma once + __global__ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, - const Matrix3 basis_rho, const Vector3 origin_rho, - const Matrix3 basis_u_inv, const Vector3 origin_u, - Vector3 * retForce, Vector3 * retTorque, int gridNum) { + const Matrix3 basis_rho, const Matrix3 basis_u_inv, + const Vector3 origin_rho_minus_origin_u, + Vector3 * retForce, Vector3 * retTorque) { - __shared__ Vector3 force [NUMTHREADS]; - __shared__ Vector3 torque [NUMTHREADS]; - - // RBTODO http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/ + extern __shared__ Vector3 s[]; + Vector3 *force = s; + Vector3 *torque = &s[NUMTHREADS]; + + // RBTODO: http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops const int tid = threadIdx.x; const int r_id = blockIdx.x * blockDim.x + threadIdx.x; - if (r_id >= rho->getSize()) { // skip threads with no data + if (r_id >= rho->getSize()) { // skip threads with nothing to do force[tid] = Vector3(0.0f); torque[tid] = Vector3(0.0f); return; } - // RBTODO: reduce registers used; commenting out interpolatePotential / interpolateForceD still uses ~40 registers, otherwise 236!!! - // RBTODO: maybe organize data into compact regions for grid data for better use of shared memory... + // RBTODO: reduce registers used; + // commenting out interpolateForceD still uses ~40 registers + // -- the innocuous-looking fn below is responsible; consumes ~17 registers! + Vector3 r_pos= rho->getPosition(r_id); /* i,j,k value of voxel */ - Vector3 r_pos = rho->getPosition(r_id); /* i,j,k value of voxel */ - r_pos = basis_rho.transform( r_pos ) + origin_rho; /* real space */ - const Vector3 u_ijk_float = basis_u_inv.transform( r_pos - origin_u ); + r_pos = basis_rho.transform( r_pos ) + origin_rho_minus_origin_u; /* real space */ + const Vector3 u_ijk_float = basis_u_inv.transform( r_pos ); // RBTODO What about non-unit delta? + /* Vector3 tmpf = Vector3(0.0f); */ + /* float tmpe = 0.0f; */ + /* const ForceEnergy fe = ForceEnergy( tmpf, tmpe); */ const ForceEnergy fe = u->interpolateForceD( u_ijk_float ); /* in coord frame of u */ force[tid] = fe.f; const float r_val = rho->val[r_id]; /* maybe move to beginning of function? */ force[tid] = basis_u_inv.transpose().transform( r_val*force[tid] ); /* transform to lab frame, with correct scaling factor */ - // Calculate torque about lab-frame origin + // Calculate torque about origin_u in the lab frame torque[tid] = r_pos.cross(force[tid]); // RBTODO: test if sign is correct! diff --git a/RigidBodyController.cu b/RigidBodyController.cu index ffadd63a078864bd46d9fa82a2229043856a9c82..6e9c449ad670d28356f4df909b660b114dd9d19f 100644 --- a/RigidBodyController.cu +++ b/RigidBodyController.cu @@ -278,6 +278,28 @@ void RigidBodyForcePair::createStreams() { gpuErrchk( cudaStreamCreate( &(stream[i]) ) ); // gpuErrchk( cudaStreamCreateWithFlags( &(stream[i]) , cudaStreamNonBlocking ) ); } +Vector3 RigidBodyForcePair::getOrigin1(const int i) { + const int k1 = gridKeyId1[i]; + return rb1->getOrientation()*type1->densityGrids[k1].getOrigin() + rb1->getPosition(); +} +Vector3 RigidBodyForcePair::getOrigin2(const int i) { + const int k2 = gridKeyId2[i]; + if (!isPmf) + return rb2->getOrientation()*type2->potentialGrids[k2].getOrigin() + rb2->getPosition(); + else + return type2->rawPmfs[k2].getOrigin(); +} +Matrix3 RigidBodyForcePair::getBasis1(const int i) { + const int k1 = gridKeyId1[i]; + return rb1->getOrientation()*type1->densityGrids[k1].getBasis(); +} +Matrix3 RigidBodyForcePair::getBasis2(const int i) { + const int k2 = gridKeyId2[i]; + if (!isPmf) + return rb2->getOrientation()*type2->potentialGrids[k2].getBasis(); + else + return type2->rawPmfs[k2].getBasis(); +} // RBTODO: bundle several rigidbodypair evaluations in single kernel call void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { @@ -294,7 +316,6 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { const int k1 = gridKeyId1[i]; const int k2 = gridKeyId2[i]; const cudaStream_t &s = stream[streamID[i]]; - /* ijk: index of grid value r: postion of point ijk in real space @@ -308,39 +329,26 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { /.––––––––––––––––––. | r = R.(B.ijk+o)+c | - | r = B'.ijk + c' | + | r = B'.ijk + c' | `––––––––––––––––––./ */ - Matrix3 B1 = rb1->getOrientation()*type1->densityGrids[k1].getBasis(); - Vector3 c1 = rb1->getOrientation()*type1->densityGrids[k1].getOrigin() + rb1->getPosition(); - - Matrix3 B2; - Vector3 c2; + Matrix3 B1 = getBasis1(i); + Vector3 c = getOrigin1(i) - getOrigin2(i); - /* printf(" Calculating grid forces\n"); */ - if (!isPmf) { /* pair of RBs */ - B2 = rb2->getOrientation()*type2->potentialGrids[k2].getBasis(); - c2 = rb2->getOrientation()*type2->potentialGrids[k2].getOrigin() + rb2->getPosition(); - B2 = B2.inverse(); + Matrix3 B2 = getBasis2(i).inverse(); - // RBTODO: get energy - computeGridGridForce<<< nb, numThreads, 0, s >>> + // RBTODO: get energy + if (!isPmf) { /* pair of RBs */ + computeGridGridForce<<< nb, numThreads, NUMTHREADS*2*sizeof(Vector3), s >>> (type1->rawDensityGrids_d[k1], type2->rawPotentialGrids_d[k2], - B1, c1, B2, c2, - forces_d[i], torques_d[i], pairId+i); + B1, B2, c, + forces_d[i], torques_d[i]); } else { /* RB with a PMF */ - // B2 = type2->rawPmfs[i].getBasis(); // not 100% certain k2 should be used rather than i - /// c2 = type2->rawPmfs[i].getOrigin(); - B2 = type2->rawPmfs[k2].getBasis(); - c2 = type2->rawPmfs[k2].getOrigin(); - B2 = B2.inverse(); - - computeGridGridForce<<< nb, numThreads, 0, s >>> + computeGridGridForce<<< nb, numThreads, NUMTHREADS*2*sizeof(Vector3), s >>> (type1->rawDensityGrids_d[k1], type2->rawPmfs_d[k2], - B1, c1, B2, c2, - forces_d[i], torques_d[i], pairId+i); + B1, B2, c, + forces_d[i], torques_d[i]); } - } } @@ -361,11 +369,23 @@ void RigidBodyForcePair::retrieveForces() { cudaMemcpyDeviceToHost, s)); gpuErrchk(cudaStreamSynchronize( s )); - + + Vector3 tmpF = Vector3(0.0f); + Vector3 tmpT = Vector3(0.0f); + for (int j = 0; j < nb; j++) { - f = f + forces[i][j]; - t = t + torques[i][j]; + tmpF = tmpF + forces[i][j]; + tmpT = tmpT + torques[i][j]; } + + // tmpT is the torque calculated about the origin of grid k2 (e.g. c2) + // so here we transform torque to be about rb1 + Vector3 c2 = getOrigin2(i); + tmpT = tmpT - (rb1->getPosition() - c2).cross( tmpF ); + + // sum forces and torques + f = f + tmpF; + t = t + tmpT; } // transform torque from lab-frame origin to rb centers @@ -375,14 +395,13 @@ void RigidBodyForcePair::retrieveForces() { /* /\* printf("rb1->position: (%f,%f,%f)\n", tmp.x, tmp.y, tmp.z); *\/ */ /* tmp = rb1->getPosition(); */ /* printf("rb1->getPosition(): (%f,%f,%f)\n", tmp.x, tmp.y, tmp.z); */ - Vector3 t1 = t - rb1->getPosition().cross( f ); rb1->addForce( f ); - rb1->addTorque(t1); + rb1->addTorque( t ); if (!isPmf) { - Vector3 t2 = -t - rb2->getPosition().cross( -f ); - rb2->addForce(-f); - rb2->addTorque(t2); + const Vector3 t2 = -t + (rb2->getPosition()-rb1->getPosition()).cross( f ); + rb2->addForce( -f ); + rb2->addTorque( t2 ); } /* printf("force: (%f,%f,%f)\n",f.x,f.y,f.z); */ @@ -590,46 +609,6 @@ void RigidBodyController::print(int step) { } } } - /* - // Output final coordinates - if (step == FILE_OUTPUT || step == END_OF_RUN) { - int realstep = ( step == FILE_OUTPUT ? - simParams->firstTimestep : simParams->N ); - iout << "WRITING RIGID BODY OUTPUT FILE AT STEP " << realstep << "\n" << endi; - static char fname[140]; - strcpy(fname, simParams->outputFilename); - strcat(fname, ".rigid"); - NAMD_backup_file(fname); - std::ofstream outputFile(fname); - while (!outputFile) { - if ( errno == EINTR ) { - CkPrintf("Warning: Interrupted system call opening rigid body output file, retrying.\n"); - outputFile.clear(); - outputFile.open(fname); - continue; - } - char err_msg[257]; - sprintf(err_msg, "Error opening rigid body output file %s",fname); - NAMD_err(err_msg); - } - outputFile << "# NAMD rigid body output file" << std::endl; - printLegend(outputFile); - printData(realstep,outputFile); - if (!outputFile) { - char err_msg[257]; - sprintf(err_msg, "Error writing rigid body output file %s",fname); - NAMD_err(err_msg); - } - } - - // Close trajectory file - if (step == END_OF_RUN) { - if ( trajFile.rdbuf()->is_open() ) { - trajFile.close(); - iout << "CLOSING RIGID BODY TRAJECTORY FILE\n" << endi; - } - } - */ } void RigidBodyController::printLegend(std::ofstream &file) { file << "#$LABELS step RigidBodyKey" @@ -661,146 +640,7 @@ void RigidBodyController::printData(int step,std::ofstream &file) { } } } -/* -void RigidBodyController::integrate(int step) { - DebugM(3, "RBC::integrate: step "<< step << "\n" << endi); - - DebugM(1, "RBC::integrate: Waiting for grid reduction\n" << endi); - gridReduction->require(); - - const Molecule * mol = Node::Object()->molecule; - - // pass reduction force and torque to each grid - // DebugM(3, "Summing forces on rigid bodies" << "\n" << endi); - for (int i=0; i < mol->rbReductionIdToRigidBody.size(); i++) { - Force f; - Force t; - for (int k = 0; k < 3; k++) { - f[k] = gridReduction->item(6*i + k); - t[k] = gridReduction->item(6*i + k + 3); - } - - if (step % 100 == 1) - DebugM(4, "RBC::integrate: reduction/rb " << i <<":" - << "\n\tposition: " - << rigidBodyList[mol->rbReductionIdToRigidBody[i]]->getPosition() - <<"\n\torientation: " - << rigidBodyList[mol->rbReductionIdToRigidBody[i]]->getOrientation() - << "\n" << endi); - - DebugM(4, "RBC::integrate: reduction/rb " << i <<": " - << "force " << f <<": "<< "torque: " << t << "\n" << endi); - rigidBodyList[mol->rbReductionIdToRigidBody[i]]->addForce(f); - rigidBodyList[mol->rbReductionIdToRigidBody[i]]->addTorque(t); - } - - // Langevin - for (int i=0; i<rigidBodyList.size(); i++) { - // continue; // debug - if (rigidBodyList[i]->langevin) { - DebugM(1, "RBC::integrate: reduction/rb " << i - <<": calling langevin" << "\n" << endi); - rigidBodyList[i]->addLangevin( - random->gaussian_vector(), random->gaussian_vector() ); - // DebugM(4, "RBC::integrate: reduction/rb " << i - // << " after langevin: force " << rigidBodyList[i]f <<": "<< "torque: " << t << "\n" << endi); - // <<": calling langevin" << "\n" << endi); - } - } - - if ( step >= 0 && simParams->rigidBodyOutputFrequency && - (step % simParams->rigidBodyOutputFrequency) == 0 ) { - DebugM(1, "RBC::integrate:integrating for before printing output" << "\n" << endi); - // PRINT - if ( step == simParams->firstTimestep ) { - print(step); - // first step so only start this cycle - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": starting integration cycle of step " - << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],0); - } - } else { - // finish last cycle - // DebugM(1, "RBC::integrate: reduction/rb " << i - // <<": firststep: calling rb->integrate" << "\n" << endi); - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": finishing integration cycle of step " - << step-1 << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],1); - } - print(step); - // start this cycle - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": starting integration cycle of step " - << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],0); - } - } - } else { - DebugM(1, "RBC::integrate: trans[0] before: " << trans[0] << "\n" << endi); - if ( step == simParams->firstTimestep ) { - // integrate the start of this cycle - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": starting integration cycle of (first) step " - << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],0); - } - } else { - // integrate end of last ts and start of this one - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": ending / starting integration cycle of step " - << step-1 << "-" << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],2); - } - } - DebugM(1, "RBC::integrate: trans[0] after: " << trans[0] << "\n" << endi); - } - - DebugM(3, "sendRigidBodyUpdate on step: " << step << "\n" << endi); - if (trans.size() != rot.size()) - NAMD_die("failed sanity check\n"); - RigidBodyMsg *msg = new RigidBodyMsg; - msg->trans.copy(trans); // perhaps .swap() would cause problems - msg->rot.copy(rot); - computeMgr->sendRigidBodyUpdate(msg); -} - - -RigidBodyParams* RigidBodyParamsList::find_key(const char* key) { - RBElem* cur = head; - RBElem* found = NULL; - RigidBodyParams* result = NULL; - - while (found == NULL && cur != NULL) { - if (!strcasecmp((cur->elem).rigidBodyKey,key)) { - found = cur; - } else { - cur = cur->nxt; - } - } - if (found != NULL) { - result = &(found->elem); - } - return result; -} -*/ -/* RigidBodyForcePair::RigidBodyForcePair(RigidBodyType* t1, RigidBodyType* t2, */ -/* RigidBody* rb1, RigidBody* rb2, */ -/* std::vector<int> gridKeyId1, std::vector<int> gridKeyId2) : */ -/* type1(t1), type2(t2), rb1(rb1), rb2(rb2), gridKeyId1(gridKeyId1), gridKeyId2(gridKeyId2) { */ - -/* printf(" Constructing RB force pair...\n"); */ -/* allocateMem(); */ -/* printf(" Done constructing RB force pair\n"); */ - -/* } */ int RigidBodyForcePair::initialize() { printf(" Initializing (memory for) RB force pair...\n"); @@ -832,10 +672,6 @@ int RigidBodyForcePair::initialize() { return nextStreamID; } -/* RigidBodyForcePair::RigidBodyForcePair(const RigidBodyForcePair& orig ) : */ - -/* } */ - void RigidBodyForcePair::swap(RigidBodyForcePair& a, RigidBodyForcePair& b) { using std::swap; swap(a.type1, b.type1); diff --git a/RigidBodyController.h b/RigidBodyController.h index b214722274aeafbb64e51ad65eb7ad5df251193d..76b270c35415d41c141665973d85716c3643e062 100644 --- a/RigidBodyController.h +++ b/RigidBodyController.h @@ -68,6 +68,10 @@ private: static void createStreams(); void callGridForceKernel(int pairId, int s); void retrieveForces(); + Matrix3 getBasis1(const int i); + Matrix3 getBasis2(const int i); + Vector3 getOrigin1(const int i); + Vector3 getOrigin2(const int i); }; class RigidBodyController { diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu index 0649c39c40f879dbc655c69c88e0556476588218..d3467324368cf60720d0cddd305d5733d6edcda1 100644 --- a/RigidBodyGrid.cu +++ b/RigidBodyGrid.cu @@ -7,11 +7,10 @@ #define STRLEN 512 -// Initialize the variables that get used a lot. -// Also, allocate the main value array. -void RigidBodyGrid::init() { - val = new float[nx*ny*nz]; -} + /* \ + | CONSTRUCTORS, DESTRUCTORS, I/O | + \===============================*/ + RigidBodyGrid::RigidBodyGrid() { RigidBodyGrid tmp(1,1,1); val = new float[1]; @@ -24,7 +23,7 @@ RigidBodyGrid::RigidBodyGrid(int nx0, int ny0, int nz0) { ny = abs(ny0); nz = abs(nz0); - init(); + val = new float[nx*ny*nz]; zero(); } @@ -44,7 +43,7 @@ RigidBodyGrid::RigidBodyGrid(Vector3 box, float dx) { if (ny <= 0) ny = 1; if (nz <= 0) nz = 1; - init(); + val = new float[nx*ny*nz]; zero(); } @@ -60,7 +59,7 @@ RigidBodyGrid::RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0) { if (ny <= 0) ny = 1; if (nz <= 0) nz = 1; - init(); + val = new float[nx*ny*nz]; zero(); } @@ -79,7 +78,7 @@ RigidBodyGrid::RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx) { if (ny <= 0) ny = 1; if (nz <= 0) nz = 1; - init(); + val = new float[nx*ny*nz]; zero(); } @@ -98,7 +97,7 @@ RigidBodyGrid::RigidBodyGrid(Matrix3 box, float dx) { if (ny <= 0) ny = 1; if (nz <= 0) nz = 1; - init(); + val = new float[nx*ny*nz]; zero(); } @@ -107,7 +106,7 @@ RigidBodyGrid::RigidBodyGrid(const BaseGrid& g) { ny = g.ny; nz = g.nz; - init(); + val = new float[nx*ny*nz]; for (int i = 0; i < nx*ny*nz; i++) val[i] = g.val[i]; } @@ -117,7 +116,7 @@ RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g) { ny = g.ny; nz = g.nz; - init(); + val = new float[nx*ny*nz]; for (int i = 0; i < nx*ny*nz; i++) val[i] = g.val[i]; } @@ -133,31 +132,12 @@ RigidBodyGrid& RigidBodyGrid::operator=(const RigidBodyGrid& g) { ny = g.ny; nz = g.nz; - init(); + val = new float[nx*ny*nz]; for (int i = 0; i < nx*ny*nz; i++) val[i] = g.val[i]; return *this; } - -// Make a copy of a grid, but at a different resolution. -RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g, int nx0, int ny0, int nz0) : nx(nx0), ny(ny0), nz(nz0) { - if (nx <= 0) nx = 1; - if (ny <= 0) ny = 1; - if (nz <= 0) nz = 1; - - // // Tile the grid into the box of the template grid. - // Matrix3 box = g.getBox(); - - init(); - - // Do an interpolation to obtain the values. - for (int i = 0; i < nx*ny*nz; i++) { - Vector3 r = getPosition(i); - val[i] = g.interpolatePotential(r); - } -} - RigidBodyGrid::~RigidBodyGrid() { if (val != NULL) delete[] val; @@ -197,10 +177,10 @@ float RigidBodyGrid::getValue(int ix, int iy, int iz) const { return val[j]; } -Vector3 RigidBodyGrid::getPosition(int j) const { - int iz = j%nz; - int iy = (j/nz)%ny; - int ix = j/(nz*ny); +Vector3 RigidBodyGrid::getPosition(const int j) const { + const int iz = j%nz; + const int iy = (j/nz)%ny; + const int ix = j/(nz*ny); return Vector3(ix,iy,iz); } @@ -213,33 +193,6 @@ Vector3 RigidBodyGrid::getPosition(int j, Matrix3 basis, Vector3 origin) const { return basis.transform(Vector3(ix, iy, iz)) + origin; } -// // Does the point r fall in the grid? -// // Obviously this is without periodic boundary conditions. -// bool RigidBodyGrid::inGrid(Vector3 r,) const { -// Vector3 l = basisInv.transform(r-origin); - -// if (l.x < 0.0f || l.x >= nx) return false; -// if (l.y < 0.0f || l.y >= ny) return false; -// if (l.z < 0.0f || l.z >= nz) return false; -// return true; -// } - -// bool RigidBodyGrid::inGridInterp(Vector3 r) const { -// Vector3 l = basisInv.transform(r-origin); - -// if (l.x < 2.0f || l.x >= nx-3.0f) return false; -// if (l.y < 2.0f || l.y >= ny-3.0f) return false; -// if (l.z < 2.0f || l.z >= nz-3.0f) return false; -// return true; -// } - -// Vector3 RigidBodyGrid::transformTo(Vector3 r) const { -// return basisInv.transform(r-origin); -// } -// Vector3 RigidBodyGrid::transformFrom(Vector3 l) const { -// return basis.transform(l) + origin; -// } - IndexList RigidBodyGrid::index(int j) const { int iz = j%nz; int iy = (j/nz)%ny; @@ -255,35 +208,6 @@ int RigidBodyGrid::indexY(int j) const { return (j/nz)%ny; } int RigidBodyGrid::indexZ(int j) const { return j%nz; } int RigidBodyGrid::index(int ix, int iy, int iz) const { return iz + iy*nz + ix*ny*nz; } -// int RigidBodyGrid::index(Vector3 r) const { -// Vector3 l = basisInv.transform(r-origin); - -// int ix = int(floor(l.x)); -// int iy = int(floor(l.y)); -// int iz = int(floor(l.z)); - -// ix = wrap(ix, nx); -// iy = wrap(iy, ny); -// iz = wrap(iz, nz); - -// return iz + iy*nz + ix*ny*nz; -// } - -// int RigidBodyGrid::nearestIndex(Vector3 r) const { -// Vector3 l = basisInv.transform(r-origin); - -// int ix = int(floorf(l.x + 0.5f)); -// int iy = int(floorf(l.y + 0.5f)); -// int iz = int(floorf(l.z + 0.5f)); - -// ix = wrap(ix, nx); -// iy = wrap(iy, ny); -// iz = wrap(iz, nz); - -// return iz + iy*nz + ix*ny*nz; -// } - - // Add a fixed value to the grid. void RigidBodyGrid::shift(float s) { for (int i = 0; i < nx*ny*nz; i++) val[i] += s; @@ -294,66 +218,6 @@ void RigidBodyGrid::scale(float s) { for (int i = 0; i < nx*ny*nz; i++) val[i] *= s; } -// Get the mean of the entire grid. -float RigidBodyGrid::mean() const { - float sum = 0.0f; - for (int i = 0; i < nx*ny*nz; i++) sum += val[i]; - return sum/nx*ny*nz; -} - -// Get the potential at the closest node. -// float RigidBodyGrid::getPotential(Vector3 pos) const { -// // Find the nearest node. -// int j = nearestIndex(pos); - -// return val[j]; -// } - -HOST DEVICE float RigidBodyGrid::interpolatePotential(const Vector3& l) const { - // Find the home node. - const int homeX = int(floor(l.x)); - const int homeY = int(floor(l.y)); - const int homeZ = int(floor(l.z)); - - float g3[4]; - for (int iz = 0; iz < 4; iz++) { - float g2[4]; - for (int iy = 0; iy < 4; iy++) { - // Fetch values from nearby - float g1[4]; - for (volatile int ix = 0; ix < 4; ix++) { - volatile int jx = (ix-1 + homeX); - volatile int jy = (iy-1 + homeY); - volatile int jz = (iz-1 + homeZ); - const int ind = jz + jy*nz + jx*nz*ny; - g1[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ? - 0 : val[ind]; - } - // Mix along x. - const float a3 = 0.5f*(-g1[0] + 3.0f*g1[1] - 3.0f*g1[2] + g1[3]); - const float a2 = 0.5f*(2.0f*g1[0] - 5.0f*g1[1] + 4.0f*g1[2] - g1[3]); - const float a1 = 0.5f*(-g1[0] + g1[2]); - const float a0 = g1[1]; - const float wx = l.x - homeX; - g2[iy] = a3*wx*wx*wx + a2*wx*wx + a1*wx + a0; - } - // Mix along y. - const float a3 = 0.5f*(-g2[0] + 3.0f*g2[1] - 3.0f*g2[2] + g2[3]); - const float a2 = 0.5f*(2.0f*g2[0] - 5.0f*g2[1] + 4.0f*g2[2] - g2[3]); - const float a1 = 0.5f*(-g2[0] + g2[2]); - const float a0 = g2[1]; - const float wy = l.y - homeY; - g3[iz] = a3*wy*wy*wy + a2*wy*wy + a1*wy + a0; - } - // Mix along z. - const float a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); - const float a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); - const float a1 = 0.5f*(-g3[0] + g3[2]); - const float a0 = g3[1]; - const float wz = l.z - homeZ; - return a3*wz*wz*wz + a2*wz*wz + a1*wz + a0; -} - /** interpolateForce() to be used on CUDA Device **/ DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const { Vector3 f; @@ -366,9 +230,6 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const { const float wz = l.z - homeZ; const float wx2 = wx*wx; - // RBTODO: test against cpu algorithm; also see if its the same algorithm used by NAMD - // RBTODO: test NAMD alg. for speed - /* f.x */ float g3[3][4]; for (int iz = 0; iz < 4; iz++) { @@ -408,23 +269,17 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const { } // Mix along z. - { - f.x = -0.5f*(-g3[0][0] + 3.0f*g3[0][1] - 3.0f*g3[0][2] + g3[0][3])*wz*wz*wz + - -0.5f*(2.0f*g3[0][0] - 5.0f*g3[0][1] + 4.0f*g3[0][2] - g3[0][3])*wz*wz + - -0.5f*(-g3[0][0] + g3[0][2]) *wz - - g3[0][1]; - } - { - f.y = -0.5f*(-g3[1][0] + 3.0f*g3[1][1] - 3.0f*g3[1][2] + g3[1][3])*wz*wz*wz + - -0.5f*(2.0f*g3[1][0] - 5.0f*g3[1][1] + 4.0f*g3[1][2] - g3[1][3])*wz*wz + - -0.5f*(-g3[1][0] + g3[1][2]) *wz - - g3[1][1]; - } - { - f.z = -1.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz - - (2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3]) *wz - - 0.5f*(-g3[2][0] + g3[2][2]); - } + f.x = -0.5f*(-g3[0][0] + 3.0f*g3[0][1] - 3.0f*g3[0][2] + g3[0][3])*wz*wz*wz + + -0.5f*(2.0f*g3[0][0] - 5.0f*g3[0][1] + 4.0f*g3[0][2] - g3[0][3])*wz*wz + + -0.5f*(-g3[0][0] + g3[0][2]) *wz - + g3[0][1]; + f.y = -0.5f*(-g3[1][0] + 3.0f*g3[1][1] - 3.0f*g3[1][2] + g3[1][3])*wz*wz*wz + + -0.5f*(2.0f*g3[1][0] - 5.0f*g3[1][1] + 4.0f*g3[1][2] - g3[1][3])*wz*wz + + -0.5f*(-g3[1][0] + g3[1][2]) *wz - + g3[1][1]; + f.z = -1.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz - + (2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3]) *wz - + 0.5f*(-g3[2][0] + g3[2][2]); float e = 0.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz*wz + 0.5f*(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3]) *wz*wz + 0.5f*(-g3[2][0] + g3[2][2]) *wz + @@ -433,39 +288,6 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const { return ForceEnergy(f,e); } - -DEVICE float RigidBodyGrid::interpolatePotentialLinearly(const Vector3& l) const { - // Find the home node. - const int homeX = int(floor(l.x)); - const int homeY = int(floor(l.y)); - const int homeZ = int(floor(l.z)); - - float g3[2]; - for (int iz = 0; iz < 2; iz++) { - float g2[2]; - for (int iy = 0; iy < 2; iy++) { - // Fetch values from nearby - float g1[2]; - for (volatile int ix = 0; ix < 2; ix++) { - volatile int jx = (ix + homeX); - volatile int jy = (iy + homeY); - volatile int jz = (iz + homeZ); - const int ind = jz + jy*nz + jx*nz*ny; - g1[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ? - 0 : val[ind]; - } - // Mix along x. - const float wx = l.x - homeX; - g2[iy] = wx * (g1[1] - g1[0]) + g1[0]; - } - // Mix along y. - const float wy = l.y - homeY; - g3[iz] = wy * (g2[1] - g2[0]) + g2[0]; - } - // Mix along z. - const float wz = l.z - homeZ; - return wz * (g3[1] - g3[0]) + g3[0]; -} DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const { // Find the home node. const int homeX = int(floor(l.x)); @@ -512,64 +334,3 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) co return ForceEnergy(f,e); } - - -// Vector3 RigidBodyGrid::wrapDiffNearest(Vector3 r) const { -// Vector3 l = r; -// l.x = wrapDiff(l.x, nx); -// l.y = wrapDiff(l.y, ny); -// l.z = wrapDiff(l.z, nz); - -// float length2 = l.length2(); - -// for (int dx = -1; dx <= 1; dx++) { -// for (int dy = -1; dy <= 1; dy++) { -// for (int dz = -1; dz <= 1; dz++) { -// //if (dx == 0 && dy == 0 && dz == 0) continue; -// Vector3 tmp = Vector3(l.x+dx*nx, l.y+dy*ny, l.z+dz*nz); -// if (tmp.length2() < length2) { -// l = tmp; -// length2 = l.length2(); -// } -// } -// } -// } - -// return l; -// // return basis.transform(l); -// } - - -// Includes the home node. -// indexBuffer must have a size of at least 27. -void RigidBodyGrid::getNeighbors(int j, int* indexBuffer) const { - int jx = indexX(j); - int jy = indexY(j); - int jz = indexZ(j); - - int k = 0; - for (int ix = -1; ix <= 1; ix++) { - for (int iy = -1; iy <= 1; iy++) { - for (int iz = -1; iz <= 1; iz++) { - int ind = wrap(jz+iz,nz) + nz*wrap(jy+iy,ny) + ny*nz*wrap(jx+ix,nx); - indexBuffer[k] = ind; - k++; - } - } - } -} - - -// Get the values at the neighbors of a node. -// Note that homeX, homeY, and homeZ do not need to be wrapped, -// since we do it here. -void RigidBodyGrid::getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const { - for (int ix = -1; ix <= 1; ix++) { - for (int iy = -1; iy <= 1; iy++) { - for (int iz = -1; iz <= 1; iz++) { - int ind = wrap(homeZ+iz,nz) + nz*wrap(homeY+iy,ny) + ny*nz*wrap(homeX+ix,nx); - neigh->v[ix+1][iy+1][iz+1] = val[ind]; - } - } - } -} diff --git a/RigidBodyGrid.h b/RigidBodyGrid.h index d74f0d7142f0c4fba3cdcbd8f0ef69cd5f10a3ef..6206b9dc27e460acac7d688cdceee52e291ba005 100644 --- a/RigidBodyGrid.h +++ b/RigidBodyGrid.h @@ -28,7 +28,7 @@ using namespace std; class ForceEnergy { public: - DEVICE ForceEnergy(Vector3 f, float e) : + DEVICE ForceEnergy(Vector3 &f, float &e) : f(f), e(e) {}; Vector3 f; float e; @@ -36,13 +36,8 @@ public: class RigidBodyGrid { friend class SparseGrid; -private: - // Initialize the variables that get used a lot. - // Also, allocate the main value array. - void init(); - + public: - /* \ | CONSTRUCTORS, DESTRUCTORS, I/O | \===============================*/ @@ -78,9 +73,6 @@ public: RigidBodyGrid mult(const RigidBodyGrid& g); RigidBodyGrid& operator=(const RigidBodyGrid& g); - - // Make a copy of a grid, but at a different resolution. - RigidBodyGrid(const RigidBodyGrid& g, int nx0, int ny0, int nz0); virtual ~RigidBodyGrid(); @@ -98,20 +90,9 @@ public: virtual float getValue(int ix, int iy, int iz) const; - // Vector3 getPosition(int ix, int iy, int iz) const; HOST DEVICE Vector3 getPosition(int j) const; HOST DEVICE Vector3 getPosition(int j, Matrix3 basis, Vector3 origin) const; - /* // Does the point r fall in the grid? */ - /* // Obviously this is without periodic boundary conditions. */ - /* bool inGrid(Vector3 r) const; */ - - /* bool inGridInterp(Vector3 r) const; */ - - /* Vector3 transformTo(Vector3 r) const; */ - - /* Vector3 transformFrom(Vector3 l) const; */ - IndexList index(int j) const; int indexX(int j) const; int indexY(int j) const; @@ -134,81 +115,10 @@ public: // Multiply the grid by a fixed value. void scale(float s); - - /* \ - | COMPUTED | - \=========*/ - - // Get the mean of the entire grid. - float mean() const; - // Get the potential at the closest node. - /* virtual float getPotential(Vector3 pos) const; */ - - DEVICE float interpolatePotentialLinearly(const Vector3& l) const; DEVICE ForceEnergy interpolateForceDLinearly(const Vector3& l) const; - - HOST DEVICE float interpolatePotential(const Vector3& l) const; - - HOST DEVICE inline static int wrap(int i, int n) { - if (i < 0) { - i %= n; - i += n; - } - // The portion above allows i == n, so no else keyword. - if (i >= n) { - i %= n; - } - return i; - } - - /** interpolateForce() to be used on CUDA Device **/ DEVICE ForceEnergy interpolateForceD(Vector3 l) const; - - // Wrap coordinate: 0 <= x < l - HOST DEVICE inline float wrapFloat(float x, float l) const { - int image = int(floor(x/l)); - x -= image*l; - return x; - } - - // Wrap distance: -0.5*l <= x < 0.5*l - HOST DEVICE static inline float wrapDiff(float x, float l) { - int image = int(floor(x/l)); - x -= image*l; - if (x >= 0.5f * l) - x -= l; - return x; - } - - // Wrap vector, 0 <= x < lx && 0 <= y < ly && 0 <= z < lz - HOST DEVICE inline Vector3 wrap(Vector3 l) const { - l.x = wrapFloat(l.x, nx); - l.y = wrapFloat(l.y, ny); - l.z = wrapFloat(l.z, nz); - return l; - } - - // Wrap vector distance, -0.5*l <= x < 0.5*l && ... - HOST DEVICE inline Vector3 wrapDiff(Vector3 l) const { - l.x = wrapDiff(l.x, nx); - l.y = wrapDiff(l.y, ny); - l.z = wrapDiff(l.z, nz); - return l; - } - - /* Vector3 wrapDiffNearest(Vector3 r) const; */ - - // Includes the home node. - // indexBuffer must have a size of at least 27. - void getNeighbors(int j, int* indexBuffer) const; - // Get the values at the neighbors of a node. - // Note that homeX, homeY, and homeZ do not need to be wrapped, - // since we do it here. - void getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const; - inline void setVal(float* v) { val = v; } - public: int nx, ny, nz; int size;