diff --git a/.gitignore b/.gitignore index 99c1cd7b962d7d04c3f32359f72b9ad0e3a6d472..23e5af8024fb69f70ca20558365dc130e0dbf1bc 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ TAGS TAGS.sh .dir-locals.el BD_example* -runBrownCUDA \ No newline at end of file +runBrownCUDA diff --git a/BaseGrid.h b/BaseGrid.h index e3032d5d88648ea42e19b87987fe8b7aa910f3f1..fe4c127cf1d838dad4e1cfc4aec23e630142260f 100644 --- a/BaseGrid.h +++ b/BaseGrid.h @@ -308,8 +308,6 @@ public: return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1); } - // RBTODO overload with optimized algorithm - // skip transforms (assume identity basis) HOST DEVICE inline float interpolatePotential(Vector3 pos) const { // Find the home node. Vector3 l = basisInv.transform(pos - origin); diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh index 173e7bb32cada796ca76baa7fc9b1b413b346e1c..337b455c6ba531036929d1ef0bde6082a634c490 100644 --- a/ComputeGridGrid.cuh +++ b/ComputeGridGrid.cuh @@ -3,32 +3,64 @@ /* #include "useful.h" */ __global__ -void computeGridGridForce(const RigidBodyGrid& rho, const RigidBodyGrid& u, +void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, Matrix3 basis_rho, Vector3 origin_rho, - Matrix3 basis_u, Vector3 origin_u) { + Matrix3 basis_u, Vector3 origin_u, + Vector3 * retForce, Vector3 * retTorque) { + + printf("ComputeGridGridForce called\n"); + extern __shared__ Vector3 force []; + extern __shared__ Vector3 torque []; + // RBTODO http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/ + const unsigned int tid = threadIdx.x; const unsigned int r_id = blockIdx.x * blockDim.x + threadIdx.x; // RBTODO parallelize transform - if (r_id > rho.size) // skip threads with no data + if (r_id > rho->size) { // skip threads with no data + force[tid] = Vector3(0.0f); + torque[tid] = Vector3(0.0f); return; - + } // Maybe: Tile grid data into shared memory - // RBTODO: think about localizing regions of grid data - Vector3 p = rho.getPosition(r_id, basis_rho, origin_rho); - float val = rho.val[r_id]; + // RBTODO: think about localizing regions of grid datas + Vector3 r_ijk = rho->getPosition(r_id); /* i,j,k value of voxel */ + Vector3 r_pos = basis_rho.transform( r_ijk ) + origin_rho; + Vector3 u_ijk_float = basis_u.transform( r_pos - origin_u ); - // RBTODO potential basis and rho! + + float r_val = rho->val[r_id]; + float energy = r_val * u->interpolatePotential( u_ijk_float ); // RBTODO combine interp methods and reduce repetition! - float energy = val*u.interpolatePotential(p); - Vector3 f = val*u.interpolateForceD(p); - Vector3 t = p.cross(f); // test if sign is correct! + Vector3 f = r_val*u->interpolateForceD( u_ijk_float ); /* in coord frame of u */ + f = basis_u.inverse().transpose().transform( f ); /* transform to lab frame */ - // RBTODO reduce forces and torques + // Calculate torque about lab-frame origin + Vector3 t = r_pos.cross(f); // RBTODO: test if sign is correct! + + + force[tid] = f; + torque[tid] = t; + __syncthreads(); + + // Reduce force and torques // http://www.cuvilib.com/Reduction.pdf + // RBTODO optimize + for (unsigned int offset = blockDim.x/2; offset > 0; offset >>= 1) { + if (tid < offset) { + unsigned int oid = tid + offset; + force[tid] = force[tid] + force[oid]; + torque[tid] = torque[tid] + torque[oid]; + } + __syncthreads(); + } + if (tid == 0) { + retForce[blockIdx.x] = force[0]; + retTorque[blockIdx.x] = force[0]; + } + // RBTODO 3rd-law forces + torques (maybe outside kernel) - // must reference rigid bodies in some way } diff --git a/Configuration.cpp b/Configuration.cpp index d23362505c47a91074c80e631d173d415b6937cb..01278da1d399ee3d94badc2d5b69cd67f5462d1b 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -775,8 +775,8 @@ int Configuration::readParameters(const char * config_file) { else if (param == String("rotDamping")) rigidBody[currRB].rotDamping = stringToVector3( value ); - else if (param == String("potentialGrid")) - rigidBody[currRB].addPotentialGrid(value); + else if (param == String("densityGrid")) + rigidBody[currRB].addDensityGrid(value); else if (param == String("potentialGrid")) rigidBody[currRB].addPotentialGrid(value); diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index fa54f7e228686c1025b2254a275ced13b03be2c9..9f2dcc4f9f0df6585c97da96f2a6a1bd3418dfef 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -413,7 +413,7 @@ void GrandBrownTown::run() { printf("Position Update Time: %f ms\n", dt2 * 1000); */ - + RBC.updateForces(); /* for (int j = 0; j < t->num; j++) { */ /* computeGridGridForce<<< numBlocks, NUM_THREADS >>>(grid1_d, grid2_d); */ diff --git a/RigidBody.h b/RigidBody.h index e0fe66ef1a2d4056ba70f523e79d528e49a037d7..6e8af6d2a2206c0ddec2186ed49f2bc67122a13d 100644 --- a/RigidBody.h +++ b/RigidBody.h @@ -41,6 +41,7 @@ class RigidBody { // host side representation of rigid bodies HOST DEVICE void integrate(Vector3& old_trans, Matrix3& old_rot, int startFinishAll); HOST DEVICE inline String getKey() { return key; } + HOST DEVICE inline Vector3 getPosition() const { return position; } HOST DEVICE inline Matrix3 getOrientation() const { return orientation; } HOST DEVICE inline Matrix3 getBasis() const { return orientation; } diff --git a/RigidBodyController.cu b/RigidBodyController.cu index fea8194218db91debb1d97e5cbcf49e02ea088f5..bba8d815af8ffd99e80a047ebc11150c1c32068e 100644 --- a/RigidBodyController.cu +++ b/RigidBodyController.cu @@ -10,6 +10,8 @@ #include "RigidBodyType.h" #include "ComputeGridGrid.cuh" +// #include <vector> + /* #include "Random.h" */ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } @@ -24,7 +26,6 @@ inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true) /* #include <cuda_runtime.h> */ /* #include <curand_kernel.h> */ - RigidBodyController::RigidBodyController(const Configuration& c) : conf(c) { @@ -42,7 +43,9 @@ conf(c) { tmp.push_back( r ); } rigidBodyByType.push_back(tmp); - } + } + + initializeForcePairs(); } RigidBodyController::~RigidBodyController() { for (int i = 0; i < rigidBodyByType.size(); i++) @@ -50,6 +53,65 @@ RigidBodyController::~RigidBodyController() { rigidBodyByType.clear(); } +void RigidBodyController::initializeForcePairs() { + // Loop over all pairs of rigid body types + // the references here make the code more readable, but they may incur a performance loss + printf("Initializing force pairs\n"); + for (int ti = 0; ti < conf.numRigidTypes; ti++) { + RigidBodyType& t1 = conf.rigidBody[ti]; + for (int tj = ti; tj < conf.numRigidTypes; tj++) { + RigidBodyType& t2 = conf.rigidBody[tj]; + + + const std::vector<String>& keys1 = t1.densityGridKeys; + const std::vector<String>& keys2 = t2.potentialGridKeys; + + printf(" Working on type pair "); + t1.name.printInline(); printf(":"); t2.name.print(); + + // Loop over all pairs of grid keys (e.g. "Elec") + std::vector<int> gridKeyId1; + std::vector<int> gridKeyId2; + + printf(" Grid keys %d:%d\n",keys1.size(),keys2.size()); + + bool paired = false; + for(int k1 = 0; k1 < keys1.size(); k1++) { + for(int k2 = 0; k2 < keys2.size(); k2++) { + printf(" checking grid keys "); + keys1[k1].printInline(); printf(":"); keys2[k2].print(); + + if ( keys1[k1] == keys2[k2] ) { + gridKeyId1.push_back(k1); + gridKeyId2.push_back(k2); + paired = true; + } + } + } + + if (paired) { + // found matching keys => calculate force between all grid pairs + std::vector<RigidBody>& rbs1 = rigidBodyByType[ti]; + std::vector<RigidBody>& rbs2 = rigidBodyByType[tj]; + + // Loop over rigid bodies of these types + for (int i = 0; i < rbs1.size(); i++) { + for (int j = (ti==tj ? i+1 : 0); j < rbs2.size(); j++) { + RigidBody* rb1 = &(rbs1[i]); + RigidBody* rb2 = &(rbs2[j]); + + printf(" pushing RB force pair for %d:%d\n",i,j); + RigidBodyForcePair fp = RigidBodyForcePair(&(t1),&(t2),rb1,rb2,gridKeyId1,gridKeyId2); + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ + forcePairs.push_back( fp ); + printf(" done pushing RB force pair for %d:%d\n",i,j); + } + } + } + } + } +} + void RigidBodyController::updateForces() { /*––{ RBTODO }–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––. | probably coalesce kernel calls, or move this to a device kernel caller | @@ -67,49 +129,74 @@ void RigidBodyController::updateForces() { `–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ // int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); // int numBlocks = 1; - int numThreads = 256; - - - // Loop over all pairs of rigid body types - // the references here make the code more readable, but they may incur a performance loss - for (int ti = 0; ti < conf.numRigidTypes; ti++) { - const RigidBodyType& t1 = conf.rigidBody[ti]; - for (int tj = ti; tj < conf.numRigidTypes; tj++) { - const RigidBodyType& t2 = conf.rigidBody[tj]; - - const std::vector<String>& keys1 = t1.densityGridKeys; - const std::vector<String>& keys2 = t2.potentialGridKeys; + /* int numThreads = 256; */ - // Loop over all pairs of grid keys (e.g. "Elec") - for(int k1 = 0; k1 < keys1.size(); k1++) { - for(int k2 = 0; k2 < keys2.size(); k2++) { - if ( keys1[k1] == keys2[k2] ) { - // found matching keys => calculate force between all grid pairs - // loop over rigid bodies of this type - const std::vector<RigidBody>& rbs1 = rigidBodyByType[ti]; - const std::vector<RigidBody>& rbs2 = rigidBodyByType[tj]; - - for (int i = 0; i < rbs1.size(); i++) { - for (int j = (ti==tj ? 0 : i); j < rbs2.size(); j++) { - const RigidBody& rb1 = rbs1[i]; - const RigidBody& rb2 = rbs2[j]; - - const int sz = t1.rawDensityGrids[k1].getSize(); - const int numBlocks = sz / numThreads + ((sz % numThreads == 0) ? 0:1 ); - computeGridGridForce<<< numBlocks, numThreads >>> - (t1.rawDensityGrids[k1], t2.rawPotentialGrids[k2], - rb1.getBasis(), rb1.getPosition(), - rb2.getBasis(), rb2.getPosition()); - } - } - } - } - } - } - } + for (int i=0; i < forcePairs.size(); i++) { + forcePairs[i].updateForces(); + } + // get 3rd law forces and torques // RBTODO: see if there is a better way to sync gpuErrchk(cudaDeviceSynchronize()); + +} + +void RigidBodyForcePair::updateForces() { + // get the force/torque between a pair of rigid bodies + printf(" Updating rbPair forces\n"); + const int numGrids = gridKeyId1.size(); + + // RBTODO: precompute certain common transformations and pass in kernel call + for (int i = 0; i < numGrids; i++) { + const int nb = numBlocks[i]; + const int k1 = gridKeyId1[i]; + const int k2 = gridKeyId2[i]; + + printf(" Calculating grid forces\n"); + + computeGridGridForce<<< nb, numThreads >>> + (type1->rawDensityGrids_d[k1], type2->rawPotentialGrids_d[k2], + rb1->getBasis(), rb1->getPosition(), /* RBTODO: include offset from grid */ + rb2->getBasis(), rb2->getPosition(), + forces_d[i], torques_d[i]); + + // RBTODO: ASYNCHRONOUSLY retrieve forces + } + gpuErrchk(cudaDeviceSynchronize()); + for (int i = 0; i < numGrids; i++) { + const int nb = numBlocks[i]; + gpuErrchk(cudaMemcpy(forces[i], &(forces_d[i]), sizeof(Vector3)*nb, + cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(torques[i], &(torques_d[i]), sizeof(Vector3)*nb, + cudaMemcpyDeviceToHost)); + } + + gpuErrchk(cudaDeviceSynchronize()); + + // sum forces + torques + Vector3 f = Vector3(0.0f); + Vector3 t = Vector3(0.0f); + + for (int i = 0; i < numGrids; i++) { + const int nb = numBlocks[i]; + for (int j = 0; j < nb; j++) { + f = f + forces[i][j]; + t = t + forces[i][j]; + } + } + + // transform torque from lab-frame origin to rb centers + Vector3 t1 = t - rb1->getPosition().cross( f ); + Vector3 t2 = -t - rb2->getPosition().cross( -f ); + + // add forces to rbs + rb1->addForce( f); + rb1->addTorque(t1); + rb2->addForce(-f); + rb2->addTorque(t2); + + // integrate + } void RigidBodyController::copyGridsToDevice() { @@ -138,7 +225,8 @@ void RigidBodyController::copyGridsToDevice() { RigidBodyGrid * gtmp; // gtmp = new RigidBodyGrid[ng]; size_t sz = sizeof(RigidBodyGrid)*ng; - + + printf("Copying potential grids!\n"); // allocate grids on device // copy temporary host pointer to device pointer // copy grids to device through temporary host poin @@ -160,10 +248,10 @@ void RigidBodyController::copyGridsToDevice() { float **tmpData; tmpData = new float*[len]; - printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); - for (int k = 0; k < len; k++) - printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", - i, gid, k, g->val[k]); + /* printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); */ + /* for (int k = 0; k < len; k++) */ + /* printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", */ + /* i, gid, k, g->val[k]); */ // allocate grid data on device // copy temporary host pointer to device pointer @@ -183,18 +271,27 @@ void RigidBodyController::copyGridsToDevice() { // density grids for (int i = 0; i < conf.numRigidTypes; i++) { - printf("working on RB %d\n",i); + printf("Copying density grids of RB type %d\n",i); RigidBodyType& rb = conf.rigidBody[i]; int ng = rb.numDenGrids; RigidBodyGrid * gtmp; size_t sz = sizeof(RigidBodyGrid)*ng; + + printf(" copying %d grids\n",ng); + for (int gid = 0; gid < ng; gid++) { + int gs = rb.rawDensityGrids[gid].getSize(); + printf(" grid %d contains %d values\n",gid, gs); + for (int idx = 0; idx < gs; idx++) { + printf(" val[%d] = %g\n",idx, rb.rawDensityGrids[gid].val[idx] ); + } + } // allocate grids on device // copy temporary host pointer to device pointer // copy grids to device through temporary host poin gpuErrchk(cudaMalloc((void **) >mp, sz)); - gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawDensityGrids), >mp, + gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawDensityGrids_d), >mp, sizeof(RigidBodyGrid*) * ng, cudaMemcpyHostToDevice )); gpuErrchk(cudaMemcpy(gtmp, &(rb.rawDensityGrids), sizeof(RigidBodyGrid) * ng, cudaMemcpyHostToDevice )); @@ -237,324 +334,94 @@ void RigidBodyController::copyGridsToDevice() { } +/* RigidBodyForcePair::RigidBodyForcePair(RigidBodyType* t1, RigidBodyType* t2, */ +/* RigidBody* rb1, RigidBody* rb2, */ +/* std::vector<int> gridKeyId1, std::vector<int> gridKeyId2) : */ +/* type1(t1), type2(t2), rb1(rb1), rb2(rb2), gridKeyId1(gridKeyId1), gridKeyId2(gridKeyId2) { */ -/* -void RigidBodyController::print(int step) { - // modeled after outputExtendedData() in Controller.C - if ( step >= 0 ) { - // Write RIGID BODY trajectory file - if ( (step % simParams->rigidBodyOutputFrequency) == 0 ) { - if ( ! trajFile.rdbuf()->is_open() ) { - // open file - iout << "OPENING RIGID BODY TRAJECTORY FILE\n" << endi; - NAMD_backup_file(simParams->rigidBodyTrajectoryFile); - trajFile.open(simParams->rigidBodyTrajectoryFile); - while (!trajFile) { - if ( errno == EINTR ) { - CkPrintf("Warning: Interrupted system call opening RIGIDBODY trajectory file, retrying.\n"); - trajFile.clear(); - trajFile.open(simParams->rigidBodyTrajectoryFile); - continue; - } - char err_msg[257]; - sprintf(err_msg, "Error opening RigidBody trajectory file %s",simParams->rigidBodyTrajectoryFile); - NAMD_err(err_msg); - } - trajFile << "# NAMD RigidBody trajectory file" << std::endl; - printLegend(trajFile); - } - printData(step,trajFile); - trajFile.flush(); - } - - // Write restart File - if ( simParams->restartFrequency && - ((step % simParams->restartFrequency) == 0) && - (step != simParams->firstTimestep) ) - { - iout << "RIGID BODY: WRITING RESTART FILE AT STEP " << step << "\n" << endi; - char fname[140]; - strcpy(fname, simParams->restartFilename); - - strcat(fname, ".rigid"); - NAMD_backup_file(fname,".old"); - std::ofstream restartFile(fname); - while (!restartFile) { - if ( errno == EINTR ) { - CkPrintf("Warning: Interrupted system call opening rigid body restart file, retrying.\n"); - restartFile.clear(); - restartFile.open(fname); - continue; - } - char err_msg[257]; - sprintf(err_msg, "Error opening rigid body restart file %s",fname); - NAMD_err(err_msg); - } - restartFile << "# NAMD rigid body restart file" << std::endl; - printLegend(restartFile); - printData(step,restartFile); - if (!restartFile) { - char err_msg[257]; - sprintf(err_msg, "Error writing rigid body restart file %s",fname); - NAMD_err(err_msg); - } - } - } - // Output final coordinates - if (step == FILE_OUTPUT || step == END_OF_RUN) { - int realstep = ( step == FILE_OUTPUT ? - simParams->firstTimestep : simParams->N ); - iout << "WRITING RIGID BODY OUTPUT FILE AT STEP " << realstep << "\n" << endi; - static char fname[140]; - strcpy(fname, simParams->outputFilename); - strcat(fname, ".rigid"); - NAMD_backup_file(fname); - std::ofstream outputFile(fname); - while (!outputFile) { - if ( errno == EINTR ) { - CkPrintf("Warning: Interrupted system call opening rigid body output file, retrying.\n"); - outputFile.clear(); - outputFile.open(fname); - continue; - } - char err_msg[257]; - sprintf(err_msg, "Error opening rigid body output file %s",fname); - NAMD_err(err_msg); - } - outputFile << "# NAMD rigid body output file" << std::endl; - printLegend(outputFile); - printData(realstep,outputFile); - if (!outputFile) { - char err_msg[257]; - sprintf(err_msg, "Error writing rigid body output file %s",fname); - NAMD_err(err_msg); - } - } - - // Close trajectory file - if (step == END_OF_RUN) { - if ( trajFile.rdbuf()->is_open() ) { - trajFile.close(); - iout << "CLOSING RIGID BODY TRAJECTORY FILE\n" << endi; +/* printf(" Constructing RB force pair...\n"); */ +/* allocateMem(); */ +/* printf(" Done constructing RB force pair\n"); */ + +/* } */ +void RigidBodyForcePair::initialize() { + printf(" Initializing (memory for) RB force pair...\n"); + + const int numGrids = gridKeyId1.size(); + // RBTODO assert gridKeysIds are same size + + // allocate memory for forces/torques + for (int i = 0; i < numGrids; i++) { + const int k1 = gridKeyId1[i]; + const int sz = type1->rawDensityGrids[k1].getSize(); + const int nb = sz / numThreads + ((sz % numThreads == 0) ? 0:1 ); + + numBlocks.push_back(nb); + forces.push_back( new Vector3[nb] ); + torques.push_back( new Vector3[nb] ); + + forces_d.push_back( new Vector3[nb] ); // RBTODO: correct? + torques_d.push_back( new Vector3[nb] ); + + // allocate device memory for numBlocks of torque, etc. + // printf(" Allocating device memory for forces/torques\n"); + gpuErrchk(cudaMalloc(&(forces_d[i]), sizeof(Vector3) * nb)); + gpuErrchk(cudaMalloc(&(torques_d[i]), sizeof(Vector3) * nb)); } - } + gpuErrchk(cudaDeviceSynchronize()); + // printf(" Done initializing RB force pair\n"); } -void RigidBodyController::printLegend(std::ofstream &file) { - file << "#$LABELS step RigidBodyKey" - << " posX posY posZ" - << " rotXX rotXY rotXZ" - << " rotYX rotYY rotYZ" - << " rotZX rotZY rotZZ" - << " velX velY velZ" - << " angVelX angVelY angVelZ" << std::endl; -} -void RigidBodyController::printData(int step,std::ofstream &file) { - iout << "WRITING RIGID BODY COORDINATES AT STEP "<< step << "\n" << endi; - for (int i; i < rigidBodyList.size(); i++) { - Vector v = rigidBodyList[i]->getPosition(); - Tensor t = rigidBodyList[i]->getOrientation(); - file << step <<" "<< rigidBodyList[i]->getKey() - <<" "<< v.x <<" "<< v.y <<" "<< v.z; - file <<" "<< t.xx <<" "<< t.xy <<" "<< t.xz - <<" "<< t.yx <<" "<< t.yy <<" "<< t.yz - <<" "<< t.zx <<" "<< t.zy <<" "<< t.zz; - v = rigidBodyList[i]->getVelocity(); - file <<" "<< v.x <<" "<< v.y <<" "<< v.z; - v = rigidBodyList[i]->getAngularVelocity(); - file <<" "<< v.x <<" "<< v.y <<" "<< v.z - << std::endl; - } + +/* RigidBodyForcePair::RigidBodyForcePair(const RigidBodyForcePair& orig ) : */ + +/* } */ + +void RigidBodyForcePair::swap(RigidBodyForcePair& a, RigidBodyForcePair& b) { + using std::swap; + swap(a.type1, b.type1); + swap(a.type2, b.type2); + swap(a.rb1, b.rb1); + swap(a.rb2, b.rb2); + + swap(a.gridKeyId1, b.gridKeyId1); + swap(a.gridKeyId2, b.gridKeyId2); + + swap(a.numBlocks, b.numBlocks); + + swap(a.forces, b.forces); + swap(a.forces_d, b.forces_d); + swap(a.torques, b.torques); + swap(a.torques_d, b.torques_d); } -void RigidBodyController::integrate(int step) { - DebugM(3, "RBC::integrate: step "<< step << "\n" << endi); - - DebugM(1, "RBC::integrate: Waiting for grid reduction\n" << endi); - gridReduction->require(); - - const Molecule * mol = Node::Object()->molecule; - - // pass reduction force and torque to each grid - // DebugM(3, "Summing forces on rigid bodies" << "\n" << endi); - for (int i=0; i < mol->rbReductionIdToRigidBody.size(); i++) { - Force f; - Force t; - for (int k = 0; k < 3; k++) { - f[k] = gridReduction->item(6*i + k); - t[k] = gridReduction->item(6*i + k + 3); - } - if (step % 100 == 1) - DebugM(4, "RBC::integrate: reduction/rb " << i <<":" - << "\n\tposition: " - << rigidBodyList[mol->rbReductionIdToRigidBody[i]]->getPosition() - <<"\n\torientation: " - << rigidBodyList[mol->rbReductionIdToRigidBody[i]]->getOrientation() - << "\n" << endi); - - DebugM(4, "RBC::integrate: reduction/rb " << i <<": " - << "force " << f <<": "<< "torque: " << t << "\n" << endi); - rigidBodyList[mol->rbReductionIdToRigidBody[i]]->addForce(f); - rigidBodyList[mol->rbReductionIdToRigidBody[i]]->addTorque(t); - } - - // Langevin - for (int i=0; i<rigidBodyList.size(); i++) { - // continue; // debug - if (rigidBodyList[i]->langevin) { - DebugM(1, "RBC::integrate: reduction/rb " << i - <<": calling langevin" << "\n" << endi); - rigidBodyList[i]->addLangevin( - random->gaussian_vector(), random->gaussian_vector() ); - // DebugM(4, "RBC::integrate: reduction/rb " << i - // << " after langevin: force " << rigidBodyList[i]f <<": "<< "torque: " << t << "\n" << endi); - // <<": calling langevin" << "\n" << endi); - } - } - - if ( step >= 0 && simParams->rigidBodyOutputFrequency && - (step % simParams->rigidBodyOutputFrequency) == 0 ) { - DebugM(1, "RBC::integrate:integrating for before printing output" << "\n" << endi); - // PRINT - if ( step == simParams->firstTimestep ) { - print(step); - // first step so only start this cycle - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": starting integration cycle of step " - << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],0); - } - } else { - // finish last cycle - // DebugM(1, "RBC::integrate: reduction/rb " << i - // <<": firststep: calling rb->integrate" << "\n" << endi); - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": finishing integration cycle of step " - << step-1 << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],1); - } - print(step); - // start this cycle - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": starting integration cycle of step " - << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],0); - } - } - } else { - DebugM(1, "RBC::integrate: trans[0] before: " << trans[0] << "\n" << endi); - if ( step == simParams->firstTimestep ) { - // integrate the start of this cycle - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": starting integration cycle of (first) step " - << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],0); - } - } else { - // integrate end of last ts and start of this one - for (int i=0; i<rigidBodyList.size(); i++) { - DebugM(2, "RBC::integrate: reduction/rb " << i - <<": ending / starting integration cycle of step " - << step-1 << "-" << step << "\n" << endi); - rigidBodyList[i]->integrate(&trans[i],&rot[i],2); - } +RigidBodyForcePair::~RigidBodyForcePair() { + printf(" Destructing RB force pair\n"); + const int numGrids = gridKeyId1.size(); + + // printf(" numGrids = %d\n",numGrids); + + // RBTODO assert gridKeysIds are same size + + // allocate memory for forces/torques + for (int i = 0; i < numGrids; i++) { + const int k1 = gridKeyId1[i]; + const int nb = numBlocks[i]; + + // free device memory for numBlocks of torque, etc. + // printf(" Freeing device memory for forces/torques\n"); + gpuErrchk(cudaFree( forces_d[i] )); + gpuErrchk(cudaFree( torques_d[i] )); } - DebugM(1, "RBC::integrate: trans[0] after: " << trans[0] << "\n" << endi); - } - - DebugM(3, "sendRigidBodyUpdate on step: " << step << "\n" << endi); - - if (trans.size() != rot.size()) - NAMD_die("failed sanity check\n"); - RigidBodyMsg *msg = new RigidBodyMsg; - msg->trans.copy(trans); // perhaps .swap() would cause problems - msg->rot.copy(rot); - computeMgr->sendRigidBodyUpdate(msg); + gpuErrchk(cudaDeviceSynchronize()); + + numBlocks.clear(); + forces.clear(); + forces_d.clear(); + torques.clear(); + torques_d.clear(); } -RigidBodyParams* RigidBodyParamsList::find_key(const char* key) -{ - RBElem* cur = head; - RBElem* found = NULL; - RigidBodyParams* result = NULL; - - while (found == NULL && cur != NULL) { - if (!strcasecmp((cur->elem).rigidBodyKey,key)) { - found = cur; - } else { - cur = cur->nxt; - } - } - if (found != NULL) { - result = &(found->elem); - } - return result; -} - -int RigidBodyParamsList::index_for_key(const char* key) -{ - RBElem* cur = head; - RBElem* found = NULL; - int result = -1; - - int idx = 0; - while (found == NULL && cur != NULL) { - if (!strcasecmp((cur->elem).rigidBodyKey,key)) { - found = cur; - } else { - cur = cur->nxt; - idx++; - } - } - if (found != NULL) { - result = idx; - } - return result; -} - -RigidBodyParams* RigidBodyParamsList::add(const char* key) -{ - // If the key is already in the list, we can't add it - if (find_key(key)!=NULL) { - return NULL; - } - - RBElem* new_elem = new RBElem(); - int len = strlen(key); - RigidBodyParams* elem = &(new_elem->elem); - elem->rigidBodyKey = new char[len+1]; - strncpy(elem->rigidBodyKey,key,len+1); - elem->mass = NULL; - elem->inertia = Vector(NULL); - elem->langevin = NULL; - elem->temperature = NULL; - elem->transDampingCoeff = Vector(NULL); - elem->rotDampingCoeff = Vector(NULL); - elem->gridList.clear(); - elem->position = Vector(NULL); - elem->velocity = Vector(NULL); - elem->orientation = Tensor(); - elem->orientationalVelocity = Vector(NULL); - - elem->next = NULL; - new_elem->nxt = NULL; - if (head == NULL) { - head = new_elem; - } - if (tail != NULL) { - tail->nxt = new_elem; - tail->elem.next = elem; - } - tail = new_elem; - n_elements++; - - return elem; -} - -*/ + diff --git a/RigidBodyController.h b/RigidBodyController.h index 528741008b1dc49257c9363631bcaed99396a58e..c6e162d750f87911eb3efa832fd70416f299c205 100644 --- a/RigidBodyController.h +++ b/RigidBodyController.h @@ -6,8 +6,55 @@ class Configuration; -struct gridInteractionList { +class RigidBodyForcePair { + friend class RigidBodyController; + +public: + RigidBodyForcePair(RigidBodyType* t1, RigidBodyType* t2, + RigidBody* rb1, RigidBody* rb2, + std::vector<int> gridKeyId1, std::vector<int> gridKeyId2) : + type1(t1), type2(t2), rb1(rb1), rb2(rb2), + gridKeyId1(gridKeyId1), gridKeyId2(gridKeyId2) + { + printf(" Constructing RB force pair...\n"); + initialize(); + // printf(" done constructing RB force pair\n"); + } + RigidBodyForcePair(const RigidBodyForcePair& o) : + type1(o.type1), type2(o.type2), rb1(o.rb1), rb2(o.rb2), + gridKeyId1(o.gridKeyId1), gridKeyId2(o.gridKeyId2) { + printf(" Copying RB force pair...\n"); + initialize(); + } + RigidBodyForcePair& operator=(RigidBodyForcePair& o) { + printf(" Copying assigning RB force pair...\n"); + swap(*this,o); + return *this; + } + + ~RigidBodyForcePair(); + +private: + void initialize(); + void swap(RigidBodyForcePair& a, RigidBodyForcePair& b); + + static const int numThreads = 256; + + RigidBodyType* type1; + RigidBodyType* type2; + RigidBody* rb1; + RigidBody* rb2; + + std::vector<int> gridKeyId1; + std::vector<int> gridKeyId2; + std::vector<int> numBlocks; + + std::vector<Vector3*> forces; + std::vector<Vector3*> forces_d; + std::vector<Vector3*> torques; + std::vector<Vector3*> torques_d; + void updateForces(); }; class RigidBodyController { @@ -17,13 +64,15 @@ public: ~RigidBodyController(); RigidBodyController(const Configuration& c); - DEVICE void integrate(int step); + void integrate(int step); DEVICE void print(int step); + + void updateForces(); private: void copyGridsToDevice(); - void updateForces(); - + void initializeForcePairs(); + /* void printLegend(std::ofstream &file); */ /* void printData(int step, std::ofstream &file); */ public: @@ -41,5 +90,5 @@ private: Matrix3* rot; // there are errors on rigidBody->integrate std::vector< std::vector<RigidBody> > rigidBodyByType; - + std::vector< RigidBodyForcePair > forcePairs; }; diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu index 7075071b332bcb31a6e203c5e6ea1df2aa9b573a..c43fb364b3fb6c0ee33b35dbb3817a22a92159ad 100644 --- a/RigidBodyGrid.cu +++ b/RigidBodyGrid.cu @@ -319,6 +319,315 @@ float RigidBodyGrid::mean() const { // return val[j]; // } + // RBTODO overload with optimized algorithm + // skip transforms (assume identity basis) +HOST DEVICE float RigidBodyGrid::interpolatePotential(Vector3 l) const { + // Find the home node. + // Vector3 l = basisInv.transform(pos - origin); + int homeX = int(floor(l.x)); + int homeY = int(floor(l.y)); + int homeZ = int(floor(l.z)); + + // out of grid? return 0 + // RBTODO + + // Get the array jumps. + int jump[3]; + jump[0] = nz*ny; + jump[1] = nz; + jump[2] = 1; + + // Shift the indices in the home array. + int home[3]; + home[0] = homeX; + home[1] = homeY; + home[2] = homeZ; + + // Get the grid dimensions. + int g[3]; + g[0] = nx; + g[1] = ny; + g[2] = nz; + + // Get the interpolation coordinates. + float w[3]; + w[0] = l.x - homeX; + w[1] = l.y - homeY; + w[2] = l.z - homeZ; + + // Find the values at the neighbors. + float g1[4][4][4]; + for (int ix = 0; ix < 4; ix++) { + int jx = ix-1 + home[0]; + jx = wrap(jx, g[0]); + for (int iy = 0; iy < 4; iy++) { + int jy = iy-1 + home[1]; + jy = wrap(jy, g[1]); + for (int iz = 0; iz < 4; iz++) { + // Wrap around the periodic boundaries. + int jz = iz-1 + home[2]; + jz = wrap(jz, g[2]); + + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } + } + } + float a0, a1, a2, a3; + + // Mix along x. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + return a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0; +} + +/** interpolateForce() to be used on CUDA Device **/ +HOST DEVICE Vector3 RigidBodyGrid::interpolateForceD(Vector3 l) const { + Vector3 f; + // Vector3 l = basisInv.transform(pos - origin); + int homeX = int(floor(l.x)); + int homeY = int(floor(l.y)); + int homeZ = int(floor(l.z)); + // Get the array jumps with shifted indices. + int jump[3]; + jump[0] = nz*ny; + jump[1] = nz; + jump[2] = 1; + // Shift the indices in the home array. + int home[3]; + home[0] = homeX; + home[1] = homeY; + home[2] = homeZ; + + // Shift the indices in the grid dimensions. + int g[3]; + g[0] = nx; + g[1] = ny; + g[2] = nz; + + // Get the interpolation coordinates. + float w[3]; + w[0] = l.x - homeX; + w[1] = l.y - homeY; + w[2] = l.z - homeZ; + // Find the values at the neighbors. + float g1[4][4][4]; /* neighborhood */ + for (int ix = 0; ix < 4; ix++) { + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + // RBTODO: probably don't wrap! + // Wrap around the periodic boundaries. + int jx = ix-1 + home[0]; + jx = wrap(jx, g[0]); + int jy = iy-1 + home[1]; + jy = wrap(jy, g[1]); + int jz = iz-1 + home[2]; + jz = wrap(jz, g[2]); + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } + } + } + f.x = interpolateDiffX(w, g1); + f.y = interpolateDiffY(w, g1); + f.z = interpolateDiffZ(w, g1); + // Vector3 f1 = basisInv.transpose().transform(f); + // return f1; + return f; +} + +/* inline virtual Vector3 interpolateForce(Vector3 pos) const { */ +/* Vector3 f; */ +/* Vector3 l = basisInv.transform(pos - origin); */ +/* int homeX = int(floor(l.x)); */ +/* int homeY = int(floor(l.y)); */ +/* int homeZ = int(floor(l.z)); */ +/* // Get the array jumps with shifted indices. */ +/* int jump[3]; */ +/* jump[0] = nz*ny; */ +/* jump[1] = nz; */ +/* jump[2] = 1; */ +/* // Shift the indices in the home array. */ +/* int home[3]; */ +/* home[0] = homeX; */ +/* home[1] = homeY; */ +/* home[2] = homeZ; */ + +/* // Shift the indices in the grid dimensions. */ +/* int g[3]; */ +/* g[0] = nx; */ +/* g[1] = ny; */ +/* g[2] = nz; */ + +/* // Get the interpolation coordinates. */ +/* float w[3]; */ +/* w[0] = l.x - homeX; */ +/* w[1] = l.y - homeY; */ +/* w[2] = l.z - homeZ; */ +/* // Find the values at the neighbors. */ +/* float g1[4][4][4]; */ +/* //RBTODO parallelize? */ +/* for (int ix = 0; ix < 4; ix++) { */ +/* for (int iy = 0; iy < 4; iy++) { */ +/* for (int iz = 0; iz < 4; iz++) { */ +/* // Wrap around the periodic boundaries. */ +/* int jx = ix-1 + home[0]; */ +/* jx = wrap(jx, g[0]); */ +/* int jy = iy-1 + home[1]; */ +/* jy = wrap(jy, g[1]); */ +/* int jz = iz-1 + home[2]; */ +/* jz = wrap(jz, g[2]); */ +/* int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; */ +/* g1[ix][iy][iz] = val[ind]; */ +/* } */ +/* } */ +/* } */ +/* f.x = interpolateDiffX(pos, w, g1); */ +/* f.y = interpolateDiffY(pos, w, g1); */ +/* f.z = interpolateDiffZ(pos, w, g1); */ +/* Vector3 f1 = basisInv.transpose().transform(f); */ +/* return f1; */ +/* } */ + +HOST DEVICE inline float RigidBodyGrid::interpolateDiffX(float w[3], float g1[4][4][4]) const { + float a0, a1, a2, a3; + // RBTODO further parallelize loops? unlikely? + + // Mix along x, taking the derivative. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + //g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + g2[iy][iz] = 3.0f*a3*w[0]*w[0] + 2.0f*a2*w[0] + a1; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + float retval = -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); + return retval; +} + +HOST DEVICE inline float RigidBodyGrid::interpolateDiffY(float w[3], float g1[4][4][4]) const { + float a0, a1, a2, a3; + + // Mix along x, taking the derivative. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + //g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + g3[iz] = 3.0f*a3*w[1]*w[1] + 2.0f*a2*w[1] + a1; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + return -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); +} + +HOST DEVICE inline float RigidBodyGrid::interpolateDiffZ(float w[3], float g1[4][4][4]) const { + float a0, a1, a2, a3; + + // Mix along x, taking the derivative. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1); +} + // Added by Rogan for times when simpler calculations are required. float RigidBodyGrid::interpolatePotentialLinearly(Vector3 pos) const { diff --git a/RigidBodyGrid.h b/RigidBodyGrid.h index 1aa49c1da08b91d69f10b9106ebbc2c33f08e9fa..b456af80c6673d02d783da97a0c0cbdb58a79cac 100644 --- a/RigidBodyGrid.h +++ b/RigidBodyGrid.h @@ -142,205 +142,11 @@ public: // Added by Rogan for times when simpler calculations are required. virtual float interpolatePotentialLinearly(Vector3 pos) const; - HOST DEVICE inline float interpolateDiffX(Vector3 pos, float w[3], float g1[4][4][4]) const { - float a0, a1, a2, a3; - - // RBTODO further parallelize loops? unlikely? - - // Mix along x, taking the derivative. - float g2[4][4]; - for (int iy = 0; iy < 4; iy++) { - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); - a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); - a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); - a0 = g1[1][iy][iz]; - - //g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; - g2[iy][iz] = 3.0f*a3*w[0]*w[0] + 2.0f*a2*w[0] + a1; - } - } - - - // Mix along y. - float g3[4]; - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); - a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); - a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); - a0 = g2[1][iz]; - - g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; - } - - // Mix along z. - a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); - a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); - a1 = 0.5f*(-g3[0] + g3[2]); - a0 = g3[1]; - - return -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); - } - - HOST DEVICE inline float interpolateDiffY(Vector3 pos, float w[3], float g1[4][4][4]) const { - float a0, a1, a2, a3; - - // Mix along x, taking the derivative. - float g2[4][4]; - for (int iy = 0; iy < 4; iy++) { - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); - a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); - a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); - a0 = g1[1][iy][iz]; - - g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; - } - } - - // Mix along y. - float g3[4]; - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); - a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); - a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); - a0 = g2[1][iz]; - - //g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; - g3[iz] = 3.0f*a3*w[1]*w[1] + 2.0f*a2*w[1] + a1; - } - - // Mix along z. - a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); - a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); - a1 = 0.5f*(-g3[0] + g3[2]); - a0 = g3[1]; - - return -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); - } - - HOST DEVICE inline float interpolateDiffZ(Vector3 pos, float w[3], float g1[4][4][4]) const { - float a0, a1, a2, a3; - - // Mix along x, taking the derivative. - float g2[4][4]; - for (int iy = 0; iy < 4; iy++) { - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); - a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); - a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); - a0 = g1[1][iy][iz]; - - g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; - } - } - - // Mix along y. - float g3[4]; - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); - a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); - a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); - a0 = g2[1][iz]; - - g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; - } - - // Mix along z. - a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); - a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); - a1 = 0.5f*(-g3[0] + g3[2]); - a0 = g3[1]; - - return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1); - } + HOST DEVICE float interpolateDiffX(float w[3], float g1[4][4][4]) const; + HOST DEVICE float interpolateDiffY(float w[3], float g1[4][4][4]) const; + HOST DEVICE float interpolateDiffZ(float w[3], float g1[4][4][4]) const; - HOST DEVICE inline float interpolatePotential(Vector3 pos) const { - // Find the home node. - Vector3 l = pos; - int homeX = int(floor(l.x)); - int homeY = int(floor(l.y)); - int homeZ = int(floor(l.z)); - - // out of grid? return 0 - // RBTODO - - // Get the array jumps. - int jump[3]; - jump[0] = nz*ny; - jump[1] = nz; - jump[2] = 1; - - // Shift the indices in the home array. - int home[3]; - home[0] = homeX; - home[1] = homeY; - home[2] = homeZ; - - // Get the grid dimensions. - int g[3]; - g[0] = nx; - g[1] = ny; - g[2] = nz; - - // Get the interpolation coordinates. - float w[3]; - w[0] = l.x - homeX; - w[1] = l.y - homeY; - w[2] = l.z - homeZ; - - // Find the values at the neighbors. - float g1[4][4][4]; - for (int ix = 0; ix < 4; ix++) { - int jx = ix-1 + home[0]; - for (int iy = 0; iy < 4; iy++) { - int jy = iy-1 + home[1]; - for (int iz = 0; iz < 4; iz++) { - int jz = iz-1 + home[2]; - if (jx < 0 || jy < 0 || jz < 0 || - jx >= nx || jz >= nz || jz >= nz) { - g1[ix][iy][iz] = 0; - } else { - int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; - g1[ix][iy][iz] = val[ind]; - } - } - } - } - float a0, a1, a2, a3; - - // Mix along x. - float g2[4][4]; - for (int iy = 0; iy < 4; iy++) { - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); - a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); - a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); - a0 = g1[1][iy][iz]; - - g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; - } - } - - // Mix along y. - float g3[4]; - for (int iz = 0; iz < 4; iz++) { - a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); - a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); - a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); - a0 = g2[1][iz]; - - g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; - } - - // Mix along z. - a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); - a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); - a1 = 0.5f*(-g3[0] + g3[2]); - a0 = g3[1]; - - return a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0; - } + HOST DEVICE float interpolatePotential(Vector3 l) const; HOST DEVICE inline static int wrap(int i, int n) { if (i < 0) { @@ -355,61 +161,7 @@ public: } /** interpolateForce() to be used on CUDA Device **/ - DEVICE inline Vector3 interpolateForceD(Vector3 pos) const { - Vector3 f; - Vector3 l = pos; - int homeX = int(floor(l.x)); - int homeY = int(floor(l.y)); - int homeZ = int(floor(l.z)); - // Get the array jumps with shifted indices. - int jump[3]; - jump[0] = nz*ny; - jump[1] = nz; - jump[2] = 1; - // Shift the indices in the home array. - int home[3]; - home[0] = homeX; - home[1] = homeY; - home[2] = homeZ; - - // Shift the indices in the grid dimensions. - int g[3]; - g[0] = nx; - g[1] = ny; - g[2] = nz; - - // Get the interpolation coordinates. - float w[3]; - w[0] = l.x - homeX; - w[1] = l.y - homeY; - w[2] = l.z - homeZ; - // Find the values at the neighbors. - float g1[4][4][4]; /* RBTODO: inefficient for my algorithm? */ - for (int ix = 0; ix < 4; ix++) { - int jx = ix-1 + home[0]; - for (int iy = 0; iy < 4; iy++) { - int jy = iy-1 + home[1]; - for (int iz = 0; iz < 4; iz++) { - // Assume zero value at edges - int jz = iz-1 + home[2]; - // RBTODO: possible branch divergence in warp? - if (jx < 0 || jy < 0 || jz < 0 || - jx >= nx || jz >= nz || jz >= nz) { - g1[ix][iy][iz] = 0; - } else { - int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; - g1[ix][iy][iz] = val[ind]; - } - } - } - } - f.x = interpolateDiffX(pos, w, g1); - f.y = interpolateDiffY(pos, w, g1); - f.z = interpolateDiffZ(pos, w, g1); - // Vector3 f1 = basisInv.transpose().transform(f); - // return f1; - return f; - } + HOST DEVICE Vector3 interpolateForceD(Vector3 l) const; inline virtual Vector3 interpolateForce(Vector3 pos) const { Vector3 f; @@ -457,9 +209,9 @@ public: } } } - f.x = interpolateDiffX(pos, w, g1); - f.y = interpolateDiffY(pos, w, g1); - f.z = interpolateDiffZ(pos, w, g1); + f.x = interpolateDiffX( w, g1); + f.y = interpolateDiffY( w, g1); + f.z = interpolateDiffZ( w, g1); // Vector3 f1 = basisInv.transpose().transform(f); // return f1; return f; @@ -482,7 +234,7 @@ public: } // Wrap vector, 0 <= x < lx && 0 <= y < ly && 0 <= z < lz - HOST DEVICE inline virtual Vector3 wrap(Vector3 l) const { + HOST DEVICE inline Vector3 wrap(Vector3 l) const { l.x = wrapFloat(l.x, nx); l.y = wrapFloat(l.y, ny); l.z = wrapFloat(l.z, nz); diff --git a/makefile b/makefile index 6f9a87576267ceea2020f279fd54a7ba0c9699b5..942255dcb3d6aa6a51a6d511ca1c59cbf22349fb 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ INCLUDE = $(CUDA_PATH)/include DEBUG = -g CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE) $(DEBUG) -std=c++0x -pedantic -NV_FLAGS = $(DEBUG) +NV_FLAGS = -g -G #debug EX_FLAGS = -O3 -m$(OS_SIZE) ifneq ($(MAVERICKS),) diff --git a/notes.org b/notes.org index 25d583185188dfd1fb34d5546d133a0fd4ca0482..28b02e96b2e1f176e60d6a094632ee7d275448dd 100644 --- a/notes.org +++ b/notes.org @@ -1,6 +1,6 @@ * TODO active ** make rigid body object device pointer - +** branch rb-device: have RBType objects hold device pointers for raw RigidBodyGrids * organization diff --git a/useful.cu b/useful.cu index 443a3544a9b762e51dc88802457347eb79f7b404..01de12fd8a05800d247fc8aba23b93bdb0d9c8b3 100644 --- a/useful.cu +++ b/useful.cu @@ -35,10 +35,13 @@ int firstSpace(const char* s, int max) { // A classic growable string class. -void String::print() { +void String::print() const { + printInline(); + printf("\n"); +} +void String::printInline() const { for (int i = 0; i < len; i++) printf("%c", c[i]); - printf("\n"); } String& String::operator=(const String& s) { diff --git a/useful.h b/useful.h index 327cd7a7d9f0ab4962d9baa0c28613a7982635f9..19b88dc09f03232e5d97fa7a5d3d09b5192c9b47 100644 --- a/useful.h +++ b/useful.h @@ -73,7 +73,8 @@ public: void add(char c0); void add(const char* s); void add(String& s); - void print(); + void printInline() const; + void print() const; int length() const; // Negative indices go from the end.