From a7b708511ea945bdf20e51bf6ca8bfa29be9daa0 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 17 Nov 2015 16:12:21 -0600 Subject: [PATCH] moved cuda copy for rigidbodies to controller --- ComputeGridGrid.cuh | 4 +- Configuration.cpp | 119 ----------------- Configuration.h | 1 - GrandBrownTown.cu | 17 +-- RigidBody.cu | 4 +- RigidBody.h | 2 +- RigidBodyController.cu | 293 +++++++++++++++++++---------------------- RigidBodyController.h | 12 +- RigidBodyGrid.cu | 9 ++ RigidBodyType.h | 10 +- makefile | 14 +- notes.org | 12 +- 12 files changed, 192 insertions(+), 305 deletions(-) diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh index 50a6ba6..173e7bb 100644 --- a/ComputeGridGrid.cuh +++ b/ComputeGridGrid.cuh @@ -1,6 +1,6 @@ #pragma once -#include "RigidBodyGrid.h" -#include "useful.h" +/* #include "RigidBodyGrid.h" */ +/* #include "useful.h" */ __global__ void computeGridGridForce(const RigidBodyGrid& rho, const RigidBodyGrid& u, diff --git a/Configuration.cpp b/Configuration.cpp index 89691e8..d233625 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -412,11 +412,9 @@ void Configuration::copyToCUDA() { printf("Copying to GPU %d\n", GPUManager::current()); BrownianParticleType **part_addr = new BrownianParticleType*[numParts]; - RigidBodyType **rb_addr = new RigidBodyType*[numRigidTypes]; // Copy the BaseGrid objects and their member variables/objects gpuErrchk(cudaMalloc(&part_d, sizeof(BrownianParticleType*) * numParts)); - gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRigidTypes)); // TODO: The above line fails when there is not enough memory. If it fails, stop. for (int i = 0; i < numParts; i++) { @@ -459,123 +457,6 @@ void Configuration::copyToCUDA() { gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts, cudaMemcpyHostToDevice)); - - printf("Copying RBs\n"); - // Copy rigidbody types - // http://stackoverflow.com/questions/16024087/copy-an-object-to-device - for (int i = 0; i < numRigidTypes; i++) { - printf("Working on RB %d\n",i); - RigidBodyType *rb = &(rigidBody[i]); // temporary for convenience - rb->updateRaw(); - - int ng = rb->numPotGrids; - - // copy rigidbody to device - // RigidBodyType *rb_d; - gpuErrchk(cudaMalloc(&rb_addr[i], sizeof(RigidBodyType))); - gpuErrchk(cudaMemcpy(rb_addr[i], rb, sizeof(RigidBodyType), - cudaMemcpyHostToDevice)); - - // copy rb->grid to device - BaseGrid * gtmp; - // gtmp = new BaseGrid[ng]; - size_t sz = sizeof(BaseGrid)*ng; - - // allocate grids on device - // copy temporary host pointer to device pointer - // copy grids to device through temporary host poin - gpuErrchk(cudaMalloc((void **) >mp, sz)); - gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawPotentialGrids), >mp, - sizeof(BaseGrid*) * ng, cudaMemcpyHostToDevice )); - gpuErrchk(cudaMemcpy(gtmp, &(rb->rawPotentialGrids), - sizeof(BaseGrid) * ng, cudaMemcpyHostToDevice )); - for (int gid = 0; gid < ng; gid++) { - gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb->rawPotentialGrids[gid]), - sizeof(BaseGrid), cudaMemcpyHostToDevice )); - } - - printf(" RigidBodyType %d: numGrids = %d\n", i, ng); - // copy grid data to device - for (int gid = 0; gid < ng; gid++) { - BaseGrid *g = &(rb->rawPotentialGrids[gid]); // convenience - int len = g->getSize(); - float **tmpData; - tmpData = new float*[len]; - - printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); - for (int k = 0; k < len; k++) - printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", - i, gid, k, g->val[k]); - - // allocate grid data on device - // copy temporary host pointer to device pointer - // copy data to device through temporary host pointer - sz = sizeof(float*) * len; - gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - // gpuErrchk(cudaMemcpy( &(rb_addr[i]->rawPotentialGrids[gid].val), &tmpData, - // sizeof(float*), cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy( &(gtmp[gid].val), &tmpData, - sizeof(float*), cudaMemcpyHostToDevice)); - sz = sizeof(float) * len; - gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); - // RBTODO: why can't this be deleted? - // delete[] tmpData; - } - } - - // density grids - for (int i = 0; i < numRigidTypes; i++) { - printf("working on RB %d\n",i); - RigidBodyType *rb = &(rigidBody[i]); // temporary for convenience - - int ng = rb->numDenGrids; - BaseGrid * gtmp; - size_t sz = sizeof(BaseGrid)*ng; - - // allocate grids on device - // copy temporary host pointer to device pointer - // copy grids to device through temporary host poin - gpuErrchk(cudaMalloc((void **) >mp, sz)); - gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawDensityGrids), >mp, - sizeof(BaseGrid*) * ng, cudaMemcpyHostToDevice )); - gpuErrchk(cudaMemcpy(gtmp, &(rb->rawDensityGrids), - sizeof(BaseGrid) * ng, cudaMemcpyHostToDevice )); - for (int gid = 0; gid < ng; gid++) { - gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb->rawDensityGrids[gid]), - sizeof(BaseGrid), cudaMemcpyHostToDevice )); - } - - printf(" RigidBodyType %d: numGrids = %d\n", i, ng); - // copy grid data to device - for (int gid = 0; gid < ng; gid++) { - BaseGrid *g = &(rb->rawDensityGrids[gid]); // convenience - int len = g->getSize(); - float **tmpData; - tmpData = new float*[len]; - - printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); - for (int k = 0; k < len; k++) - printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", - i, gid, k, g->val[k]); - - // allocate grid data on device - // copy temporary host pointer to device pointer - // copy data to device through temporary host pointer - sz = sizeof(float*) * len; - gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - gpuErrchk(cudaMemcpy( &(gtmp[gid].val), &tmpData, - sizeof(float*), cudaMemcpyHostToDevice)); - sz = sizeof(float) * len; - gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); - // RBTODO: why can't this be deleted? - // delete[] tmpData; - } - - } - gpuErrchk(cudaMemcpy(rbType_d, rb_addr, sizeof(RigidBodyType*) * numRigidTypes, - cudaMemcpyHostToDevice)); - printf("Done copying RBs\n"); - // kTGrid_d kTGrid_d = NULL; if (temperatureGrid.length() > 0) { diff --git a/Configuration.h b/Configuration.h index 68df323..cb255b1 100644 --- a/Configuration.h +++ b/Configuration.h @@ -82,7 +82,6 @@ public: // Device Variables int *type_d; BrownianParticleType **part_d; - RigidBodyType **rbType_d; BaseGrid *sys_d, *kTGrid_d; Bond* bonds_d; int2* bondMap_d; diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index 57b271b..fa54f7e 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -16,7 +16,8 @@ cudaEvent_t START, STOP; GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, const long int randomSeed, bool debug, bool imd_on, unsigned int imd_port, int numReplicas) : - imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas), conf(c) { + imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas), + conf(c), RBC(RigidBodyController(c)) { for (int i = 0; i < numReplicas; i++) { std::stringstream curr_file, restart_file, out_prefix; @@ -28,13 +29,6 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, outCurrFiles.push_back(curr_file.str()); restartFiles.push_back(restart_file.str()); outFilePrefixes.push_back(out_prefix.str()); - - printf("About to devicePrint\n"); - // devicePrint<<<1,1>>>(&(c.rigidBody[0])); - devicePrint<<<1,1>>>(c.rbType_d); - cudaDeviceSynchronize(); - printf("Done with devicePrint\n"); - } GrandBrownTown::DEBUG = debug; @@ -56,6 +50,13 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, // Allocate things for rigid body // RBC = RigidBodyController(c); + printf("About to devicePrint\n"); + // devicePrint<<<1,1>>>(&(c.rigidBody[0])); + devicePrint<<<1,1>>>(RBC.rbType_d); + cudaDeviceSynchronize(); + printf("Done with devicePrint\n"); + + // Replicate identical initial conditions across all replicas // TODO: add an option to generate random initial conditions for all replicas diff --git a/RigidBody.cu b/RigidBody.cu index bfbbb90..d35d610 100644 --- a/RigidBody.cu +++ b/RigidBody.cu @@ -10,8 +10,10 @@ #include "Debug.h" + RigidBody::RigidBody(const Configuration& cref, RigidBodyType& tref) - : c(&cref), t(&tref) { + : c(&cref), t(&tref), impulse_to_momentum(0.0004184) { + timestep = c->timestep; // RBTODO: fix this Temp = 295; diff --git a/RigidBody.h b/RigidBody.h index 0dc6a9c..e0fe66e 100644 --- a/RigidBody.h +++ b/RigidBody.h @@ -84,7 +84,7 @@ private: /*–––––––––––––––––––––––––––––––––––––––––. | units "kcal_mol/AA * fs" "(AA/fs) * amu" | `–––––––––––––––––––––––––––––––––––––––––*/ - const BigReal impulse_to_momentum = 0.0004184; /* should be static, but fails */ + BigReal impulse_to_momentum; /* should be const, but copy constructor failed */ HOST DEVICE inline Matrix3 Rx(BigReal t); diff --git a/RigidBodyController.cu b/RigidBodyController.cu index feb471e..fea8194 100644 --- a/RigidBodyController.cu +++ b/RigidBodyController.cu @@ -7,8 +7,9 @@ /* #include "RigidBody.h" */ #include "RigidBodyController.h" #include "Configuration.h" - #include "RigidBodyType.h" +#include "ComputeGridGrid.cuh" + /* #include "Random.h" */ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } @@ -27,6 +28,10 @@ inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true) RigidBodyController::RigidBodyController(const Configuration& c) : conf(c) { + if (conf.numRigidTypes > 0) { + copyGridsToDevice(); + } + int numRB = 0; // grow list of rbs for (int i = 0; i < conf.numRigidTypes; i++) { @@ -37,8 +42,7 @@ conf(c) { tmp.push_back( r ); } rigidBodyByType.push_back(tmp); - } - + } } RigidBodyController::~RigidBodyController() { for (int i = 0; i < rigidBodyByType.size(); i++) @@ -62,8 +66,8 @@ void RigidBodyController::updateForces() { | Opportunities for memory bandwidth savings: | `–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ // int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); - int numBlocks = 1; - int numThreads = 32; + // int numBlocks = 1; + int numThreads = 256; // Loop over all pairs of rigid body types @@ -89,7 +93,9 @@ void RigidBodyController::updateForces() { for (int j = (ti==tj ? 0 : i); j < rbs2.size(); j++) { const RigidBody& rb1 = rbs1[i]; const RigidBody& rb2 = rbs2[j]; - + + const int sz = t1.rawDensityGrids[k1].getSize(); + const int numBlocks = sz / numThreads + ((sz % numThreads == 0) ? 0:1 ); computeGridGridForce<<< numBlocks, numThreads >>> (t1.rawDensityGrids[k1], t2.rawPotentialGrids[k2], rb1.getBasis(), rb1.getPosition(), @@ -104,56 +110,135 @@ void RigidBodyController::updateForces() { // RBTODO: see if there is a better way to sync gpuErrchk(cudaDeviceSynchronize()); - } -/* -RigidBodyController::RigidBodyController(const NamdState *s, const int reductionTag, SimParameters *sp) : state(s), simParams(sp) -{ - DebugM(2, "Rigid Body Controller initializing" - << "\n" << endi); - - // initialize each RigidBody - RigidBodyParams *params = simParams->rigidBodyList.get_first(); - while (params != NULL) { - // check validity of params? - RigidBody *rb = new RigidBody(simParams, params); - rigidBodyList.push_back( rb ); - params = params->next; - } +void RigidBodyController::copyGridsToDevice() { + RigidBodyType **rb_addr = new RigidBodyType*[conf.numRigidTypes]; /* temporary pointer to device pointer */ - // initialize translation and rotation data - trans.resize( rigidBodyList.size() ); - rot.resize( rigidBodyList.size() ); - for (int i=0; i<rigidBodyList.size(); i++) { - trans[i] = rigidBodyList[i]->getPosition(); - rot[i] = rigidBodyList[i]->getOrientation(); - // trans.insert( rigidBodyList[i]->getPosition(), i ); - // rot.insert( rigidBodyList[i]->getOrientation(), i ); - } + gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * conf.numRigidTypes)); + // TODO: The above line fails when there is not enough memory. If it fails, stop. - random = new Random(simParams->randomSeed); - // random->split(0,PatchMap::Object()->numPatches()+1); - - // inbound communication - DebugM(3, "RBC::init: requiring reduction "<<reductionTag<<" with "<<6*rigidBodyList.size()<<" elements\n" << endi); - gridReduction = ReductionMgr::Object()->willRequire(reductionTag ,6*rigidBodyList.size() ); + printf("Copying RBs\n"); + // Copy rigidbody types + // http://stackoverflow.com/questions/16024087/copy-an-object-to-device + for (int i = 0; i < conf.numRigidTypes; i++) { + printf("Working on RB %d\n",i); + RigidBodyType& rb = conf.rigidBody[i]; // temporary for convenience + rb.updateRaw(); - // outbound communication - CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr); - computeMgr = cm.ckLocalBranch(); + int ng = rb.numPotGrids; - if (trans.size() != rot.size()) - NAMD_die("failed sanity check\n"); - RigidBodyMsg *msg = new RigidBodyMsg; - msg->trans.copy(trans); // perhaps .swap() would cause problems - msg->rot.copy(rot); - computeMgr->sendRigidBodyUpdate(msg); -} -RigidBodyController::~RigidBodyController() { - delete gridReduction; + // copy rigidbody to device + // RigidBodyType *rb_d; + gpuErrchk(cudaMalloc(&rb_addr[i], sizeof(RigidBodyType))); + gpuErrchk(cudaMemcpy(rb_addr[i], &rb, sizeof(RigidBodyType), + cudaMemcpyHostToDevice)); + + // copy rb->grid to device + RigidBodyGrid * gtmp; + // gtmp = new RigidBodyGrid[ng]; + size_t sz = sizeof(RigidBodyGrid)*ng; + + // allocate grids on device + // copy temporary host pointer to device pointer + // copy grids to device through temporary host poin + gpuErrchk(cudaMalloc((void **) >mp, sz)); + gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawPotentialGrids), >mp, + sizeof(RigidBodyGrid*) * ng, cudaMemcpyHostToDevice )); + gpuErrchk(cudaMemcpy(gtmp, &(rb.rawPotentialGrids), + sizeof(RigidBodyGrid) * ng, cudaMemcpyHostToDevice )); + for (int gid = 0; gid < ng; gid++) { + gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb.rawPotentialGrids[gid]), + sizeof(RigidBodyGrid), cudaMemcpyHostToDevice )); + } + + printf(" RigidBodyType %d: numGrids = %d\n", i, ng); + // copy potential grid data to device + for (int gid = 0; gid < ng; gid++) { + RigidBodyGrid *g = &(rb.rawPotentialGrids[gid]); // convenience + int len = g->getSize(); + float **tmpData; + tmpData = new float*[len]; + + printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); + for (int k = 0; k < len; k++) + printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", + i, gid, k, g->val[k]); + + // allocate grid data on device + // copy temporary host pointer to device pointer + // copy data to device through temporary host pointer + sz = sizeof(float*) * len; + gpuErrchk(cudaMalloc((void **) &tmpData, sz)); + // gpuErrchk(cudaMemcpy( &(rb_addr[i]->rawPotentialGrids[gid].val), &tmpData, + // sizeof(float*), cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpy( &(gtmp[gid].val), &tmpData, + sizeof(float*), cudaMemcpyHostToDevice)); + sz = sizeof(float) * len; + gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); + // RBTODO: why can't this be deleted? + // delete[] tmpData; + } + } + + // density grids + for (int i = 0; i < conf.numRigidTypes; i++) { + printf("working on RB %d\n",i); + RigidBodyType& rb = conf.rigidBody[i]; + + int ng = rb.numDenGrids; + RigidBodyGrid * gtmp; + size_t sz = sizeof(RigidBodyGrid)*ng; + + // allocate grids on device + // copy temporary host pointer to device pointer + // copy grids to device through temporary host poin + gpuErrchk(cudaMalloc((void **) >mp, sz)); + gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawDensityGrids), >mp, + sizeof(RigidBodyGrid*) * ng, cudaMemcpyHostToDevice )); + gpuErrchk(cudaMemcpy(gtmp, &(rb.rawDensityGrids), + sizeof(RigidBodyGrid) * ng, cudaMemcpyHostToDevice )); + for (int gid = 0; gid < ng; gid++) { + gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb.rawDensityGrids[gid]), + sizeof(RigidBodyGrid), cudaMemcpyHostToDevice )); + } + + printf(" RigidBodyType %d: numGrids = %d\n", i, ng); + // copy grid data to device + for (int gid = 0; gid < ng; gid++) { + RigidBodyGrid& g = rb.rawDensityGrids[gid]; // convenience + int len = g.getSize(); + float **tmpData; + tmpData = new float*[len]; + + printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); + for (int k = 0; k < len; k++) + printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", + i, gid, k, g.val[k]); + + // allocate grid data on device + // copy temporary host pointer to device pointer + // copy data to device through temporary host pointer + sz = sizeof(float*) * len; + gpuErrchk(cudaMalloc((void **) &tmpData, sz)); + gpuErrchk(cudaMemcpy( &(gtmp[gid].val), &tmpData, + sizeof(float*), cudaMemcpyHostToDevice)); + sz = sizeof(float) * len; + gpuErrchk(cudaMemcpy( tmpData, g.val, sz, cudaMemcpyHostToDevice)); + + // RBTODO: why can't this be deleted? + // delete[] tmpData; + } + + } + gpuErrchk(cudaMemcpy(rbType_d, rb_addr, sizeof(RigidBodyType*) * conf.numRigidTypes, + cudaMemcpyHostToDevice)); + printf("Done copying RBs\n"); } + + +/* void RigidBodyController::print(int step) { // modeled after outputExtendedData() in Controller.C if ( step >= 0 ) { @@ -472,116 +557,4 @@ RigidBodyParams* RigidBodyParamsList::add(const char* key) return elem; } -const void RigidBodyParams::print() { - iout << iINFO - << "printing RigidBodyParams("<<rigidBodyKey<<"):" - <<"\n\t" << "mass: " << mass - <<"\n\t" << "inertia: " << inertia - <<"\n\t" << "langevin: " << langevin - <<"\n\t" << "temperature: " << temperature - <<"\n\t" << "transDampingCoeff: " << transDampingCoeff - <<"\n\t" << "position: " << position - <<"\n\t" << "orientation: " << orientation - <<"\n\t" << "orientationalVelocity: " << orientationalVelocity - << "\n" << endi; - -} -const void RigidBodyParamsList::print() { - iout << iINFO << "Printing " << n_elements << " RigidBodyParams\n" << endi; - - RigidBodyParams *elem = get_first(); - while (elem != NULL) { - elem->print(); - elem = elem->next; - } -} -const void RigidBodyParamsList::print(char *s) { - iout << iINFO << "("<<s<<") Printing " << n_elements << " RigidBodyParams\n" << endi; - - RigidBodyParams *elem = get_first(); - while (elem != NULL) { - elem->print(); - elem = elem->next; - } -} - -void RigidBodyParamsList::pack_data(MOStream *msg) { - DebugM(4, "Packing rigid body parameter list\n"); - print(); - - int i = n_elements; - msg->put(n_elements); - - RigidBodyParams *elem = get_first(); - while (elem != NULL) { - DebugM(4, "Packing a new element\n"); - - int len; - Vector v; - - len = strlen(elem->rigidBodyKey) + 1; - msg->put(len); - msg->put(len,elem->rigidBodyKey); - msg->put(elem->mass); - - // v = elem-> - msg->put(&(elem->inertia)); - msg->put( (elem->langevin?1:0) ); - msg->put(elem->temperature); - msg->put(&(elem->transDampingCoeff)); - msg->put(&(elem->rotDampingCoeff)); - - // elem->gridList.clear(); - - msg->put(&(elem->position)); - msg->put(&(elem->velocity)); - // Tensor data = elem->orientation; - msg->put( & elem->orientation ); - msg->put(&(elem->orientationalVelocity)) ; - - i--; - elem = elem->next; - } - if (i != 0) - NAMD_die("MGridforceParams message packing error\n"); -} -void RigidBodyParamsList::unpack_data(MIStream *msg) { - DebugM(4, "Could be unpacking rigid body parameterlist (not used & not implemented)\n"); - - int elements; - msg->get(elements); - - for(int i=0; i < elements; i++) { - DebugM(4, "Unpacking a new element\n"); - - int len; - msg->get(len); - char *key = new char[len]; - msg->get(len,key); - RigidBodyParams *elem = add(key); - delete [] key; - - msg->get(&(elem->inertia)); - - int j; - msg->get(j); - elem->langevin = (j != 0); - - msg->get(elem->temperature); - msg->get(&(elem->transDampingCoeff)); - msg->get(&(elem->rotDampingCoeff)); - - // elem->gridList.clear(); - - msg->get(&(elem->position)); - msg->get(&(elem->velocity)); - msg->get( & elem->orientation ); - msg->get(&(elem->orientationalVelocity)) ; - - elem = elem->next; - } - - DebugM(4, "Finished unpacking rigid body parameter list\n"); - print(); -} */ diff --git a/RigidBodyController.h b/RigidBodyController.h index 1e06609..5287410 100644 --- a/RigidBodyController.h +++ b/RigidBodyController.h @@ -4,8 +4,6 @@ #include <cuda.h> #include <cuda_runtime.h> -#include "ComputeGridGrid.cuh" - class Configuration; struct gridInteractionList { @@ -23,11 +21,15 @@ public: DEVICE void print(int step); private: + void copyGridsToDevice(); void updateForces(); /* void printLegend(std::ofstream &file); */ /* void printData(int step, std::ofstream &file); */ - +public: + RigidBodyType** rbType_d; + +private: /* std::ofstream trajFile; */ const Configuration& conf; @@ -37,11 +39,7 @@ private: Vector3* trans; // would have made these static, but Matrix3* rot; // there are errors on rigidBody->integrate - std::vector< std::vector<RigidBody> > rigidBodyByType; - /* RigidBody* rigidBodyList; */ - }; - diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu index 28bb6fe..7075071 100644 --- a/RigidBodyGrid.cu +++ b/RigidBodyGrid.cu @@ -112,6 +112,15 @@ RigidBodyGrid::RigidBodyGrid(Matrix3 box, float dx) { zero(); } +RigidBodyGrid::RigidBodyGrid(const BaseGrid& g) { + nx = g.nx; + ny = g.ny; + nz = g.nz; + + init(); + for (int i = 0; i < size; i++) val[i] = g.val[i]; +} + // Make an exact copy of a grid. RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g) { nx = g.nx; diff --git a/RigidBodyType.h b/RigidBodyType.h index be452c3..04eb55a 100644 --- a/RigidBodyType.h +++ b/RigidBodyType.h @@ -61,8 +61,9 @@ public: std::vector<BaseGrid> potentialGrids; std::vector<BaseGrid> densityGrids; - - // for device + + // RBTODO: clear std::vectors after initialization + // duplicates of std::vector grids for device int numPotGrids; int numDenGrids; RigidBodyGrid* rawPotentialGrids; @@ -71,5 +72,10 @@ public: Matrix3* rawDensityBases; Vector3* rawPotentialOrigins; Vector3* rawDensityOrigins; + + // device pointers + RigidBodyGrid** rawPotentialGrids_d; + RigidBodyGrid** rawDensityGrids_d; + }; diff --git a/makefile b/makefile index b701a44..6f9a875 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ INCLUDE = $(CUDA_PATH)/include DEBUG = -g CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE) $(DEBUG) -std=c++0x -pedantic -NV_FLAGS = -rdc=true $(DEBUG) +NV_FLAGS = $(DEBUG) EX_FLAGS = -O3 -m$(OS_SIZE) ifneq ($(MAVERICKS),) @@ -29,7 +29,6 @@ else LIBRARY = $(CUDA_PATH)/lib64 endif -LD_FLAGS = -L$(LIBRARY) -lcurand -lcudart -Wl,-rpath,$(LIBRARY) #CODE_10 := -gencode arch=compute_10,code=sm_10 #CODE_12 := -gencode arch=compute_12,code=sm_12 @@ -40,6 +39,11 @@ CODE_20 := -arch=sm_20 NV_FLAGS += $(CODE_10) $(CODE_12) $(CODE_20) $(CODE_30) $(CODE_35) +NVLD_FLAGS := $(NV_FLAGS) --device-link +NV_FLAGS += -rdc=true + +LD_FLAGS = -L$(LIBRARY) -lcurand -lcudart -Wl,-rpath,$(LIBRARY) + ### Sources CC_SRC := $(wildcard *.cpp) @@ -59,7 +63,11 @@ all: $(TARGET) @echo "Done ->" $(TARGET) $(TARGET): $(CU_OBJ) $(CC_OBJ) runBrownTown.cpp vmdsock.c imd.c imd.h - $(EXEC) $(CC) $(CC_FLAGS) $(LD_FLAGS) $(EX_FLAGS) runBrownTown.cpp vmdsock.c imd.c $(CU_OBJ) $(CC_OBJ) -o $(TARGET) + $(EXEC) $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o + $(EXEC) $(CC) $(CC_FLAGS) $(EX_FLAGS) runBrownTown.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS) -o $(TARGET) + +# $(EXEC) $(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o +# $(EXEC) $(CC) $(CC_FLAGS) $(LD_FLAGS) $(EX_FLAGS) runBrownTown.cpp vmdsock.c imd.c $(CU_OBJ) $(CC_OBJ) -o $(TARGET) .SECONDEXPANSION: $(CU_OBJ): %.o: %.cu $$(wildcard %.h) $$(wildcard %.cuh) diff --git a/notes.org b/notes.org index 5a3e805..25d5831 100644 --- a/notes.org +++ b/notes.org @@ -1,5 +1,15 @@ +* TODO active +** make rigid body object device pointer + + + +* organization +** RgidBodyController (RBC) holds device pointers, manages force evaluation and integration + + + * Opportunities for memory bandwidth savings -** each block should contain a compact set of density grid points +** each block should (ideally) contain a compact set of density grid points cache (automatically!?) potential grid lookups and coefficients! ** each block should have same transformation matrices applied to each grid point?! ** each block could have same inverse transformation matrix applied to each grid point -- GitLab