diff --git a/src/ComputeGridGrid.cu b/src/ComputeGridGrid.cu index 7fae6ae9b9d25a5468b39a1720da9f35740868df..8cefacd4882ccd2a917fcd6a6d25b6f6c014958a 100644 --- a/src/ComputeGridGrid.cu +++ b/src/ComputeGridGrid.cu @@ -6,7 +6,7 @@ //RBTODO: add __restrict__, benchmark (Q: how to restrict member data?) __global__ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const Vector3 origin_rho_minus_origin_u, - ForceEnergy* retForce, Vector3 * retTorque, int scheme) + ForceEnergy* retForce, Vector3 * retTorque, int scheme, BaseGrid* sys_d) { extern __shared__ ForceEnergy s[]; @@ -27,6 +27,7 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, cons Vector3 r_pos= rho->getPosition(r_id); /* i,j,k value of voxel */ r_pos = basis_rho.transform( r_pos ) + origin_rho_minus_origin_u; /* real space */ + r_pos = sys_d->wrapDiff(r_pos); /* TODO: wrap about center of RB, not origin of u */ const Vector3 u_ijk_float = basis_u_inv.transform( r_pos ); // RBTODO: Test for non-unit delta /* Vector3 tmpf = Vector3(0.0f); */ @@ -76,7 +77,7 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc const int num, const int* __restrict__ particleIds, const RigidBodyGrid* __restrict__ u, const Matrix3 basis_u_inv, const Vector3 origin_u, - ForceEnergy* __restrict__ retForce, Vector3* __restrict__ retTorque, float* __restrict__ energy, bool get_energy, int scheme) { + ForceEnergy* __restrict__ retForce, Vector3* __restrict__ retTorque, float* __restrict__ energy, bool get_energy, int scheme, BaseGrid* sys_d) { extern __shared__ ForceEnergy s[]; ForceEnergy *force = s; @@ -89,7 +90,8 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc torque[tid] = ForceEnergy(0.f,0.f); if (i < num) { const int id = particleIds[i]; - Vector3 p = pos[id] - origin_u; + //Vector3 p = pos[id] - origin_u; + Vector3 p = sys_d->wrapDiff(pos[id]-origin_u); /* TODO: wrap about RB center, not origin */ // TODO: wrap to center of u const Vector3 u_ijk_float = basis_u_inv.transform( p ); @@ -134,14 +136,14 @@ __global__ void createPartlist(const Vector3* __restrict__ pos, const int numTypeParticles, const int* __restrict__ typeParticles_d, int* numParticles_d, int* particles_d, - const Vector3 gridCenter, const float radius2) { + const Vector3 gridCenter, const float radius2, BaseGrid* sys_d) { const int tid = threadIdx.x; const int warpLane = tid % WARPSIZE; /* RBTODO: optimize */ const int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < numTypeParticles) { int aid = typeParticles_d[i]; - float dist = (pos[aid] - gridCenter).length2(); + float dist = (sys_d->wrapDiff(pos[aid] - gridCenter)).length2(); if (dist <= radius2) { int tmp = atomicAggInc(numParticles_d, warpLane); diff --git a/src/ComputeGridGrid.cuh b/src/ComputeGridGrid.cuh index 98275a1068f364ef22515a2d151a03bafaa5a3f3..6223e0afa3ba56c6808da84d5dbcb0d01c680cf5 100644 --- a/src/ComputeGridGrid.cuh +++ b/src/ComputeGridGrid.cuh @@ -6,25 +6,26 @@ class RigidBodyGrid; class ForceEnergy; +class BaseGrid; extern __global__ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const Vector3 origin_rho_minus_origin_u, - ForceEnergy* retForce, Vector3 * retTorque, int scheme); + ForceEnergy* retForce, Vector3 * retTorque, int scheme, BaseGrid* sys_d); extern __global__ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForce, const int num, const int* __restrict__ particleIds, const RigidBodyGrid* __restrict__ u, const Matrix3 basis_u_inv, const Vector3 origin_u, - ForceEnergy* __restrict__ retForce, Vector3* __restrict__ retTorque, float* energy, bool get_energy, int scheme); + ForceEnergy* __restrict__ retForce, Vector3* __restrict__ retTorque, float* energy, bool get_energy, int scheme, BaseGrid* sys_d); extern __global__ void createPartlist(const Vector3* __restrict__ pos, const int numTypeParticles, const int* __restrict__ typeParticles_d, int* numParticles_d, int* particles_d, - const Vector3 gridCenter, const float radius2); + const Vector3 gridCenter, const float radius2, BaseGrid* sys_d); extern __global__ void printRigidBodyGrid(const RigidBodyGrid* rho); diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 8b442d2deefc49dbdbcf750fd1ba99bb40013045..e3f95ef79b5ed532294a528211f3ad398e7c1260 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -35,7 +35,20 @@ cudaEvent_t START, STOP; GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, bool debug, bool imd_on, unsigned int imd_port, int numReplicas) : imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas), - conf(c), RBC(RigidBodyController(c,outArg)) { + //conf(c), RBC(RigidBodyController(c,outArg)) { + conf(c){ + + gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); + gsl_rng_set(gslcpp_rng, conf.seed); + RBC.resize(numReplicas); + for(int i = 0; i < numReplicas; ++i) + { + unsigned long int seed = gsl_rng_get(gslcpp_rng); + RigidBodyController* rb = new RigidBodyController(c, outArg, seed, i); + RBC[i] = rb; + } + gsl_rng_free(gslcpp_rng); + //printf("%d\n",__LINE__); //Determine which dynamic. Han-Yi Chou particle_dynamic = c.ParticleDynamicType; @@ -61,7 +74,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, { std::stringstream restart_file_p, out_momentum_prefix, out_force_prefix; restart_file_p << outArg << '.' << i << ".momentum.restart"; - out_momentum_prefix << outArg << ".momentum." << i; + out_momentum_prefix << outArg << '.' << i << ".momentum"; //out_force_prefix << outArg << ".force." << i; restartMomentumFiles.push_back(restart_file_p.str());//Han-Yi Chou @@ -428,7 +441,12 @@ GrandBrownTown::~GrandBrownTown() { //curandDestroyGenerator(randoGen->generator); delete randoGen; gpuErrchk(cudaFree(randoGen_d)); - + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + { + (*iter)->~RigidBodyController(); + delete *iter; + } + RBC.clear(); // Auxillary objects delete internal; internal = NULL; @@ -463,10 +481,6 @@ GrandBrownTown::~GrandBrownTown() { //Nose Hoover is now implement for particles. void GrandBrownTown::RunNoseHooverLangevin() { - //printf("\n\n"); - //printf("Testing for Nose-Hoover Langevin dynamics\n"); - //Vector3 runningNetForce(0.0f); - //comment this out because this is the origin points Han-Yi Chou // Open the files for recording ionic currents for (int repID = 0; repID < numReplicas; ++repID) @@ -534,7 +548,8 @@ void GrandBrownTown::RunNoseHooverLangevin() // cudaSetDevice(0); internal->decompose(); gpuErrchk(cudaDeviceSynchronize()); - RBC.updateParticleLists( internal->getPos_d() ); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateParticleLists( internal->getPos_d(), sys_d); } float t; // simulation time @@ -558,7 +573,8 @@ void GrandBrownTown::RunNoseHooverLangevin() // 'interparticleForce' - determines whether particles interact with each other gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); gpuErrchk(cudaMemset((void*)(internal->getEnergy()), 0, sizeof(float)*num*numReplicas)); - RBC.clearForceAndTorque(); //Han-Yi Chou + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->clearForceAndTorque(); //Han-Yi Chou if (interparticleForce) { @@ -570,8 +586,9 @@ void GrandBrownTown::RunNoseHooverLangevin() if (s % decompPeriod == 0) { // cudaSetDevice(0); - internal -> decompose(); - RBC.updateParticleLists( internal->getPos_d() ); + internal -> decompose(); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateParticleLists( internal->getPos_d(), sys_d); } internal -> computeTabulated(get_energy); break; @@ -590,7 +607,8 @@ void GrandBrownTown::RunNoseHooverLangevin() { // cudaSetDevice(0); internal->decompose(); - RBC.updateParticleLists( internal->getPos_d() ); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateParticleLists( internal->getPos_d(), sys_d); } internal->compute(get_energy); break; @@ -610,11 +628,15 @@ void GrandBrownTown::RunNoseHooverLangevin() } }//if inter-particle force gpuErrchk(cudaDeviceSynchronize()); - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType, sys, sys_d); if(rigidbody_dynamic == String("Langevin")) { - RBC.SetRandomTorques(); - RBC.AddLangevin(); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + { + (*iter)->SetRandomTorques(); + (*iter)->AddLangevin(); + } } }//if step == 1 @@ -635,12 +657,16 @@ void GrandBrownTown::RunNoseHooverLangevin() if(rigidbody_dynamic == String("Langevin")) { - RBC.integrateDLM(0); - RBC.integrateDLM(1); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + { + (*iter)->integrateDLM(0); + (*iter)->integrateDLM(1); + } } else { - RBC.integrate(s); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->integrate(s); //RBC.print(s); } gpuErrchk(cudaDeviceSynchronize()); @@ -717,8 +743,8 @@ void GrandBrownTown::RunNoseHooverLangevin() } if (imd_on && clientsock) internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD - - RBC.clearForceAndTorque(); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->clearForceAndTorque(); gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); if (interparticleForce) { @@ -731,7 +757,8 @@ void GrandBrownTown::RunNoseHooverLangevin() if (s % decompPeriod == 0) { internal -> decompose(); - RBC.updateParticleLists( internal->getPos_d() ); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateParticleLists( internal->getPos_d(), sys_d); } internal -> computeTabulated(get_energy); break; @@ -749,7 +776,8 @@ void GrandBrownTown::RunNoseHooverLangevin() if (s % decompPeriod == 0) { internal->decompose(); - RBC.updateParticleLists( internal->getPos_d() ); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateParticleLists( internal->getPos_d(), sys_d); } internal->compute(get_energy); break; @@ -769,7 +797,8 @@ void GrandBrownTown::RunNoseHooverLangevin() } gpuErrchk(cudaDeviceSynchronize()); //compute the force for rigid bodies - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType, sys, sys_d); gpuErrchk(cudaDeviceSynchronize()); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) @@ -780,10 +809,13 @@ void GrandBrownTown::RunNoseHooverLangevin() if(rigidbody_dynamic == String("Langevin")) { - RBC.SetRandomTorques(); - RBC.AddLangevin(); - RBC.integrateDLM(2); - RBC.print(s); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + { + (*iter)->SetRandomTorques(); + (*iter)->AddLangevin(); + (*iter)->integrateDLM(2); + (*iter)->print(s); + } } if (s % outputPeriod == 0) @@ -863,13 +895,27 @@ void GrandBrownTown::RunNoseHooverLangevin() } if(rigidbody_dynamic == String("Langevin")) - RBC.KineticEnergy(); + { + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + (*iter)->KineticEnergy(); + } std::fstream rb_energy_file; rb_energy_file.open("rb_energy_config.txt", std::fstream::out | std::fstream::app); if(rb_energy_file.is_open()) { - RBC.printEnergyData(rb_energy_file); - energy_file.close(); + float k_tol = 0.f; + float v_tol = 0.f; + float (RigidBody::*func_ptr)(); + for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + { + func_ptr = &RigidBody::getKinetic; + k_tol += (*iter)->getEnergy(func_ptr); + func_ptr = &RigidBody::getEnergy; + v_tol += (*iter)->getEnergy(func_ptr); + } + rb_energy_file << "Kinetic Energy " << k_tol/numReplicas << " (kT)" << std::endl; + rb_energy_file << "Potential Energy " << v_tol/numReplicas << " (kcal/mol)" << std::endl; + rb_energy_file.close(); } else { diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index f42764da9e0aeb55af59c7eb4679568f83ca1de8..6d928c1e079cfa1696c08364112425979d6ea3c9 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -128,16 +128,29 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + if(!scheme) + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + } + else + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); + } #endif Vector3 force = forceInternal[idx] + forceExternal + fe.f; #ifdef Debug forceInternal[idx] = -force; #endif // Get local kT value - float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ + float kTlocal; + if(!scheme) + kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ + else + kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(r0); /* periodic */ // Update the particle's position using the calculated values for time, force, etc. float mass = pt.mass; @@ -155,7 +168,11 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ 0.5*pt.diffusionGrid->nz)); Vector3 p2 = r0 - gridCenter; p2 = sys->wrapDiff( p2 ) + gridCenter; - ForceEnergy diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); + ForceEnergy diff; + if(!scheme) + diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); + else + diff = pt.diffusionGrid->interpolateForceD(p2); gamma = Vector3(kTlocal / (mass * diff.e)); } @@ -166,7 +183,7 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ p0 = p0 + 0.5f * timestep * force * Unit1; r0 = r0 + 0.5f * timestep * p0 * 1e4 / mass; - r0 = sys->wrap(r0); + //r0 = sys->wrap(r0); ran = ran + 0.5f * (p0.length2() / mass * 0.238845899f - 3.f * kTlocal) / mu; @@ -225,9 +242,18 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + if(!scheme) + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + } + else + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); + } #endif Vector3 force = forceInternal[idx] + forceExternal + fe.f; #ifdef Debug @@ -235,7 +261,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re #endif // Get local kT value - float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ + float kTlocal; + if(!scheme) + kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ + else + kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(r0); /* periodic */ // Update the particle's position using the calculated values for time, force, etc. float mass = pt.mass; @@ -250,7 +280,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re 0.5*pt.diffusionGrid->nz)); Vector3 p2 = r0 - gridCenter; p2 = sys->wrapDiff( p2 ) + gridCenter; - ForceEnergy diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); + ForceEnergy diff; + if(!scheme) + diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); + else + diff = pt.diffusionGrid->interpolateForceD(p2); gamma = Vector3(kTlocal / (mass * diff.e)); } @@ -310,9 +344,18 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ energy[idx] += fe.e; #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + if(!scheme) + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); + } + else + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0); + } #endif Vector3 force = forceInternal[idx] + forceExternal + fe.f; #ifdef Debug @@ -428,9 +471,18 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], } #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(p); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(p); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(p); + if(!scheme) + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(p); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(p); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(p); + } + else + { + if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(p); + if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(p); + if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(p); + } #endif // Compute total force: @@ -444,7 +496,11 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], // Get local kT value - float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(p); /* periodic */ + float kTlocal; + if(!scheme) + kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(p); /* periodic */ + else + kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(p); /* periodic */ // Update the particle's position using the calculated values for time, force, etc. float diffusion = pt.diffusion; @@ -453,7 +509,8 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], // fe.f.x, fe.f.y, fe.f.z); // // force.x, force.y, force.z); - if (pt.diffusionGrid != NULL) { + if (pt.diffusionGrid != NULL) + { // printf("atom %d: pos: %f %f %f\n", idx, p.x, p.y, p.z); // p = pt.diffusionGrid->wrap(p); // illegal mem access; no origin/basis? @@ -465,8 +522,12 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], p2 = sys->wrapDiff( p2 ) + gridCenter; /* p2 = sys->wrap( p2 ); */ /* p2 = p2 - gridCenter; */ - /* printf("atom %d: ps2: %f %f %f\n", idx, p2.x, p2.y, p2.z); */ - ForceEnergy diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); + /* printf("atom %d: ps2: %f %f %f\n", idx, p2.x, p2.y, p2.z); */ + ForceEnergy diff; + if(!scheme) + diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); + else + diff = pt.diffusionGrid->interpolateForceD(p2); diffusion = diff.e; diffGrad = diff.f; } diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index afbd008690df1d86ddfe18324fb94abff0acfb61..c9ef30630dae1dea4fecc6987e9a553ef5803eb6 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -167,7 +167,7 @@ private: float timeLast; // used with posLast float minimumSep; // minimum separation allowed with placing new particles - RigidBodyController RBC; + std::vector<RigidBodyController*> RBC; Vector3* rbPos; // rigid body positions // CUDA device variables diff --git a/src/RigidBody.cu b/src/RigidBody.cu index c0db5e73ae9b0c7bc3be76f0fdc1e0e4e149b9ef..faf886292b779938ada0869f334a9a31b9586821 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -67,12 +67,12 @@ void RigidBody::init() { } //Boltzmann distribution -void RigidBody::Boltzmann() +void RigidBody::Boltzmann(unsigned long int seed) { gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); - std::srand(time(NULL)); - gsl_rng_set (gslcpp_rng, rand() % 100000); + //std::srand(time(NULL)); + gsl_rng_set (gslcpp_rng, seed); double sigma[4] = { sqrt(t->mass*Temp) * 2.046167135,sqrt(t->inertia.x*Temp) * 2.046167135, sqrt(t->inertia.y*Temp) * 2.046167135, sqrt(t->inertia.z*Temp) * 2.046167135 }; @@ -84,6 +84,7 @@ void RigidBody::Boltzmann() angularMomentum.z = gsl_ran_gaussian(gslcpp_rng,sigma[3]); printf("%f\n", Temp); printf("%f\n", Temperature()); + gsl_rng_free(gslcpp_rng); } RigidBody::~RigidBody() { @@ -118,7 +119,7 @@ void RigidBody::addEnergy(float e) { energy += e; } -void RigidBody::updateParticleList(Vector3* pos_d) { +void RigidBody::updateParticleList(Vector3* pos_d, BaseGrid* sys_d) { for (int i = 0; i < t->numPotGrids; ++i) { numParticles[i] = 0; int& tnp = t->numParticles[i]; @@ -136,11 +137,11 @@ void RigidBody::updateParticleList(Vector3* pos_d) { #if __CUDA_ARCH__ >= 300 createPartlist<<<nb,NUMTHREADS>>>(pos_d, tnp, t->particles_d[i], tmp_d, particles_d[i], - gridCenter + position, cutoff*cutoff); + gridCenter + position, cutoff*cutoff, sys_d); #else createPartlist<<<nb,NUMTHREADS,NUMTHREADS/WARPSIZE>>>(pos_d, tnp, t->particles_d[i], tmp_d, particles_d[i], - gridCenter + position, cutoff*cutoff); + gridCenter + position, cutoff*cutoff, sys_d); #endif gpuErrchk(cudaMemcpy(&numParticles[i], tmp_d, sizeof(int), cudaMemcpyDeviceToHost )); gpuErrchk(cudaFree( tmp_d )); @@ -148,7 +149,7 @@ void RigidBody::updateParticleList(Vector3* pos_d) { } } -void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme) { +void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d) { // Apply the force and torque on the rigid body, and forces on particles // RBTODO: performance: consolidate CUDA stream management @@ -193,7 +194,7 @@ void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, in computePartGridForce<<< nb, NUMTHREADS,2*NUMTHREADS*sizeof(ForceEnergy)*nb >>>( pos_d, force_d, numParticles[i], particles_d[i], t->rawPotentialGrids_d[i], - B, c, forces_d, torques_d, energy, get_energy, scheme); + B, c, forces_d, torques_d, energy, get_energy, scheme, sys_d); //gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(ForceEnergy)*nb, cudaMemcpyDeviceToHost)); @@ -207,8 +208,8 @@ void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, in f = f + forces[k]; torq = torq + torques[k]; } - - torq = -torq + (getPosition()-c).cross( f.f ); + //why the force points are at the origin of the potential? + torq = -torq + (sys->wrapDiff(getPosition()-c)).cross( f.f ); addForce( -f.f ); addTorque( torq ); addEnergy( f.e ); @@ -323,7 +324,6 @@ void RigidBody::integrate(int startFinishAll) Vector3::element_mult( Vector3::element_sqrt( 2.0f * diffusion * timestep * 418.679994353), rando) ; position += offset; - rando = getRandomGaussVector(); Vector3 rotationOffset = Vector3::element_mult( (rotDiffusion / Temp) , orientation.transpose() * torque * timestep) * 418.679994353 + Vector3::element_mult( Vector3::element_sqrt( 2.0f * rotDiffusion * timestep * 418.679994353), rando ); diff --git a/src/RigidBody.h b/src/RigidBody.h index 99745110d1f2c35ebf7b65869e542b743cf333b3..6963211a10df6df49719ee8212d91cd3130aa2f1 100644 --- a/src/RigidBody.h +++ b/src/RigidBody.h @@ -19,7 +19,7 @@ class Configuration; class ForceEnergy; - +class BaseGrid; typedef float BigReal; /* strip this out later */ typedef Vector3 Force; @@ -63,8 +63,8 @@ class RigidBody { // host side representation of rigid bodies HOST DEVICE inline BigReal getMass() const { return t->mass; } //HOST DEVICE inline Vector3 getVelocity() const { return momentum/t->mass; } HOST DEVICE inline Vector3 getVelocity() const { return momentum; } - HOST DEVICE inline float getEnergy() const { return energy; } - HOST DEVICE inline float getKinetic() const { return kinetic; } + HOST float getEnergy() { return energy; } + HOST float getKinetic(){ return kinetic; } //HOST DEVICE inline Vector3 getAngularVelocity() const { // return Vector3( angularMomentum.x / t->inertia.x, // angularMomentum.y / t->inertia.y, @@ -74,8 +74,8 @@ class RigidBody { // host side representation of rigid bodies return Vector3( angularMomentum.x, angularMomentum.y, angularMomentum.z); } - void updateParticleList(Vector3* pos_d); - void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme); + void updateParticleList(Vector3* pos_d, BaseGrid* sys_d); + void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d); bool langevin; @@ -137,6 +137,6 @@ private: HOST DEVICE inline Matrix3 Rz(BigReal t); HOST DEVICE inline Matrix3 eulerToMatrix(const Vector3 e); float Temperature(); - void Boltzmann(); + void Boltzmann(unsigned long int); }; diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index da44fcf18a4518eb43f6ea76e140aac0dbb84546..09178c67778c7044d8d3c6bf9ee919df5c42228a 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -29,8 +29,14 @@ RigidBodyForcePair* RigidBodyForcePair::lastRbForcePair = NULL; /* #include <cuda_runtime.h> */ /* #include <curand_kernel.h> */ -RigidBodyController::RigidBodyController(const Configuration& c, const char* outArg) : - conf(c), outArg(outArg) { +RigidBodyController::RigidBodyController(const Configuration& c, const char* prefix, unsigned long int seed, int repID) : conf(c) +{ + char str[8]; + sprintf(str, "%d", repID); + strcpy(outArg, prefix); + strcat(outArg, "."); + strcat(outArg, str); + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ for (int i = 0; i < conf.numRigidTypes; i++) conf.rigidBody[i].initializeParticleLists(); @@ -63,14 +69,14 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* out { RigidBody& rb = rigidBodyByType[i][j]; // thermostat - rb.Boltzmann(); + rb.Boltzmann(seed); } } if (conf.inputRBCoordinates.length() > 0) loadRBCoordinates(conf.inputRBCoordinates.val()); - random = new RandomCPU(conf.seed + 1); /* +1 to avoid using same seed as RandomCUDA */ + random = new RandomCPU(conf.seed+repID); /* +rep to avoid using same seed as RandomCUDA */ gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ initializeForcePairs(); @@ -261,10 +267,10 @@ void RigidBodyController::initializeForcePairs() { } -void RigidBodyController::updateParticleLists(Vector3* pos_d) { +void RigidBodyController::updateParticleLists(Vector3* pos_d, BaseGrid* sys_d) { for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { - rigidBodyByType[i][j].updateParticleList(pos_d); + rigidBodyByType[i][j].updateParticleList(pos_d, sys_d); } } } @@ -283,7 +289,8 @@ void RigidBodyController::clearForceAndTorque() } } -void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme) { +void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d) +{ //if (s <= 1) //gpuErrchk( cudaProfilerStart() ); @@ -291,7 +298,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { RigidBody& rb = rigidBodyByType[i][j]; - rb.callGridParticleForceKernel( pos_d, force_d, s, energy, get_energy, scheme ); + rb.callGridParticleForceKernel( pos_d, force_d, s, energy, get_energy, scheme, sys, sys_d ); } } @@ -299,8 +306,8 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, if ( ((s % conf.rigidBodyGridGridPeriod) == 0 || s == 1 ) && forcePairs.size() > 0) { for (int i=0; i < forcePairs.size(); i++) { // TODO: performance: make this check occur less frequently - if (forcePairs[i].isWithinPairlistDist()) - forcePairs[i].callGridForceKernel(i, s, scheme); + if (forcePairs[i].isWithinPairlistDist(sys)) + forcePairs[i].callGridForceKernel(i, s, scheme, sys_d); } // each kernel call is followed by async memcpy for previous; now get last @@ -318,8 +325,8 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, gpuErrchk(cudaDeviceSynchronize()); for (int i=0; i < forcePairs.size(); i++) - if (forcePairs[i].isWithinPairlistDist()) - forcePairs[i].processGPUForces(); + if (forcePairs[i].isWithinPairlistDist(sys)) + forcePairs[i].processGPUForces(sys); } } @@ -467,11 +474,12 @@ void RigidBodyForcePair::createStreams() { gpuErrchk( cudaStreamCreate( &(stream[i]) ) ); // gpuErrchk( cudaStreamCreateWithFlags( &(stream[i]) , cudaStreamNonBlocking ) ); } -bool RigidBodyForcePair::isWithinPairlistDist() const { +bool RigidBodyForcePair::isWithinPairlistDist(BaseGrid* sys) const { if (isPmf) return true; float pairlistDist = 2.0f; /* TODO: get from conf */ - float rbDist = (rb1->getPosition() - rb2->getPosition()).length(); - + //float rbDist = (rb1->getPosition() - rb2->getPosition()).length(); + float rbDist = (sys->wrapDiff(rb1->getPosition()-rb2->getPosition())).length(); + for (int i = 0; i < gridKeyId1.size(); ++i) { const int k1 = gridKeyId1[i]; const int k2 = gridKeyId2[i]; @@ -506,7 +514,8 @@ Matrix3 RigidBodyForcePair::getBasis2(const int i) { } // RBTODO: bundle several rigidbodypair evaluations in single kernel call -void RigidBodyForcePair::callGridForceKernel(int pairId, int s, int scheme) { +void RigidBodyForcePair::callGridForceKernel(int pairId, int s, int scheme, BaseGrid* sys_d) +{ // get the force/torque between a pair of rigid bodies /* printf(" Updating rbPair forces\n"); */ const int numGrids = gridKeyId1.size(); @@ -546,12 +555,12 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s, int scheme) { computeGridGridForce<<< nb, NUMTHREADS, 2*sizeof(ForceEnergy)*NUMTHREADS, s>>> (type1->rawDensityGrids_d[k1], type2->rawPotentialGrids_d[k2], B1, B2, c, - forces_d[i], torques_d[i], scheme); + forces_d[i], torques_d[i], scheme, sys_d); } else { /* RB with a PMF */ computeGridGridForce<<< nb, NUMTHREADS, 2*sizeof(ForceEnergy)*NUMTHREADS, s>>> (type1->rawDensityGrids_d[k1], type2->rawPmfs_d[k2], B1, B2, c, - forces_d[i], torques_d[i], scheme); + forces_d[i], torques_d[i], scheme, sys_d); } // retrieveForcesForGrid(i); // this is slower than approach below, unsure why @@ -570,7 +579,7 @@ void RigidBodyForcePair::retrieveForcesForGrid(const int i) { gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(ForceEnergy)*nb, cudaMemcpyDeviceToHost, s)); gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, cudaMemcpyDeviceToHost, s)); } -void RigidBodyForcePair::processGPUForces() { +void RigidBodyForcePair::processGPUForces(BaseGrid* sys) { const int numGrids = gridKeyId1.size(); Vector3 f = Vector3(0.f); @@ -591,7 +600,7 @@ void RigidBodyForcePair::processGPUForces() { // tmpT is the torque calculated about the origin of grid k2 (e.g. c2) // so here we transform torque to be about rb1 Vector3 o2 = getOrigin2(i); - tmpT = tmpT - (rb1->getPosition() - o2).cross( tmpF.f ); + tmpT = tmpT - sys->wrapDiff(rb1->getPosition() - o2).cross( tmpF.f ); // sum energies,forces and torques energy += tmpF.e; @@ -607,7 +616,7 @@ void RigidBodyForcePair::processGPUForces() { rb1->addEnergy( energy ); if (!isPmf) { - const Vector3 t2 = -t + (rb2->getPosition()-rb1->getPosition()).cross( f ); + const Vector3 t2 = -t + sys->wrapDiff(rb2->getPosition()-rb1->getPosition()).cross( f ); rb2->addForce( -f ); rb2->addTorque( t2 ); rb2->addEnergy( energy ); @@ -721,6 +730,22 @@ void RigidBodyController::printData(int step,std::ofstream &file) { } } +float RigidBodyController::getEnergy(float (RigidBody::*Get)()) +{ + float e = 0.f; + for (int i = 0; i < rigidBodyByType.size(); i++) + { + for(int j = 0; j < rigidBodyByType[i].size(); j++) + { + RigidBody& rb = rigidBodyByType[i][j]; + //e += rb.getKinetic(); + e += (rb.*Get)(); + } + } + return e; +} + +#if 0 void RigidBodyController::printEnergyData(std::fstream &file) { if(file.is_open()) @@ -741,6 +766,7 @@ void RigidBodyController::printEnergyData(std::fstream &file) std::cout << " Error in opening files\n"; } } +#endif int RigidBodyForcePair::initialize() { // printf(" Initializing (streams for) RB force pair...\n"); diff --git a/src/RigidBodyController.h b/src/RigidBodyController.h index 1aee3635d6bc99113b1c1e056d75706b136b28cc..eda6aa114c12ffc0cc7d40ef449fbbf70e313158 100644 --- a/src/RigidBodyController.h +++ b/src/RigidBodyController.h @@ -6,6 +6,7 @@ #include <cuda.h> #include <cuda_runtime.h> #include "useful.h" +#include "BaseGrid.h" #define NUMSTREAMS 8 @@ -46,7 +47,7 @@ public: } ~RigidBodyForcePair(); - bool isWithinPairlistDist() const; + bool isWithinPairlistDist(BaseGrid* sys) const; private: int initialize(); @@ -81,9 +82,9 @@ private: static RigidBodyForcePair* lastRbForcePair; static int lastRbGridID; - void callGridForceKernel(int pairId, int s,int scheme); + void callGridForceKernel(int pairId, int s,int scheme, BaseGrid* sys_d); void retrieveForcesForGrid(const int i); - void processGPUForces(); + void processGPUForces(BaseGrid*); Matrix3 getBasis1(const int i); Matrix3 getBasis2(const int i); Vector3 getOrigin1(const int i); @@ -95,18 +96,19 @@ public: /* DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); */ RigidBodyController(); ~RigidBodyController(); - RigidBodyController(const Configuration& c, const char* outArg); + RigidBodyController(const Configuration& c, const char* outArg, unsigned long int seed, int repID); void AddLangevin(); void SetRandomTorques(); void integrate(int step); void integrateDLM(int step); - void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme); - void updateParticleLists(Vector3* pos_d); + void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d); + void updateParticleLists(Vector3* pos_d, BaseGrid* sys_d); void clearForceAndTorque(); void KineticEnergy(); void print(int step); - void printEnergyData(std::fstream &file); + //void printEnergyData(std::fstream &file); + float getEnergy(float (RigidBody::*get)()); private: bool loadRBCoordinates(const char* fileName); void initializeForcePairs(); @@ -126,7 +128,7 @@ private: std::ofstream trajFile; const Configuration& conf; - const char* outArg; + char outArg[128]; RandomCPU* random; /* RequireReduction *gridReduction; */ @@ -137,6 +139,6 @@ private: std::vector< RigidBodyForcePair > forcePairs; - float* rb_energy; + //float* rb_energy; };