Something went wrong on our end
GrandBrownTown.cuh 21.45 KiB
// GrandBrownTown.cuh
//
// Terrance Howard <heyterrance@gmail.com>
#pragma once
//#define MDSTEP
#define Unit1 4.18679994e4
#define Unit2 2.046167337
//#define Debug
#include "CudaUtil.cuh"
#include "RigidBodyType.h"
#include "RigidBodyGrid.h"
__device__
Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad,
float timestep, BaseGrid *sys, Random *randoGen, int num);
inline __device__
ForceEnergy compute_position_dependent_force(
const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal,
const int* __restrict__ type, BrownianParticleType** part,
const float electricField, const int scheme, const int idx)
{
int t = type[idx];
Vector3 r0 = pos[idx];
const BrownianParticleType& pt = *part[t];
Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField);
ForceEnergy fe(0.f, 0.f);
for(int i = 0; i < pt.numPartGridFiles; ++i)
{
ForceEnergy tmp(0.f, 0.f);
if(!scheme) {
BoundaryCondition bc = pt.pmf_boundary_conditions[i];
INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0)
} else
tmp = pt.pmf[i]->interpolateForceD(r0);
fe.f += tmp.f * pt.pmf_scale[i];
fe.e += tmp.e * pt.pmf_scale[i];
}
// if(get_energy)
// energy[idx] += fe.e;
#ifndef FORCEGRIDOFF
// Add a force defined via 3D FORCE maps (not 3D potential maps)
if(!scheme)
{
if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0);
if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0);
if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0);
}
else
{
if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0);
if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0);
if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0);
}
#endif
fe.f = fe.f + forceExternal;
return fe;
}
////The kernel is for Nose-Hoover Langevin dynamics
template <bool need_position_dep_force = false>
__global__ void
updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random,
Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[],
float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep,
int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num * numReplicas)
{
idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
ForceEnergy fe;
if (need_position_dep_force) {
fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
}
int t = type[idx];
Vector3 r0 = pos[idx];
Vector3 p0 = momentum[idx];
float ran = random[idx];
const BrownianParticleType& pt = *part[t];
if (need_position_dep_force) {
forceInternal[idx] = forceInternal[idx] + fe.f;
}
Vector3 force = forceInternal[idx];
#ifdef Debug
forceInternal[idx] = -force;
#endif
// Get local kT value
float kTlocal;
if(!scheme)
kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */
else
kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(r0); /* periodic */
// Update the particle's position using the calculated values for time, force, etc.
float mass = pt.mass;
float mu = pt.mu;
Vector3 gamma = pt.transDamping;
float rando = (*randoGen).gaussian(idx, num * numReplicas);
float tmp = sqrtf(kTlocal * (1.f - expf(-2.f * gamma.x * timestep)) / mu);
if (pt.diffusionGrid != NULL)
{
Vector3 gridCenter = pt.diffusionGrid->origin +
pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx, 0.5*pt.diffusionGrid->ny,
0.5*pt.diffusionGrid->nz));
Vector3 p2 = r0 - gridCenter;
p2 = sys->wrapDiff( p2 ) + gridCenter;
ForceEnergy diff;
if(!scheme)
diff = pt.diffusionGrid->interpolateForceDLinearly<periodic>(p2);
else
diff = pt.diffusionGrid->interpolateForceD(p2);
gamma = Vector3(kTlocal / (mass * diff.e));
}
#ifdef MDSTEP
force = Vector3(-r0.x, -r0.y, -r0.z);
#endif
p0 = p0 + 0.5f * timestep * force * Unit1;
r0 = r0 + 0.5f * timestep * p0 * 1e4 / mass;
//r0 = sys->wrap(r0);
ran = ran + 0.5f * (p0.length2() / mass * 0.238845899f - 3.f * kTlocal) / mu;
p0 = expf(-0.5f * ran) * p0;
ran = expf(-gamma.x * timestep) * ran + tmp * rando;
p0 = expf(-0.5f * ran) * p0;
ran = ran + 0.5f * (p0.length2() / mass * 0.238845899 - 3.f * kTlocal) / mu;
r0 = r0 + 0.5f * timestep * p0 * 1e4 / mass;
r0 = sys->wrap(r0);
pos[idx] = r0;
momentum[idx] = p0;
random[idx]= ran;
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//This is the kernel for BAOAB (velocity verlet algorithm) which is symplectic. For more information,
//please infer http://www.MolecularDynamics.info
//The original BBK kernel is no longer used since the random numbers should be reused
//which is not possible in GPU code.
//Han-Yi Chou
template <bool need_position_dep_force = false>
__global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal,
int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid,
float electricField,int tGridLength, float timestep,
int num, int num_rb_attached_particles, BaseGrid* sys,
Random* randoGen, int numReplicas, int scheme)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num * numReplicas)
{
idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
ForceEnergy fe;
if (need_position_dep_force) {
fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
}
// if (get_energy) energy[idx] += fe.e;
int t = type[idx];
Vector3 r0 = pos[idx];
Vector3 p0 = momentum[idx];
const BrownianParticleType& pt = *part[t];
if (need_position_dep_force) { // only needed at ts == 1
forceInternal[idx] = forceInternal[idx] + fe.f;
}
Vector3 force = forceInternal[idx];
#ifdef Debug
forceInternal[idx] = -force;
#endif
// Get local kT value
float kTlocal;
if(!scheme)
kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */
else
kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(r0); /* periodic */
// Update the particle's position using the calculated values for time, force, etc.
float mass = pt.mass;
Vector3 gamma = pt.transDamping;
Vector3 rando = (*randoGen).gaussian_vector(idx, num * numReplicas);
//printf("%f %f %f\n", rando.x, rando.y, rando.z);
if (pt.diffusionGrid != NULL)
{
Vector3 gridCenter = pt.diffusionGrid->origin +
pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx, 0.5*pt.diffusionGrid->ny,
0.5*pt.diffusionGrid->nz));
Vector3 p2 = r0 - gridCenter;
p2 = sys->wrapDiff( p2 ) + gridCenter;
ForceEnergy diff;
if(!scheme)
diff = pt.diffusionGrid->interpolateForceDLinearly<periodic>(p2);
else
diff = pt.diffusionGrid->interpolateForceD(p2);
gamma = Vector3(kTlocal / (mass * diff.e));
}
#ifdef MDSTEP
force = Vector3(-r0.x, -r0.y, -r0.z);
#endif
p0 = p0 + 0.5f * timestep * force * Unit1;
r0 = r0 + 0.5f * timestep / mass * p0 * 1e4;
p0.x = expf(-timestep * gamma.x) * p0.x + sqrtf(mass * kTlocal * (1.f-expf(-2.f*timestep*gamma.x))) * rando.x * Unit2;
p0.y = expf(-timestep * gamma.y) * p0.y + sqrtf(mass * kTlocal * (1.f-expf(-2.f*timestep*gamma.y))) * rando.y * Unit2;
p0.z = expf(-timestep * gamma.z) * p0.z + sqrtf(mass * kTlocal * (1.f-expf(-2.f*timestep*gamma.z))) * rando.z * Unit2;
r0 = r0 + 0.5f * timestep * p0 * 1e4 / mass;
r0 = sys->wrap(r0);
pos[idx] = r0;
momentum[idx] = p0;
//if(idx == 0)
// printf("%f %f %f\n", pos[idx].x,pos[idx].y,pos[idx].z);
}
}
//update momentum in the last step of BAOAB integrator for the Langevin dynamics. Han-Yi Chou
__global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* __restrict__ forceInternal,
int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid,
float electricField, int tGridLength, float timestep, int num, int num_rb_attached_particles,
BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy,int scheme)
{
int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
if (idx < num * numReplicas)
{
idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
ForceEnergy fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
if (get_energy) energy[idx] += fe.e;
Vector3 r0 = pos[idx];
Vector3 p0 = momentum[idx];
Vector3 force = forceInternal[idx] + fe.f;
forceInternal[idx] = forceInternal[idx] + fe.f; // Adding position-dep force so it doesn't need to be calculated in the "plain" kernel
#ifdef Debug
forceInternal[idx] = -force;
#endif
#ifdef MDSTEP
force = Vector3(-r0.x, -r0.y, -r0.z);
#endif
p0 = p0 + 0.5f * timestep * force * Unit1;
momentum[idx] = p0;
}
}
//Update kernel for Brownian dynamics
__global__
void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[],
BrownianParticleType* part[],float kT, BaseGrid* kTGrid, float electricField,
int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys,
Random* randoGen, int numReplicas, float* energy, bool get_energy, int scheme)
{
// Calculate this thread's ID
int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
// TODO: Make this a grid-stride loop to make efficient reuse of RNG states
// Loop over ALL particles in ALL replicas
if (idx < num * numReplicas)
{
idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
const int t = type[idx];
Vector3 p = pos[idx];
const BrownianParticleType& pt = *part[t];
/* printf("atom %d: forceInternal: %f %f %f\n", idx, forceInternal[idx].x, forceInternal[idx].y, forceInternal[idx].z); */
ForceEnergy fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
// Compute total force:
// Internal: interaction between particles
// External: electric field (now this is basically a constant vector)
// forceGrid: ADD force due to PMF or other potentials defined in 3D space
Vector3 force = forceInternal[idx] + fe.f;
#ifdef Debug
forceInternal[idx] = -force;
#endif
// Get local kT value
float kTlocal;
if(!scheme)
kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(p); /* periodic */
else
kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(p); /* periodic */
// Update the particle's position using the calculated values for time, force, etc.
float diffusion = pt.diffusion;
Vector3 diffGrad = Vector3(0.0f);
// printf("force: %f %f %f %f %f %f\n", p.x, p.y, p.z,
// fe.f.x, fe.f.y, fe.f.z);
// // force.x, force.y, force.z);
if (pt.diffusionGrid != NULL)
{
// printf("atom %d: pos: %f %f %f\n", idx, p.x, p.y, p.z);
// p = pt.diffusionGrid->wrap(p); // illegal mem access; no origin/basis?
Vector3 gridCenter = pt.diffusionGrid->origin +
pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx,
0.5*pt.diffusionGrid->ny,
0.5*pt.diffusionGrid->nz));
Vector3 p2 = p - gridCenter;
p2 = sys->wrapDiff( p2 ) + gridCenter;
/* p2 = sys->wrap( p2 ); */
/* p2 = p2 - gridCenter; */
/* printf("atom %d: ps2: %f %f %f\n", idx, p2.x, p2.y, p2.z); */
ForceEnergy diff;
if(!scheme)
diff = pt.diffusionGrid->interpolateForceDLinearly<periodic>(p2);
else
diff = pt.diffusionGrid->interpolateForceD(p2);
diffusion = diff.e;
diffGrad = diff.f;
}
// if (idx == 0) {
// printf("force: "); force.print();
// }
Vector3 tmp = step(p, kTlocal, force, diffusion, -diffGrad, timestep, sys, randoGen,
num * numReplicas);
// assert( tmp.length() < 10000.0f );
pos[idx] = tmp;
if(get_energy)
{
float en_local = 0.f;
for(int i = 0; i < pt.numPartGridFiles; ++i)
{
float en_tmp = 0.0f;
if(!scheme)
en_tmp = pt.pmf[i]->interpolatePotentialLinearly(tmp);
else
en_tmp = pt.pmf[i]->interpolatePotential(tmp);
en_tmp *= pt.pmf_scale[i];
}
energy[idx] += en_local;
}
}
}
/*
//This is the BBK Langevin integrator for Langevin dynamics Han-Yi Chou
__device__ inline void step(Vector3& r0, Vector3& p0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad,
float mass, Vector3& gamma, float timestep, BaseGrid *sys, Random *randoGen, int num)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
Vector3 rando = randoGen->gaussian_vector(idx, num);
float tmp = sqrtf(diffusion * timestep);
p0.x = (1.0f - 0.50f * timestep * gamma.x) * p0.x + (0.50f * timestep * force.x * Unit1 + (tmp * sqrtf(gamma.x) * rando.x) * mass * Unit2);
p0.y = (1.0f - 0.50f * timestep * gamma.y) * p0.y + (0.50f * timestep * force.y * Unit1 + (tmp * sqrtf(gamma.y) * rando.y) * mass * Unit2);
p0.z = (1.0f - 0.50f * timestep * gamma.z) * p0.z + (0.50f * timestep * force.z * Unit1 + (tmp * sqrtf(gamma.z) * rando.z) * mass * Unit2);
r0 = r0 + (timestep * p0) / mass * 1e4;
Vector3 r = r0;
r0 = sys->wrap(r);
}
//This is the BBK integrator for updating momentum in Langevin dynamics Han-Yi Chou
__device__ inline void step(Vector3& p0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad,
float mass, Vector3& gamma, float timestep, BaseGrid *sys, Random *randoGen, int num)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
Vector3 rando = randoGen->gaussian_vector(idx, num);
float tmp = sqrtf(diffusion * timestep);
p0.x = (p0.x + (0.50 * timestep * force.x * Unit1 + (tmp * sqrtf(gamma.x) * rando.x) * mass * Unit2)) / (1.0f+0.5f*timestep*gamma.x);
p0.y = (p0.y + (0.50 * timestep * force.y * Unit1 + (tmp * sqrtf(gamma.y) * rando.y) * mass * Unit2)) / (1.0f+0.5f*timestep*gamma.y);
p0.z = (p0.z + (0.50 * timestep * force.z * Unit1 + (tmp * sqrtf(gamma.z) * rando.z) * mass * Unit2)) / (1.0f+0.5f*timestep*gamma.z);
}
*/
//For Brownian dynamics
__device__
inline Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion,
Vector3 diffGrad, float timestep, BaseGrid *sys,
Random *randoGen, int num) {
const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
// TODO: improve performance by storing state locally, then sending it back to GPU
Vector3 rando = (*randoGen).gaussian_vector(idx, num);
diffusion *= timestep;
Vector3 r = r0 + (diffusion / kTlocal) * force
+ timestep * diffGrad
+ sqrtf(2.0f * diffusion) * rando;
// Wrap about periodic boundaries
return sys->wrap(r);
}
template<bool print=false>
__global__
void updateGroupSites(Vector3 pos[], int* groupSiteData, int num, int numGroupSites, int numReplicas) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
// TODO: improve naive implementation so that each thread loads memory single pos elemment
// For all threads representing a valid pair of particles
if (i < numGroupSites*numReplicas) {
pos[num*numReplicas + i] = Vector3(0.0f);
}
// For all threads representing a valid pair of particles
if (i < numGroupSites*numReplicas) {
const int imod = i % numGroupSites;
const int rep = i/numGroupSites;
const int start = groupSiteData[imod];
const int finish = groupSiteData[imod+1];
float weight = 1.0 / (finish-start);
Vector3 tmp = Vector3(0.0f);
for (int j = start; j < finish; j++) {
const int aj = groupSiteData[j] + num*rep;
tmp += weight * pos[aj];
}
if (print) {
printf("GroupSite %d (rep %d/%d) COM (start,finish, x,y,z): (%d,%d, %f,%f,%f)\n",i, rep, numReplicas, start, finish, tmp.x, tmp.y, tmp.z);
}
pos[num*numReplicas + i] = tmp;
}
}
template<bool print=false>
__global__
void distributeGroupSiteForces(Vector3 force[], int* groupSiteData, int num, int numGroupSites, int numReplicas) {
// TODO, handle groupsite energies
int i = blockIdx.x * blockDim.x + threadIdx.x;
// For all threads representing a valid pair of particles
if (i < numGroupSites*numReplicas) {
const int imod = i % numGroupSites;
const int rep = i/numGroupSites;
const int start = groupSiteData[imod];
const int finish = groupSiteData[imod+1];
float weight = 1.0 / (finish-start);
const Vector3 tmp = weight*force[num*numReplicas+i];
if (print) {
printf("GroupSite %d Force rep %d/%d: %f %f %f\n",i, rep, numReplicas, tmp.x, tmp.y, tmp.z);
}
for (int j = start; j < finish; j++) {
const int aj = groupSiteData[j] + num*rep;
atomicAdd( force+aj, tmp );
}
}
}
__global__ void devicePrint(RigidBodyType* rb[]) {
// printf("Device printing\n");
int i = 0;
printf("RigidBodyType %d: numGrids = %d\n", i, rb[i]->numPotGrids);
// printf(" RigidBodyType %d: potGrid: %p\n", i, rb[i]->rawPotentialGrids);
// int j = 0;
// printf(" RigidBodyType %d: potGrid[%d]: %p\n", i, j, &(rb[i]->rawPotentialGrids[j]));
// printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, j, rb[i]->rawPotentialGrids[j].getSize());
// BaseGrid g = rb[i]->rawPotentialGrids[j];
// for (int k = 0; k < rb[i]->rawPotentialGrids[j].size(); k++)
// for (int k = 0; k < rb[i]->rawPotentialGrids[j].getSize(); k++)
// printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n",
// i, j, k, rb[i]->rawPotentialGrids[j].val[k]);
// i, j, k, rb[i]->rawPotentialGrids[j]).val[k];
}
// __global__ void devicePrint(RigidBodyType* rb[]) {
// // printf("Device printing\n");
// int i = 0;
// printf("RigidBodyType %d: numGrids = %d\n", i, rb[i]->numPotGrids);
// printf("RigidBodyType %d: potGrid: %p\n", i, rb[i]->rawPotentialGrids);
// int j = 0;
// printf("RigidBodyType %d: potGrid[%d]: %p\n", i, &(rb[i]->rawPotentialGrids[j]));
// BaseGrid g = rb[i]->rawPotentialGrids[j];
// // for (int k = 0; k < rb[i]->rawPotentialGrids[j].size(); k++)
// for (int k = 0; k < g->getSize(); k++)
// printf("rbType_d[%d]->potGrid[%d].val[%d]: %g\n",
// i, j, k, g.val[k]);
// // i, j, k, rb[i]->rawPotentialGrids[j]).val[k];
// }
// __device__ Vector3* totalForce;
// Vector3* totalForce_h;
// void initTotalForce() {
// cudaMalloc( &totalForce_h, sizeof(Vector3) );
// cudaMemcpyToSymbol(totalForce, &totalForce_h, sizeof(totalForce_h));
// }
__global__ void compute_position_dependent_force_for_rb_attached_particles(
const Vector3* __restrict__ pos,
Vector3* __restrict__ forceInternal, float* __restrict__ energy,
const int* __restrict__ type, BrownianParticleType** __restrict__ part,
const float electricField, const int num, const int num_rb_attached_particles,
const int numReplicas, const int scheme)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_rb_attached_particles * numReplicas)
{
idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles);
ForceEnergy fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
atomicAdd( &forceInternal[idx], fe.f );
atomicAdd( &energy[idx], fe.e );
}
}
__global__ void compute_position_dependent_force_for_rb_attached_particles(
const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal,
const int* __restrict__ type, BrownianParticleType** __restrict__ part,
const float electricField, const int num, const int num_rb_attached_particles,
const int numReplicas, const int scheme)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_rb_attached_particles * numReplicas)
{
idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles);
ForceEnergy fe = compute_position_dependent_force(
pos, forceInternal, type, part, electricField, scheme, idx );
atomicAdd( &forceInternal[idx], fe.f );
}
}