From bba585bba7cba1ce2a8f69df98035a72f9569219 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 17 Nov 2015 16:09:34 -0600 Subject: [PATCH] worked on rigidbody classes --- BaseGrid.h | 2 +- ComputeGridGrid.cuh | 46 +-- Configuration.cpp | 31 +- Configuration.h | 8 +- Debug.h | 2 +- GrandBrownTown.cu | 18 +- GrandBrownTown.cuh | 2 - GrandBrownTown.h | 10 +- RigidBody.c | 265 ------------------ RigidBody.cu | 226 +++++++++++++++ RigidBody.h | 252 ++++------------- ...BodyController.c => RigidBodyController.cu | 132 +++++++-- RigidBodyController.h | 47 ++++ RigidBodyGrid.cu | 39 +-- RigidBodyGrid.h | 81 +++--- RigidBodyType.cu | 67 ++++- RigidBodyType.h | 19 +- makefile | 2 +- notes.org | 191 ++++++++++++- useful.h | 22 +- 20 files changed, 820 insertions(+), 642 deletions(-) delete mode 100644 RigidBody.c create mode 100644 RigidBody.cu rename RigidBodyController.c => RigidBodyController.cu (78%) create mode 100644 RigidBodyController.h diff --git a/BaseGrid.h b/BaseGrid.h index bcdda0b..e3032d5 100644 --- a/BaseGrid.h +++ b/BaseGrid.h @@ -175,7 +175,7 @@ public: \=========*/ // Get the mean of the entire grid. - float mean() const + float mean() const; // Compute the average profile along an axis. // Assumes that the grid axis with index "axis" is aligned with the world axis of index "axis". diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh index 11e2f04..50a6ba6 100644 --- a/ComputeGridGrid.cuh +++ b/ComputeGridGrid.cuh @@ -1,34 +1,34 @@ #pragma once #include "RigidBodyGrid.h" +#include "useful.h" __global__ -void computeGridGridForce(RigidBodyGrid* rho, RigidBodyGrid* u) { - unsigned int bidx = blockIdx.x; - unsigned int tidx = threadIdx.x; - - // GTX 980 has ~ 4 x 10x10x10 floats available for shared memory - // but code should work without access to this - - // RBTODO rewrite to use shared memory, break problem into subgrids - // will lookups of - -// http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/ - - const unsigned int r_id = bidx * blockDim.x + threadIdx.x; +void computeGridGridForce(const RigidBodyGrid& rho, const RigidBodyGrid& u, + Matrix3 basis_rho, Vector3 origin_rho, + Matrix3 basis_u, Vector3 origin_u) { + // RBTODO http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/ + const unsigned int r_id = blockIdx.x * blockDim.x + threadIdx.x; // RBTODO parallelize transform - if (r_id > rho->size) // skip threads with no data + if (r_id > rho.size) // skip threads with no data return; - - // Tile grid data into shared memory + + // Maybe: Tile grid data into shared memory // RBTODO: think about localizing regions of grid data - Vector3 p = rho->getPosition(r_id); - float val = rho->val[r_id]; + Vector3 p = rho.getPosition(r_id, basis_rho, origin_rho); + float val = rho.val[r_id]; + + // RBTODO potential basis and rho! + + // RBTODO combine interp methods and reduce repetition! + float energy = val*u.interpolatePotential(p); + Vector3 f = val*u.interpolateForceD(p); + Vector3 t = p.cross(f); // test if sign is correct! - // RBTODO reduce these + // RBTODO reduce forces and torques // http://www.cuvilib.com/Reduction.pdf - float energy = ; - Vector3 f = u->interpolateForceD(p); - Vector3 t = f * - + + // RBTODO 3rd-law forces + torques (maybe outside kernel) + + // must reference rigid bodies in some way } diff --git a/Configuration.cpp b/Configuration.cpp index 8e7328e..89691e8 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -174,18 +174,29 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : name = new String[num * simNum]; currSerial = 0; - // RigidBodies... - if (numRigidTypes > 0) { - printf("\nCounting rigid bodies specified in the configuration file.\n"); - numRB = 0; - for (int i = 0; i < numRigidTypes; i++) numRB += rigidBody[i].num; - +// RBTODO: clean this mess up + /* // RigidBodies... */ + /* if (numRigidTypes > 0) { */ + /* printf("\nCounting rigid bodies specified in the configuration file.\n"); */ + /* numRB = 0; */ + + /* // grow list of rbs */ + /* for (int i = 0; i < numRigidTypes; i++) { */ + /* numRB += rigidBody[i].num; */ + + /* std::vector<RigidBody> tmp; */ + /* for (int j = 0; j < rigidBody[i].num; j++) { */ + /* tmp.push_back( new RigidBody( this, rigidBody[i] ) ); */ + /* } */ + + /* rbs.push_back(tmp); */ + /* } */ // // state data // rbPos = new Vector3[numRB * simNum]; // type = new int[numRB * simNum]; - } - printf("Initial RigidBodies: %d\n", numRB); + /* } */ + /* printf("Initial RigidBodies: %d\n", numRB); */ // Create exclusions from the exclude rule, if it was specified in the config file @@ -449,11 +460,11 @@ void Configuration::copyToCUDA() { cudaMemcpyHostToDevice)); - printf("copying RBs\n"); + printf("Copying RBs\n"); // Copy rigidbody types // http://stackoverflow.com/questions/16024087/copy-an-object-to-device for (int i = 0; i < numRigidTypes; i++) { - printf("working on RB %d\n",i); + printf("Working on RB %d\n",i); RigidBodyType *rb = &(rigidBody[i]); // temporary for convenience rb->updateRaw(); diff --git a/Configuration.h b/Configuration.h index 0734a33..68df323 100644 --- a/Configuration.h +++ b/Configuration.h @@ -10,6 +10,7 @@ #define CONFIGURATION_H #include <algorithm> // sort +#include <vector> #include <cuda.h> #include <cuda_runtime.h> @@ -27,6 +28,7 @@ #include "GPUManager.h" #include "Dihedral.h" #include "RigidBodyType.h" +#include "RigidBody.h" #include <cuda.h> #include <cuda_runtime.h> @@ -109,8 +111,9 @@ public: float minimumSep; // minimum separation allowed with placing new particles // RigidBody variables - int numRB; - + /* int numRB; */ + /* std::vector< std::vector<RigidBody> > rbs; */ + // System parameters String outputName; float timestep; @@ -194,7 +197,6 @@ public: RigidBodyType* rigidBody; int numRigidTypes; - }; #endif diff --git a/Debug.h b/Debug.h index c3bee20..a77f2b8 100644 --- a/Debug.h +++ b/Debug.h @@ -36,7 +36,7 @@ #ifdef DEBUGM -#include "InfoStream.h" +/* #include "InfoStream.h" */ #define Debug(x) (x) #define DebugM(level,format) \ diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index cd1650b..57b271b 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -1,5 +1,6 @@ #include "GrandBrownTown.h" #include "GrandBrownTown.cuh" +/* #include "ComputeGridGrid.cuh" */ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { @@ -9,16 +10,13 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { } } -#include "RigidBodyType.h" //temporary - - bool GrandBrownTown::DEBUG; cudaEvent_t START, STOP; GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, const long int randomSeed, bool debug, bool imd_on, unsigned int imd_port, int numReplicas) : - imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas) { + imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas), conf(c) { for (int i = 0; i < numReplicas; i++) { std::stringstream curr_file, restart_file, out_prefix; @@ -56,6 +54,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, type = new int[num * numReplicas]; // [HOST] array of particles' types. serial = new int[num * numReplicas]; // [HOST] array of particles' serial numbers. + // Allocate things for rigid body + // RBC = RigidBodyController(c); // Replicate identical initial conditions across all replicas // TODO: add an option to generate random initial conditions for all replicas @@ -410,13 +410,11 @@ void GrandBrownTown::run() { rt_timer_stop(cputimer); float dt2 = rt_timer_time(cputimer); printf("Position Update Time: %f ms\n", dt2 * 1000); - // */ + */ - // calculateRigidBodyForce<<< numBlocks, NUM_THREADS >>>(rb_d, rbType_d, - // kT, kTGrid_d, - // tl, timestep, - // sys_d, randoGen_d, numReplicas); - computeGridGridForce<<< numBlocks, NUM_THREADS >>>(grid1_d, grid2_d); + + /* for (int j = 0; j < t->num; j++) { */ + /* computeGridGridForce<<< numBlocks, NUM_THREADS >>>(grid1_d, grid2_d); */ // int numBlocks = (numRB ) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh index 7b447e5..5f8c1a7 100644 --- a/GrandBrownTown.cuh +++ b/GrandBrownTown.cuh @@ -16,8 +16,6 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion, __global__ void updateKernel(Vector3 pos[], Vector3 forceInternal[], int type[], BrownianParticleType* part[], - RigidBody* rb[], - RigidBodyType* rbType[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, int num, BaseGrid* sys, diff --git a/GrandBrownTown.h b/GrandBrownTown.h index 5155fe4..43ba4d7 100644 --- a/GrandBrownTown.h +++ b/GrandBrownTown.h @@ -1,4 +1,3 @@ -// // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // / // Author: Jeff Comer <jcomer2@illinois.edu> #ifndef GRANDBROWNTOWN_H #define GRANDBROWNTOWN_H @@ -37,6 +36,10 @@ #include "Angle.h" #include "Configuration.h" #include "Dihedral.h" +/* #include "RigidBody.h" */ +/* #include "RigidBodyType.h" */ +/* #include "RigidBodyGrid.h" */ +#include "RigidBodyController.h" #include "util.h" // IMD @@ -111,6 +114,7 @@ public: Vector3 freePosition(Vector3 r0, Vector3 r1, float minDist); private: + const Configuration& conf; int numReplicas; // IMD variables @@ -146,6 +150,10 @@ private: float timeLast; // used with posLast float minimumSep; // minimum separation allowed with placing new particles + RigidBodyController RBC; + Vector3* rbPos; // rigid body positions + + // CUDA device variables Vector3 *pos_d, *forceInternal_d, *force_d; int *type_d; diff --git a/RigidBody.c b/RigidBody.c deleted file mode 100644 index 36bd3f8..0000000 --- a/RigidBody.c +++ /dev/null @@ -1,265 +0,0 @@ -/** -*** Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by -*** The Board of Trustees of the University of Illinois. -*** All rights reserved. -**/ - -#ifndef MIN_DEBUG_LEVEL -#define MIN_DEBUG_LEVEL 5 -#endif -#define DEBUGM -#include "Debug.h" - -#include <iostream> -#include <typeinfo> - -#include "RigidBody.h" -#include "Vector.h" -#include "RigidBodyParams.h" - - -RigidBody::RigidBody(SimParameters *simParams, RigidBodyParams *rbParams) -: impulse_to_momentum(0.0004184) { - - int len = strlen(rbParams->rigidBodyKey); - key = new char[len+1]; - strncpy(key,rbParams->rigidBodyKey,len+1); - - DebugM(4, "Initializing Rigid Body " << key << "\n" << endi); - - timestep = simParams->dt; - Temp = simParams->langevinTemp; - - mass = rbParams->mass; - inertia = rbParams->inertia; - - position = rbParams->position; - orientation = rbParams->orientation; // orientation a matrix that brings a vector from the RB frame to the lab frame - - momentum = rbParams->velocity * mass; // lab frame - DebugM(4, "velocity " << rbParams->velocity << "\n" << endi); - DebugM(4, "momentum " << momentum << "\n" << endi); - - angularMomentum = rbParams->orientationalVelocity; // rigid body frame - angularMomentum.x *= rbParams->inertia.x; - angularMomentum.y *= rbParams->inertia.y; - angularMomentum.z *= rbParams->inertia.z; - - isFirstStep = true; // this might not work flawlessly... - - clearForce(); - clearTorque(); - - DebugM(4, "RigidBody initial Force: " << force << "\n" << endi); - - // setup for langevin - langevin = rbParams->langevin; - if (langevin) { - /*=======================================================\ - | DiffCoeff = kT / dampingCoeff mass | - | | - | rbParams->DampingCoeff has units of (1/ps) | - | | - | f = - dampingCoeff * momentum | - | | - | units "(1/ps) * (amu AA/fs)" "kcal_mol/AA" * 2.3900574 | - \=======================================================*/ - transDampingCoeff = 2.3900574 * rbParams->transDampingCoeff; - - /*================================================================\ - | < f(t) f(t') > = 2 kT dampingCoeff mass delta(t-t') | - | | - | units "sqrt( k K (1/ps) amu / fs )" "kcal_mol/AA" * 0.068916889 | - \================================================================*/ - transForceCoeff = 0.068916889 * element_sqrt( 2*Temp*mass*rbParams->transDampingCoeff/timestep ); - - // T = - dampingCoeff * angularMomentum - rotDampingCoeff = 2.3900574 * rbParams->rotDampingCoeff; - // < f(t) f(t') > = 2 kT dampingCoeff inertia delta(t-t') - rotTorqueCoeff = 0.068916889 * element_sqrt( 2*Temp*element_mult(inertia, rbParams->rotDampingCoeff) / timestep ); - } -} - -void RigidBody::addForce(Force f) { - DebugM(1, "RigidBody "<<key<<" adding f ("<<f<<") to Force " << force << "\n" << endi); - force += f; -} -void RigidBody::addTorque(Force t) { - DebugM(1, "RigidBody adding t ("<<t<<") to torque " << torque << "\n" << endi); - torque += t; -} -RigidBody::~RigidBody() {} - -/*===========================================================================\ -| Following "Algorithm for rigid-body Brownian dynamics" Dan Gordon, Matthew | -| Hoyles, and Shin-Ho Chung | -| http://langevin.anu.edu.au/publications/PhysRevE_80_066703.pdf | -| | -| | -| BUT: assume diagonal friction tensor and no Wiener process / stochastic | -| calculus then this is just the same as for translation | -| | -| < T_i(t) T_i(t) > = 2 kT friction inertia | -| | -| friction / kt = Diff | -\===========================================================================*/ -void RigidBody::addLangevin(Vector w1, Vector w2) { - // w1 and w2 should be standard normal distributions - - // in RB frame - Force f = element_mult(transForceCoeff,w1) - - element_mult(transDampingCoeff, transpose(orientation)*momentum); - - Force t = element_mult(rotTorqueCoeff,w2) - - element_mult(rotDampingCoeff, angularMomentum); - - f = orientation * f; // return to lab frame - t = orientation * t; - - addForce(f); - addTorque(t); -} - -/*==========================================================================\ -| from: Dullweber, Leimkuhler, Maclachlan. Symplectic splitting methods for | -| rigid body molecular dynamics. JCP 107. (1997) | -| http://jcp.aip.org/resource/1/jcpsa6/v107/i15/p5840_s1 | -\==========================================================================*/ -void RigidBody::integrate(Vector *p_trans, Tensor *p_rot, int startFinishAll) { - Vector trans; // = *p_trans; - Tensor rot = Tensor::identity(); // = *p_rot; - -#ifdef DEBUGM - switch (startFinishAll) { - case 0: // start - DebugM(2, "Rigid Body integrating start of cycle" << "\n" << endi); - case 1: // finish - DebugM(2, "Rigid Body integrating finish of cycle" << "\n" << endi); - case 2: // finish and start - DebugM(2, "Rigid Body integrating finishing last cycle, starting this one" << "\n" << endi); - } -#endif - - if ( isnan(force.x) || isnan(torque.x) ) // NaN check - NAMD_die("Rigid Body force was NaN!\n"); - - // torque = Vector(0,0,10); // debug - Force tmpTorque = transpose(orientation)*torque; // bring to rigid body frame - - DebugM(3, "integrate" <<": force "<<force <<": velocity "<<getVelocity() << "\n" << endi); - DebugM(3, "integrate" <<": torque "<<tmpTorque <<": orientationalVelocity "<<getAngularVelocity() << "\n" << endi); - - if (startFinishAll == 0 || startFinishAll == 1) { - // propogate momenta by half step - momentum += 0.5 * timestep * force * impulse_to_momentum; - angularMomentum += 0.5 * timestep * tmpTorque * impulse_to_momentum; - } else { - // propogate momenta by a full timestep - momentum += timestep * force * impulse_to_momentum; - angularMomentum += timestep * tmpTorque * impulse_to_momentum; - } - - DebugM(3, " position before: " << position << "\n" << endi); - - if (startFinishAll == 0 || startFinishAll == 2) { - // update positions - // trans = Vector(0); if (false) { - trans = timestep * momentum / mass; - position += trans; // update CoM a full timestep - // } - - // update orientations a full timestep - Tensor R; // represents a rotation about a principle axis - R = Rx(0.5*timestep * angularMomentum.x / inertia.x ); // R1 - angularMomentum = R * angularMomentum; - rot = transpose(R); - DebugM(1, "R: " << R << "\n" << endi); - DebugM(1, "Rot 1: " << rot << "\n" << endi); - - R = Ry(0.5*timestep * angularMomentum.y / inertia.y ); // R2 - angularMomentum = R * angularMomentum; - rot = rot * transpose(R); - DebugM(1, "R: " << R << "\n" << endi); - DebugM(1, "Rot 2: " << rot << "\n" << endi); - - R = Rz( timestep * angularMomentum.z / inertia.z ); // R3 - angularMomentum = R * angularMomentum; - rot = rot * transpose(R); - DebugM(1, "R: " << R << "\n" << endi); - DebugM(1, "Rot 3: " << rot << "\n" << endi); - - R = Ry(0.5*timestep * angularMomentum.y / inertia.y ); // R4 - angularMomentum = R * angularMomentum; - rot = rot * transpose(R); - DebugM(1, "R: " << R << "\n" << endi); - DebugM(1, "Rot 4: " << rot << "\n" << endi); - - R = Rx(0.5*timestep * angularMomentum.x / inertia.x ); // R5 - angularMomentum = R * angularMomentum; - rot = rot * transpose(R); - DebugM(1, "R: " << R << "\n" << endi); - DebugM(1, "Rot 5: " << rot << "\n" << endi); - - // DebugM(3,"TEST: " << Ry(0.01) <<"\n" << endi); // DEBUG - - // update actual orientation - Tensor newOrientation = orientation*rot; // not 100% sure; rot could be in rb frame - orientation = newOrientation; - rot = transpose(rot); - - DebugM(2, "trans during: " << trans - << "\n" << endi); - DebugM(2, "rot during: " << rot - << "\n" << endi); - - clearForce(); - clearTorque(); - - *p_trans = trans; - *p_rot = rot; - } - DebugM(3, " position after: " << position << "\n" << endi); -} - -// Rotations about axes - -// for very small angles 10^-8, cos^2+sin^2 != 1 -// concerned about the accumulation of errors in non-unitary transformations! -Tensor RigidBody::Rx(BigReal t) { - BigReal qt = 0.25*t*t; // for approximate calculations of sin(t) and cos(t) - BigReal cos = (1-qt)/(1+qt); - BigReal sin = t/(1+qt); - - Tensor tmp; - tmp.xx = 1; tmp.xy = 0; tmp.xz = 0; - tmp.yx = 0; tmp.yy = cos; tmp.yz = -sin; - tmp.zx = 0; tmp.zy = sin; tmp.zz = cos; - return tmp; -} -Tensor RigidBody::Ry(BigReal t) { - BigReal qt = 0.25*t*t; // for approximate calculations of sin(t) and cos(t) - BigReal cos = (1-qt)/(1+qt); - BigReal sin = t/(1+qt); - - Tensor tmp; - tmp.xx = cos; tmp.xy = 0; tmp.xz = sin; - tmp.yx = 0; tmp.yy = 1; tmp.yz = 0; - tmp.zx = -sin; tmp.zy = 0; tmp.zz = cos; - return tmp; -} -Tensor RigidBody::Rz(BigReal t) { - BigReal qt = 0.25*t*t; // for approximate calculations of sin(t) and cos(t) - BigReal cos = (1-qt)/(1+qt); - BigReal sin = t/(1+qt); - - Tensor tmp; - tmp.xx = cos; tmp.xy = -sin; tmp.xz = 0; - tmp.yx = sin; tmp.yy = cos; tmp.yz = 0; - tmp.zx = 0; tmp.zy = 0; tmp.zz = 1; - return tmp; -} -Tensor RigidBody::eulerToMatrix(const Vector e) { - // convert euler angle input to rotation matrix - // http://en.wikipedia.org/wiki/Rotation_formalisms_in_three_dimensions#Conversion_formulae_between_formalisms - return Rz(e.z) * Ry(e.y) * Rx(e.x); -} diff --git a/RigidBody.cu b/RigidBody.cu new file mode 100644 index 0000000..bfbbb90 --- /dev/null +++ b/RigidBody.cu @@ -0,0 +1,226 @@ +/* #ifndef MIN_DEBUG_LEVEL */ +/* #define MIN_DEBUG_LEVEL 5 */ +/* #endif */ +/* #include "Debug.h" */ + +#include <iostream> +#include <typeinfo> +#include "RigidBody.h" +#include "Configuration.h" + +#include "Debug.h" + +RigidBody::RigidBody(const Configuration& cref, RigidBodyType& tref) + : c(&cref), t(&tref) { + timestep = c->timestep; + // RBTODO: fix this + Temp = 295; + // tempgrid = c->temperatureGrid; + + position = Vector3(); + + // Orientation matrix that brings vector from the RB frame to the lab frame + orientation = Matrix3(); + + momentum = Vector3() * t->mass; // lab frame + /* DebugM(4, "velocity " << rbParams->velocity << "\n" << endi); */ + DebugM(4, "momentum " << momentum << "\n" << endi); + + angularMomentum = Vector3(); // rigid body frame + angularMomentum.x *= t->inertia.x; + angularMomentum.y *= t->inertia.y; + angularMomentum.z *= t->inertia.z; + + /* isFirstStep = true; // this might not work flawlessly... */ + + /* clearForce(); */ + /* clearTorque(); */ + + /* DebugM(4, "RigidBody initial Force: " << force << "\n" << endi); */ +} + +void RigidBody::addForce(Force f) { + // DebugM(1, "RigidBody "<<key<<" adding f ("<<f<<") to Force " << force << "\n" << endi); + force += f; +} +void RigidBody::addTorque(Force torq) { + // DebugM(1, "RigidBody adding t ("<<t<<") to torque " << torque << "\n" << endi); + torque += torq; +} +RigidBody::~RigidBody() {} + +/*===========================================================================\ + | Following "Algorithm for rigid-body Brownian dynamics" Dan Gordon, Matthew | + | Hoyles, and Shin-Ho Chung | + | http://langevin.anu.edu.au/publications/PhysRevE_80_066703.pdf | + | | + | | + | BUT: assume diagonal friction tensor and no Wiener process / stochastic | + | calculus then this is just the same as for translation | + | | + | < T_i(t) T_i(t) > = 2 kT friction inertia | + | | + | friction / kt = Diff | + \===========================================================================*/ +void RigidBody::addLangevin(Vector3 w1, Vector3 w2) { + // w1 and w2 should be standard normal distributions + + // in RB frame + Vector3 tmp = orientation.transpose()*momentum; + Force f = Vector3::element_mult(t->transForceCoeff,w1) - + Vector3::element_mult(t->transDamping, orientation.transpose()*momentum); + + Force torq = Vector3::element_mult(t->rotTorqueCoeff,w2) - + Vector3::element_mult(t->rotDamping, angularMomentum); + + f = orientation * f; // return to lab frame + torq = orientation * torq; + + addForce(f); + addTorque(torq); +} + +/*==========================================================================\ + | from: Dullweber, Leimkuhler, Maclachlan. Symplectic splitting methods for | + | rigid body molecular dynamics. JCP 107. (1997) | + | http://jcp.aip.org/resource/1/jcpsa6/v107/i15/p5840_s1 | + \==========================================================================*/ +void RigidBody::integrate(Vector3& old_trans, Matrix3& old_rot, int startFinishAll) { + Vector3 trans; // = *p_trans; + Matrix3 rot = Matrix3(1); // = *p_rot; + +#ifdef DEBUGM + switch (startFinishAll) { + case 0: // start + DebugM(2, "Rigid Body integrating start of cycle" << "\n" << endi); + case 1: // finish + DebugM(2, "Rigid Body integrating finish of cycle" << "\n" << endi); + case 2: // finish and start + DebugM(2, "Rigid Body integrating finishing last cycle, starting this one" << "\n" << endi); + } +#endif + + if ( isnan(force.x) || isnan(torque.x) ) { // NaN check + printf("Rigid Body force was NaN!\n"); + exit(-1); + } + + // torque = Vector(0,0,10); // debug + Force tmpTorque = orientation.transpose()*torque; // bring to rigid body frame + + DebugM(3, "integrate" <<": force "<<force <<": velocity "<<getVelocity() << "\n" << endi); + DebugM(3, "integrate" <<": torque "<<tmpTorque <<": orientationalVelocity "<<getAngularVelocity() << "\n" << endi); + + if (startFinishAll == 0 || startFinishAll == 1) { + // propogate momenta by half step + momentum += 0.5 * timestep * force * impulse_to_momentum; + angularMomentum += 0.5 * timestep * tmpTorque * impulse_to_momentum; + } else { + // propogate momenta by a full timestep + momentum += timestep * force * impulse_to_momentum; + angularMomentum += timestep * tmpTorque * impulse_to_momentum; + } + + DebugM(3, " position before: " << position << "\n" << endi); + + if (startFinishAll == 0 || startFinishAll == 2) { + // update positions + // trans = Vector(0); if (false) { + trans = timestep * momentum / t->mass; + position += trans; // update CoM a full timestep + // } + + // update orientations a full timestep + Matrix3 R; // represents a rotation about a principle axis + R = Rx(0.5*timestep * angularMomentum.x / t->inertia.x ); // R1 + angularMomentum = R * angularMomentum; + rot = R.transpose(); + DebugM(1, "R: " << R << "\n" << endi); + DebugM(1, "Rot 1: " << rot << "\n" << endi); + + R = Ry(0.5*timestep * angularMomentum.y / t->inertia.y ); // R2 + angularMomentum = R * angularMomentum; + rot = rot * R.transpose(); + DebugM(1, "R: " << R << "\n" << endi); + DebugM(1, "Rot 2: " << rot << "\n" << endi); + + R = Rz( timestep * angularMomentum.z / t->inertia.z ); // R3 + angularMomentum = R * angularMomentum; + rot = rot * R.transpose(); + DebugM(1, "R: " << R << "\n" << endi); + DebugM(1, "Rot 3: " << rot << "\n" << endi); + + R = Ry(0.5*timestep * angularMomentum.y / t->inertia.y ); // R4 + angularMomentum = R * angularMomentum; + rot = rot * R.transpose(); + DebugM(1, "R: " << R << "\n" << endi); + DebugM(1, "Rot 4: " << rot << "\n" << endi); + + R = Rx(0.5*timestep * angularMomentum.x / t->inertia.x ); // R5 + angularMomentum = R * angularMomentum; + rot = rot * R.transpose(); + DebugM(1, "R: " << R << "\n" << endi); + DebugM(1, "Rot 5: " << rot << "\n" << endi); + + // DebugM(3,"TEST: " << Ry(0.01) <<"\n" << endi); // DEBUG + + // update actual orientation + Matrix3 newOrientation = orientation*rot; // not 100% sure; rot could be in rb frame + orientation = newOrientation; + rot = rot.transpose(); + + DebugM(2, "trans during: " << trans + << "\n" << endi); + DebugM(2, "rot during: " << rot + << "\n" << endi); + + clearForce(); + clearTorque(); + + old_trans = trans; + old_rot = rot; + } + DebugM(3, " position after: " << position << "\n" << endi); +} + +// Rotations about axes +// for very small angles 10^-8, cos^2+sin^2 != 1 +// concerned about the accumulation of errors in non-unitary transformations! +Matrix3 RigidBody::Rx(BigReal t) { + BigReal qt = 0.25*t*t; // for approximate calculations of sin(t) and cos(t) + BigReal cos = (1-qt)/(1+qt); + BigReal sin = t/(1+qt); + + Matrix3 tmp; + tmp.exx = 1; tmp.exy = 0; tmp.exz = 0; + tmp.eyx = 0; tmp.eyy = cos; tmp.eyz = -sin; + tmp.ezx = 0; tmp.ezy = sin; tmp.ezz = cos; + return tmp; +} +Matrix3 RigidBody::Ry(BigReal t) { + BigReal qt = 0.25*t*t; // for approximate calculations of sin(t) and cos(t) + BigReal cos = (1-qt)/(1+qt); + BigReal sin = t/(1+qt); + + Matrix3 tmp; + tmp.exx = cos; tmp.exy = 0; tmp.exz = sin; + tmp.eyx = 0; tmp.eyy = 1; tmp.eyz = 0; + tmp.ezx = -sin; tmp.ezy = 0; tmp.ezz = cos; + return tmp; +} +Matrix3 RigidBody::Rz(BigReal t) { + BigReal qt = 0.25*t*t; // for approximate calculations of sin(t) and cos(t) + BigReal cos = (1-qt)/(1+qt); + BigReal sin = t/(1+qt); + + Matrix3 tmp; + tmp.exx = cos; tmp.exy = -sin; tmp.exz = 0; + tmp.eyx = sin; tmp.eyy = cos; tmp.eyz = 0; + tmp.ezx = 0; tmp.ezy = 0; tmp.ezz = 1; + return tmp; +} +Matrix3 RigidBody::eulerToMatrix(const Vector3 e) { + // convert euler angle input to rotation matrix + // http://en.wikipedia.org/wiki/Rotation_formalisms_in_three_dimensions#Conversion_formulae_between_formalisms + return Rz(e.z) * Ry(e.y) * Rx(e.x); +} diff --git a/RigidBody.h b/RigidBody.h index 0f7c047..0dc6a9c 100644 --- a/RigidBody.h +++ b/RigidBody.h @@ -1,235 +1,95 @@ -/** -*** Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by -*** The Board of Trustees of the University of Illinois. -*** All rights reserved. -**/ - +/*===========================\ +| RigidBody Class for device | +\===========================*/ #pragma once -/* #include <set> */ -/* #include <vector> */ - #include "useful.h" -/* #include "Vector.h" */ -/* #include "Tensor.h" */ - - -/* #include "strlib.h" */ -/* #include "common.h" */ -/* #include "Vector.h" */ -/* #include "Tensor.h" */ -/* #include "InfoStream.h" */ -/* #include "MStream.h" */ +#include "RigidBodyType.h" -/* #include "SimParameters.h" */ -/* #include "NamdTypes.h" */ +#ifdef __CUDACC__ + #define HOST __host__ + #define DEVICE __device__ +#else + #define HOST + #define DEVICE +#endif -typedef BigReal float; /* strip this out later */ -typedef Force Vector3; +class Configuration; /* forward decleration */ -#define DEVICE __device__ +typedef float BigReal; /* strip this out later */ +typedef Vector3 Force; -/* class ComputeMgr; */ -/* class Random; */ -class RigidBody { - /*=====================================================================\ - | See Appendix A of: Dullweber, Leimkuhler and McLaclan. "Symplectic | - | splitting methods for rigid body molecular dynamics". J Chem | - | Phys. (1997) | - \=====================================================================*/ -public: - DEVICE RigidBody(SimParameters *simParams, RigidBodyParams *rbParams); - DEVICE ~RigidBody(); +class RigidBody { // host side representation of rigid bodies + /*––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––. + | See Appendix A of: Dullweber, Leimkuhler and McLaclan. "Symplectic | + | splitting methods for rigid body molecular dynamics". J Chem Phys. (1997) | + `––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ + public: + HOST DEVICE RigidBody(const Configuration& c, RigidBodyType& t); + /* HOST DEVICE RigidBody(RigidBodyType t); */ + HOST DEVICE ~RigidBody(); - DEVICE void addForce(Force f); - DEVICE void addTorque(Force t); - DEVICE void addLangevin(Vector3 w1, Vector3 w2); - - DEVICE inline void clearForce() { force = Force(0); } - DEVICE inline void clearTorque() { torque = Force(0); } - - DEVICE void integrate(Vector3 *p_trans, Matrix3 *p_rot, int startFinishAll); + HOST DEVICE void addForce(Force f); + HOST DEVICE void addTorque(Force t); + HOST DEVICE void addLangevin(Vector3 w1, Vector3 w2); + + HOST DEVICE inline void clearForce() { force = Force(0.0f); } + HOST DEVICE inline void clearTorque() { torque = Force(0.0f); } - DEVICE inline char* getKey() { return key; } - DEVICE inline Vector3 getPosition() { return position; } - DEVICE inline Matrix3 getOrientation() { return orientation; } - DEVICE inline BigReal getMass() { return mass; } - DEVICE inline Vector3 getVelocity() { return momentum/mass; } - DEVICE inline Vector3 getAngularVelocity() { - return Vector3( angularMomentum.x / inertia.x, - angularMomentum.y / inertia.y, - angularMomentum.z / inertia.z ); + HOST DEVICE void integrate(Vector3& old_trans, Matrix3& old_rot, int startFinishAll); + + HOST DEVICE inline String getKey() { return key; } + HOST DEVICE inline Vector3 getPosition() const { return position; } + HOST DEVICE inline Matrix3 getOrientation() const { return orientation; } + HOST DEVICE inline Matrix3 getBasis() const { return orientation; } + HOST DEVICE inline BigReal getMass() const { return t->mass; } + HOST DEVICE inline Vector3 getVelocity() const { return momentum/t->mass; } + HOST DEVICE inline Vector3 getAngularVelocity() const { + return Vector3( angularMomentum.x / t->inertia.x, + angularMomentum.y / t->inertia.y, + angularMomentum.z / t->inertia.z ); } bool langevin; private: String key; /* static const SimParameters * simParams; */ - BigReal mass; - Vector3 position; Matrix3 orientation; - Vector3 inertia; // diagonal elements of inertia tensor Vector3 momentum; Vector3 angularMomentum; // angular momentum along corresponding principal axes // Langevin - Vector3 langevinTransFriction; + Vector3 langevinTransFriction; /* RBTODO: make this work with a grid */ Vector3 langevinRotFriction; BigReal Temp; - Vector3 transDampingCoeff; - Vector3 transForceCoeff; - Vector3 rotDampingCoeff; - Vector3 rotTorqueCoeff; + /* Vector3 transDampingCoeff; */ + /* Vector3 transForceCoeff; */ + /* Vector3 rotDampingCoeff; */ + /* Vector3 rotTorqueCoeff; */ // integration + const Configuration* c; + RigidBodyType* t; /* RBTODO: const? */ int timestep; Vector3 force; // lab frame Vector3 torque; // lab frame (except in integrate()) bool isFirstStep; - // units "kcal_mol/AA * fs" "(AA/fs) * amu" - const BigReal impulse_to_momentum; - - DEVICE inline Matrix3 Rx(BigReal t); - DEVICE inline Matrix3 Ry(BigReal t); - DEVICE inline Matrix3 Rz(BigReal t); - DEVICE inline Matrix3 eulerToMatrix(const Vector3 e); -}; - -class RigidBodyController { -public: - /* DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); */ - DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); - - DEVICE ~RigidBodyController(); - DEVICE void integrate(int step); - DEVICE void print(int step); - -private: - /* void printLegend(std::ofstream &file); */ - /* void printData(int step, std::ofstream &file); */ - - /* SimParameters* simParams; */ - /* const NamdState* state; */ - /* std::ofstream trajFile; */ + /*–––––––––––––––––––––––––––––––––––––––––. + | units "kcal_mol/AA * fs" "(AA/fs) * amu" | + `–––––––––––––––––––––––––––––––––––––––––*/ + const BigReal impulse_to_momentum = 0.0004184; /* should be static, but fails */ - Random* random; - /* RequireReduction *gridReduction; */ - - Vector3* trans; // would have made these static, but - Matrix3* rot; // there are errors on rigidBody->integrate - RigidBody* rigidBodyList; - -}; -class RigidBodyParams { -public: - RigidBodyParams() { - rigidBodyKey = 0; - mass = 0; - inertia = Vector3(); - langevin = FALSE; - temperature = 0; - transDampingCoeff = Vector3(); - rotDampingCoeff = Vector3(); - gridList; - position = Vector3(); - velocity = Vector3(); - orientation = Matrix3(); - orientationalVelocity = Vector3(); - } - int typeID; - - String *rigidBodyKey; - BigReal mass; - Vector3 inertia; - Bool langevin; - BigReal temperature; - Vector3 transDampingCoeff; - Vector3 rotDampingCoeff; - String *gridList; - - - Vector3 position; - Vector3 velocity; - Matrix3 orientation; - Vector3 orientationalVelocity; - - RigidBodyParams *next; - - const void print(); + HOST DEVICE inline Matrix3 Rx(BigReal t); + HOST DEVICE inline Matrix3 Ry(BigReal t); + HOST DEVICE inline Matrix3 Rz(BigReal t); + HOST DEVICE inline Matrix3 eulerToMatrix(const Vector3 e); }; - -/* class RigidBodyParamsList { */ -/* public: */ -/* RigidBodyParamsList() { */ -/* clear(); */ -/* } */ - -/* ~RigidBodyParamsList() */ -/* { */ -/* RBElem* cur; */ -/* while (head != NULL) { */ -/* cur = head; */ -/* head = cur->nxt; */ -/* delete cur; */ -/* } */ -/* clear(); */ -/* } */ -/* const void print(char *s); */ -/* const void print(); */ - -/* // The SimParameters bit copy overwrites these values with illegal pointers, */ -/* // So thise throws away the garbage and lets everything be reinitialized */ -/* // from scratch */ -/* void clear() { */ -/* head = tail = NULL; */ -/* n_elements = 0; */ -/* } */ - -/* RigidBodyParams* find_key(const char* key); */ -/* int index_for_key(const char* key); */ -/* RigidBodyParams* add(const char* key); */ - -/* RigidBodyParams *get_first() { */ -/* if (head == NULL) { */ -/* return NULL; */ -/* } else return &(head->elem); */ -/* } */ - -/* void pack_data(MOStream *msg); */ -/* void unpack_data(MIStream *msg); */ - -/* // convert from a string to Bool; returns 1(TRUE) 0(FALSE) or -1(if unknown) */ -/* static int atoBool(const char *s) */ -/* { */ -/* if (!strcasecmp(s, "on")) return 1; */ -/* if (!strcasecmp(s, "off")) return 0; */ -/* if (!strcasecmp(s, "true")) return 1; */ -/* if (!strcasecmp(s, "false")) return 0; */ -/* if (!strcasecmp(s, "yes")) return 1; */ -/* if (!strcasecmp(s, "no")) return 0; */ -/* if (!strcasecmp(s, "1")) return 1; */ -/* if (!strcasecmp(s, "0")) return 0; */ -/* return -1; */ -/* } */ - - -/* private: */ -/* class RBElem { */ -/* public: */ -/* RigidBodyParams elem; */ -/* RBElem* nxt; */ -/* }; */ -/* RBElem* head; */ -/* RBElem* tail; */ -/* int n_elements; */ - -/* }; */ diff --git a/RigidBodyController.c b/RigidBodyController.cu similarity index 78% rename from RigidBodyController.c rename to RigidBodyController.cu index ad6dcc1..feb471e 100644 --- a/RigidBodyController.c +++ b/RigidBodyController.cu @@ -1,44 +1,113 @@ -#ifndef MIN_DEBUG_LEVEL -#define MIN_DEBUG_LEVEL 5 -#endif -#define DEBUGM -#include "Debug.h" +/* #ifndef MIN_DEBUG_LEVEL */ +/* #define MIN_DEBUG_LEVEL 5 */ +/* #endif */ +/* #define DEBUGM */ +/* #include "Debug.h" */ -#include <iostream> -#include <typeinfo> - -#include "RigidBody.h" +/* #include "RigidBody.h" */ #include "RigidBodyController.h" -#include "RigidBodyMsgs.h" -// #include "Vector.h" -#include "Node.h" -#include "ReductionMgr.h" -//#include "Broadcasts.h" -#include "ComputeMgr.h" -#include "Random.h" -#include "Output.h" +#include "Configuration.h" -#include "SimParameters.h" -#include "RigidBodyParams.h" -#include "Molecule.h" -// #include "InfoStream.h" -#include "common.h" -#include <errno.h> +#include "RigidBodyType.h" +/* #include "Random.h" */ +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true) { + if (code != cudaSuccess) { + fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line); + if (abort) exit(code); + } +} -/*============================\ -| include "RigidBodyParams.h" | -\============================*/ +/* #include <cuda.h> */ +/* #include <cuda_runtime.h> */ +/* #include <curand_kernel.h> */ -#include "MStream.h" -#include "DataStream.h" -#include "InfoStream.h" +RigidBodyController::RigidBodyController(const Configuration& c) : +conf(c) { -#include "Debug.h" -#include <stdio.h> + int numRB = 0; + // grow list of rbs + for (int i = 0; i < conf.numRigidTypes; i++) { + numRB += conf.rigidBody[i].num; + std::vector<RigidBody> tmp; + for (int j = 0; j < conf.rigidBody[i].num; j++) { + RigidBody r(conf, conf.rigidBody[i]); + tmp.push_back( r ); + } + rigidBodyByType.push_back(tmp); + } +} +RigidBodyController::~RigidBodyController() { + for (int i = 0; i < rigidBodyByType.size(); i++) + rigidBodyByType[i].clear(); + rigidBodyByType.clear(); +} + +void RigidBodyController::updateForces() { + /*––{ RBTODO }–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––. + | probably coalesce kernel calls, or move this to a device kernel caller | + | | + | - consider removing references (unless they are optimized out!) --- | + | - caclulate numthreads && numblocks | + | | + | all threads in a block should: ---------------------------------------- | + | (1) apply the same transformations to get the data point position in a | + | destination grid ----------------------------------------------------- | + | (2) reduce forces and torques to the same location ------------------- | + | (3) ??? ------------------------------------------------------------- | + | | + | Opportunities for memory bandwidth savings: | + `–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ + // int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); + int numBlocks = 1; + int numThreads = 32; + + + // Loop over all pairs of rigid body types + // the references here make the code more readable, but they may incur a performance loss + for (int ti = 0; ti < conf.numRigidTypes; ti++) { + const RigidBodyType& t1 = conf.rigidBody[ti]; + for (int tj = ti; tj < conf.numRigidTypes; tj++) { + const RigidBodyType& t2 = conf.rigidBody[tj]; + + const std::vector<String>& keys1 = t1.densityGridKeys; + const std::vector<String>& keys2 = t2.potentialGridKeys; + + // Loop over all pairs of grid keys (e.g. "Elec") + for(int k1 = 0; k1 < keys1.size(); k1++) { + for(int k2 = 0; k2 < keys2.size(); k2++) { + if ( keys1[k1] == keys2[k2] ) { + // found matching keys => calculate force between all grid pairs + // loop over rigid bodies of this type + const std::vector<RigidBody>& rbs1 = rigidBodyByType[ti]; + const std::vector<RigidBody>& rbs2 = rigidBodyByType[tj]; + + for (int i = 0; i < rbs1.size(); i++) { + for (int j = (ti==tj ? 0 : i); j < rbs2.size(); j++) { + const RigidBody& rb1 = rbs1[i]; + const RigidBody& rb2 = rbs2[j]; + + computeGridGridForce<<< numBlocks, numThreads >>> + (t1.rawDensityGrids[k1], t2.rawPotentialGrids[k2], + rb1.getBasis(), rb1.getPosition(), + rb2.getBasis(), rb2.getPosition()); + } + } + } + } + } + } + } + + // RBTODO: see if there is a better way to sync + gpuErrchk(cudaDeviceSynchronize()); + +} +/* RigidBodyController::RigidBodyController(const NamdState *s, const int reductionTag, SimParameters *sp) : state(s), simParams(sp) { DebugM(2, "Rigid Body Controller initializing" @@ -515,3 +584,4 @@ void RigidBodyParamsList::unpack_data(MIStream *msg) { DebugM(4, "Finished unpacking rigid body parameter list\n"); print(); } +*/ diff --git a/RigidBodyController.h b/RigidBodyController.h new file mode 100644 index 0000000..1e06609 --- /dev/null +++ b/RigidBodyController.h @@ -0,0 +1,47 @@ +#include <vector> +#include "RigidBody.h" +/* #include "RandomCUDA.h" */ +#include <cuda.h> +#include <cuda_runtime.h> + +#include "ComputeGridGrid.cuh" + +class Configuration; + +struct gridInteractionList { + +}; + +class RigidBodyController { +public: + /* DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); */ + RigidBodyController(); + ~RigidBodyController(); + RigidBodyController(const Configuration& c); + + DEVICE void integrate(int step); + DEVICE void print(int step); + +private: + void updateForces(); + + /* void printLegend(std::ofstream &file); */ + /* void printData(int step, std::ofstream &file); */ + + /* std::ofstream trajFile; */ + + const Configuration& conf; + + /* Random* random; */ + /* RequireReduction *gridReduction; */ + + Vector3* trans; // would have made these static, but + Matrix3* rot; // there are errors on rigidBody->integrate + + std::vector< std::vector<RigidBody> > rigidBodyByType; + /* RigidBody* rigidBodyList; */ + + + +}; + diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu index f893867..28bb6fe 100644 --- a/RigidBodyGrid.cu +++ b/RigidBodyGrid.cu @@ -1,4 +1,3 @@ - ////////////////////////////////////////////////////////////////////// // Grid base class that does just the basics. // Author: Jeff Comer <jcomer2@illinois.edu> @@ -6,7 +5,6 @@ #include "RigidBodyGrid.h" #include <cuda.h> - #define STRLEN 512 // Initialize the variables that get used a lot. @@ -17,7 +15,7 @@ void RigidBodyGrid::init() { val = new float[size]; } RigidBodyGrid::RigidBodyGrid() { - RigidBodyGrid tmp(Matrix3(),Vector3(),1,1,1); + RigidBodyGrid tmp(1,1,1); val = new float[1]; *this = tmp; // TODO: verify that this is OK @@ -149,8 +147,8 @@ RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g, int nx0, int ny0, int nz0) if (ny <= 0) ny = 1; if (nz <= 0) nz = 1; - // Tile the grid into the box of the template grid. - Matrix3 box = g.getBox(); + // // Tile the grid into the box of the template grid. + // Matrix3 box = g.getBox(); init(); @@ -207,6 +205,7 @@ Vector3 RigidBodyGrid::getPosition(int j) const { return Vector3(ix,iy,iz); } + Vector3 RigidBodyGrid::getPosition(int j, Matrix3 basis, Vector3 origin) const { int iz = j%nz; int iy = (j/nz)%ny; @@ -285,7 +284,6 @@ int RigidBodyGrid::index(int ix, int iy, int iz) const { return iz + iy*nz + ix* // return iz + iy*nz + ix*ny*nz; // } -} // Add a fixed value to the grid. void RigidBodyGrid::shift(float s) { @@ -305,12 +303,12 @@ float RigidBodyGrid::mean() const { } // Get the potential at the closest node. -float RigidBodyGrid::getPotential(Vector3 pos) const { - // Find the nearest node. - int j = nearestIndex(pos); +// float RigidBodyGrid::getPotential(Vector3 pos) const { +// // Find the nearest node. +// int j = nearestIndex(pos); - return val[j]; -} +// return val[j]; +// } // Added by Rogan for times when simpler calculations are required. @@ -416,25 +414,6 @@ float RigidBodyGrid::interpolatePotentialLinearly(Vector3 pos) const { // } -// Includes the home node. -// indexBuffer must have a size of at least 27. -void RigidBodyGrid::getNeighbors(int j, int* indexBuffer) const { - int jx = indexX(j); - int jy = indexY(j); - int jz = indexZ(j); - - int k = 0; - for (int ix = -1; ix <= 1; ix++) { - for (int iy = -1; iy <= 1; iy++) { - for (int iz = -1; iz <= 1; iz++) { - int ind = wrap(jz+iz,nz) + nz*wrap(jy+iy,ny) + nynz*wrap(jx+ix,nx); - indexBuffer[k] = ind; - k++; - } - } - } -} - // Includes the home node. // indexBuffer must have a size of at least 27. void RigidBodyGrid::getNeighbors(int j, int* indexBuffer) const { diff --git a/RigidBodyGrid.h b/RigidBodyGrid.h index 8453332..1aa49c1 100644 --- a/RigidBodyGrid.h +++ b/RigidBodyGrid.h @@ -1,8 +1,9 @@ ////////////////////////////////////////////////////////////////////// // Copy of BaseGrid with some modificaitons // - -#pragma once +#ifndef RBBASEGRID_H +#define RBBASEGRID_H +// #pragma once #ifdef __CUDACC__ #define HOST __host__ @@ -12,6 +13,7 @@ #define DEVICE #endif +#include "BaseGrid.h" #include "useful.h" #include <cmath> #include <cstring> @@ -20,17 +22,14 @@ #include <ctime> #include <cuda.h> - using namespace std; #define STRLEN 512 -class NeighborList { -public: - float v[3][3][3]; -}; +// RBTODO: integrate with existing grid code? class RigidBodyGrid { + friend class SparseGrid; private: // Initialize the variables that get used a lot. // Also, allocate the main value array. @@ -45,7 +44,7 @@ public: // RBTODO Fix? RigidBodyGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted RigidBodyGrid in a struct // The most obvious of constructors. - RigidBodyGrid(int nx0, int ny0, int nz0); + RigidBodyGrid(int nx0, int ny0, int nz0); // Make an orthogonal grid given the box dimensions and resolution. RigidBodyGrid(Vector3 box, float dx); @@ -64,6 +63,9 @@ public: // The grid spacing is always a bit smaller than dx. RigidBodyGrid(Matrix3 box, float dx); + // Make a copy of a BaseGrid grid. + RigidBodyGrid(const BaseGrid& g); + // Make an exact copy of a grid. RigidBodyGrid(const RigidBodyGrid& g); @@ -91,9 +93,9 @@ public: virtual float getValue(int ix, int iy, int iz) const; // Vector3 getPosition(int ix, int iy, int iz) const; - Vector3 getPosition(int j) const; - Vector3 getPosition(int j, Matrix3 basis, Vector3 origin) const { - + HOST DEVICE Vector3 getPosition(int j) const; + HOST DEVICE Vector3 getPosition(int j, Matrix3 basis, Vector3 origin) const; + /* // Does the point r fall in the grid? */ /* // Obviously this is without periodic boundary conditions. */ /* bool inGrid(Vector3 r) const; */ @@ -113,9 +115,7 @@ public: /* int index(Vector3 r) const; */ /* int nearestIndex(Vector3 r) const; */ - HOST DEVICE inline int length() const { - return size; - } + HOST DEVICE inline int length() const { return size; } HOST DEVICE inline int getNx() const {return nx;} HOST DEVICE inline int getNy() const {return ny;} @@ -134,18 +134,18 @@ public: \=========*/ // Get the mean of the entire grid. - float mean() const + float mean() const; // Get the potential at the closest node. - virtual float getPotential(Vector3 pos) const; + /* virtual float getPotential(Vector3 pos) const; */ // Added by Rogan for times when simpler calculations are required. virtual float interpolatePotentialLinearly(Vector3 pos) const; - HOST DEVICE inline float interpolateDiffX(Vector3 pos, float w[3], float g1[4][4][4]) const { + HOST DEVICE inline float interpolateDiffX(Vector3 pos, float w[3], float g1[4][4][4]) const { float a0, a1, a2, a3; - - // RBTODO parallelize loops? + + // RBTODO further parallelize loops? unlikely? // Mix along x, taking the derivative. float g2[4][4]; @@ -179,8 +179,7 @@ public: a1 = 0.5f*(-g3[0] + g3[2]); a0 = g3[1]; - float retval = -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); - return retval; + return -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); } HOST DEVICE inline float interpolateDiffY(Vector3 pos, float w[3], float g1[4][4][4]) const { @@ -256,8 +255,6 @@ public: return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1); } - // RBTODO overload with optimized algorithm - // skip transforms (assume identity basis) HOST DEVICE inline float interpolatePotential(Vector3 pos) const { // Find the home node. Vector3 l = pos; @@ -296,17 +293,17 @@ public: float g1[4][4][4]; for (int ix = 0; ix < 4; ix++) { int jx = ix-1 + home[0]; - jx = wrap(jx, g[0]); for (int iy = 0; iy < 4; iy++) { int jy = iy-1 + home[1]; - jy = wrap(jy, g[1]); for (int iz = 0; iz < 4; iz++) { - // Wrap around the periodic boundaries. int jz = iz-1 + home[2]; - jz = wrap(jz, g[2]); - - int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; - g1[ix][iy][iz] = val[ind]; + if (jx < 0 || jy < 0 || jz < 0 || + jx >= nx || jz >= nz || jz >= nz) { + g1[ix][iy][iz] = 0; + } else { + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } } } } @@ -387,19 +384,22 @@ public: w[1] = l.y - homeY; w[2] = l.z - homeZ; // Find the values at the neighbors. - float g1[4][4][4]; + float g1[4][4][4]; /* RBTODO: inefficient for my algorithm? */ for (int ix = 0; ix < 4; ix++) { + int jx = ix-1 + home[0]; for (int iy = 0; iy < 4; iy++) { + int jy = iy-1 + home[1]; for (int iz = 0; iz < 4; iz++) { - // Wrap around the periodic boundaries. - int jx = ix-1 + home[0]; - jx = wrap(jx, g[0]); - int jy = iy-1 + home[1]; - jy = wrap(jy, g[1]); + // Assume zero value at edges int jz = iz-1 + home[2]; - jz = wrap(jz, g[2]); - int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; - g1[ix][iy][iz] = val[ind]; + // RBTODO: possible branch divergence in warp? + if (jx < 0 || jy < 0 || jz < 0 || + jx >= nx || jz >= nz || jz >= nz) { + g1[ix][iy][iz] = 0; + } else { + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } } } } @@ -508,7 +508,7 @@ public: // since we do it here. void getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const; inline void setVal(float* v) { val = v; } - + public: int nx, ny, nz; int nynz; @@ -517,3 +517,4 @@ public: float* val; }; +#endif diff --git a/RigidBodyType.cu b/RigidBodyType.cu index 30322be..eca2b46 100644 --- a/RigidBodyType.cu +++ b/RigidBodyType.cu @@ -57,6 +57,43 @@ void RigidBodyType::clear() { // g.grid = * new BaseGrid(token[1]); // return g; // } + + +void RigidBodyType::setDampingCoeffs(float timestep, float tmp_mass, Vector3 tmp_inertia, float tmp_transDamping, float tmp_rotDamping) { + mass = tmp_mass; + inertia = tmp_inertia; + /*–––––––––––––––––––––––––––––––––––––––––––––––––––––––––. + | DiffCoeff = kT / dampingCoeff mass | + | | + | type->DampingCoeff has units of (1/ps) | + | | + | f[kcal/mol AA] = - dampingCoeff * momentum[amu AA/fs] | + | | + | units "(1/ps) * (amu AA/fs)" "kcal_mol/AA" * 2.3900574 | + `–––––––––––––––––––––––––––––––––––––––––––––––––––––––––*/ + transDamping = 2.3900574 * tmp_transDamping; + + /*––––––––––––––––––––––––––––––––––––––––––––––––––––. + | < f(t) f(t') > = 2 kT dampingCoeff mass delta(t-t') | + | | + | units "sqrt( k K (1/ps) amu / fs )" "kcal_mol/AA" | + | * 0.068916889 | + `––––––––––––––––––––––––––––––––––––––––––––––––––––*/ + float Temp = 295; /* RBTODO: Fix!!!! */ + transForceCoeff = 0.068916889 * Vector3::element_sqrt( 2*Temp*mass*tmp_transDamping/timestep ); + + // setup for langevin + // langevin = rbParams->langevin; + // if (langevin) { + // T = - dampingCoeff * angularMomentum + rotDamping = 2.3900574 * tmp_rotDamping; + + // < f(t) f(t') > = 2 kT dampingCoeff inertia delta(t-t') + rotTorqueCoeff = 0.068916889 * + Vector3::element_sqrt( 2*Temp* Vector3::element_mult(inertia,tmp_rotDamping) / timestep ); + // } +} + void RigidBodyType::addPotentialGrid(String s) { // tokenize and return int numTokens = s.tokenCount(); @@ -94,15 +131,27 @@ void RigidBodyType::updateRaw() { if (numDenGrids > 0) delete[] rawDensityGrids; numPotGrids = potentialGrids.size(); numDenGrids = densityGrids.size(); - if (numPotGrids > 0) - rawPotentialGrids = new BaseGrid[numPotGrids]; - if (numDenGrids > 0) - rawDensityGrids = new BaseGrid[numDenGrids]; - - for (int i=0; i < numPotGrids; i++) - rawPotentialGrids[i] = potentialGrids[i]; - for (int i=0; i < numDenGrids; i++) - rawDensityGrids[i] = densityGrids[i]; + if (numPotGrids > 0) { + rawPotentialGrids = new RigidBodyGrid[numPotGrids]; + rawPotentialBases = new Matrix3[numPotGrids]; + rawPotentialOrigins = new Vector3[numPotGrids]; + } + if (numDenGrids > 0) { + rawDensityGrids = new RigidBodyGrid[numDenGrids]; + rawDensityBases = new Matrix3[numDenGrids]; + rawDensityOrigins = new Vector3[numDenGrids]; + } + + for (int i=0; i < numPotGrids; i++) { + rawPotentialGrids[i] = potentialGrids[i]; + rawPotentialBases[i] = potentialGrids[i].getBasis(); + rawPotentialOrigins[i] = potentialGrids[i].getOrigin(); + } + for (int i=0; i < numDenGrids; i++) { + rawDensityGrids[i] = densityGrids[i]; + rawDensityBases[i] = densityGrids[i].getBasis(); + rawDensityOrigins[i] = densityGrids[i].getOrigin(); + } } diff --git a/RigidBodyType.h b/RigidBodyType.h index 4c334dd..be452c3 100644 --- a/RigidBodyType.h +++ b/RigidBodyType.h @@ -6,8 +6,9 @@ /* #include <thrust/host_vector.h> */ /* #include <thrust/device_vector.h> */ #include "Reservoir.h" -#include "useful.h" #include "BaseGrid.h" +#include "RigidBodyGrid.h" +#include "useful.h" #include <cstdio> @@ -37,9 +38,12 @@ RigidBodyType(const String& name = "") : void addPotentialGrid(String s); void addDensityGrid(String s); void updateRaw(); - + void setDampingCoeffs(float timestep, float tmp_mass, Vector3 tmp_inertia, + float tmp_transDamping, float tmp_rotDamping); + public: + String name; int num; // number of particles of this type @@ -49,6 +53,8 @@ public: Vector3 inertia; Vector3 transDamping; Vector3 rotDamping; + Vector3 transForceCoeff; + Vector3 rotTorqueCoeff; std::vector<String> potentialGridKeys; std::vector<String> densityGridKeys; @@ -59,8 +65,11 @@ public: // for device int numPotGrids; int numDenGrids; - BaseGrid* rawPotentialGrids; - BaseGrid* rawDensityGrids; - + RigidBodyGrid* rawPotentialGrids; + RigidBodyGrid* rawDensityGrids; + Matrix3* rawPotentialBases; + Matrix3* rawDensityBases; + Vector3* rawPotentialOrigins; + Vector3* rawDensityOrigins; }; diff --git a/makefile b/makefile index c119d1b..b701a44 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ INCLUDE = $(CUDA_PATH)/include DEBUG = -g CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE) $(DEBUG) -std=c++0x -pedantic -NV_FLAGS = $(DEBUG) +NV_FLAGS = -rdc=true $(DEBUG) EX_FLAGS = -O3 -m$(OS_SIZE) ifneq ($(MAVERICKS),) diff --git a/notes.org b/notes.org index c6ee171..5a3e805 100644 --- a/notes.org +++ b/notes.org @@ -1,24 +1,189 @@ -* grid -** Q: overhead of dynamic parallelism? -** Q: overhead of classes? - Member data can't be shared +* Opportunities for memory bandwidth savings +** each block should contain a compact set of density grid points + cache (automatically!?) potential grid lookups and coefficients! +** each block should have same transformation matrices applied to each grid point?! +** each block could have same inverse transformation matrix applied to each grid point +*** how well this peforms will depend on the number +*** but also simplifies reductions + +** new data structure for grids? + each grid contains blocks of data of a size optmized for the device +** Each thread can operate on multiple data points if needed (any advantage?) + -** TODO read up on: -*** dynamic parallelism -*** registers -* TODO decide whether to write a monolithic kernel or to break problem into subkernel launches +* questions +** Q: overhead of dynamic parallelism? +Where does it make sense to have a kernel call subkernels? + A: At least: to sychronize blocks + +** Q: overhead for classes? +** Q: could algorithm use shared memory through persistent threads? + + +* bring in rigid body integrator * optimize algorithms for cuda (balance floating point calculation with memory transfers) -** parallize loops -** subthreads -** new algorithms may be improved through new data structures; introduce -*** RigidBodyGrid: methods pulled from BaseGrid, but transforms passed to functions +** share pieces of memory that are used repeatedly + +** RigidBodyGrid: methods pulled from BaseGrid, but transforms passed to functions **** no wrapping +** where exactly to parallelize: + Easy to break calculation for density grid, but each thread needs to perform + it's own lookup of potential grid; only O(10) values per thread + + neighboring density grid points will need to lookup similar region of potential grid + BUT the regions could be difficult to determine a priori + + + + possible to move potential grid data into shared memory tiles + e.g. blocks of 10x10x10 + this will be more important for global memory + Better to use cudaCache? + + + + * pairlists for rigid bodies ** maybe for grids, depending on parallel structure of code * other ideas -** interpolate density grids? +** interpolate density grid? + +BaseGrid.h: // RBTODO Fix? +BaseGrid.h- BaseGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted BaseGrid in a struct +BaseGrid.h- // The most obvious of constructors. +BaseGrid.h- BaseGrid(Matrix3 basis0, Vector3 origin0, int nx0, int ny0, int nz0); +BaseGrid.h- +-- +BaseGrid.h- float a0, a1, a2, a3; +BaseGrid.h- +BaseGrid.h: // RBTODO parallelize loops? +BaseGrid.h- +BaseGrid.h- // Mix along x, taking the derivative. +BaseGrid.h- float g2[4][4]; +BaseGrid.h- for (int iy = 0; iy < 4; iy++) { +-- +BaseGrid.h- } +BaseGrid.h- +BaseGrid.h: // RBTODO overload with optimized algorithm +BaseGrid.h- // skip transforms (assume identity basis) +BaseGrid.h- HOST DEVICE inline float interpolatePotential(Vector3 pos) const { +BaseGrid.h- // Find the home node. +BaseGrid.h- Vector3 l = basisInv.transform(pos - origin); +-- +BaseGrid.h- +BaseGrid.h- // out of grid? return 0 +BaseGrid.h: // RBTODO +BaseGrid.h- +BaseGrid.h- // Get the array jumps. +BaseGrid.h- int jump[3]; +BaseGrid.h- jump[0] = nz*ny; +-- +BaseGrid.h- // Find the values at the neighbors. +BaseGrid.h- float g1[4][4][4]; +BaseGrid.h: //RBTODO parallelize? +BaseGrid.h- for (int ix = 0; ix < 4; ix++) { +BaseGrid.h- for (int iy = 0; iy < 4; iy++) { +BaseGrid.h- for (int iz = 0; iz < 4; iz++) { +BaseGrid.h- // Wrap around the periodic boundaries. +-- +ComputeGridGrid.cuh- Matrix3 basis_rho, Vector3 origin_rho, +ComputeGridGrid.cuh- Matrix3 basis_u, Vector3 origin_u) { +ComputeGridGrid.cuh: // RBTODO http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/ +ComputeGridGrid.cuh- const unsigned int r_id = blockIdx.x * blockDim.x + threadIdx.x; +ComputeGridGrid.cuh- +ComputeGridGrid.cuh: // RBTODO parallelize transform +ComputeGridGrid.cuh- if (r_id > rho->size) // skip threads with no data +ComputeGridGrid.cuh- return; +ComputeGridGrid.cuh- +ComputeGridGrid.cuh- // Maybe: Tile grid data into shared memory +ComputeGridGrid.cuh: // RBTODO: think about localizing regions of grid data +ComputeGridGrid.cuh- Vector3 p = rho->getPosition(r_id, basis, origin); +ComputeGridGrid.cuh- float val = rho->val[r_id]; +ComputeGridGrid.cuh- +ComputeGridGrid.cuh: // RBTODO reduce forces and torques +ComputeGridGrid.cuh- // http://www.cuvilib.com/Reduction.pdf +ComputeGridGrid.cuh- +ComputeGridGrid.cuh: // RBTODO combine interp methods and reduce repetition! +ComputeGridGrid.cuh- float energy = u->interpolatePotential(p); +ComputeGridGrid.cuh- Vector3 f = u->interpolateForceD(p); +ComputeGridGrid.cuh- Vector3 t = cross(p,f); // test if sign is correct! +ComputeGridGrid.cuh- +ComputeGridGrid.cuh: // RBTODO 3rd-law forces + torques +ComputeGridGrid.cuh-} +-- +Configuration.cpp- cudaMemcpyHostToDevice)); +Configuration.cpp- } +Configuration.cpp: // RBTODO: moved this out of preceding loop; was that correct? +Configuration.cpp- gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts, +Configuration.cpp- cudaMemcpyHostToDevice)); +Configuration.cpp- +Configuration.cpp- +-- +Configuration.cpp- sz = sizeof(float) * len; +Configuration.cpp- gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); +Configuration.cpp: // RBTODO: why can't this be deleted? +Configuration.cpp- // delete[] tmpData; +Configuration.cpp- } +Configuration.cpp- } +Configuration.cpp- +-- +Configuration.cpp- sz = sizeof(float) * len; +Configuration.cpp- gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); +Configuration.cpp: // RBTODO: why can't this be deleted? +Configuration.cpp- // delete[] tmpData; +Configuration.cpp- } +Configuration.cpp- +Configuration.cpp- } +-- +RigidBodyGrid.h- \===============================*/ +RigidBodyGrid.h- +RigidBodyGrid.h: // RBTODO Fix? +RigidBodyGrid.h- RigidBodyGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted RigidBodyGrid in a struct +RigidBodyGrid.h- // The most obvious of constructors. +RigidBodyGrid.h- RigidBodyGrid(int nx0, int ny0, int nz0); +RigidBodyGrid.h- +-- +RigidBodyGrid.h- float a0, a1, a2, a3; +RigidBodyGrid.h- +RigidBodyGrid.h: // RBTODO further parallelize loops? unlikely? +RigidBodyGrid.h- +RigidBodyGrid.h- // Mix along x, taking the derivative. +RigidBodyGrid.h- float g2[4][4]; +RigidBodyGrid.h- for (int iy = 0; iy < 4; iy++) { +-- +RigidBodyGrid.h- +RigidBodyGrid.h- // out of grid? return 0 +RigidBodyGrid.h: // RBTODO +RigidBodyGrid.h- +RigidBodyGrid.h- // Get the array jumps. +RigidBodyGrid.h- int jump[3]; +RigidBodyGrid.h- jump[0] = nz*ny; +-- +RigidBodyGrid.h- w[2] = l.z - homeZ; +RigidBodyGrid.h- // Find the values at the neighbors. +RigidBodyGrid.h: float g1[4][4][4]; /* RBTODO: inefficient for my algorithm? */ +RigidBodyGrid.h- for (int ix = 0; ix < 4; ix++) { +RigidBodyGrid.h- int jx = ix-1 + home[0]; +RigidBodyGrid.h- for (int iy = 0; iy < 4; iy++) { +RigidBodyGrid.h- int jy = iy-1 + home[1]; +-- +RigidBodyGrid.h- // Assume zero value at edges +RigidBodyGrid.h- int jz = iz-1 + home[2]; +RigidBodyGrid.h: // RBTODO: possible branch divergence in warp? +RigidBodyGrid.h- if (jx < 0 || jy < 0 || jz < 0 || +RigidBodyGrid.h- jx >= nx || jz >= nz || jz >= nz) { +RigidBodyGrid.h- g1[ix][iy][iz] = 0; +RigidBodyGrid.h- } else { +-- +RigidBodyGrid.h- // Find the values at the neighbors. +RigidBodyGrid.h- float g1[4][4][4]; +RigidBodyGrid.h: //RBTODO parallelize? +RigidBodyGrid.h- for (int ix = 0; ix < 4; ix++) { +RigidBodyGrid.h- for (int iy = 0; iy < 4; iy++) { +RigidBodyGrid.h- for (int iz = 0; iz < 4; iz++) { +RigidBodyGrid.h- // Wrap around the periodic boundaries. diff --git a/useful.h b/useful.h index 72070e8..327cd7a 100644 --- a/useful.h +++ b/useful.h @@ -194,7 +194,6 @@ public: return v; } - HOST DEVICE inline Vector3 operator*(float s) const { Vector3 v; v.x = s*x; @@ -220,6 +219,22 @@ public: return 0.0f; } + HOST DEVICE static inline Vector3 element_mult(const Vector3 v, const Vector3 w) { + Vector3 ret( + v.x*w.x, + v.y*w.y, + v.z*w.z); + return ret; + } + HOST DEVICE static inline Vector3 element_sqrt(const Vector3 w) { + Vector3 ret( + sqrt(w.x), + sqrt(w.y), + sqrt(w.z)); + return ret; + } + + String toString() const; float x, y, z; }; @@ -251,6 +266,8 @@ public: const Matrix3 operator*(float s) const; + HOST DEVICE const Vector3 operator*(const Vector3& v) const { return this->transform(v); } + const Matrix3 operator*(const Matrix3& m) const; const Matrix3 operator-() const; @@ -310,8 +327,11 @@ public: float exx, exy, exz; float eyx, eyy, eyz; float ezx, ezy, ezz; + }; + + Matrix3 operator*(float s, Matrix3 m); Matrix3 operator/(Matrix3 m, float s); -- GitLab