From 9c9cbc1e50efcfb066af9f7c9b74e100ce50f50d Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Fri, 13 Nov 2015 11:22:50 -0600 Subject: [PATCH] starting work on grid-grid kernel --- .gitignore | 1 - BaseGrid.h | 37 +++- ComputeGridGrid.cuh | 34 +++ Configuration.cpp | 155 ++++--------- GrandBrownTown.cu | 13 +- GrandBrownTown.cuh | 2 + RigidBody.h | 312 +++++++++++++------------- RigidBodyGrid.cu | 470 +++++++++++++++++++++++++++++++++++++++ RigidBodyGrid.h | 519 ++++++++++++++++++++++++++++++++++++++++++++ RigidBodyType.cu | 8 +- RigidBodyType.h | 3 + notes.org | 25 ++- runBrownTown.cpp | 2 - useful.h | 2 +- 14 files changed, 1304 insertions(+), 279 deletions(-) create mode 100644 ComputeGridGrid.cuh create mode 100644 RigidBodyGrid.cu create mode 100644 RigidBodyGrid.h diff --git a/.gitignore b/.gitignore index 56e5b99..99c1cd7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -*.org *.o .svn *~ diff --git a/BaseGrid.h b/BaseGrid.h index 9156f95..bcdda0b 100644 --- a/BaseGrid.h +++ b/BaseGrid.h @@ -40,8 +40,13 @@ private: void init(); public: - BaseGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted BaseGrid in a struct + /* \ + | CONSTRUCTORS, DESTRUCTORS, I/O | + \===============================*/ + + // RBTODO Fix? + BaseGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted BaseGrid in a struct // The most obvious of constructors. BaseGrid(Matrix3 basis0, Vector3 origin0, int nx0, int ny0, int nz0); @@ -90,7 +95,11 @@ public: virtual ~BaseGrid(); - void zero(); + /* \ + | DATA METHODS | + \=============*/ + + void zero(); bool setValue(int j, float v); @@ -161,9 +170,13 @@ public: // Multiply the grid by a fixed value. void scale(float s); + /* \ + | COMPUTED | + \=========*/ + // Get the mean of the entire grid. - float mean() const; - + float mean() const + // Compute the average profile along an axis. // Assumes that the grid axis with index "axis" is aligned with the world axis of index "axis". void averageProfile(const char* fileName, int axis); @@ -183,7 +196,9 @@ public: HOST DEVICE inline float interpolateDiffX(Vector3 pos, float w[3], float g1[4][4][4]) const { float a0, a1, a2, a3; - + + // RBTODO parallelize loops? + // Mix along x, taking the derivative. float g2[4][4]; for (int iy = 0; iy < 4; iy++) { @@ -292,14 +307,19 @@ public: return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1); } - + + // RBTODO overload with optimized algorithm + // skip transforms (assume identity basis) HOST DEVICE inline float interpolatePotential(Vector3 pos) const { // Find the home node. Vector3 l = basisInv.transform(pos - origin); int homeX = int(floor(l.x)); int homeY = int(floor(l.y)); int homeZ = int(floor(l.z)); - + + // out of grid? return 0 + // RBTODO + // Get the array jumps. int jump[3]; jump[0] = nz*ny; @@ -317,7 +337,7 @@ public: g[0] = nx; g[1] = ny; g[2] = nz; - + // Get the interpolation coordinates. float w[3]; w[0] = l.x - homeX; @@ -472,6 +492,7 @@ public: w[2] = l.z - homeZ; // Find the values at the neighbors. float g1[4][4][4]; + //RBTODO parallelize? for (int ix = 0; ix < 4; ix++) { for (int iy = 0; iy < 4; iy++) { for (int iz = 0; iz < 4; iz++) { diff --git a/ComputeGridGrid.cuh b/ComputeGridGrid.cuh new file mode 100644 index 0000000..11e2f04 --- /dev/null +++ b/ComputeGridGrid.cuh @@ -0,0 +1,34 @@ +#pragma once +#include "RigidBodyGrid.h" + +__global__ +void computeGridGridForce(RigidBodyGrid* rho, RigidBodyGrid* u) { + unsigned int bidx = blockIdx.x; + unsigned int tidx = threadIdx.x; + + // GTX 980 has ~ 4 x 10x10x10 floats available for shared memory + // but code should work without access to this + + // RBTODO rewrite to use shared memory, break problem into subgrids + // will lookups of + +// http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/ + + const unsigned int r_id = bidx * blockDim.x + threadIdx.x; + + // RBTODO parallelize transform + if (r_id > rho->size) // skip threads with no data + return; + + // Tile grid data into shared memory + // RBTODO: think about localizing regions of grid data + Vector3 p = rho->getPosition(r_id); + float val = rho->val[r_id]; + + // RBTODO reduce these + // http://www.cuvilib.com/Reduction.pdf + float energy = ; + Vector3 f = u->interpolateForceD(p); + Vector3 t = f * + +} diff --git a/Configuration.cpp b/Configuration.cpp index 028b2fc..8e7328e 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -482,12 +482,8 @@ void Configuration::copyToCUDA() { gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb->rawPotentialGrids[gid]), sizeof(BaseGrid), cudaMemcpyHostToDevice )); } - // RBTODO: segfault when gtmp is deleted --> why? - // delete[] gtmp; - printf(" RigidBodyType %d: numGrids = %d\n", i, ng); - - + printf(" RigidBodyType %d: numGrids = %d\n", i, ng); // copy grid data to device for (int gid = 0; gid < ng; gid++) { BaseGrid *g = &(rb->rawPotentialGrids[gid]); // convenience @@ -499,7 +495,6 @@ void Configuration::copyToCUDA() { for (int k = 0; k < len; k++) printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", i, gid, k, g->val[k]); - // allocate grid data on device // copy temporary host pointer to device pointer @@ -515,118 +510,56 @@ void Configuration::copyToCUDA() { // RBTODO: why can't this be deleted? // delete[] tmpData; } + } + // density grids + for (int i = 0; i < numRigidTypes; i++) { + printf("working on RB %d\n",i); + RigidBodyType *rb = &(rigidBody[i]); // temporary for convenience - - // // gpuErrchk(cudaMemcpy(&(rb_d->potentialGrids[i]), >mp, - // // sz, cudaMemcpyHostToDevice )); - - // // size_t sz; - // // for (int gid = 0; gid < ng; gid++) { - // // gpuErrchk(cudaMalloc((void**) &(gtmp[i]), sizeof(BaseGrid))); - // // gpuErrchk(cudaMemcpy(&(gtmp[i]->potentialGrids[i]), &(gtmp[i]), - // // sizeof(BaseGrid*), cudaMemcpyHostToDevice)); - // // } - + int ng = rb->numDenGrids; + BaseGrid * gtmp; + size_t sz = sizeof(BaseGrid)*ng; - // { - // BaseGrid *tmpData; - // size_t sz = sizeof(BaseGrid)*ng; - - // gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - // gpuErrchk(cudaMemcpy(&(rb_d->potentialGrids[i]), &tmpData, - // sizeof(BaseGrid*), cudaMemcpyHostToDevice )); - // sz = sizeof(float) * ng - // gpuErrchk(cudaMemcpy(tmpData, rbg->val, sz, cudaMemcpyHostToDevice)); - // } - - - // size_t sz = sizeof(float) * rb->potentialGrids[gid]->getSize(); - // // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); - - // gpuErrchk(cudaMalloc(&tmpData, sz)); - // gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], - // sizeof(BaseGrid), cudaMemcpyHostToDevice)); - - // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice)); - - // set rb pointers appropriately - // rb_d->potentialGrids[gid] = g_d; - // g_d->val = val_d; // hopefully? - - - - // BaseGrid *g_d; - // float *val_d; - // size_t sz = sizeof(float) * rb.potentialGrids[gid]->getSize(); - // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); - // gpuErrchk(cudaMalloc(&val_d, sz)); - // gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], - // sizeof(BaseGrid), cudaMemcpyHostToDevice)); - // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice)); - - // // set rb pointers appropriately - // rb_d->potentialGrids[gid] = g_d; - // g_d->val = val_d; // hopefully? - - // } - - - // // Copy rigidbody types - // for (int i = 0; i < numRigidTypes; i++) { - // RigidBodyType *rb = rigidBody[i]; // temporary for convenience - // // gpuErrchk(cudaMemcpy(&rb, sizeof(RigidBodyType))); + // allocate grids on device + // copy temporary host pointer to device pointer + // copy grids to device through temporary host poin + gpuErrchk(cudaMalloc((void **) >mp, sz)); + gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawDensityGrids), >mp, + sizeof(BaseGrid*) * ng, cudaMemcpyHostToDevice )); + gpuErrchk(cudaMemcpy(gtmp, &(rb->rawDensityGrids), + sizeof(BaseGrid) * ng, cudaMemcpyHostToDevice )); + for (int gid = 0; gid < ng; gid++) { + gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb->rawDensityGrids[gid]), + sizeof(BaseGrid), cudaMemcpyHostToDevice )); + } - // RigidBodyType *rb_d; + printf(" RigidBodyType %d: numGrids = %d\n", i, ng); + // copy grid data to device + for (int gid = 0; gid < ng; gid++) { + BaseGrid *g = &(rb->rawDensityGrids[gid]); // convenience + int len = g->getSize(); + float **tmpData; + tmpData = new float*[len]; - - // for (int gid = 0; i < rb.numPotGrids) { - // BaseGrid *g_d; - // float *val_d; - // size_t sz = sizeof(float) * rb.potentialGrids[gid]->getSize(); - // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); - // gpuErrchk(cudaMalloc(&val_d, sz)); - // gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], - // sizeof(BaseGrid), cudaMemcpyHostToDevice)); - // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice)); - - // // set rb pointers appropriately - // rb_d->potentialGrids[gid] = g_d; - // g_d->val = val_d; // hopefully? - - // } - // for (int gid = 0; i < rb.numDenGrids) { - // BaseGrid *g_d; - // float *val_d; - // size_t sz = sizeof(float) * rb->densityGrids[gid]->getSize(); - // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); - // gpuErrchk(cudaMalloc(&val_d, sz)); - // gpuErrchk(cudaMemcpyAsync(g_d, rb->densityGrids[gid], - // sizeof(BaseGrid), cudaMemcpyHostToDevice)); - // gpuErrchk(cudaMemcpy(val_d, rb->densityGrids[gid], sz, cudaMemcpyHostToDevice)); + printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); + for (int k = 0; k < len; k++) + printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", + i, gid, k, g->val[k]); - // } - - - // // RBTODO - // // b->pmf = pmf; - // // b->diffusionGrid = diffusionGrid; - // gpuErrchk(cudaMalloc(&rb_addr[i], sizeof(BrownianParticleType))); - // gpuErrchk(cudaMemcpyAsync( rb_addr[i], rb_d, - // sizeof(BrownianParticleType), cudaMemcpyHostToDevice)); - - - // gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRigidTypes)); - // rigidBody[i].potentialGrids_D = rigidBody[i].potentialGrids; - // rigidBody[i].densityGrids_D = rigidBody[i].densityGrids; - - // gpuErrchk(cudaMemcpyAsync(rb_addr[i], rb, sizeof(RigidBodyType), - // cudaMemcpyHostToDevice)); + // allocate grid data on device + // copy temporary host pointer to device pointer + // copy data to device through temporary host pointer + sz = sizeof(float*) * len; + gpuErrchk(cudaMalloc((void **) &tmpData, sz)); + gpuErrchk(cudaMemcpy( &(gtmp[gid].val), &tmpData, + sizeof(float*), cudaMemcpyHostToDevice)); + sz = sizeof(float) * len; + gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); + // RBTODO: why can't this be deleted? + // delete[] tmpData; + } - // gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts, - // cudaMemcpyHostToDevice)); - - // } } gpuErrchk(cudaMemcpy(rbType_d, rb_addr, sizeof(RigidBodyType*) * numRigidTypes, cudaMemcpyHostToDevice)); diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index fdf5036..cd1650b 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -56,6 +56,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, type = new int[num * numReplicas]; // [HOST] array of particles' types. serial = new int[num * numReplicas]; // [HOST] array of particles' serial numbers. + // Replicate identical initial conditions across all replicas // TODO: add an option to generate random initial conditions for all replicas for (int r = 0; r < numReplicas; ++r) { @@ -403,13 +404,23 @@ void GrandBrownTown::run() { electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); //gpuErrchk(cudaPeekAtLastError()); // Does not work on old GPUs (like mine). TODO: write a better wrapper around Peek - + + /* Time position computations. rt_timer_stop(cputimer); float dt2 = rt_timer_time(cputimer); printf("Position Update Time: %f ms\n", dt2 * 1000); // */ + // calculateRigidBodyForce<<< numBlocks, NUM_THREADS >>>(rb_d, rbType_d, + // kT, kTGrid_d, + // tl, timestep, + // sys_d, randoGen_d, numReplicas); + computeGridGridForce<<< numBlocks, NUM_THREADS >>>(grid1_d, grid2_d); + + // int numBlocks = (numRB ) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); + + Vector3 force0(0.0f); if (imd_on && clientsock && s % outputPeriod == 0) { diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh index 5f8c1a7..7b447e5 100644 --- a/GrandBrownTown.cuh +++ b/GrandBrownTown.cuh @@ -16,6 +16,8 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion, __global__ void updateKernel(Vector3 pos[], Vector3 forceInternal[], int type[], BrownianParticleType* part[], + RigidBody* rb[], + RigidBodyType* rbType[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, int num, BaseGrid* sys, diff --git a/RigidBody.h b/RigidBody.h index ef3a788..0f7c047 100644 --- a/RigidBody.h +++ b/RigidBody.h @@ -6,11 +6,12 @@ #pragma once -#include <set> -#include <vector> +/* #include <set> */ +/* #include <vector> */ -#include "Vector.h" -#include "Tensor.h" +#include "useful.h" +/* #include "Vector.h" */ +/* #include "Tensor.h" */ /* #include "strlib.h" */ @@ -25,203 +26,210 @@ /* #include "NamdTypes.h" */ typedef BigReal float; /* strip this out later */ +typedef Force Vector3; -class ComputeMgr; -class Random; +#define DEVICE __device__ + +/* class ComputeMgr; */ +/* class Random; */ class RigidBody { /*=====================================================================\ - | See Appendix A of: Dullweber, Leimkuhler and McLaclan. "Symplectic | - | splitting methods for rigid body molecular dynamics". J Chem | - | Phys. (1997) | - \=====================================================================*/ + | See Appendix A of: Dullweber, Leimkuhler and McLaclan. "Symplectic | + | splitting methods for rigid body molecular dynamics". J Chem | + | Phys. (1997) | + \=====================================================================*/ public: - RigidBody(SimParameters *simParams, RigidBodyParams *rbParams); - ~RigidBody(); + DEVICE RigidBody(SimParameters *simParams, RigidBodyParams *rbParams); + DEVICE ~RigidBody(); - void addForce(Force f); - void addTorque(Force t); - void addLangevin(Vector w1, Vector w2); + DEVICE void addForce(Force f); + DEVICE void addTorque(Force t); + DEVICE void addLangevin(Vector3 w1, Vector3 w2); - inline void clearForce() { force = Force(0); } - inline void clearTorque() { torque = Force(0); } + DEVICE inline void clearForce() { force = Force(0); } + DEVICE inline void clearTorque() { torque = Force(0); } - void integrate(Vector *p_trans, Tensor *p_rot, int startFinishAll); + DEVICE void integrate(Vector3 *p_trans, Matrix3 *p_rot, int startFinishAll); - inline char* getKey() { return key; } - inline Vector getPosition() { return position; } - inline Tensor getOrientation() { return orientation; } - inline BigReal getMass() { return mass; } - inline Vector getVelocity() { return momentum/mass; } - inline Vector getAngularVelocity() { - return Vector( angularMomentum.x / inertia.x, + DEVICE inline char* getKey() { return key; } + DEVICE inline Vector3 getPosition() { return position; } + DEVICE inline Matrix3 getOrientation() { return orientation; } + DEVICE inline BigReal getMass() { return mass; } + DEVICE inline Vector3 getVelocity() { return momentum/mass; } + DEVICE inline Vector3 getAngularVelocity() { + return Vector3( angularMomentum.x / inertia.x, angularMomentum.y / inertia.y, angularMomentum.z / inertia.z ); } bool langevin; private: - char* key; - static const SimParameters * simParams; + String key; + /* static const SimParameters * simParams; */ BigReal mass; - Vector position; - Tensor orientation; + Vector3 position; + Matrix3 orientation; - Vector inertia; // diagonal elements of inertia tensor - Vector momentum; - Vector angularMomentum; // angular momentum along corresponding principal axes + Vector3 inertia; // diagonal elements of inertia tensor + Vector3 momentum; + Vector3 angularMomentum; // angular momentum along corresponding principal axes // Langevin - Vector langevinTransFriction; - Vector langevinRotFriction; + Vector3 langevinTransFriction; + Vector3 langevinRotFriction; BigReal Temp; - Vector transDampingCoeff; - Vector transForceCoeff; - Vector rotDampingCoeff; - Vector rotTorqueCoeff; + Vector3 transDampingCoeff; + Vector3 transForceCoeff; + Vector3 rotDampingCoeff; + Vector3 rotTorqueCoeff; // integration int timestep; - Vector force; // lab frame - Vector torque; // lab frame (except in integrate()) + Vector3 force; // lab frame + Vector3 torque; // lab frame (except in integrate()) bool isFirstStep; // units "kcal_mol/AA * fs" "(AA/fs) * amu" const BigReal impulse_to_momentum; - inline Tensor Rx(BigReal t); - inline Tensor Ry(BigReal t); - inline Tensor Rz(BigReal t); - inline Tensor eulerToMatrix(const Vector e); + DEVICE inline Matrix3 Rx(BigReal t); + DEVICE inline Matrix3 Ry(BigReal t); + DEVICE inline Matrix3 Rz(BigReal t); + DEVICE inline Matrix3 eulerToMatrix(const Vector3 e); }; class RigidBodyController { public: - RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); - ~RigidBodyController(); - void integrate(int step); - void print(int step); + /* DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); */ + DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); + + DEVICE ~RigidBodyController(); + DEVICE void integrate(int step); + DEVICE void print(int step); private: - void printLegend(std::ofstream &file); - void printData(int step, std::ofstream &file); - - SimParameters *simParams; - const NamdState * state; - - std::ofstream trajFile; - - Random *random; - ComputeMgr *computeMgr; - RequireReduction *gridReduction; - - ResizeArray<Vector> trans; // would have made these static, but - ResizeArray<Tensor> rot; // there are errors on rigidBody->integrate - std::vector<RigidBody*> rigidBodyList; + /* void printLegend(std::ofstream &file); */ + /* void printData(int step, std::ofstream &file); */ + + /* SimParameters* simParams; */ + /* const NamdState* state; */ + /* std::ofstream trajFile; */ + + Random* random; + /* RequireReduction *gridReduction; */ + + Vector3* trans; // would have made these static, but + Matrix3* rot; // there are errors on rigidBody->integrate + RigidBody* rigidBodyList; + }; class RigidBodyParams { public: - RigidBodyParams() { - rigidBodyKey = 0; - mass = 0; - inertia = Vector(0); - langevin = FALSE; - temperature = 0; - transDampingCoeff = Vector(0); - rotDampingCoeff = Vector(0); - gridList; - position = Vector(0); - velocity = Vector(0); - orientation = Tensor(); - orientationalVelocity = Vector(0); - } - char *rigidBodyKey; - BigReal mass; - zVector inertia; - Bool langevin; - BigReal temperature; - zVector transDampingCoeff; - zVector rotDampingCoeff; - std::vector<std::string> gridList; + RigidBodyParams() { + rigidBodyKey = 0; + mass = 0; + inertia = Vector3(); + langevin = FALSE; + temperature = 0; + transDampingCoeff = Vector3(); + rotDampingCoeff = Vector3(); + gridList; + position = Vector3(); + velocity = Vector3(); + orientation = Matrix3(); + orientationalVelocity = Vector3(); + } + int typeID; + + String *rigidBodyKey; + BigReal mass; + Vector3 inertia; + Bool langevin; + BigReal temperature; + Vector3 transDampingCoeff; + Vector3 rotDampingCoeff; + String *gridList; + - zVector position; - zVector velocity; - Tensor orientation; - zVector orientationalVelocity; + Vector3 position; + Vector3 velocity; + Matrix3 orientation; + Vector3 orientationalVelocity; - RigidBodyParams *next; + RigidBodyParams *next; - const void print(); + const void print(); }; -class RigidBodyParamsList { -public: - RigidBodyParamsList() { - clear(); - } +/* class RigidBodyParamsList { */ +/* public: */ +/* RigidBodyParamsList() { */ +/* clear(); */ +/* } */ - ~RigidBodyParamsList() - { - RBElem* cur; - while (head != NULL) { - cur = head; - head = cur->nxt; - delete cur; - } - clear(); - } - const void print(char *s); - const void print(); - - // The SimParameters bit copy overwrites these values with illegal pointers, - // So thise throws away the garbage and lets everything be reinitialized - // from scratch - void clear() { - head = tail = NULL; - n_elements = 0; - } +/* ~RigidBodyParamsList() */ +/* { */ +/* RBElem* cur; */ +/* while (head != NULL) { */ +/* cur = head; */ +/* head = cur->nxt; */ +/* delete cur; */ +/* } */ +/* clear(); */ +/* } */ +/* const void print(char *s); */ +/* const void print(); */ + +/* // The SimParameters bit copy overwrites these values with illegal pointers, */ +/* // So thise throws away the garbage and lets everything be reinitialized */ +/* // from scratch */ +/* void clear() { */ +/* head = tail = NULL; */ +/* n_elements = 0; */ +/* } */ - RigidBodyParams* find_key(const char* key); - int index_for_key(const char* key); - RigidBodyParams* add(const char* key); +/* RigidBodyParams* find_key(const char* key); */ +/* int index_for_key(const char* key); */ +/* RigidBodyParams* add(const char* key); */ - RigidBodyParams *get_first() { - if (head == NULL) { - return NULL; - } else return &(head->elem); - } +/* RigidBodyParams *get_first() { */ +/* if (head == NULL) { */ +/* return NULL; */ +/* } else return &(head->elem); */ +/* } */ - void pack_data(MOStream *msg); - void unpack_data(MIStream *msg); +/* void pack_data(MOStream *msg); */ +/* void unpack_data(MIStream *msg); */ - // convert from a string to Bool; returns 1(TRUE) 0(FALSE) or -1(if unknown) - static int atoBool(const char *s) - { - if (!strcasecmp(s, "on")) return 1; - if (!strcasecmp(s, "off")) return 0; - if (!strcasecmp(s, "true")) return 1; - if (!strcasecmp(s, "false")) return 0; - if (!strcasecmp(s, "yes")) return 1; - if (!strcasecmp(s, "no")) return 0; - if (!strcasecmp(s, "1")) return 1; - if (!strcasecmp(s, "0")) return 0; - return -1; - } - - -private: - class RBElem { - public: - RigidBodyParams elem; - RBElem* nxt; - }; - RBElem* head; - RBElem* tail; - int n_elements; - -}; +/* // convert from a string to Bool; returns 1(TRUE) 0(FALSE) or -1(if unknown) */ +/* static int atoBool(const char *s) */ +/* { */ +/* if (!strcasecmp(s, "on")) return 1; */ +/* if (!strcasecmp(s, "off")) return 0; */ +/* if (!strcasecmp(s, "true")) return 1; */ +/* if (!strcasecmp(s, "false")) return 0; */ +/* if (!strcasecmp(s, "yes")) return 1; */ +/* if (!strcasecmp(s, "no")) return 0; */ +/* if (!strcasecmp(s, "1")) return 1; */ +/* if (!strcasecmp(s, "0")) return 0; */ +/* return -1; */ +/* } */ + + +/* private: */ +/* class RBElem { */ +/* public: */ +/* RigidBodyParams elem; */ +/* RBElem* nxt; */ +/* }; */ +/* RBElem* head; */ +/* RBElem* tail; */ +/* int n_elements; */ + +/* }; */ diff --git a/RigidBodyGrid.cu b/RigidBodyGrid.cu new file mode 100644 index 0000000..f893867 --- /dev/null +++ b/RigidBodyGrid.cu @@ -0,0 +1,470 @@ + +////////////////////////////////////////////////////////////////////// +// Grid base class that does just the basics. +// Author: Jeff Comer <jcomer2@illinois.edu> + +#include "RigidBodyGrid.h" +#include <cuda.h> + + +#define STRLEN 512 + +// Initialize the variables that get used a lot. +// Also, allocate the main value array. +void RigidBodyGrid::init() { + nynz = ny*nz; + size = nx*ny*nz; + val = new float[size]; +} +RigidBodyGrid::RigidBodyGrid() { + RigidBodyGrid tmp(Matrix3(),Vector3(),1,1,1); + val = new float[1]; + *this = tmp; // TODO: verify that this is OK + + // origin = Vector3(); + // nx = 1; + // ny = 1; + // nz = 1; + + // init(); + // zero(); +} + +// The most obvious of constructors. +RigidBodyGrid::RigidBodyGrid(int nx0, int ny0, int nz0) { + nx = abs(nx0); + ny = abs(ny0); + nz = abs(nz0); + + init(); + zero(); +} + +// Make an orthogonal grid given the box dimensions and resolution. +RigidBodyGrid::RigidBodyGrid(Vector3 box, float dx) { + dx = fabsf(dx); + box.x = fabsf(box.x); + box.y = fabsf(box.y); + box.z = fabsf(box.z); + + // Tile the grid into the system box. + // The grid spacing is always a bit smaller than dx. + nx = int(ceilf(box.x/dx)); + ny = int(ceilf(box.y/dx)); + nz = int(ceilf(box.z/dx)); + if (nx <= 0) nx = 1; + if (ny <= 0) ny = 1; + if (nz <= 0) nz = 1; + + init(); + zero(); +} + +// The box gives the system geometry. +// The grid point numbers define the resolution. +RigidBodyGrid::RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0) { + nx = nx0; + ny = ny0; + nz = nz0; + + // Tile the grid into the system box. + if (nx <= 0) nx = 1; + if (ny <= 0) ny = 1; + if (nz <= 0) nz = 1; + + init(); + zero(); +} + +// The box gives the system geometry. +// dx is the approx. resolution. +// The grid spacing is always a bit larger than dx. +RigidBodyGrid::RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx) { + dx = fabs(dx); + + // Tile the grid into the system box. + // The grid spacing is always a bit larger than dx. + nx = int(floor(box.ex().length()/dx))-1; + ny = int(floor(box.ey().length()/dx))-1; + nz = int(floor(box.ez().length()/dx))-1; + if (nx <= 0) nx = 1; + if (ny <= 0) ny = 1; + if (nz <= 0) nz = 1; + + init(); + zero(); +} + +// The box gives the system geometry. +// dx is the approx. resolution. +// The grid spacing is always a bit smaller than dx. +RigidBodyGrid::RigidBodyGrid(Matrix3 box, float dx) { + dx = fabs(dx); + + // Tile the grid into the system box. + // The grid spacing is always a bit smaller than dx. + nx = int(ceilf(box.ex().length()/dx)); + ny = int(ceilf(box.ey().length()/dx)); + nz = int(ceilf(box.ez().length()/dx)); + if (nx <= 0) nx = 1; + if (ny <= 0) ny = 1; + if (nz <= 0) nz = 1; + + init(); + zero(); +} + +// Make an exact copy of a grid. +RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g) { + nx = g.nx; + ny = g.ny; + nz = g.nz; + + init(); + for (int i = 0; i < size; i++) val[i] = g.val[i]; +} + +RigidBodyGrid RigidBodyGrid::mult(const RigidBodyGrid& g) { + for (int i = 0; i < size; i++) val[i] *= g.val[i]; + return *this; +} + +RigidBodyGrid& RigidBodyGrid::operator=(const RigidBodyGrid& g) { + delete[] val; + val = NULL; + nx = g.nx; + ny = g.ny; + nz = g.nz; + + init(); + for (int i = 0; i < size; i++) val[i] = g.val[i]; + + return *this; +} + + +// Make a copy of a grid, but at a different resolution. +RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g, int nx0, int ny0, int nz0) : nx(nx0), ny(ny0), nz(nz0) { + if (nx <= 0) nx = 1; + if (ny <= 0) ny = 1; + if (nz <= 0) nz = 1; + + // Tile the grid into the box of the template grid. + Matrix3 box = g.getBox(); + + init(); + + // Do an interpolation to obtain the values. + for (int i = 0; i < size; i++) { + Vector3 r = getPosition(i); + val[i] = g.interpolatePotential(r); + } +} + +RigidBodyGrid::~RigidBodyGrid() { + if (val != NULL) + delete[] val; +} + +void RigidBodyGrid::zero() { + for (int i = 0; i < size; i++) val[i] = 0.0f; +} + +bool RigidBodyGrid::setValue(int j, float v) { + if (j < 0 || j >= size) return false; + val[j] = v; + return true; +} + +bool RigidBodyGrid::setValue(int ix, int iy, int iz, float v) { + if (ix < 0 || ix >= nx) return false; + if (iy < 0 || iy >= ny) return false; + if (iz < 0 || iz >= nz) return false; + int j = iz + iy*nz + ix*ny*nz; + + val[j] = v; + return true; +} + +float RigidBodyGrid::getValue(int j) const { + if (j < 0 || j >= size) return 0.0f; + return val[j]; +} + +float RigidBodyGrid::getValue(int ix, int iy, int iz) const { + if (ix < 0 || ix >= nx) return 0.0f; + if (iy < 0 || iy >= ny) return 0.0f; + if (iz < 0 || iz >= nz) return 0.0f; + + int j = iz + iy*nz + ix*ny*nz; + return val[j]; +} + +Vector3 RigidBodyGrid::getPosition(int j) const { + int iz = j%nz; + int iy = (j/nz)%ny; + int ix = j/(nz*ny); + + return Vector3(ix,iy,iz); +} +Vector3 RigidBodyGrid::getPosition(int j, Matrix3 basis, Vector3 origin) const { + int iz = j%nz; + int iy = (j/nz)%ny; + int ix = j/(nz*ny); + + return basis.transform(Vector3(ix, iy, iz)) + origin; +} + +// // Does the point r fall in the grid? +// // Obviously this is without periodic boundary conditions. +// bool RigidBodyGrid::inGrid(Vector3 r,) const { +// Vector3 l = basisInv.transform(r-origin); + +// if (l.x < 0.0f || l.x >= nx) return false; +// if (l.y < 0.0f || l.y >= ny) return false; +// if (l.z < 0.0f || l.z >= nz) return false; +// return true; +// } + +// bool RigidBodyGrid::inGridInterp(Vector3 r) const { +// Vector3 l = basisInv.transform(r-origin); + +// if (l.x < 2.0f || l.x >= nx-3.0f) return false; +// if (l.y < 2.0f || l.y >= ny-3.0f) return false; +// if (l.z < 2.0f || l.z >= nz-3.0f) return false; +// return true; +// } + +// Vector3 RigidBodyGrid::transformTo(Vector3 r) const { +// return basisInv.transform(r-origin); +// } +// Vector3 RigidBodyGrid::transformFrom(Vector3 l) const { +// return basis.transform(l) + origin; +// } + +IndexList RigidBodyGrid::index(int j) const { + int iz = j%nz; + int iy = (j/nz)%ny; + int ix = j/(nz*ny); + IndexList ret; + ret.add(ix); + ret.add(iy); + ret.add(iz); + return ret; +} +int RigidBodyGrid::indexX(int j) const { return j/(nz*ny); } +int RigidBodyGrid::indexY(int j) const { return (j/nz)%ny; } +int RigidBodyGrid::indexZ(int j) const { return j%nz; } +int RigidBodyGrid::index(int ix, int iy, int iz) const { return iz + iy*nz + ix*ny*nz; } + +// int RigidBodyGrid::index(Vector3 r) const { +// Vector3 l = basisInv.transform(r-origin); + +// int ix = int(floor(l.x)); +// int iy = int(floor(l.y)); +// int iz = int(floor(l.z)); + +// ix = wrap(ix, nx); +// iy = wrap(iy, ny); +// iz = wrap(iz, nz); + +// return iz + iy*nz + ix*ny*nz; +// } + +// int RigidBodyGrid::nearestIndex(Vector3 r) const { +// Vector3 l = basisInv.transform(r-origin); + +// int ix = int(floorf(l.x + 0.5f)); +// int iy = int(floorf(l.y + 0.5f)); +// int iz = int(floorf(l.z + 0.5f)); + +// ix = wrap(ix, nx); +// iy = wrap(iy, ny); +// iz = wrap(iz, nz); + +// return iz + iy*nz + ix*ny*nz; +// } + +} + +// Add a fixed value to the grid. +void RigidBodyGrid::shift(float s) { + for (int i = 0; i < size; i++) val[i] += s; +} + +// Multiply the grid by a fixed value. +void RigidBodyGrid::scale(float s) { + for (int i = 0; i < size; i++) val[i] *= s; +} + +// Get the mean of the entire grid. +float RigidBodyGrid::mean() const { + float sum = 0.0f; + for (int i = 0; i < size; i++) sum += val[i]; + return sum/size; +} + +// Get the potential at the closest node. +float RigidBodyGrid::getPotential(Vector3 pos) const { + // Find the nearest node. + int j = nearestIndex(pos); + + return val[j]; +} + + +// Added by Rogan for times when simpler calculations are required. +float RigidBodyGrid::interpolatePotentialLinearly(Vector3 pos) const { + // Find the home node. + Vector3 l = pos; + int homeX = int(floorf(l.x)); + int homeY = int(floorf(l.y)); + int homeZ = int(floorf(l.z)); + + // Get the array jumps. + int jump[3]; + jump[0] = nz*ny; + jump[1] = nz; + jump[2] = 1; + + // Shift the indices in the home array. + int home[3]; + home[0] = homeX; + home[1] = homeY; + home[2] = homeZ; + + // Get the grid dimensions. + int g[3]; + g[0] = nx; + g[1] = ny; + g[2] = nz; + + // Get the interpolation coordinates. + float w[3]; + w[0] = l.x - homeX; + w[1] = l.y - homeY; + w[2] = l.z - homeZ; + + // Find the values at the neighbors. + float g1[2][2][2]; + for (int ix = 0; ix < 2; ix++) { + for (int iy = 0; iy < 2; iy++) { + for (int iz = 0; iz < 2; iz++) { + // Wrap around the periodic boundaries. + int jx = ix + home[0]; + jx = wrap(jx, g[0]); + int jy = iy + home[1]; + jy = wrap(jy, g[1]); + int jz = iz + home[2]; + jz = wrap(jz, g[2]); + + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } + } + } + + // Mix along x. + float g2[2][2]; + for (int iy = 0; iy < 2; iy++) { + for (int iz = 0; iz < 2; iz++) { + // p = w[0] * g[0][iy][iz] + (1-w[0]) * g[1][iy][iz] + g2[iy][iz] = (1.0f-w[0])*g1[0][iy][iz] + w[0]*g1[1][iy][iz]; + } + } + + // Mix along y. + float g3[2]; + for (int iz = 0; iz < 2; iz++) { + g3[iz] = (1.0f-w[1])*g2[0][iz] + w[1]*g2[1][iz]; + } + + // DEBUG + //printf("(0,0,0)=%.1f (0,0,1)=%.1f (0,1,0)=%.1f (0,1,1)=%.1f (1,0,0)=%.1f (1,0,1)=%.1f (1,1,0)=%.1f (1,1,1)=%.1f ", + // g1[0][0][0], g1[0][0][1], g1[0][1][0], g1[0][1][1], g1[1][0][0], g1[1][0][1], g1[1][1][0], g1[1][1][1] ); + //printf ("%.2f\n",(1.0-w[2])*g3[0] + w[2]*g3[1]); + + // Mix along z + return (1.0f-w[2])*g3[0] + w[2]*g3[1]; +} + + + +// Vector3 RigidBodyGrid::wrapDiffNearest(Vector3 r) const { +// Vector3 l = r; +// l.x = wrapDiff(l.x, nx); +// l.y = wrapDiff(l.y, ny); +// l.z = wrapDiff(l.z, nz); + +// float length2 = l.length2(); + +// for (int dx = -1; dx <= 1; dx++) { +// for (int dy = -1; dy <= 1; dy++) { +// for (int dz = -1; dz <= 1; dz++) { +// //if (dx == 0 && dy == 0 && dz == 0) continue; +// Vector3 tmp = Vector3(l.x+dx*nx, l.y+dy*ny, l.z+dz*nz); +// if (tmp.length2() < length2) { +// l = tmp; +// length2 = l.length2(); +// } +// } +// } +// } + +// return l; +// // return basis.transform(l); +// } + + +// Includes the home node. +// indexBuffer must have a size of at least 27. +void RigidBodyGrid::getNeighbors(int j, int* indexBuffer) const { + int jx = indexX(j); + int jy = indexY(j); + int jz = indexZ(j); + + int k = 0; + for (int ix = -1; ix <= 1; ix++) { + for (int iy = -1; iy <= 1; iy++) { + for (int iz = -1; iz <= 1; iz++) { + int ind = wrap(jz+iz,nz) + nz*wrap(jy+iy,ny) + nynz*wrap(jx+ix,nx); + indexBuffer[k] = ind; + k++; + } + } + } +} + +// Includes the home node. +// indexBuffer must have a size of at least 27. +void RigidBodyGrid::getNeighbors(int j, int* indexBuffer) const { + int jx = indexX(j); + int jy = indexY(j); + int jz = indexZ(j); + + int k = 0; + for (int ix = -1; ix <= 1; ix++) { + for (int iy = -1; iy <= 1; iy++) { + for (int iz = -1; iz <= 1; iz++) { + int ind = wrap(jz+iz,nz) + nz*wrap(jy+iy,ny) + nynz*wrap(jx+ix,nx); + indexBuffer[k] = ind; + k++; + } + } + } +} + + +// Get the values at the neighbors of a node. +// Note that homeX, homeY, and homeZ do not need to be wrapped, +// since we do it here. +void RigidBodyGrid::getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const { + for (int ix = -1; ix <= 1; ix++) { + for (int iy = -1; iy <= 1; iy++) { + for (int iz = -1; iz <= 1; iz++) { + int ind = wrap(homeZ+iz,nz) + nz*wrap(homeY+iy,ny) + nynz*wrap(homeX+ix,nx); + neigh->v[ix+1][iy+1][iz+1] = val[ind]; + } + } + } +} diff --git a/RigidBodyGrid.h b/RigidBodyGrid.h new file mode 100644 index 0000000..8453332 --- /dev/null +++ b/RigidBodyGrid.h @@ -0,0 +1,519 @@ +////////////////////////////////////////////////////////////////////// +// Copy of BaseGrid with some modificaitons +// + +#pragma once + +#ifdef __CUDACC__ + #define HOST __host__ + #define DEVICE __device__ +#else + #define HOST + #define DEVICE +#endif + +#include "useful.h" +#include <cmath> +#include <cstring> +#include <cstdio> +#include <cstdlib> +#include <ctime> +#include <cuda.h> + + +using namespace std; + +#define STRLEN 512 + +class NeighborList { +public: + float v[3][3][3]; +}; + +class RigidBodyGrid { +private: + // Initialize the variables that get used a lot. + // Also, allocate the main value array. + void init(); + +public: + + /* \ + | CONSTRUCTORS, DESTRUCTORS, I/O | + \===============================*/ + + // RBTODO Fix? + RigidBodyGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted RigidBodyGrid in a struct + // The most obvious of constructors. + RigidBodyGrid(int nx0, int ny0, int nz0); + + // Make an orthogonal grid given the box dimensions and resolution. + RigidBodyGrid(Vector3 box, float dx); + + // The box gives the system geometry. + // The grid point numbers define the resolution. + RigidBodyGrid(Matrix3 box, int nx0, int ny0, int nz0); + + // The box gives the system geometry. + // dx is the approx. resolution. + // The grid spacing is always a bit larger than dx. + RigidBodyGrid(Matrix3 box, Vector3 origin0, float dx); + + // The box gives the system geometry. + // dx is the approx. resolution. + // The grid spacing is always a bit smaller than dx. + RigidBodyGrid(Matrix3 box, float dx); + + // Make an exact copy of a grid. + RigidBodyGrid(const RigidBodyGrid& g); + + RigidBodyGrid mult(const RigidBodyGrid& g); + + RigidBodyGrid& operator=(const RigidBodyGrid& g); + + // Make a copy of a grid, but at a different resolution. + RigidBodyGrid(const RigidBodyGrid& g, int nx0, int ny0, int nz0); + + virtual ~RigidBodyGrid(); + + /* \ + | DATA METHODS | + \=============*/ + + void zero(); + + bool setValue(int j, float v); + + bool setValue(int ix, int iy, int iz, float v); + + virtual float getValue(int j) const; + + virtual float getValue(int ix, int iy, int iz) const; + + // Vector3 getPosition(int ix, int iy, int iz) const; + Vector3 getPosition(int j) const; + Vector3 getPosition(int j, Matrix3 basis, Vector3 origin) const { + + /* // Does the point r fall in the grid? */ + /* // Obviously this is without periodic boundary conditions. */ + /* bool inGrid(Vector3 r) const; */ + + /* bool inGridInterp(Vector3 r) const; */ + + /* Vector3 transformTo(Vector3 r) const; */ + + /* Vector3 transformFrom(Vector3 l) const; */ + + IndexList index(int j) const; + int indexX(int j) const; + int indexY(int j) const; + int indexZ(int j) const; + int index(int ix, int iy, int iz) const; + + /* int index(Vector3 r) const; */ + /* int nearestIndex(Vector3 r) const; */ + + HOST DEVICE inline int length() const { + return size; + } + + HOST DEVICE inline int getNx() const {return nx;} + HOST DEVICE inline int getNy() const {return ny;} + HOST DEVICE inline int getNz() const {return nz;} + HOST DEVICE inline int getSize() const {return size;} + + + // Add a fixed value to the grid. + void shift(float s); + + // Multiply the grid by a fixed value. + void scale(float s); + + /* \ + | COMPUTED | + \=========*/ + + // Get the mean of the entire grid. + float mean() const + + // Get the potential at the closest node. + virtual float getPotential(Vector3 pos) const; + + // Added by Rogan for times when simpler calculations are required. + virtual float interpolatePotentialLinearly(Vector3 pos) const; + + HOST DEVICE inline float interpolateDiffX(Vector3 pos, float w[3], float g1[4][4][4]) const { + float a0, a1, a2, a3; + + // RBTODO parallelize loops? + + // Mix along x, taking the derivative. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + //g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + g2[iy][iz] = 3.0f*a3*w[0]*w[0] + 2.0f*a2*w[0] + a1; + } + } + + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + float retval = -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); + return retval; + } + + HOST DEVICE inline float interpolateDiffY(Vector3 pos, float w[3], float g1[4][4][4]) const { + float a0, a1, a2, a3; + + // Mix along x, taking the derivative. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + //g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + g3[iz] = 3.0f*a3*w[1]*w[1] + 2.0f*a2*w[1] + a1; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + return -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0); + } + + HOST DEVICE inline float interpolateDiffZ(Vector3 pos, float w[3], float g1[4][4][4]) const { + float a0, a1, a2, a3; + + // Mix along x, taking the derivative. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1); + } + + // RBTODO overload with optimized algorithm + // skip transforms (assume identity basis) + HOST DEVICE inline float interpolatePotential(Vector3 pos) const { + // Find the home node. + Vector3 l = pos; + int homeX = int(floor(l.x)); + int homeY = int(floor(l.y)); + int homeZ = int(floor(l.z)); + + // out of grid? return 0 + // RBTODO + + // Get the array jumps. + int jump[3]; + jump[0] = nz*ny; + jump[1] = nz; + jump[2] = 1; + + // Shift the indices in the home array. + int home[3]; + home[0] = homeX; + home[1] = homeY; + home[2] = homeZ; + + // Get the grid dimensions. + int g[3]; + g[0] = nx; + g[1] = ny; + g[2] = nz; + + // Get the interpolation coordinates. + float w[3]; + w[0] = l.x - homeX; + w[1] = l.y - homeY; + w[2] = l.z - homeZ; + + // Find the values at the neighbors. + float g1[4][4][4]; + for (int ix = 0; ix < 4; ix++) { + int jx = ix-1 + home[0]; + jx = wrap(jx, g[0]); + for (int iy = 0; iy < 4; iy++) { + int jy = iy-1 + home[1]; + jy = wrap(jy, g[1]); + for (int iz = 0; iz < 4; iz++) { + // Wrap around the periodic boundaries. + int jz = iz-1 + home[2]; + jz = wrap(jz, g[2]); + + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } + } + } + float a0, a1, a2, a3; + + // Mix along x. + float g2[4][4]; + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]); + a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]); + a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]); + a0 = g1[1][iy][iz]; + + g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0; + } + } + + // Mix along y. + float g3[4]; + for (int iz = 0; iz < 4; iz++) { + a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]); + a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]); + a1 = 0.5f*(-g2[0][iz] + g2[2][iz]); + a0 = g2[1][iz]; + + g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0; + } + + // Mix along z. + a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]); + a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]); + a1 = 0.5f*(-g3[0] + g3[2]); + a0 = g3[1]; + + return a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0; + } + + HOST DEVICE inline static int wrap(int i, int n) { + if (i < 0) { + i %= n; + i += n; + } + // The portion above allows i == n, so no else keyword. + if (i >= n) { + i %= n; + } + return i; + } + + /** interpolateForce() to be used on CUDA Device **/ + DEVICE inline Vector3 interpolateForceD(Vector3 pos) const { + Vector3 f; + Vector3 l = pos; + int homeX = int(floor(l.x)); + int homeY = int(floor(l.y)); + int homeZ = int(floor(l.z)); + // Get the array jumps with shifted indices. + int jump[3]; + jump[0] = nz*ny; + jump[1] = nz; + jump[2] = 1; + // Shift the indices in the home array. + int home[3]; + home[0] = homeX; + home[1] = homeY; + home[2] = homeZ; + + // Shift the indices in the grid dimensions. + int g[3]; + g[0] = nx; + g[1] = ny; + g[2] = nz; + + // Get the interpolation coordinates. + float w[3]; + w[0] = l.x - homeX; + w[1] = l.y - homeY; + w[2] = l.z - homeZ; + // Find the values at the neighbors. + float g1[4][4][4]; + for (int ix = 0; ix < 4; ix++) { + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + // Wrap around the periodic boundaries. + int jx = ix-1 + home[0]; + jx = wrap(jx, g[0]); + int jy = iy-1 + home[1]; + jy = wrap(jy, g[1]); + int jz = iz-1 + home[2]; + jz = wrap(jz, g[2]); + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } + } + } + f.x = interpolateDiffX(pos, w, g1); + f.y = interpolateDiffY(pos, w, g1); + f.z = interpolateDiffZ(pos, w, g1); + // Vector3 f1 = basisInv.transpose().transform(f); + // return f1; + return f; + } + + inline virtual Vector3 interpolateForce(Vector3 pos) const { + Vector3 f; + Vector3 l = pos; + int homeX = int(floor(l.x)); + int homeY = int(floor(l.y)); + int homeZ = int(floor(l.z)); + // Get the array jumps with shifted indices. + int jump[3]; + jump[0] = nz*ny; + jump[1] = nz; + jump[2] = 1; + // Shift the indices in the home array. + int home[3]; + home[0] = homeX; + home[1] = homeY; + home[2] = homeZ; + + // Shift the indices in the grid dimensions. + int g[3]; + g[0] = nx; + g[1] = ny; + g[2] = nz; + + // Get the interpolation coordinates. + float w[3]; + w[0] = l.x - homeX; + w[1] = l.y - homeY; + w[2] = l.z - homeZ; + // Find the values at the neighbors. + float g1[4][4][4]; + //RBTODO parallelize? + for (int ix = 0; ix < 4; ix++) { + for (int iy = 0; iy < 4; iy++) { + for (int iz = 0; iz < 4; iz++) { + // Wrap around the periodic boundaries. + int jx = ix-1 + home[0]; + jx = wrap(jx, g[0]); + int jy = iy-1 + home[1]; + jy = wrap(jy, g[1]); + int jz = iz-1 + home[2]; + jz = wrap(jz, g[2]); + int ind = jz*jump[2] + jy*jump[1] + jx*jump[0]; + g1[ix][iy][iz] = val[ind]; + } + } + } + f.x = interpolateDiffX(pos, w, g1); + f.y = interpolateDiffY(pos, w, g1); + f.z = interpolateDiffZ(pos, w, g1); + // Vector3 f1 = basisInv.transpose().transform(f); + // return f1; + return f; + } + + // Wrap coordinate: 0 <= x < l + HOST DEVICE inline float wrapFloat(float x, float l) const { + int image = int(floor(x/l)); + x -= image*l; + return x; + } + + // Wrap distance: -0.5*l <= x < 0.5*l + HOST DEVICE static inline float wrapDiff(float x, float l) { + int image = int(floor(x/l)); + x -= image*l; + if (x >= 0.5f * l) + x -= l; + return x; + } + + // Wrap vector, 0 <= x < lx && 0 <= y < ly && 0 <= z < lz + HOST DEVICE inline virtual Vector3 wrap(Vector3 l) const { + l.x = wrapFloat(l.x, nx); + l.y = wrapFloat(l.y, ny); + l.z = wrapFloat(l.z, nz); + return l; + } + + // Wrap vector distance, -0.5*l <= x < 0.5*l && ... + HOST DEVICE inline Vector3 wrapDiff(Vector3 l) const { + l.x = wrapDiff(l.x, nx); + l.y = wrapDiff(l.y, ny); + l.z = wrapDiff(l.z, nz); + return l; + } + + /* Vector3 wrapDiffNearest(Vector3 r) const; */ + + // Includes the home node. + // indexBuffer must have a size of at least 27. + void getNeighbors(int j, int* indexBuffer) const; + + // Get the values at the neighbors of a node. + // Note that homeX, homeY, and homeZ do not need to be wrapped, + // since we do it here. + void getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const; + inline void setVal(float* v) { val = v; } + +public: + int nx, ny, nz; + int nynz; + int size; +public: + float* val; +}; + diff --git a/RigidBodyType.cu b/RigidBodyType.cu index b587174..30322be 100644 --- a/RigidBodyType.cu +++ b/RigidBodyType.cu @@ -11,6 +11,9 @@ void RigidBodyType::clear() { potentialGrids.clear(); densityGrids.clear(); + potentialGridKeys.clear(); + densityGridKeys.clear(); + if (numPotGrids > 0) delete[] rawPotentialGrids; if (numDenGrids > 0) delete[] rawDensityGrids; rawPotentialGrids = NULL; @@ -67,8 +70,7 @@ void RigidBodyType::addPotentialGrid(String s) { BaseGrid g(token[1]); potentialGrids.push_back( g ); - // [numPotGrids] = g; - // numPotGrids++; + potentialGridKeys.push_back( key ); } void RigidBodyType::addDensityGrid(String s) { // tokenize and return @@ -82,7 +84,9 @@ void RigidBodyType::addDensityGrid(String s) { String key = token[0]; BaseGrid g(token[1]); + densityGrids.push_back( g ); + densityGridKeys.push_back( key ); } void RigidBodyType::updateRaw() { diff --git a/RigidBodyType.h b/RigidBodyType.h index d20f1e6..4c334dd 100644 --- a/RigidBodyType.h +++ b/RigidBodyType.h @@ -50,6 +50,9 @@ public: Vector3 transDamping; Vector3 rotDamping; + std::vector<String> potentialGridKeys; + std::vector<String> densityGridKeys; + std::vector<BaseGrid> potentialGrids; std::vector<BaseGrid> densityGrids; diff --git a/notes.org b/notes.org index 9cf5d48..c6ee171 100644 --- a/notes.org +++ b/notes.org @@ -1 +1,24 @@ -* Q: vector classes in thrust look like they could simplify code; bad idea? +* grid +** Q: overhead of dynamic parallelism? +** Q: overhead of classes? + Member data can't be shared + +** TODO read up on: +*** dynamic parallelism +*** registers +* TODO decide whether to write a monolithic kernel or to break problem into subkernel launches + + + +* optimize algorithms for cuda (balance floating point calculation with memory transfers) +** parallize loops +** subthreads +** new algorithms may be improved through new data structures; introduce +*** RigidBodyGrid: methods pulled from BaseGrid, but transforms passed to functions +**** no wrapping + +* pairlists for rigid bodies +** maybe for grids, depending on parallel structure of code + +* other ideas +** interpolate density grids? diff --git a/runBrownTown.cpp b/runBrownTown.cpp index 4cd7714..5de068f 100644 --- a/runBrownTown.cpp +++ b/runBrownTown.cpp @@ -103,9 +103,7 @@ int main(int argc, char* argv[]) { GPUManager::init(); GPUManager::safe(safe); Configuration config(configFile, replicas, debug); - printf("Done with config\n"); GPUManager::set(0); - printf("Done with gpumanager\n"); config.copyToCUDA(); GrandBrownTown brown(config, outArg, randomSeed, diff --git a/useful.h b/useful.h index 78b6efc..72070e8 100644 --- a/useful.h +++ b/useful.h @@ -244,7 +244,7 @@ class Matrix3 { public: HOST DEVICE inline Matrix3() {} Matrix3(float s); - Matrix3(float xx, float xy, float xz, float yx, float yy, float yz, float zx, float zy, float zz); + HOST DEVICE Matrix3(float xx, float xy, float xz, float yx, float yy, float yz, float zx, float zy, float zz); Matrix3(float x, float y, float z); Matrix3(const Vector3& ex, const Vector3& ey, const Vector3& ez); Matrix3(const float* d); -- GitLab