Skip to content
Snippets Groups Projects
Commit 2a5a3261 authored by cmaffeo2's avatar cmaffeo2
Browse files

slightly improved tricubic interpolation; now 145 regs, a little faster

parent 9b1b3888
No related branches found
No related tags found
No related merge requests found
...@@ -49,8 +49,10 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, ...@@ -49,8 +49,10 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u,
// RBTODO What about non-unit delta? // RBTODO What about non-unit delta?
// RBTODO combine interp methods and reduce repetition! // RBTODO combine interp methods and reduce repetition!
// Vector3 f = u->interpolateForceD( u_ijk_float ); /* in coord frame of u */ // Vector3 f = u->interpolateForceD( u_ijk_float ); /* in coord frame of u */
force[tid] = u->interpolateForceD( u_ijk_float ); /* in coord frame of u */ const ForceEnergy fe = u->interpolateForceD( u_ijk_float ); /* in coord frame of u */
const float r_val = rho->val[r_id]; force[tid] = fe.f;
const float r_val = rho->val[r_id]; /* move to beginning of script? */
// force[tid] = r_val*force[tid]; // force[tid] = r_val*force[tid];
force[tid] = basis_u_inv.transpose().transform( r_val*force[tid] ); /* transform to lab frame */ force[tid] = basis_u_inv.transpose().transform( r_val*force[tid] ); /* transform to lab frame */
// force[tid] = f; // force[tid] = f;
......
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#define NUMTHREADS 128 /* try with 64, every 32+ */ // #define NUMTHREADS 128 /* try with 64, every 32+ */
// #define NUMTHREADS 64 #define NUMTHREADS 64
#define NUMSTREAMS 8 #define NUMSTREAMS 8
class Configuration; class Configuration;
......
...@@ -364,7 +364,7 @@ HOST DEVICE float RigidBodyGrid::interpolatePotential(const Vector3& l) const { ...@@ -364,7 +364,7 @@ HOST DEVICE float RigidBodyGrid::interpolatePotential(const Vector3& l) const {
} }
/** interpolateForce() to be used on CUDA Device **/ /** interpolateForce() to be used on CUDA Device **/
DEVICE Vector3 RigidBodyGrid::interpolateForceD(const Vector3 l) const { DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const {
Vector3 f; Vector3 f;
// Vector3 l = basisInv.transform(pos - origin); // Vector3 l = basisInv.transform(pos - origin);
const int homeX = int(floor(l.x)); const int homeX = int(floor(l.x));
...@@ -373,6 +373,10 @@ DEVICE Vector3 RigidBodyGrid::interpolateForceD(const Vector3 l) const { ...@@ -373,6 +373,10 @@ DEVICE Vector3 RigidBodyGrid::interpolateForceD(const Vector3 l) const {
const float wx = l.x - homeX; const float wx = l.x - homeX;
const float wy = l.y - homeY; const float wy = l.y - homeY;
const float wz = l.z - homeZ; const float wz = l.z - homeZ;
const float wx2 = wx*wx;
// RBTODO: test against cpu algorithm; also see if its the same algorithm used by NAMD
// RBTODO: test NAMD alg. for speed
/* f.x */ /* f.x */
float g3[3][4]; float g3[3][4];
...@@ -388,55 +392,54 @@ DEVICE Vector3 RigidBodyGrid::interpolateForceD(const Vector3 l) const { ...@@ -388,55 +392,54 @@ DEVICE Vector3 RigidBodyGrid::interpolateForceD(const Vector3 l) const {
v[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ? v[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ?
0 : val[ind]; 0 : val[ind];
} }
const float a3 = 0.5f*(-v[0] + 3.0f*v[1] - 3.0f*v[2] + v[3]); const float a3 = 0.5f*(-v[0] + 3.0f*v[1] - 3.0f*v[2] + v[3])*wx2;
const float a2 = 0.5f*(2.0f*v[0] - 5.0f*v[1] + 4.0f*v[2] - v[3]); const float a2 = 0.5f*(2.0f*v[0] - 5.0f*v[1] + 4.0f*v[2] - v[3])*wx;
const float a1 = 0.5f*(-v[0] + v[2]); const float a1 = 0.5f*(-v[0] + v[2]);
const float a0 = v[1]; g2[0][iy] = 3.0f*a3 + 2.0f*a2 + a1; /* f.x (derivative) */
g2[1][iy] = a3*wx + a2*wx + a1*wx + v[1]; /* f.y & f.z */
g2[0][iy] = 3.0f*a3*wx*wx + 2.0f*a2*wx + a1; /* f.x (derivative) */
g2[1][iy] = a3*wx*wx*wx + a2*wx*wx + a1*wx + a0; /* f.y & f.z */
} }
// Mix along y. // Mix along y.
{ {
const float a3 = 0.5f*(-g2[0][0] + 3.0f*g2[0][1] - 3.0f*g2[0][2] + g2[0][3]); g3[0][iz] = 0.5f*(-g2[0][0] + 3.0f*g2[0][1] - 3.0f*g2[0][2] + g2[0][3])*wy*wy*wy +
const float a2 = 0.5f*(2.0f*g2[0][0] - 5.0f*g2[0][1] + 4.0f*g2[0][2] - g2[0][3]); 0.5f*(2.0f*g2[0][0] - 5.0f*g2[0][1] + 4.0f*g2[0][2] - g2[0][3]) *wy*wy +
const float a1 = 0.5f*(-g2[0][0] + g2[0][2]); 0.5f*(-g2[0][0] + g2[0][2]) *wy +
const float a0 = g2[0][1]; g2[0][1];
g3[0][iz] = a3*wy*wy*wy + a2*wy*wy + a1*wy + a0; /* f.x */
} }
{ {
const float a3 = 0.5f*(-g2[1][0] + 3.0f*g2[1][1] - 3.0f*g2[1][2] + g2[1][3]); const float a3 = 0.5f*(-g2[1][0] + 3.0f*g2[1][1] - 3.0f*g2[1][2] + g2[1][3])*wy*wy;
const float a2 = 0.5f*(2.0f*g2[1][0] - 5.0f*g2[1][1] + 4.0f*g2[1][2] - g2[1][3]); const float a2 = 0.5f*(2.0f*g2[1][0] - 5.0f*g2[1][1] + 4.0f*g2[1][2] - g2[1][3])*wy;
const float a1 = 0.5f*(-g2[1][0] + g2[1][2]); const float a1 = 0.5f*(-g2[1][0] + g2[1][2]);
const float a0 = g2[1][1]; g3[1][iz] = 3.0f*a3 + 2.0f*a2 + a1; /* f.y */
g3[1][iz] = 3.0f*a3*wy*wy + 2.0f*a2*wy + a1; /* f.y */ g3[2][iz] = a3*wy + a2*wy + a1*wy + g2[1][1]; /* f.z */
g3[2][iz] = a3*wy*wy*wy + a2*wy*wy + a1*wy + a0; /* f.z */
} }
} }
// Mix along z. // Mix along z.
{ {
const float a3 = 0.5f*(-g3[0][0] + 3.0f*g3[0][1] - 3.0f*g3[0][2] + g3[0][3]); f.x = -0.5f*(-g3[0][0] + 3.0f*g3[0][1] - 3.0f*g3[0][2] + g3[0][3])*wz*wz*wz +
const float a2 = 0.5f*(2.0f*g3[0][0] - 5.0f*g3[0][1] + 4.0f*g3[0][2] - g3[0][3]); -0.5f*(2.0f*g3[0][0] - 5.0f*g3[0][1] + 4.0f*g3[0][2] - g3[0][3])*wz*wz +
const float a1 = 0.5f*(-g3[0][0] + g3[0][2]); -0.5f*(-g3[0][0] + g3[0][2]) *wz -
const float a0 = g3[0][1]; g3[0][1];
f.x = -(a3*wz*wz*wz + a2*wz*wz + a1*wz + a0);
} }
{ {
const float a3 = 0.5f*(-g3[1][0] + 3.0f*g3[1][1] - 3.0f*g3[1][2] + g3[1][3]); f.y = -0.5f*(-g3[1][0] + 3.0f*g3[1][1] - 3.0f*g3[1][2] + g3[1][3])*wz*wz*wz +
const float a2 = 0.5f*(2.0f*g3[1][0] - 5.0f*g3[1][1] + 4.0f*g3[1][2] - g3[1][3]); -0.5f*(2.0f*g3[1][0] - 5.0f*g3[1][1] + 4.0f*g3[1][2] - g3[1][3])*wz*wz +
const float a1 = 0.5f*(-g3[1][0] + g3[1][2]); -0.5f*(-g3[1][0] + g3[1][2]) *wz -
const float a0 = g3[1][1]; g3[1][1];
f.y = -(a3*wz*wz*wz + a2*wz*wz + a1*wz + a0);
} }
{ {
const float a3 = 0.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3]); f.z = -1.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz -
const float a2 = 0.5f*(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3]); (2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3]) *wz -
const float a1 = 0.5f*(-g3[2][0] + g3[2][2]); 0.5f*(-g3[2][0] + g3[2][2]);
f.z = -(3.0f*a3*wz*wz + 2.0f*a2*wz + a1);
} }
return f; float e = 0.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz*wz +
0.5f*(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3]) *wz*wz +
0.5f*(-g3[2][0] + g3[2][2]) *wz +
g3[2][1];
return ForceEnergy(f,e);
} }
...@@ -472,7 +475,7 @@ DEVICE float RigidBodyGrid::interpolatePotentialLinearly(const Vector3& l) const ...@@ -472,7 +475,7 @@ DEVICE float RigidBodyGrid::interpolatePotentialLinearly(const Vector3& l) const
const float wz = l.z - homeZ; const float wz = l.z - homeZ;
return wz * (g3[1] - g3[0]) + g3[0]; return wz * (g3[1] - g3[0]) + g3[0];
} }
DEVICE Vector3 RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const { DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const {
// Find the home node. // Find the home node.
const int homeX = int(floor(l.x)); const int homeX = int(floor(l.x));
const int homeY = int(floor(l.y)); const int homeY = int(floor(l.y));
...@@ -514,6 +517,8 @@ DEVICE Vector3 RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const ...@@ -514,6 +517,8 @@ DEVICE Vector3 RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const
f.x = -(wz * (g3[0][1] - g3[0][0]) + g3[0][0]); f.x = -(wz * (g3[0][1] - g3[0][0]) + g3[0][0]);
f.y = -(wz * (g3[1][1] - g3[1][0]) + g3[1][0]); f.y = -(wz * (g3[1][1] - g3[1][0]) + g3[1][0]);
f.z = - (g3[2][1] - g3[2][0]); f.z = - (g3[2][1] - g3[2][0]);
float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0];
return ForceEnergy(f,e);
} }
......
...@@ -28,6 +28,14 @@ using namespace std; ...@@ -28,6 +28,14 @@ using namespace std;
// RBTODO: integrate with existing grid code? // RBTODO: integrate with existing grid code?
class ForceEnergy {
public:
DEVICE ForceEnergy(Vector3 f, float e) :
f(f), e(e) {};
Vector3 f;
float e;
};
class RigidBodyGrid { class RigidBodyGrid {
friend class SparseGrid; friend class SparseGrid;
private: private:
...@@ -140,7 +148,7 @@ public: ...@@ -140,7 +148,7 @@ public:
/* virtual float getPotential(Vector3 pos) const; */ /* virtual float getPotential(Vector3 pos) const; */
DEVICE float interpolatePotentialLinearly(const Vector3& l) const; DEVICE float interpolatePotentialLinearly(const Vector3& l) const;
DEVICE Vector3 interpolateForceDLinearly(const Vector3& l) const; DEVICE ForceEnergy interpolateForceDLinearly(const Vector3& l) const;
HOST DEVICE float interpolatePotential(const Vector3& l) const; HOST DEVICE float interpolatePotential(const Vector3& l) const;
...@@ -157,7 +165,7 @@ public: ...@@ -157,7 +165,7 @@ public:
} }
/** interpolateForce() to be used on CUDA Device **/ /** interpolateForce() to be used on CUDA Device **/
DEVICE Vector3 interpolateForceD(Vector3 l) const; DEVICE ForceEnergy interpolateForceD(Vector3 l) const;
// Wrap coordinate: 0 <= x < l // Wrap coordinate: 0 <= x < l
HOST DEVICE inline float wrapFloat(float x, float l) const { HOST DEVICE inline float wrapFloat(float x, float l) const {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment