From cf6ce09f56405c95da2ecb8a06492d3e4448f0b5 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Thu, 14 Dec 2017 08:40:48 -0600 Subject: [PATCH] HYC: Added namd-style interpolation schemes for grids; added null check to RigidBodyGrid before deletion of rigidbody memory; changed rigid body wrap from modulo to while loop --- src/BaseGrid.cu | 4 +- src/BaseGrid.h | 333 +++++++++++++++++++++++++++++++++++++++++- src/RigidBodyGrid.cu | 338 ++++++++++++++++++++++++++++++++++++++++++- src/RigidBodyGrid.h | 9 +- 4 files changed, 673 insertions(+), 11 deletions(-) diff --git a/src/BaseGrid.cu b/src/BaseGrid.cu index b5cb8d6..e70cbf1 100644 --- a/src/BaseGrid.cu +++ b/src/BaseGrid.cu @@ -382,7 +382,7 @@ float BaseGrid::getValue(int j) const { if (j < 0 || j >= size) return 0.0f; return val[j]; } - +/* float BaseGrid::getValue(int ix, int iy, int iz) const { if (ix < 0 || ix >= nx) return 0.0f; if (iy < 0 || iy >= ny) return 0.0f; @@ -391,7 +391,7 @@ float BaseGrid::getValue(int ix, int iy, int iz) const { int j = iz + iy*nz + ix*ny*nz; return val[j]; } - +*/ Vector3 BaseGrid::getPosition(int ix, int iy, int iz) const { return basis.transform(Vector3(ix, iy, iz)) + origin; } diff --git a/src/BaseGrid.h b/src/BaseGrid.h index eee5cf7..01419a0 100644 --- a/src/BaseGrid.h +++ b/src/BaseGrid.h @@ -117,7 +117,7 @@ public: virtual float getValue(int j) const; - virtual float getValue(int ix, int iy, int iz) const; + //virtual float getValue(int ix, int iy, int iz) const; Vector3 getPosition(int ix, int iy, int iz) const; @@ -383,8 +383,8 @@ public: } HOST DEVICE inline static int wrap(int i, int n) { - if (i < 0) { - i %= n; + while (i < 0) { + //i %= n; i += n; } // The portion above allows i == n, so no else keyword. @@ -556,7 +556,333 @@ public: float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0]; return ForceEnergy(f,e); } + DEVICE inline ForceEnergy interpolateForceDnamd(const Vector3& pos) const { + Vector3 f; + const Vector3 l = basisInv.transform(pos - origin); + + const int homeX = int(floor(l.x)); + const int homeY = int(floor(l.y)); + const int homeZ = int(floor(l.z)); + const float wx = l.x - homeX; + const float wy = l.y - homeY; + const float wz = l.z - homeZ; + + Vector3 dg = Vector3(wx,wy,wz); + + int inds[3]; + inds[0] = homeX; + inds[1] = homeY; + inds[2] = homeZ; + + // TODO: handle edges + + // Compute b + float b[64]; // Matrix of values at 8 box corners + compute_b(b, inds); + + // Compute a + float a[64]; + compute_a(a, b); + + // Calculate powers of x, y, z for later use + // e.g. x[2] = x^2 + float x[4], y[4], z[4]; + x[0] = 1; y[0] = 1; z[0] = 1; + for (int j = 1; j < 4; j++) { + x[j] = x[j-1] * dg.x; + y[j] = y[j-1] * dg.y; + z[j] = z[j-1] * dg.z; + } + float e = compute_V(a, x, y, z); + f = compute_dV(a, x, y, z); + + f = basisInv.transpose().transform(f); + return ForceEnergy(f,e); + } + DEVICE inline float compute_V(float *a, float *x, float *y, float *z) const + { + float V = 0.0; + long int ind = 0; + for (int l = 0; l < 4; l++) { + for (int k = 0; k < 4; k++) { + for (int j = 0; j < 4; j++) { + V += a[ind] * x[j] * y[k] * z[l]; + ind++; + } + } + } + return V; + } + DEVICE inline Vector3 compute_dV(float *a, float *x, float *y, float *z) const + { + Vector3 dV = Vector3(0.0f); + long int ind = 0; + for (int l = 0; l < 4; l++) { + for (int k = 0; k < 4; k++) { + for (int j = 0; j < 4; j++) { + if (j > 0) dV.x += a[ind] * j * x[j-1] * y[k] * z[l]; // dV/dx + if (k > 0) dV.y += a[ind] * k * x[j] * y[k-1] * z[l]; // dV/dy + if (l > 0) dV.z += a[ind] * l * x[j] * y[k] * z[l-1]; // dV/dz + ind++; + } + } + } + return dV*(-1.f); + } + DEVICE inline void compute_a(float *a, float *b) const + { + // Static sparse 64x64 matrix times vector ... nicer looking way than this? + a[0] = b[0]; + a[1] = b[8]; + a[2] = -3*b[0] + 3*b[1] - 2*b[8] - b[9]; + a[3] = 2*b[0] - 2*b[1] + b[8] + b[9]; + a[4] = b[16]; + a[5] = b[32]; + a[6] = -3*b[16] + 3*b[17] - 2*b[32] - b[33]; + a[7] = 2*b[16] - 2*b[17] + b[32] + b[33]; + a[8] = -3*b[0] + 3*b[2] - 2*b[16] - b[18]; + a[9] = -3*b[8] + 3*b[10] - 2*b[32] - b[34]; + a[10] = 9*b[0] - 9*b[1] - 9*b[2] + 9*b[3] + 6*b[8] + 3*b[9] - 6*b[10] - 3*b[11] + + 6*b[16] - 6*b[17] + 3*b[18] - 3*b[19] + 4*b[32] + 2*b[33] + 2*b[34] + b[35]; + a[11] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 3*b[8] - 3*b[9] + 3*b[10] + 3*b[11] + - 4*b[16] + 4*b[17] - 2*b[18] + 2*b[19] - 2*b[32] - 2*b[33] - b[34] - b[35]; + a[12] = 2*b[0] - 2*b[2] + b[16] + b[18]; + a[13] = 2*b[8] - 2*b[10] + b[32] + b[34]; + a[14] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 4*b[8] - 2*b[9] + 4*b[10] + 2*b[11] + - 3*b[16] + 3*b[17] - 3*b[18] + 3*b[19] - 2*b[32] - b[33] - 2*b[34] - b[35]; + a[15] = 4*b[0] - 4*b[1] - 4*b[2] + 4*b[3] + 2*b[8] + 2*b[9] - 2*b[10] - 2*b[11] + + 2*b[16] - 2*b[17] + 2*b[18] - 2*b[19] + b[32] + b[33] + b[34] + b[35]; + a[16] = b[24]; + a[17] = b[40]; + a[18] = -3*b[24] + 3*b[25] - 2*b[40] - b[41]; + a[19] = 2*b[24] - 2*b[25] + b[40] + b[41]; + a[20] = b[48]; + a[21] = b[56]; + a[22] = -3*b[48] + 3*b[49] - 2*b[56] - b[57]; + a[23] = 2*b[48] - 2*b[49] + b[56] + b[57]; + a[24] = -3*b[24] + 3*b[26] - 2*b[48] - b[50]; + a[25] = -3*b[40] + 3*b[42] - 2*b[56] - b[58]; + a[26] = 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43] + + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 4*b[56] + 2*b[57] + 2*b[58] + b[59]; + a[27] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43] + - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 2*b[56] - 2*b[57] - b[58] - b[59]; + a[28] = 2*b[24] - 2*b[26] + b[48] + b[50]; + a[29] = 2*b[40] - 2*b[42] + b[56] + b[58]; + a[30] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43] + - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 2*b[56] - b[57] - 2*b[58] - b[59]; + a[31] = 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43] + + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + b[56] + b[57] + b[58] + b[59]; + a[32] = -3*b[0] + 3*b[4] - 2*b[24] - b[28]; + a[33] = -3*b[8] + 3*b[12] - 2*b[40] - b[44]; + a[34] = 9*b[0] - 9*b[1] - 9*b[4] + 9*b[5] + 6*b[8] + 3*b[9] - 6*b[12] - 3*b[13] + + 6*b[24] - 6*b[25] + 3*b[28] - 3*b[29] + 4*b[40] + 2*b[41] + 2*b[44] + b[45]; + a[35] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 3*b[8] - 3*b[9] + 3*b[12] + 3*b[13] + - 4*b[24] + 4*b[25] - 2*b[28] + 2*b[29] - 2*b[40] - 2*b[41] - b[44] - b[45]; + a[36] = -3*b[16] + 3*b[20] - 2*b[48] - b[52]; + a[37] = -3*b[32] + 3*b[36] - 2*b[56] - b[60]; + a[38] = 9*b[16] - 9*b[17] - 9*b[20] + 9*b[21] + 6*b[32] + 3*b[33] - 6*b[36] - 3*b[37] + + 6*b[48] - 6*b[49] + 3*b[52] - 3*b[53] + 4*b[56] + 2*b[57] + 2*b[60] + b[61]; + a[39] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 3*b[32] - 3*b[33] + 3*b[36] + 3*b[37] + - 4*b[48] + 4*b[49] - 2*b[52] + 2*b[53] - 2*b[56] - 2*b[57] - b[60] - b[61]; + a[40] = 9*b[0] - 9*b[2] - 9*b[4] + 9*b[6] + 6*b[16] + 3*b[18] - 6*b[20] - 3*b[22] + + 6*b[24] - 6*b[26] + 3*b[28] - 3*b[30] + 4*b[48] + 2*b[50] + 2*b[52] + b[54]; + a[41] = 9*b[8] - 9*b[10] - 9*b[12] + 9*b[14] + 6*b[32] + 3*b[34] - 6*b[36] - 3*b[38] + + 6*b[40] - 6*b[42] + 3*b[44] - 3*b[46] + 4*b[56] + 2*b[58] + 2*b[60] + b[62]; + a[42] = -27*b[0] + 27*b[1] + 27*b[2] - 27*b[3] + 27*b[4] - 27*b[5] - 27*b[6] + 27*b[7] + - 18*b[8] - 9*b[9] + 18*b[10] + 9*b[11] + 18*b[12] + 9*b[13] - 18*b[14] - 9*b[15] + - 18*b[16] + 18*b[17] - 9*b[18] + 9*b[19] + 18*b[20] - 18*b[21] + 9*b[22] - 9*b[23] + - 18*b[24] + 18*b[25] + 18*b[26] - 18*b[27] - 9*b[28] + 9*b[29] + 9*b[30] - 9*b[31] + - 12*b[32] - 6*b[33] - 6*b[34] - 3*b[35] + 12*b[36] + 6*b[37] + 6*b[38] + 3*b[39] + - 12*b[40] - 6*b[41] + 12*b[42] + 6*b[43] - 6*b[44] - 3*b[45] + 6*b[46] + 3*b[47] + - 12*b[48] + 12*b[49] - 6*b[50] + 6*b[51] - 6*b[52] + 6*b[53] - 3*b[54] + 3*b[55] + - 8*b[56] - 4*b[57] - 4*b[58] - 2*b[59] - 4*b[60] - 2*b[61] - 2*b[62] - b[63]; + a[43] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7] + + 9*b[8] + 9*b[9] - 9*b[10] - 9*b[11] - 9*b[12] - 9*b[13] + 9*b[14] + 9*b[15] + + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23] + + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31] + + 6*b[32] + 6*b[33] + 3*b[34] + 3*b[35] - 6*b[36] - 6*b[37] - 3*b[38] - 3*b[39] + + 6*b[40] + 6*b[41] - 6*b[42] - 6*b[43] + 3*b[44] + 3*b[45] - 3*b[46] - 3*b[47] + + 8*b[48] - 8*b[49] + 4*b[50] - 4*b[51] + 4*b[52] - 4*b[53] + 2*b[54] - 2*b[55] + + 4*b[56] + 4*b[57] + 2*b[58] + 2*b[59] + 2*b[60] + 2*b[61] + b[62] + b[63]; + a[44] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 3*b[16] - 3*b[18] + 3*b[20] + 3*b[22] + - 4*b[24] + 4*b[26] - 2*b[28] + 2*b[30] - 2*b[48] - 2*b[50] - b[52] - b[54]; + a[45] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 3*b[32] - 3*b[34] + 3*b[36] + 3*b[38] + - 4*b[40] + 4*b[42] - 2*b[44] + 2*b[46] - 2*b[56] - 2*b[58] - b[60] - b[62]; + a[46] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7] + + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15] + + 9*b[16] - 9*b[17] + 9*b[18] - 9*b[19] - 9*b[20] + 9*b[21] - 9*b[22] + 9*b[23] + + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31] + + 6*b[32] + 3*b[33] + 6*b[34] + 3*b[35] - 6*b[36] - 3*b[37] - 6*b[38] - 3*b[39] + + 8*b[40] + 4*b[41] - 8*b[42] - 4*b[43] + 4*b[44] + 2*b[45] - 4*b[46] - 2*b[47] + + 6*b[48] - 6*b[49] + 6*b[50] - 6*b[51] + 3*b[52] - 3*b[53] + 3*b[54] - 3*b[55] + + 4*b[56] + 2*b[57] + 4*b[58] + 2*b[59] + 2*b[60] + b[61] + 2*b[62] + b[63]; + a[47] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7] + - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15] + - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23] + - 8*b[24] + 8*b[25] + 8*b[26] - 8*b[27] - 4*b[28] + 4*b[29] + 4*b[30] - 4*b[31] + - 3*b[32] - 3*b[33] - 3*b[34] - 3*b[35] + 3*b[36] + 3*b[37] + 3*b[38] + 3*b[39] + - 4*b[40] - 4*b[41] + 4*b[42] + 4*b[43] - 2*b[44] - 2*b[45] + 2*b[46] + 2*b[47] + - 4*b[48] + 4*b[49] - 4*b[50] + 4*b[51] - 2*b[52] + 2*b[53] - 2*b[54] + 2*b[55] + - 2*b[56] - 2*b[57] - 2*b[58] - 2*b[59] - b[60] - b[61] - b[62] - b[63]; + a[48] = 2*b[0] - 2*b[4] + b[24] + b[28]; + a[49] = 2*b[8] - 2*b[12] + b[40] + b[44]; + a[50] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 4*b[8] - 2*b[9] + 4*b[12] + 2*b[13] + - 3*b[24] + 3*b[25] - 3*b[28] + 3*b[29] - 2*b[40] - b[41] - 2*b[44] - b[45]; + a[51] = 4*b[0] - 4*b[1] - 4*b[4] + 4*b[5] + 2*b[8] + 2*b[9] - 2*b[12] - 2*b[13] + + 2*b[24] - 2*b[25] + 2*b[28] - 2*b[29] + b[40] + b[41] + b[44] + b[45]; + a[52] = 2*b[16] - 2*b[20] + b[48] + b[52]; + a[53] = 2*b[32] - 2*b[36] + b[56] + b[60]; + a[54] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 4*b[32] - 2*b[33] + 4*b[36] + 2*b[37] + - 3*b[48] + 3*b[49] - 3*b[52] + 3*b[53] - 2*b[56] - b[57] - 2*b[60] - b[61]; + a[55] = 4*b[16] - 4*b[17] - 4*b[20] + 4*b[21] + 2*b[32] + 2*b[33] - 2*b[36] - 2*b[37] + + 2*b[48] - 2*b[49] + 2*b[52] - 2*b[53] + b[56] + b[57] + b[60] + b[61]; + a[56] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 4*b[16] - 2*b[18] + 4*b[20] + 2*b[22] + - 3*b[24] + 3*b[26] - 3*b[28] + 3*b[30] - 2*b[48] - b[50] - 2*b[52] - b[54]; + a[57] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 4*b[32] - 2*b[34] + 4*b[36] + 2*b[38] + - 3*b[40] + 3*b[42] - 3*b[44] + 3*b[46] - 2*b[56] - b[58] - 2*b[60] - b[62]; + a[58] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7] + + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15] + + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23] + + 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 9*b[28] - 9*b[29] - 9*b[30] + 9*b[31] + + 8*b[32] + 4*b[33] + 4*b[34] + 2*b[35] - 8*b[36] - 4*b[37] - 4*b[38] - 2*b[39] + + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43] + 6*b[44] + 3*b[45] - 6*b[46] - 3*b[47] + + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 6*b[52] - 6*b[53] + 3*b[54] - 3*b[55] + + 4*b[56] + 2*b[57] + 2*b[58] + b[59] + 4*b[60] + 2*b[61] + 2*b[62] + b[63]; + a[59] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7] + - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15] + - 8*b[16] + 8*b[17] - 4*b[18] + 4*b[19] + 8*b[20] - 8*b[21] + 4*b[22] - 4*b[23] + - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31] + - 4*b[32] - 4*b[33] - 2*b[34] - 2*b[35] + 4*b[36] + 4*b[37] + 2*b[38] + 2*b[39] + - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43] - 3*b[44] - 3*b[45] + 3*b[46] + 3*b[47] + - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 4*b[52] + 4*b[53] - 2*b[54] + 2*b[55] + - 2*b[56] - 2*b[57] - b[58] - b[59] - 2*b[60] - 2*b[61] - b[62] - b[63]; + a[60] = 4*b[0] - 4*b[2] - 4*b[4] + 4*b[6] + 2*b[16] + 2*b[18] - 2*b[20] - 2*b[22] + + 2*b[24] - 2*b[26] + 2*b[28] - 2*b[30] + b[48] + b[50] + b[52] + b[54]; + a[61] = 4*b[8] - 4*b[10] - 4*b[12] + 4*b[14] + 2*b[32] + 2*b[34] - 2*b[36] - 2*b[38] + + 2*b[40] - 2*b[42] + 2*b[44] - 2*b[46] + b[56] + b[58] + b[60] + b[62]; + a[62] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7] + - 8*b[8] - 4*b[9] + 8*b[10] + 4*b[11] + 8*b[12] + 4*b[13] - 8*b[14] - 4*b[15] + - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23] + - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31] + - 4*b[32] - 2*b[33] - 4*b[34] - 2*b[35] + 4*b[36] + 2*b[37] + 4*b[38] + 2*b[39] + - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43] - 4*b[44] - 2*b[45] + 4*b[46] + 2*b[47] + - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 3*b[52] + 3*b[53] - 3*b[54] + 3*b[55] + - 2*b[56] - b[57] - 2*b[58] - b[59] - 2*b[60] - b[61] - 2*b[62] - b[63]; + a[63] = 8*b[0] - 8*b[1] - 8*b[2] + 8*b[3] - 8*b[4] + 8*b[5] + 8*b[6] - 8*b[7] + + 4*b[8] + 4*b[9] - 4*b[10] - 4*b[11] - 4*b[12] - 4*b[13] + 4*b[14] + 4*b[15] + + 4*b[16] - 4*b[17] + 4*b[18] - 4*b[19] - 4*b[20] + 4*b[21] - 4*b[22] + 4*b[23] + + 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 4*b[28] - 4*b[29] - 4*b[30] + 4*b[31] + + 2*b[32] + 2*b[33] + 2*b[34] + 2*b[35] - 2*b[36] - 2*b[37] - 2*b[38] - 2*b[39] + + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43] + 2*b[44] + 2*b[45] - 2*b[46] - 2*b[47] + + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + 2*b[52] - 2*b[53] + 2*b[54] - 2*b[55] + + b[56] + b[57] + b[58] + b[59] + b[60] + b[61] + b[62] + b[63]; + } + DEVICE void compute_b(float * __restrict__ b, int * __restrict__ inds) const + { + int k[3]; + k[0] = nx; + k[1] = ny; + k[2] = nz; + + int inds2[3] = {0,0,0}; + + for (int i0 = 0; i0 < 8; i0++) { + inds2[0] = 0; + inds2[1] = 0; + inds2[2] = 0; + + /* printf("%d\n", inds2[0]); */ + /* printf("%d\n", inds2[1]); */ + /* printf("%d\n", inds2[2]); */ + + bool zero_derivs = false; + + int bit = 1; // bit = 2^i1 in the below loop + for (int i1 = 0; i1 < 3; i1++) { + inds2[i1] = (inds[i1] + ((i0 & bit) ? 1 : 0)) % k[i1]; + bit <<= 1; // i.e. multiply by 2 + } + int d_lo[3] = {1, 1, 1}; + float voffs[3] = {0.0f, 0.0f, 0.0f}; + float dscales[3] = {0.5, 0.5, 0.5}; + + for (int i1 = 0; i1 < 3; i1++) { + if (inds2[i1] == 0) { + zero_derivs = true; + } + else if (inds2[i1] == k[i1]-1) { + zero_derivs = true; + } + else { + // printf("%d\n",i1); + voffs[i1] = 0.0; + } + } + + // V + b[i0] = getValue(inds2[0],inds2[1],inds2[2]); + + if (zero_derivs) { + b[8+i0] = 0.0; + b[16+i0] = 0.0; + b[24+i0] = 0.0; + b[32+i0] = 0.0; + b[40+i0] = 0.0; + b[48+i0] = 0.0; + b[56+i0] = 0.0; + } else { + b[8+i0] = dscales[0] * (getValue(inds2[0]+1,inds2[1],inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]) + voffs[0]); + b[16+i0] = dscales[1] * (getValue(inds2[0],inds2[1]+1,inds2[2]) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]) + voffs[1]); + b[24+i0] = dscales[2] * (getValue(inds2[0],inds2[1],inds2[2]+1) - getValue(inds2[0],inds2[1],inds2[2]-d_lo[2]) + voffs[2]); + b[32+i0] = dscales[0] * dscales[1] + * (getValue(inds2[0]+1,inds2[1]+1,inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]) - + getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]) + getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2])); + b[40+i0] = dscales[0] * dscales[2] + * (getValue(inds2[0]+1,inds2[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]+1) - + getValue(inds2[0]+1,inds2[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]-d_lo[2])); + b[48+i0] = dscales[1] * dscales[2] + * (getValue(inds2[0],inds2[1]+1,inds2[2]+1) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]+1) - + getValue(inds2[0],inds2[1]+1,inds2[2]-d_lo[2]) + getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2])); + b[56+i0] = dscales[0] * dscales[1] * dscales[2] + * (getValue(inds2[0]+1,inds2[1]+1,inds2[2]+1) - getValue(inds2[0]+1,inds2[1]+1,inds2[2]-d_lo[2]) - + getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]+1) + + getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]-d_lo[2]) + + getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2])); + } + } + } + HOST DEVICE inline float getValue(int ix, int iy, int iz) const { + + if (ix < 0 || ix >= nx) return 0.0f; + if (iy < 0 || iy >= ny) return 0.0f; + if (iz < 0 || iz >= nz) return 0.0f; + + int j = iz + iy*nz + ix*ny*nz; + return val[j]; + /* + if(ix < 0) ix = 0; + else if(ix >= nx) ix = nx -1; + + if(iy < 0) iy = 0; + else if(iy >= ny) iy = ny-1; + + if(iz < 0) iz = 0; + else if(iz >= nz) iz = nz-1; + + int j = iz + nz * (iy + ny * ix); + return val[j];*/ + } + + //#define cubic DEVICE inline ForceEnergy interpolateForceDLinearlyPeriodic(const Vector3& pos) const { + #ifdef cubic + return interpolateForceD(pos); + #elif defined(cubic_namd) + return interpolateForceDnamd(pos); + #else + return interpolateForceDLinearly(pos); + #endif + #if 0 const Vector3 l = basisInv.transform(pos - origin); // Find the home node. @@ -609,6 +935,7 @@ public: f = basisInv.transpose().transform(f); float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0]; return ForceEnergy(f,e); + #endif } inline virtual Vector3 interpolateForce(Vector3 pos) const { diff --git a/src/RigidBodyGrid.cu b/src/RigidBodyGrid.cu index 5f6caf3..d9b32da 100644 --- a/src/RigidBodyGrid.cu +++ b/src/RigidBodyGrid.cu @@ -52,7 +52,8 @@ RigidBodyGrid RigidBodyGrid::mult(const RigidBodyGrid& g) { } RigidBodyGrid& RigidBodyGrid::operator=(const RigidBodyGrid& g) { - delete[] val; + if(val!=NULL) + delete[] val; val = NULL; nx = g.nx; ny = g.ny; @@ -66,7 +67,10 @@ RigidBodyGrid& RigidBodyGrid::operator=(const RigidBodyGrid& g) { RigidBodyGrid::~RigidBodyGrid() { if (val != NULL) + { delete[] val; + val = NULL; + } } void RigidBodyGrid::zero() { @@ -90,17 +94,37 @@ bool RigidBodyGrid::setValue(int ix, int iy, int iz, float v) { } float RigidBodyGrid::getValue(int j) const { + if (j < 0 || j >= nx*ny*nz) return 0.0f; return val[j]; +/* + Vector3 idx = getPosition(j) + return getValue(idx.x,idx.y,idx.z); +*/ } -float RigidBodyGrid::getValue(int ix, int iy, int iz) const { +HOST DEVICE float RigidBodyGrid::getValue(int ix, int iy, int iz) const { +/* + if(ix < 0) ix = 0; + else if(ix >= nx) ix = nx -1; + + if(iy < 0) iy = 0; + else if(iy >= ny) iy = ny-1; + + if(iz < 0) iz = 0; + else if(iz >= nz) iz = nz-1; + + int j = iz + nz * (iy + ny * ix); + return val[j]; +*/ + if (ix < 0 || ix >= nx) return 0.0f; if (iy < 0 || iy >= ny) return 0.0f; if (iz < 0 || iz >= nz) return 0.0f; int j = iz + iy*nz + ix*ny*nz; return val[j]; + } Vector3 RigidBodyGrid::getPosition(const int j) const { @@ -219,8 +243,13 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const { return ForceEnergy(f,e); } - +#define cubic DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const { +#ifdef cubic +return interpolateForceD(l); +#elif defined(cubic_namd) +return interpolateForceDnamd(l); +#else // Find the home node. const int homeX = int(floor(l.x)); const int homeY = int(floor(l.y)); @@ -264,5 +293,308 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) co f.z = - (g3[2][1] - g3[2][0]); float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0]; return ForceEnergy(f,e); +#endif } +DEVICE ForceEnergy RigidBodyGrid::interpolateForceDnamd(const Vector3& l) const +{ + Vector3 f; + //const Vector3 l = basisInv.transform(pos - origin); + + const int homeX = int(floor(l.x)); + const int homeY = int(floor(l.y)); + const int homeZ = int(floor(l.z)); + const float wx = l.x - homeX; + const float wy = l.y - homeY; + const float wz = l.z - homeZ; + + Vector3 dg = Vector3(wx,wy,wz); + + int inds[3]; + inds[0] = homeX; + inds[1] = homeY; + inds[2] = homeZ; + + // TODO: handle edges + + // Compute b + float b[64]; // Matrix of values at 8 box corners + compute_b(b, inds); + + // Compute a + float a[64]; + compute_a(a, b); + + // Calculate powers of x, y, z for later use + // e.g. x[2] = x^2 + float x[4], y[4], z[4]; + x[0] = 1; y[0] = 1; z[0] = 1; + for (int j = 1; j < 4; j++) { + x[j] = x[j-1] * dg.x; + y[j] = y[j-1] * dg.y; + z[j] = z[j-1] * dg.z; + } + + float e = compute_V(a, x, y, z); + f = compute_dV(a, x, y, z); + + //f = basisInv.transpose().transform(f); + return ForceEnergy(f,e); + } + +DEVICE float RigidBodyGrid::compute_V(float *a, float *x, float *y, float *z) const + { + float V = 0.0; + long int ind = 0; + for (int l = 0; l < 4; l++) { + for (int k = 0; k < 4; k++) { + for (int j = 0; j < 4; j++) { + V += a[ind] * x[j] * y[k] * z[l]; + ind++; + } + } + } + return V; + } +DEVICE Vector3 RigidBodyGrid::compute_dV(float *a, float *x, float *y, float *z) const + { + Vector3 dV = Vector3(0.0f); + long int ind = 0; + for (int l = 0; l < 4; l++) { + for (int k = 0; k < 4; k++) { + for (int j = 0; j < 4; j++) { + if (j > 0) dV.x += a[ind] * j * x[j-1] * y[k] * z[l]; // dV/dx + if (k > 0) dV.y += a[ind] * k * x[j] * y[k-1] * z[l]; // dV/dy + if (l > 0) dV.z += a[ind] * l * x[j] * y[k] * z[l-1]; // dV/dz + ind++; + } + } + } + return dV*(-1.f); + } +DEVICE void RigidBodyGrid::compute_a(float *a, float *b) const + { + // Static sparse 64x64 matrix times vector ... nicer looking way than this? + a[0] = b[0]; + a[1] = b[8]; + a[2] = -3*b[0] + 3*b[1] - 2*b[8] - b[9]; + a[3] = 2*b[0] - 2*b[1] + b[8] + b[9]; + a[4] = b[16]; + a[5] = b[32]; + a[6] = -3*b[16] + 3*b[17] - 2*b[32] - b[33]; + a[7] = 2*b[16] - 2*b[17] + b[32] + b[33]; + a[8] = -3*b[0] + 3*b[2] - 2*b[16] - b[18]; + a[9] = -3*b[8] + 3*b[10] - 2*b[32] - b[34]; + a[10] = 9*b[0] - 9*b[1] - 9*b[2] + 9*b[3] + 6*b[8] + 3*b[9] - 6*b[10] - 3*b[11] + + 6*b[16] - 6*b[17] + 3*b[18] - 3*b[19] + 4*b[32] + 2*b[33] + 2*b[34] + b[35]; + a[11] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 3*b[8] - 3*b[9] + 3*b[10] + 3*b[11] + - 4*b[16] + 4*b[17] - 2*b[18] + 2*b[19] - 2*b[32] - 2*b[33] - b[34] - b[35]; + a[12] = 2*b[0] - 2*b[2] + b[16] + b[18]; + a[13] = 2*b[8] - 2*b[10] + b[32] + b[34]; + a[14] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 4*b[8] - 2*b[9] + 4*b[10] + 2*b[11] + - 3*b[16] + 3*b[17] - 3*b[18] + 3*b[19] - 2*b[32] - b[33] - 2*b[34] - b[35]; + a[15] = 4*b[0] - 4*b[1] - 4*b[2] + 4*b[3] + 2*b[8] + 2*b[9] - 2*b[10] - 2*b[11] + + 2*b[16] - 2*b[17] + 2*b[18] - 2*b[19] + b[32] + b[33] + b[34] + b[35]; + a[16] = b[24]; + a[17] = b[40]; + a[18] = -3*b[24] + 3*b[25] - 2*b[40] - b[41]; + a[19] = 2*b[24] - 2*b[25] + b[40] + b[41]; + a[20] = b[48]; + a[21] = b[56]; + a[22] = -3*b[48] + 3*b[49] - 2*b[56] - b[57]; + a[23] = 2*b[48] - 2*b[49] + b[56] + b[57]; + a[24] = -3*b[24] + 3*b[26] - 2*b[48] - b[50]; + a[25] = -3*b[40] + 3*b[42] - 2*b[56] - b[58]; + a[26] = 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43] + + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 4*b[56] + 2*b[57] + 2*b[58] + b[59]; + a[27] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43] + - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 2*b[56] - 2*b[57] - b[58] - b[59]; + a[28] = 2*b[24] - 2*b[26] + b[48] + b[50]; + a[29] = 2*b[40] - 2*b[42] + b[56] + b[58]; + a[30] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43] + - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 2*b[56] - b[57] - 2*b[58] - b[59]; + a[31] = 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43] + + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + b[56] + b[57] + b[58] + b[59]; + a[32] = -3*b[0] + 3*b[4] - 2*b[24] - b[28]; + a[33] = -3*b[8] + 3*b[12] - 2*b[40] - b[44]; + a[34] = 9*b[0] - 9*b[1] - 9*b[4] + 9*b[5] + 6*b[8] + 3*b[9] - 6*b[12] - 3*b[13] + + 6*b[24] - 6*b[25] + 3*b[28] - 3*b[29] + 4*b[40] + 2*b[41] + 2*b[44] + b[45]; + a[35] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 3*b[8] - 3*b[9] + 3*b[12] + 3*b[13] + - 4*b[24] + 4*b[25] - 2*b[28] + 2*b[29] - 2*b[40] - 2*b[41] - b[44] - b[45]; + a[36] = -3*b[16] + 3*b[20] - 2*b[48] - b[52]; + a[37] = -3*b[32] + 3*b[36] - 2*b[56] - b[60]; + a[38] = 9*b[16] - 9*b[17] - 9*b[20] + 9*b[21] + 6*b[32] + 3*b[33] - 6*b[36] - 3*b[37] + + 6*b[48] - 6*b[49] + 3*b[52] - 3*b[53] + 4*b[56] + 2*b[57] + 2*b[60] + b[61]; + a[39] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 3*b[32] - 3*b[33] + 3*b[36] + 3*b[37] + - 4*b[48] + 4*b[49] - 2*b[52] + 2*b[53] - 2*b[56] - 2*b[57] - b[60] - b[61]; + a[40] = 9*b[0] - 9*b[2] - 9*b[4] + 9*b[6] + 6*b[16] + 3*b[18] - 6*b[20] - 3*b[22] + + 6*b[24] - 6*b[26] + 3*b[28] - 3*b[30] + 4*b[48] + 2*b[50] + 2*b[52] + b[54]; + a[41] = 9*b[8] - 9*b[10] - 9*b[12] + 9*b[14] + 6*b[32] + 3*b[34] - 6*b[36] - 3*b[38] + + 6*b[40] - 6*b[42] + 3*b[44] - 3*b[46] + 4*b[56] + 2*b[58] + 2*b[60] + b[62]; + a[42] = -27*b[0] + 27*b[1] + 27*b[2] - 27*b[3] + 27*b[4] - 27*b[5] - 27*b[6] + 27*b[7] + - 18*b[8] - 9*b[9] + 18*b[10] + 9*b[11] + 18*b[12] + 9*b[13] - 18*b[14] - 9*b[15] + - 18*b[16] + 18*b[17] - 9*b[18] + 9*b[19] + 18*b[20] - 18*b[21] + 9*b[22] - 9*b[23] + - 18*b[24] + 18*b[25] + 18*b[26] - 18*b[27] - 9*b[28] + 9*b[29] + 9*b[30] - 9*b[31] + - 12*b[32] - 6*b[33] - 6*b[34] - 3*b[35] + 12*b[36] + 6*b[37] + 6*b[38] + 3*b[39] + - 12*b[40] - 6*b[41] + 12*b[42] + 6*b[43] - 6*b[44] - 3*b[45] + 6*b[46] + 3*b[47] + - 12*b[48] + 12*b[49] - 6*b[50] + 6*b[51] - 6*b[52] + 6*b[53] - 3*b[54] + 3*b[55] + - 8*b[56] - 4*b[57] - 4*b[58] - 2*b[59] - 4*b[60] - 2*b[61] - 2*b[62] - b[63]; + a[43] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7] + + 9*b[8] + 9*b[9] - 9*b[10] - 9*b[11] - 9*b[12] - 9*b[13] + 9*b[14] + 9*b[15] + + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23] + + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31] + + 6*b[32] + 6*b[33] + 3*b[34] + 3*b[35] - 6*b[36] - 6*b[37] - 3*b[38] - 3*b[39] + + 6*b[40] + 6*b[41] - 6*b[42] - 6*b[43] + 3*b[44] + 3*b[45] - 3*b[46] - 3*b[47] + + 8*b[48] - 8*b[49] + 4*b[50] - 4*b[51] + 4*b[52] - 4*b[53] + 2*b[54] - 2*b[55] + + 4*b[56] + 4*b[57] + 2*b[58] + 2*b[59] + 2*b[60] + 2*b[61] + b[62] + b[63]; + a[44] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 3*b[16] - 3*b[18] + 3*b[20] + 3*b[22] + - 4*b[24] + 4*b[26] - 2*b[28] + 2*b[30] - 2*b[48] - 2*b[50] - b[52] - b[54]; + a[45] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 3*b[32] - 3*b[34] + 3*b[36] + 3*b[38] + - 4*b[40] + 4*b[42] - 2*b[44] + 2*b[46] - 2*b[56] - 2*b[58] - b[60] - b[62]; + a[46] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7] + + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15] + + 9*b[16] - 9*b[17] + 9*b[18] - 9*b[19] - 9*b[20] + 9*b[21] - 9*b[22] + 9*b[23] + + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31] + + 6*b[32] + 3*b[33] + 6*b[34] + 3*b[35] - 6*b[36] - 3*b[37] - 6*b[38] - 3*b[39] + + 8*b[40] + 4*b[41] - 8*b[42] - 4*b[43] + 4*b[44] + 2*b[45] - 4*b[46] - 2*b[47] + + 6*b[48] - 6*b[49] + 6*b[50] - 6*b[51] + 3*b[52] - 3*b[53] + 3*b[54] - 3*b[55] + + 4*b[56] + 2*b[57] + 4*b[58] + 2*b[59] + 2*b[60] + b[61] + 2*b[62] + b[63]; + a[47] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7] + - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15] + - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23] + - 8*b[24] + 8*b[25] + 8*b[26] - 8*b[27] - 4*b[28] + 4*b[29] + 4*b[30] - 4*b[31] + - 3*b[32] - 3*b[33] - 3*b[34] - 3*b[35] + 3*b[36] + 3*b[37] + 3*b[38] + 3*b[39] + - 4*b[40] - 4*b[41] + 4*b[42] + 4*b[43] - 2*b[44] - 2*b[45] + 2*b[46] + 2*b[47] + - 4*b[48] + 4*b[49] - 4*b[50] + 4*b[51] - 2*b[52] + 2*b[53] - 2*b[54] + 2*b[55] + - 2*b[56] - 2*b[57] - 2*b[58] - 2*b[59] - b[60] - b[61] - b[62] - b[63]; + a[48] = 2*b[0] - 2*b[4] + b[24] + b[28]; + a[49] = 2*b[8] - 2*b[12] + b[40] + b[44]; + a[50] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 4*b[8] - 2*b[9] + 4*b[12] + 2*b[13] + - 3*b[24] + 3*b[25] - 3*b[28] + 3*b[29] - 2*b[40] - b[41] - 2*b[44] - b[45]; + a[51] = 4*b[0] - 4*b[1] - 4*b[4] + 4*b[5] + 2*b[8] + 2*b[9] - 2*b[12] - 2*b[13] + + 2*b[24] - 2*b[25] + 2*b[28] - 2*b[29] + b[40] + b[41] + b[44] + b[45]; + a[52] = 2*b[16] - 2*b[20] + b[48] + b[52]; + a[53] = 2*b[32] - 2*b[36] + b[56] + b[60]; + a[54] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 4*b[32] - 2*b[33] + 4*b[36] + 2*b[37] + - 3*b[48] + 3*b[49] - 3*b[52] + 3*b[53] - 2*b[56] - b[57] - 2*b[60] - b[61]; + a[55] = 4*b[16] - 4*b[17] - 4*b[20] + 4*b[21] + 2*b[32] + 2*b[33] - 2*b[36] - 2*b[37] + + 2*b[48] - 2*b[49] + 2*b[52] - 2*b[53] + b[56] + b[57] + b[60] + b[61]; + a[56] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 4*b[16] - 2*b[18] + 4*b[20] + 2*b[22] + - 3*b[24] + 3*b[26] - 3*b[28] + 3*b[30] - 2*b[48] - b[50] - 2*b[52] - b[54]; + a[57] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 4*b[32] - 2*b[34] + 4*b[36] + 2*b[38] + - 3*b[40] + 3*b[42] - 3*b[44] + 3*b[46] - 2*b[56] - b[58] - 2*b[60] - b[62]; + a[58] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7] + + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15] + + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23] + + 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 9*b[28] - 9*b[29] - 9*b[30] + 9*b[31] + + 8*b[32] + 4*b[33] + 4*b[34] + 2*b[35] - 8*b[36] - 4*b[37] - 4*b[38] - 2*b[39] + + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43] + 6*b[44] + 3*b[45] - 6*b[46] - 3*b[47] + + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 6*b[52] - 6*b[53] + 3*b[54] - 3*b[55] + + 4*b[56] + 2*b[57] + 2*b[58] + b[59] + 4*b[60] + 2*b[61] + 2*b[62] + b[63]; + a[59] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7] + - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15] + - 8*b[16] + 8*b[17] - 4*b[18] + 4*b[19] + 8*b[20] - 8*b[21] + 4*b[22] - 4*b[23] + - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31] + - 4*b[32] - 4*b[33] - 2*b[34] - 2*b[35] + 4*b[36] + 4*b[37] + 2*b[38] + 2*b[39] + - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43] - 3*b[44] - 3*b[45] + 3*b[46] + 3*b[47] + - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 4*b[52] + 4*b[53] - 2*b[54] + 2*b[55] + - 2*b[56] - 2*b[57] - b[58] - b[59] - 2*b[60] - 2*b[61] - b[62] - b[63]; + a[60] = 4*b[0] - 4*b[2] - 4*b[4] + 4*b[6] + 2*b[16] + 2*b[18] - 2*b[20] - 2*b[22] + + 2*b[24] - 2*b[26] + 2*b[28] - 2*b[30] + b[48] + b[50] + b[52] + b[54]; + a[61] = 4*b[8] - 4*b[10] - 4*b[12] + 4*b[14] + 2*b[32] + 2*b[34] - 2*b[36] - 2*b[38] + + 2*b[40] - 2*b[42] + 2*b[44] - 2*b[46] + b[56] + b[58] + b[60] + b[62]; + a[62] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7] + - 8*b[8] - 4*b[9] + 8*b[10] + 4*b[11] + 8*b[12] + 4*b[13] - 8*b[14] - 4*b[15] + - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23] + - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31] + - 4*b[32] - 2*b[33] - 4*b[34] - 2*b[35] + 4*b[36] + 2*b[37] + 4*b[38] + 2*b[39] + - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43] - 4*b[44] - 2*b[45] + 4*b[46] + 2*b[47] + - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 3*b[52] + 3*b[53] - 3*b[54] + 3*b[55] + - 2*b[56] - b[57] - 2*b[58] - b[59] - 2*b[60] - b[61] - 2*b[62] - b[63]; + a[63] = 8*b[0] - 8*b[1] - 8*b[2] + 8*b[3] - 8*b[4] + 8*b[5] + 8*b[6] - 8*b[7] + + 4*b[8] + 4*b[9] - 4*b[10] - 4*b[11] - 4*b[12] - 4*b[13] + 4*b[14] + 4*b[15] + + 4*b[16] - 4*b[17] + 4*b[18] - 4*b[19] - 4*b[20] + 4*b[21] - 4*b[22] + 4*b[23] + + 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 4*b[28] - 4*b[29] - 4*b[30] + 4*b[31] + + 2*b[32] + 2*b[33] + 2*b[34] + 2*b[35] - 2*b[36] - 2*b[37] - 2*b[38] - 2*b[39] + + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43] + 2*b[44] + 2*b[45] - 2*b[46] - 2*b[47] + + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + 2*b[52] - 2*b[53] + 2*b[54] - 2*b[55] + + b[56] + b[57] + b[58] + b[59] + b[60] + b[61] + b[62] + b[63]; + } +DEVICE void RigidBodyGrid::compute_b(float * __restrict__ b, int * __restrict__ inds) const + { + int k[3]; + k[0] = nx; + k[1] = ny; + k[2] = nz; + + int inds2[3] = {0,0,0}; + + for (int i0 = 0; i0 < 8; i0++) { + inds2[0] = 0; + inds2[1] = 0; + inds2[2] = 0; + + /* printf("%d\n", inds2[0]); */ + /* printf("%d\n", inds2[1]); */ + /* printf("%d\n", inds2[2]); */ + + bool zero_derivs = false; + + int bit = 1; // bit = 2^i1 in the below loop + for (int i1 = 0; i1 < 3; i1++) { + inds2[i1] = (inds[i1] + ((i0 & bit) ? 1 : 0)) % k[i1]; + bit <<= 1; // i.e. multiply by 2 + } + //int d_hi[3] = {1, 1, 1}; + int d_lo[3] = {1, 1, 1}; + float voffs[3] = {0.0f, 0.0f, 0.0f}; + float dscales[3] = {0.5, 0.5, 0.5}; + + for (int i1 = 0; i1 < 3; i1++) { + if (inds2[i1] == 0) { + zero_derivs = true; + } + else if (inds2[i1] == k[i1]-1) { + zero_derivs = true; + } + else { + voffs[i1] = 0.0; + } + } + + // V + b[i0] = getValue(inds2[0],inds2[1],inds2[2]); + + if (zero_derivs) { + b[8+i0] = 0.0; + b[16+i0] = 0.0; + b[24+i0] = 0.0; + b[32+i0] = 0.0; + b[40+i0] = 0.0; + b[48+i0] = 0.0; + b[56+i0] = 0.0; + } else { + b[8+i0] = dscales[0] * (getValue(inds2[0]+1,inds2[1],inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]) + voffs[0]); // dV/dx + b[16+i0] = dscales[1] * (getValue(inds2[0],inds2[1]+1,inds2[2]) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]) + voffs[1]); // dV/dy + b[24+i0] = dscales[2] * (getValue(inds2[0],inds2[1],inds2[2]+1) - getValue(inds2[0],inds2[1],inds2[2]-d_lo[2]) + voffs[2]); // dV/dz + b[32+i0] = dscales[0] * dscales[1] * + (getValue(inds2[0]+1,inds2[1]+1,inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]) + - getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]) + getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2])); // d2V/dxdy + + b[40+i0] = dscales[0] * dscales[2] * + (getValue(inds2[0]+1,inds2[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]+1) + - getValue(inds2[0]+1,inds2[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]-d_lo[2])); // d2V/dxdz + + b[48+i0] = dscales[1] * dscales[2] * + (getValue(inds2[0],inds2[1]+1,inds2[2]+1) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]+1) + - getValue(inds2[0],inds2[1]+1,inds2[2]-d_lo[2]) + getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2])); // d2V/dydz + + b[56+i0] = dscales[0] * dscales[1] * dscales[2] * // d3V/dxdydz + (getValue(inds2[0]+1,inds2[1]+1,inds2[2]+1) - getValue(inds2[0]+1,inds2[1]+1,inds2[2]-d_lo[2]) - + getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]+1) + + getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]-d_lo[2]) + + getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2])); + + } + } + } diff --git a/src/RigidBodyGrid.h b/src/RigidBodyGrid.h index 96a5d03..f586a41 100644 --- a/src/RigidBodyGrid.h +++ b/src/RigidBodyGrid.h @@ -71,7 +71,7 @@ public: virtual float getValue(int j) const; - virtual float getValue(int ix, int iy, int iz) const; + HOST DEVICE float getValue(int ix, int iy, int iz) const; HOST DEVICE Vector3 getPosition(int j) const; HOST DEVICE Vector3 getPosition(int j, Matrix3 basis, Vector3 origin) const; @@ -107,8 +107,11 @@ public: return 0.5 * sqrt(radius); } - - + DEVICE ForceEnergy interpolateForceDnamd(const Vector3& l) const; + DEVICE float compute_V(float *a, float *x, float *y, float *z) const; + DEVICE Vector3 compute_dV(float *a, float *x, float *y, float *z) const; + DEVICE void compute_a(float *a, float *b) const; + DEVICE void compute_b(float * __restrict__ b, int * __restrict__ inds) const; // Add a fixed value to the grid. void shift(float s); -- GitLab