diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index be5e545f1393fd354b751e34620ce928dc93b789..81d64c8eb278209c91693be3057ac2cedce179a0 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -55,6 +55,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : for (int i = 0; i < gpuman.gpus.size(); ++i) { int s = gpuman.gpus.size(); sys_d = std::vector<BaseGrid*>(s); + tablePot_addr = std::vector<TabulatedPotential**>(s); tablePot_d = std::vector<TabulatedPotential**>(s); pairLists_d = std::vector<int2*>(s); pairLists_tex = std::vector<cudaTextureObject_t>(s); @@ -97,12 +98,11 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : // Create the potential table tablePot = new TabulatedPotential*[np2]; - tablePot_addr = new TabulatedPotential*[np2]; - for (int i = 0; i < np2; ++i) { - tablePot_addr[i] = NULL; - tablePot[i] = NULL; - } + for (int i = 0; i < np2; ++i) tablePot[i] = NULL; + for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { + tablePot_addr[i] = new TabulatedPotential*[np2]; + for (int j = 0; j < np2; ++j) tablePot_addr[i][j] = NULL; gpuman.use(i); gpuErrchk(cudaMalloc(&tablePot_d[i], sizeof(TabulatedPotential*) * np2)); } @@ -255,7 +255,7 @@ ComputeForce::~ComputeForce() { for (int j = 0; j < numParts * numParts; ++j) delete tablePot[j]; delete[] tablePot; - delete[] tablePot_addr; + for (auto& tpa : tablePot_addr) delete[] tpa; for (int j = 0; j < numTabBondFiles; ++j) delete tableBond[j]; @@ -364,78 +364,44 @@ bool ComputeForce::addTabulatedPotential(String fileName, int type0, int type1) // If an entry already exists for this particle type, delete it if (tablePot[ind] != NULL) { delete tablePot[ind]; - gpuErrchk(cudaFree(tablePot_addr[ind])); - tablePot[ind] = NULL; - tablePot_addr[ind] = NULL; + // TODO free resources + // gpuErrchk(cudaFree(tablePot_d[ind])); + // tablePot[ind] = NULL; + // tablePot_addr[ind] = NULL; } if (tablePot[ind1] != NULL) { - gpuErrchk(cudaFree(tablePot_addr[ind1])); + // gpuErrchk(cudaFree(tablePot_addr[ind1])); delete tablePot[ind1]; - tablePot[ind1] = NULL; - tablePot_addr[ind1] = NULL; + // tablePot[ind1] = NULL; + // tablePot_addr[ind1] = NULL; } + + FullTabulatedPotential fullpot = FullTabulatedPotential(fileName); - tablePot[ind] = new TabulatedPotential(fileName); + tablePot[ind] = tablePot[ind1] = new TabulatedPotential(*fullpot.pot); tablePot[ind]->truncate(switchStart, sqrtf(cutoff2), 0.0f); - tablePot[ind1] = new TabulatedPotential(*(tablePot[ind])); - TabulatedPotential* t = new TabulatedPotential(*tablePot[ind]); for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); - - // Copy tablePot[ind] to the device - float *v0, *v1, *v2, *v3; - size_t sz_n = sizeof(float) * tablePot[ind]->n; - gpuErrchk(cudaMalloc(&v0, sz_n)); - gpuErrchk(cudaMalloc(&v1, sz_n)); - gpuErrchk(cudaMalloc(&v2, sz_n)); - gpuErrchk(cudaMalloc(&v3, sz_n)); - gpuErrchk(cudaMemcpyAsync(v0, tablePot[ind]->v0, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v1, tablePot[ind]->v1, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v2, tablePot[ind]->v2, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v3, tablePot[ind]->v3, sz_n, cudaMemcpyHostToDevice)); - t->v0 = v0; t->v1 = v1; - t->v2 = v2; t->v3 = v3; - // gpuErrchk(cudaMalloc(&tablePot_addr[ind], sizeof(TabulatedPotential))); - gpuErrchk(cudaMemcpy(tablePot_d[i][ind], t, sizeof(TabulatedPotential), cudaMemcpyHostToDevice)); - - /** Same thing for ind1 **/ - sz_n = sizeof(float) * tablePot[ind1]->n; - gpuErrchk(cudaMalloc(&v0, sz_n)); - gpuErrchk(cudaMalloc(&v1, sz_n)); - gpuErrchk(cudaMalloc(&v2, sz_n)); - gpuErrchk(cudaMalloc(&v3, sz_n)); - gpuErrchk(cudaMemcpyAsync(v0, tablePot[ind1]->v0, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v1, tablePot[ind1]->v1, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v2, tablePot[ind1]->v2, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v3, tablePot[ind1]->v3, sz_n, cudaMemcpyHostToDevice)); - t->v0 = v0; t->v1 = v1; - t->v2 = v2; t->v3 = v3; - // gpuErrchk(cudaMalloc(&tablePot_addr[ind1], sizeof(TabulatedPotential))); - // gpuErrchk(cudaMemcpy(tablePot_addr[ind1], t, sizeof(TabulatedPotential), cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(tablePot_d[i][ind1], t, sizeof(TabulatedPotential), cudaMemcpyHostToDevice)); - t->v0 = NULL; t->v1 = NULL; - t->v2 = NULL; t->v3 = NULL; - gpuErrchk(cudaMemcpy(tablePot_d[i], tablePot_addr, + tablePot_addr[i][ind] = tablePot_addr[i][ind1] = tablePot[ind]->copy_to_cuda(); + gpuErrchk(cudaMemcpy(tablePot_d[i], tablePot_addr[i], sizeof(TabulatedPotential*) * numParts * numParts, cudaMemcpyHostToDevice)); } gpuman.use(0); - delete t; - return true; } bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[], BondAngle bondAngles[]) { - // TODO: see if tableBond_addr can be removed - if (tableBond[ind] != NULL) { - delete tableBond[ind]; - gpuErrchk(cudaFree(tableBond_addr[ind])); - tableBond[ind] = NULL; - tableBond_addr[ind] = NULL; - } - tableBond[ind] = new TabulatedPotential(fileName); + // TODO: see if tableBond_addr can be removed + if (tableBond[ind] != NULL) { + delete tableBond[ind]; + // gpuErrchk(cudaFree(tableBond_addr[ind])); //TODO free this a little more cleanly + } + + FullTabulatedPotential fullpot = FullTabulatedPotential(fileName); + tableBond[ind] = new TabulatedPotential(*fullpot.pot); for (int i = 0; i < numBonds; ++i) if (bonds[i].fileName == fileName) @@ -449,29 +415,9 @@ bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[], Bond gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice)); - // Copy tableBond[ind] to the device - float *v0, *v1, *v2, *v3; - size_t sz_n = sizeof(float) * tableBond[ind]->n; - gpuErrchk(cudaMalloc(&v0, sz_n)); - gpuErrchk(cudaMalloc(&v1, sz_n)); - gpuErrchk(cudaMalloc(&v2, sz_n)); - gpuErrchk(cudaMalloc(&v3, sz_n)); - gpuErrchk(cudaMemcpyAsync(v0, tableBond[ind]->v0, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v1, tableBond[ind]->v1, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v2, tableBond[ind]->v2, sz_n, cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpyAsync(v3, tableBond[ind]->v3, sz_n, cudaMemcpyHostToDevice)); - - gpuErrchk(cudaMalloc(&tableBond_addr[ind], sizeof(TabulatedPotential))); - TabulatedPotential t = TabulatedPotential(*tableBond[ind]); - t.v0 = v0; t.v1 = v1; - t.v2 = v2; t.v3 = v3; - gpuErrchk(cudaMemcpyAsync(tableBond_addr[ind], &t, - sizeof(TabulatedPotential), cudaMemcpyHostToDevice)); - + tableBond_addr[ind] = tableBond[ind]->copy_to_cuda(); gpuErrchk(cudaMemcpy(tableBond_d, tableBond_addr, - sizeof(TabulatedPotential*) * numTabBondFiles, cudaMemcpyHostToDevice)); - t.v0 = NULL; t.v1 = NULL; - t.v2 = NULL; t.v3 = NULL; + sizeof(TabulatedPotential*) * numParts * numParts, cudaMemcpyHostToDevice)); return true; } diff --git a/src/ComputeForce.h b/src/ComputeForce.h index b22231f491ee08fef899a0be7566cc6295841dd5..04ccb8d6a2c209f1a0dacd397efe7208e6c0c522 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -216,7 +216,7 @@ private: int numTabDihedralFiles; float *tableEps, *tableRad6, *tableAlpha; - TabulatedPotential **tablePot; + TabulatedPotential **tablePot; // 100% on Host TabulatedPotential **tableBond; TabulatedAnglePotential **tableAngle; TabulatedDihedralPotential **tableDihedral; @@ -232,9 +232,10 @@ private: float *energies_d; float *tableEps_d, *tableRad6_d, *tableAlpha_d; int gridSize; - // TabulatedPotential **tablePot_d, **tablePot_addr; - std::vector<TabulatedPotential**> tablePot_d; - TabulatedPotential** tablePot_addr; + // TabulatedPotential **tablePot_d, **tablePot_addr; + // We use this ugly approach because the array of tabulatePotentials may be sparse... but it probably won't be large enough to cause problems if we allocate more directly + std::vector<TabulatedPotential**> tablePot_addr; // per-gpu vector of host-allocated device pointers + std::vector<TabulatedPotential**> tablePot_d; // per-gpu vector of device-allocated device pointers TabulatedPotential **tableBond_d, **tableBond_addr; TabulatedAnglePotential **tableAngle_d, **tableAngle_addr; diff --git a/src/TabulatedPotential.cu b/src/TabulatedPotential.cu index b74a4734cd431a7c9c19a49da4d0a7490ff947cf..872c5ff087af46b97e6368d4889eb57fbac8d919 100644 --- a/src/TabulatedPotential.cu +++ b/src/TabulatedPotential.cu @@ -6,18 +6,102 @@ #include <cuda.h> TabulatedPotential::TabulatedPotential() { - n = 2; - r0 = 0.0f; - dr = 1.0f; - r1 = r0 + n*dr; - + n = 2; + drInv = 1.0f; + r0 = 0.0f; + v0 = new float[n]; + for (int i = 0; i < n; i++) v0[i] = 0.0f; +} + +TabulatedPotential::TabulatedPotential(const float* dist, const float* pot, int n0) { + n = abs(n0); + drInv = 1.0f/(dist[1]-dist[0]); + r0 = dist[0]; + v0 = new float[n]; - v0[0] = 0.0f; - v0[1] = 0.0f; - interpolate(); + for (int i = 0; i < n; i++) v0[i] = pot[i]; +} +TabulatedPotential::TabulatedPotential(const TabulatedPotential& tab) { + n = tab.n; + drInv = tab.drInv; + r0 = tab.r0; + + v0 = new float[n]; + for (int i = 0; i < n; i++) v0[i] = tab.v0[i]; +} + +TabulatedPotential::~TabulatedPotential() { + delete [] v0; } -TabulatedPotential::TabulatedPotential(const char* fileName) : fileName(fileName) { +void TabulatedPotential::truncate(float cutoff) { + int home = int(floor((cutoff - r0) * drInv)); + if (home > n) return; + + float v = v0[home]; + for (int i = home; i < n; i++) v0[i] = v; + // interpolate(); +} + +bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) { + int indOff = int(floor((cutoff - r0) * drInv)); + int indSwitch = int(floor((switchDist - r0) * drInv)); + + if (indSwitch > n) return false; + + // Set everything after the cutoff to "value". + for (int i = indOff; i < n; i++) v0[i] = value; + + // Apply a linear switch. + float v = v0[indSwitch]; + float m = (value - v)/(indOff - indSwitch); + for (int i = indSwitch; i < indOff; i++) v0[i] = m*(i - indSwitch) + v; + + // interpolate(); + return true; +} + +// Vector3 TabulatedPotential::computeForce(Vector3 r) { +// float d = r.length(); +// Vector3 rUnit = -r/d; +// int home = int(floor((d - r0)*drInv)); + +// if (home < 0) return Vector3(0.0f); +// if (home >= n) return Vector3(0.0f); + +// float homeR = home*dr + r0; +// float w = (d - homeR)/dr; + +// // Interpolate. +// Vector3 force = -(3.0f*v3[home]*w*w + 2.0f*v2[home]*w + v1[home])*rUnit/dr; +// return force; +// } + + +// void TabulatedPotential::interpolate() { for cubic interpolation +// v1 = new float[n]; +// v2 = new float[n]; +// v3 = new float[n]; + +// for (int i = 0; i < n; i++) { +// int i0 = i - 1; +// int i1 = i; +// int i2 = i + 1; +// int i3 = i + 2; + +// if (i0 < 0) i0 = 0; +// if (i2 >= n) i2 = n-1; +// if (i3 >= n) i3 = n-1; + +// v3[i] = 0.5f*(-v0[i0] + 3.0f*v0[i1] - 3.0f*v0[i2] + v0[i3]); +// v2[i] = 0.5f*(2.0f*v0[i0] - 5.0f*v0[i1] + 4.0f*v0[i2] - v0[i3]); +// v1[i] = 0.5f*(-v0[i0] + v0[i2]); +// } +// e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1]; +// } + + +FullTabulatedPotential::FullTabulatedPotential(const char* fileName) : fileName(fileName) { // printf("File: %s\n", fileName); FILE* inp = fopen(fileName, "r"); if (inp == NULL) { @@ -58,44 +142,25 @@ TabulatedPotential::TabulatedPotential(const char* fileName) : fileName(fileName delete[] tokenList; } fclose(inp); - init(r, v, count); - interpolate(); + *pot = TabulatedPotential(r,v,count); + //pot = new TabulatePotential(r,v,count); + // init(r, v, count); + // interpolate(); delete[] r; delete[] v; } -TabulatedPotential::TabulatedPotential(const TabulatedPotential& tab) { - n = tab.n; - dr = tab.dr; - r0 = tab.r0; - r1 = tab.r1; - - v0 = new float[n]; - v1 = new float[n]; - v2 = new float[n]; - v3 = new float[n]; - for (int i = 0; i < n; i++) { - v0[i] = tab.v0[i]; - v1[i] = tab.v1[i]; - v2[i] = tab.v2[i]; - v3[i] = tab.v3[i]; - } - e0 = tab.e0; +FullTabulatedPotential::FullTabulatedPotential(const FullTabulatedPotential& tab) { + pot = new TabulatedPotential(*tab.pot); + numLines = tab.numLines; + fileName = String(tab.fileName); } -TabulatedPotential::TabulatedPotential(const float* dist, const float* pot, int n0) { - init(dist, pot, n0); - interpolate(); +FullTabulatedPotential::~FullTabulatedPotential() { + delete pot; } -TabulatedPotential::~TabulatedPotential() { - delete[] v0; - delete[] v1; - delete[] v2; - delete[] v3; -} - -int TabulatedPotential::countValueLines(const char* fileName) { +int FullTabulatedPotential::countValueLines(const char* fileName) { FILE* inp = fopen(fileName, "r"); if (inp == NULL) { printf("TabulatedPotential::countValueLines Could not open file '%s'\n", fileName); @@ -117,101 +182,26 @@ int TabulatedPotential::countValueLines(const char* fileName) { return count; } -void TabulatedPotential::truncate(float cutoff) { - int home = int(floor((cutoff - r0)/dr)); - if (home > n) return; - - float v = v0[home]; - for (int i = home; i < n; i++) v0[i] = v; - interpolate(); -} - -bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) { - int indOff = int(floor((cutoff - r0)/dr)); - int indSwitch = int(floor((switchDist - r0)/dr)); - - if (indSwitch > n) return false; - - // Set everything after the cutoff to "value". - for (int i = indOff; i < n; i++) v0[i] = value; - - // Apply a linear switch. - float v = v0[indSwitch]; - float m = (value - v)/(indOff - indSwitch); - for (int i = indSwitch; i < indOff; i++) v0[i] = m*(i - indSwitch) + v; - - interpolate(); - return true; -} - -Vector3 TabulatedPotential::computeForce(Vector3 r) { - float d = r.length(); - Vector3 rUnit = -r/d; - int home = int(floor((d - r0)/dr)); - - if (home < 0) return Vector3(0.0f); - if (home >= n) return Vector3(0.0f); - - float homeR = home*dr + r0; - float w = (d - homeR)/dr; - - // Interpolate. - Vector3 force = -(3.0f*v3[home]*w*w + 2.0f*v2[home]*w + v1[home])*rUnit/dr; - return force; -} - -void TabulatedPotential::init(const float* dist, const float* pot, int n0) { - n = abs(n0); - dr = dist[1]-dist[0]; - r0 = dist[0]; - r1 = r0 + n*dr; - - v0 = new float[n]; - for (int i = 0; i < n; i++) v0[i] = pot[i]; -} - -void TabulatedPotential::interpolate() { - v1 = new float[n]; - v2 = new float[n]; - v3 = new float[n]; - - for (int i = 0; i < n; i++) { - int i0 = i - 1; - int i1 = i; - int i2 = i + 1; - int i3 = i + 2; - - if (i0 < 0) i0 = 0; - if (i2 >= n) i2 = n-1; - if (i3 >= n) i3 = n-1; - - v3[i] = 0.5f*(-v0[i0] + 3.0f*v0[i1] - 3.0f*v0[i2] + v0[i3]); - v2[i] = 0.5f*(2.0f*v0[i0] - 5.0f*v0[i1] + 4.0f*v0[i2] - v0[i3]); - v1[i] = 0.5f*(-v0[i0] + v0[i2]); - } - e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1]; -} - int countValueLines(const char* fileName) { - FILE* inp = fopen(fileName, "r"); - if (inp == NULL) { - printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName); - exit(-1); - } - char line[256]; - int count = 0; + FILE* inp = fopen(fileName, "r"); + if (inp == NULL) { + printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName); + exit(-1); + } + char line[256]; + int count = 0; + + while (fgets(line, 256, inp) != NULL) { + // Ignore comments. + int len = strlen(line); + if (line[0] == '#') continue; + if (len < 2) continue; + count++; + } + fclose(inp); + return count; +} - while (fgets(line, 256, inp) != NULL) { - // Ignore comments. - int len = strlen(line); - if (line[0] == '#') continue; - if (len < 2) continue; - count++; - } - fclose(inp); - return count; -} - SimplePotential::SimplePotential(const char* filename, SimplePotentialType type) : type(type) { FILE* inp = fopen(filename, "r"); if (inp == NULL) { diff --git a/src/TabulatedPotential.h b/src/TabulatedPotential.h index e409bdb0283228a6bc5ba8485c06535a7dcc986c..3f9cbb30120c4451146c102e161cc248dee88bde 100644 --- a/src/TabulatedPotential.h +++ b/src/TabulatedPotential.h @@ -22,6 +22,12 @@ #define BD_PI 3.1415927f +#ifndef gpuErrchk +#define delgpuErrchk +#define gpuErrchk(code) { if ((code) != cudaSuccess) { \ + fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, __LINE__); \ + }} +#endif class EnergyForce { public: @@ -41,76 +47,82 @@ public: class TabulatedPotential { public: TabulatedPotential(); - TabulatedPotential(const char* fileName); TabulatedPotential(const TabulatedPotential& tab); TabulatedPotential(const float* dist, const float* pot, int n0); - ~TabulatedPotential(); - static int countValueLines(const char* fileName); - void truncate(float cutoff); bool truncate(float switchDist, float cutoff, float value); Vector3 computeForce(Vector3 r); - HOST DEVICE inline EnergyForce computeOLD(Vector3 r) { - float d = r.length(); - Vector3 rUnit = -r/d; - int home = int(floorf((d - r0)/dr)); - if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); - if (home >= n) return EnergyForce(e0, Vector3(0.0f)); - float homeR = home*dr + r0; - float w = (d - homeR)/dr; - - // Interpolate. - float energy = v3[home]*w*w*w + v2[home]*w*w + v1[home]*w + v0[home]; - Vector3 force = -(3.0f*v3[home] * w * w - + 2.0f*v2[home] * w - + v1[home]) * rUnit/dr; - return EnergyForce(energy,force); + int size() const { return n; } + + TabulatedPotential* copy_to_cuda() const { + // Allocate data for array + TabulatedPotential* dev_ptr; + TabulatedPotential tmp(*this); + + float *v; + { + size_t sz = sizeof(float) * n; + gpuErrchk(cudaMalloc(&v, sz)); + gpuErrchk(cudaMemcpy(v, v0, sz, cudaMemcpyHostToDevice)); } + tmp.v0 = v; + + size_t sz = sizeof(TabulatedPotential); + gpuErrchk(cudaMalloc(&dev_ptr, sz)); + gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sz, cudaMemcpyHostToDevice)); + tmp.v0 = NULL; + return dev_ptr; + } + void free_from_cuda(TabulatedPotential* dev_ptr) const { + // TODO: copy pointers to local temporary object, then cudaFree that data + } + HOST DEVICE inline EnergyForce compute(Vector3 r) { float d = r.length(); - float w = (d - r0)/dr; + float w = (d - r0) * drInv; int home = int( floorf(w) ); w = w - home; // if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); home = home < 0 ? 0 : home; - if (home >= n) return EnergyForce(e0, Vector3(0.0f)); + if (home >= n) return EnergyForce(v0[n-1], Vector3(0.0f)); float u0 = v0[home]; float du = home+1 < n ? v0[home+1]-u0 : 0; // Interpolate. float energy = du*w+u0; - Vector3 force = (du/(d*dr))*r; + Vector3 force = (du*drInv/d)*r; return EnergyForce(energy,force); } + HOST DEVICE inline EnergyForce compute(const Vector3 r, float d) const { d = sqrt(d); // float d = r.length(); - float w = (d - r0)/dr; + float w = (d - r0) * drInv; int home = int( floorf(w) ); w = w - home; // if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); home = home < 0 ? 0 : home; - if (home >= n) return EnergyForce(e0, Vector3(0.0f)); + if (home >= n) return EnergyForce(v0[n-1], Vector3(0.0f)); float u0 = v0[home]; float du = home+1 < n ? v0[home+1]-u0 : 0; // Interpolate. float energy = du*w+u0; - Vector3 force = (du/(d*dr))*r; + Vector3 force = (du*drInv/d)*r; return EnergyForce(energy,force); } HOST DEVICE inline Vector3 computef(const Vector3 r, float d) const { d = sqrt(d); // float d = r.length(); // RBTODO: precompute so that initial blocks are zero; reduce computation here - float w = (d - r0)/dr; + float w = (d - r0)*drInv; int home = int( floorf(w) ); w = w - home; // if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); @@ -118,28 +130,50 @@ public: if (home >= n) return Vector3(0.0f); if (home+1 < n) - return ((v0[home+1]-v0[home])/(d*dr))*r; + return ((v0[home+1]-v0[home])*drInv/d)*r; else - return Vector3(0.0f); + return Vector3(0.0f); } // private: -public: - float* pot; +private: float* v0; - float* v1; - float* v2; - float* v3; int n; + float drInv; //TODO replace with drInv + float r0; +}; + +class FullTabulatedPotential { +public: + FullTabulatedPotential(); + FullTabulatedPotential(const char* fileName); + FullTabulatedPotential(const FullTabulatedPotential& tab); + ~FullTabulatedPotential(); + + static int countValueLines(const char* fileName); + + /* HOST DEVICE inline EnergyForce computeOLD(Vector3 r) { */ + /* float d = r.length(); */ + /* Vector3 rUnit = -r/d; */ + /* int home = int(floorf((d - r0)/dr)); */ + /* if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); */ + /* if (home >= n) return EnergyForce(e0, Vector3(0.0f)); */ + /* float homeR = home*dr + r0; */ + /* float w = (d - homeR)/dr; */ + + /* // Interpolate. */ + /* float energy = v3[home]*w*w*w + v2[home]*w*w + v1[home]*w + v0[home]; */ + /* Vector3 force = -(3.0f*v3[home] * w * w */ + /* + 2.0f*v2[home] * w */ + /* + v1[home]) * rUnit/dr; */ + /* return EnergyForce(energy,force); */ + /* } */ + + TabulatedPotential* pot; + +private: int numLines; - float dr; //TODO replace with drInv - float r0, r1; - float e0; String fileName; - - void init(const float* dist, const float* pot, int n0); - void interpolate(); - }; @@ -478,4 +512,8 @@ public: }; +#ifndef delgpuErrchk +#undef delgpuErrchk +#undef gpuErrchk +#endif #endif