diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
index be5e545f1393fd354b751e34620ce928dc93b789..81d64c8eb278209c91693be3057ac2cedce179a0 100644
--- a/src/ComputeForce.cu
+++ b/src/ComputeForce.cu
@@ -55,6 +55,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
 	for (int i = 0; i < gpuman.gpus.size(); ++i) {
 	    int s = gpuman.gpus.size();
 	    sys_d	= std::vector<BaseGrid*>(s);
+	    tablePot_addr = std::vector<TabulatedPotential**>(s);
 	    tablePot_d	= std::vector<TabulatedPotential**>(s);
 	    pairLists_d = std::vector<int2*>(s);
 	    pairLists_tex = std::vector<cudaTextureObject_t>(s);
@@ -97,12 +98,11 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
 
 	// Create the potential table
 	tablePot = new TabulatedPotential*[np2];
-	tablePot_addr = new TabulatedPotential*[np2];
-	for (int i = 0; i < np2; ++i) {
-		tablePot_addr[i] = NULL;
-		tablePot[i] = NULL;
-	}
+	for (int i = 0; i < np2; ++i) tablePot[i] = NULL;
+
 	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
+	    tablePot_addr[i] = new TabulatedPotential*[np2];
+	    for (int j = 0; j < np2; ++j) tablePot_addr[i][j] = NULL;
 	    gpuman.use(i);
 	    gpuErrchk(cudaMalloc(&tablePot_d[i], sizeof(TabulatedPotential*) * np2));
 	}
@@ -255,7 +255,7 @@ ComputeForce::~ComputeForce() {
 	for (int j = 0; j < numParts * numParts; ++j)
 		delete tablePot[j];
 	delete[] tablePot;
-	delete[] tablePot_addr;
+	for (auto& tpa : tablePot_addr) delete[] tpa;
 
 	for (int j = 0; j < numTabBondFiles; ++j)
 		delete tableBond[j];
@@ -364,78 +364,44 @@ bool ComputeForce::addTabulatedPotential(String fileName, int type0, int type1)
 	// If an entry already exists for this particle type, delete it
 	if (tablePot[ind] != NULL) {
 		delete tablePot[ind];
-		gpuErrchk(cudaFree(tablePot_addr[ind]));
-		tablePot[ind] = NULL;
-		tablePot_addr[ind] = NULL;
+		// TODO free resources
+		// gpuErrchk(cudaFree(tablePot_d[ind]));
+		// tablePot[ind] = NULL;
+		// tablePot_addr[ind] = NULL;
 	}
 	if (tablePot[ind1] != NULL) {
-		gpuErrchk(cudaFree(tablePot_addr[ind1]));
+	    // gpuErrchk(cudaFree(tablePot_addr[ind1]));
 		delete tablePot[ind1];
-		tablePot[ind1] = NULL;
-		tablePot_addr[ind1] = NULL;
+		// tablePot[ind1] = NULL;
+		// tablePot_addr[ind1] = NULL;
 	}
+	
+	FullTabulatedPotential fullpot = FullTabulatedPotential(fileName);
 
-	tablePot[ind] = new TabulatedPotential(fileName);
+	tablePot[ind] = tablePot[ind1] = new TabulatedPotential(*fullpot.pot);
 	tablePot[ind]->truncate(switchStart, sqrtf(cutoff2), 0.0f);
-	tablePot[ind1] = new TabulatedPotential(*(tablePot[ind]));
 
-	TabulatedPotential* t = new TabulatedPotential(*tablePot[ind]);
 
 	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
 	    gpuman.use(i);
-
-	    // Copy tablePot[ind] to the device
-	    float *v0, *v1, *v2, *v3;
-	    size_t sz_n = sizeof(float) * tablePot[ind]->n;
-	    gpuErrchk(cudaMalloc(&v0, sz_n));
-	    gpuErrchk(cudaMalloc(&v1, sz_n));
-	    gpuErrchk(cudaMalloc(&v2, sz_n));
-	    gpuErrchk(cudaMalloc(&v3, sz_n));
-	    gpuErrchk(cudaMemcpyAsync(v0, tablePot[ind]->v0, sz_n, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(v1, tablePot[ind]->v1, sz_n, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(v2, tablePot[ind]->v2, sz_n, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(v3, tablePot[ind]->v3, sz_n, cudaMemcpyHostToDevice));
-	    t->v0 = v0; t->v1 = v1;
-	    t->v2 = v2; t->v3 = v3;
-	    // gpuErrchk(cudaMalloc(&tablePot_addr[ind], sizeof(TabulatedPotential)));
-	    gpuErrchk(cudaMemcpy(tablePot_d[i][ind], t, sizeof(TabulatedPotential), cudaMemcpyHostToDevice));
-
-	    /** Same thing for ind1 **/
-	    sz_n = sizeof(float) * tablePot[ind1]->n;
-	    gpuErrchk(cudaMalloc(&v0, sz_n));
-	    gpuErrchk(cudaMalloc(&v1, sz_n));
-	    gpuErrchk(cudaMalloc(&v2, sz_n));
-	    gpuErrchk(cudaMalloc(&v3, sz_n));
-	    gpuErrchk(cudaMemcpyAsync(v0, tablePot[ind1]->v0, sz_n, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(v1, tablePot[ind1]->v1, sz_n, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(v2, tablePot[ind1]->v2, sz_n, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(v3, tablePot[ind1]->v3, sz_n, cudaMemcpyHostToDevice));
-	    t->v0 = v0; t->v1 = v1;
-	    t->v2 = v2; t->v3 = v3;
-	    // gpuErrchk(cudaMalloc(&tablePot_addr[ind1], sizeof(TabulatedPotential)));
-	    // gpuErrchk(cudaMemcpy(tablePot_addr[ind1], t, sizeof(TabulatedPotential), cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpy(tablePot_d[i][ind1], t, sizeof(TabulatedPotential), cudaMemcpyHostToDevice));
-	    t->v0 = NULL; t->v1 = NULL;
-	    t->v2 = NULL; t->v3 = NULL;
-	    gpuErrchk(cudaMemcpy(tablePot_d[i], tablePot_addr,
+	    tablePot_addr[i][ind] = tablePot_addr[i][ind1] = tablePot[ind]->copy_to_cuda();
+	    gpuErrchk(cudaMemcpy(tablePot_d[i], tablePot_addr[i],
 				 sizeof(TabulatedPotential*) * numParts * numParts, cudaMemcpyHostToDevice));
 	}
 	gpuman.use(0);
-	delete t;
-
 	return true;
 }
 
 bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[], BondAngle bondAngles[])
 {
-	// TODO: see if tableBond_addr can be removed
-	if (tableBond[ind] != NULL) {
-		delete tableBond[ind];
-		gpuErrchk(cudaFree(tableBond_addr[ind]));
-		tableBond[ind] = NULL;
-		tableBond_addr[ind] = NULL;
-	}
-	tableBond[ind] = new TabulatedPotential(fileName);
+    // TODO: see if tableBond_addr can be removed
+    if (tableBond[ind] != NULL) {
+	delete tableBond[ind];
+	// gpuErrchk(cudaFree(tableBond_addr[ind])); //TODO free this a little more cleanly
+    }
+
+    FullTabulatedPotential fullpot = FullTabulatedPotential(fileName);
+    tableBond[ind] = new TabulatedPotential(*fullpot.pot);
 
 	for (int i = 0; i < numBonds; ++i)
 		if (bonds[i].fileName == fileName)
@@ -449,29 +415,9 @@ bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[], Bond
 
 	gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice));
 
-	// Copy tableBond[ind] to the device
-	float *v0, *v1, *v2, *v3;
-	size_t sz_n = sizeof(float) * tableBond[ind]->n;
-	gpuErrchk(cudaMalloc(&v0, sz_n));
-	gpuErrchk(cudaMalloc(&v1, sz_n));
-	gpuErrchk(cudaMalloc(&v2, sz_n));
-	gpuErrchk(cudaMalloc(&v3, sz_n));
-	gpuErrchk(cudaMemcpyAsync(v0, tableBond[ind]->v0, sz_n, cudaMemcpyHostToDevice));
-	gpuErrchk(cudaMemcpyAsync(v1, tableBond[ind]->v1, sz_n, cudaMemcpyHostToDevice));
-	gpuErrchk(cudaMemcpyAsync(v2, tableBond[ind]->v2, sz_n, cudaMemcpyHostToDevice));
-	gpuErrchk(cudaMemcpyAsync(v3, tableBond[ind]->v3, sz_n, cudaMemcpyHostToDevice));
-
-	gpuErrchk(cudaMalloc(&tableBond_addr[ind], sizeof(TabulatedPotential)));
-	TabulatedPotential t = TabulatedPotential(*tableBond[ind]);
-	t.v0 = v0; t.v1 = v1;
-	t.v2 = v2; t.v3 = v3;
-	gpuErrchk(cudaMemcpyAsync(tableBond_addr[ind], &t,
-			sizeof(TabulatedPotential), cudaMemcpyHostToDevice));
-
+	tableBond_addr[ind] = tableBond[ind]->copy_to_cuda();
 	gpuErrchk(cudaMemcpy(tableBond_d, tableBond_addr,
-			sizeof(TabulatedPotential*) * numTabBondFiles, cudaMemcpyHostToDevice));
-	t.v0 = NULL; t.v1 = NULL;
-	t.v2 = NULL; t.v3 = NULL;
+				 sizeof(TabulatedPotential*) * numParts * numParts, cudaMemcpyHostToDevice));
 	return true;
 }
 
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
index b22231f491ee08fef899a0be7566cc6295841dd5..04ccb8d6a2c209f1a0dacd397efe7208e6c0c522 100644
--- a/src/ComputeForce.h
+++ b/src/ComputeForce.h
@@ -216,7 +216,7 @@ private:
 	int numTabDihedralFiles;
 
 	float *tableEps, *tableRad6, *tableAlpha;
-	TabulatedPotential **tablePot;
+	TabulatedPotential **tablePot; // 100% on Host 
 	TabulatedPotential **tableBond;
 	TabulatedAnglePotential **tableAngle;
 	TabulatedDihedralPotential **tableDihedral;
@@ -232,9 +232,10 @@ private:
 	float *energies_d;
 	float *tableEps_d, *tableRad6_d, *tableAlpha_d;
 	int gridSize;
-    // TabulatedPotential **tablePot_d, **tablePot_addr;
-    std::vector<TabulatedPotential**> tablePot_d;
-        TabulatedPotential** tablePot_addr;
+	// TabulatedPotential **tablePot_d, **tablePot_addr;
+	// We use this ugly approach because the array of tabulatePotentials may be sparse... but it probably won't be large enough to cause problems if we allocate more directly
+	std::vector<TabulatedPotential**> tablePot_addr; // per-gpu vector of host-allocated device pointers
+	std::vector<TabulatedPotential**> tablePot_d; // per-gpu vector of device-allocated device pointers
 
 	TabulatedPotential **tableBond_d, **tableBond_addr;
 	TabulatedAnglePotential **tableAngle_d, **tableAngle_addr;
diff --git a/src/TabulatedPotential.cu b/src/TabulatedPotential.cu
index b74a4734cd431a7c9c19a49da4d0a7490ff947cf..872c5ff087af46b97e6368d4889eb57fbac8d919 100644
--- a/src/TabulatedPotential.cu
+++ b/src/TabulatedPotential.cu
@@ -6,18 +6,102 @@
 #include <cuda.h>
 
 TabulatedPotential::TabulatedPotential() {
-	n = 2;
-	r0 = 0.0f;
-	dr = 1.0f;
-	r1 = r0 + n*dr;
-	
+    n = 2;
+    drInv = 1.0f;
+    r0 = 0.0f;
+    v0 = new float[n];
+    for (int i = 0; i < n; i++) v0[i] = 0.0f;
+}
+
+TabulatedPotential::TabulatedPotential(const float* dist, const float* pot, int n0) {
+	n = abs(n0);
+	drInv = 1.0f/(dist[1]-dist[0]);
+	r0 = dist[0];
+
 	v0 = new float[n];
-	v0[0] = 0.0f;
-	v0[1] = 0.0f;
-	interpolate();
+	for (int i = 0; i < n; i++) v0[i] = pot[i];
+}
+TabulatedPotential::TabulatedPotential(const TabulatedPotential& tab) {
+    n = tab.n;
+    drInv = tab.drInv;
+    r0 = tab.r0;
+
+    v0 = new float[n];
+    for (int i = 0; i < n; i++) v0[i] = tab.v0[i];
+}
+
+TabulatedPotential::~TabulatedPotential() {
+    delete [] v0;
 }
 
-TabulatedPotential::TabulatedPotential(const char* fileName) : fileName(fileName) {
+void TabulatedPotential::truncate(float cutoff) {
+	int home = int(floor((cutoff - r0) * drInv));
+	if (home > n) return;
+
+	float v = v0[home];
+	for (int i = home; i < n; i++) v0[i] = v;
+	// interpolate();
+}
+
+bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) {
+	int indOff = int(floor((cutoff - r0) * drInv));
+	int indSwitch = int(floor((switchDist - r0) * drInv));
+
+	if (indSwitch > n) return false;
+
+	// Set everything after the cutoff to "value".
+	for (int i = indOff; i < n; i++) v0[i] = value;
+    
+	// Apply a linear switch.
+	float v = v0[indSwitch];
+	float m = (value - v)/(indOff - indSwitch);
+	for (int i = indSwitch; i < indOff; i++) v0[i] = m*(i - indSwitch) + v;
+
+	// interpolate();
+	return true;
+}
+
+// Vector3 TabulatedPotential::computeForce(Vector3 r) {
+// 	float d = r.length();
+// 	Vector3 rUnit = -r/d;
+// 	int home = int(floor((d - r0)*drInv));
+
+// 	if (home < 0) return Vector3(0.0f);
+// 	if (home >= n) return Vector3(0.0f);
+        
+// 	float homeR = home*dr + r0;
+// 	float w = (d - homeR)/dr;
+   
+// 	// Interpolate.
+// 	Vector3 force = -(3.0f*v3[home]*w*w + 2.0f*v2[home]*w + v1[home])*rUnit/dr;
+// 	return force;
+// }
+ 
+  
+// void TabulatedPotential::interpolate() { for cubic interpolation
+// 	v1 = new float[n];
+// 	v2 = new float[n];
+// 	v3 = new float[n];
+
+// 	for (int i = 0; i < n; i++) {
+// 		int i0 = i - 1;
+// 		int i1 = i;
+// 		int i2 = i + 1;
+// 		int i3 = i + 2;
+
+// 		if (i0 < 0) i0 = 0;
+// 		if (i2 >= n) i2 = n-1;
+// 		if (i3 >= n) i3 = n-1;
+
+// 		v3[i] = 0.5f*(-v0[i0] + 3.0f*v0[i1] - 3.0f*v0[i2] + v0[i3]);
+// 		v2[i] = 0.5f*(2.0f*v0[i0] - 5.0f*v0[i1] + 4.0f*v0[i2] - v0[i3]);
+// 		v1[i] = 0.5f*(-v0[i0] + v0[i2]);
+// 	}
+// 	e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1];
+// }
+
+
+FullTabulatedPotential::FullTabulatedPotential(const char* fileName) : fileName(fileName) {
 	// printf("File: %s\n", fileName);
 	FILE* inp = fopen(fileName, "r");
 	if (inp == NULL) {
@@ -58,44 +142,25 @@ TabulatedPotential::TabulatedPotential(const char* fileName) : fileName(fileName
 		delete[] tokenList;
 	}
 	fclose(inp);
-	init(r, v, count);
-	interpolate();
+	*pot = TabulatedPotential(r,v,count);
+	//pot = new TabulatePotential(r,v,count);
+	// init(r, v, count);
+	// interpolate();
 	delete[] r;
 	delete[] v;
 }
 
-TabulatedPotential::TabulatedPotential(const TabulatedPotential& tab) {
-	n = tab.n;
-	dr = tab.dr;
-	r0 = tab.r0;
-	r1 = tab.r1;
-
-	v0 = new float[n];
-	v1 = new float[n];
-	v2 = new float[n];
-	v3 = new float[n];
-	for (int i = 0; i < n; i++) {
-		v0[i] = tab.v0[i];
-		v1[i] = tab.v1[i];
-		v2[i] = tab.v2[i];
-		v3[i] = tab.v3[i];
-	}
-	e0 = tab.e0;
+FullTabulatedPotential::FullTabulatedPotential(const FullTabulatedPotential& tab) {
+    pot = new TabulatedPotential(*tab.pot);
+    numLines = tab.numLines;
+    fileName = String(tab.fileName);
 }
 
-TabulatedPotential::TabulatedPotential(const float* dist, const float* pot, int n0) {
-	init(dist, pot, n0);
-	interpolate();
+FullTabulatedPotential::~FullTabulatedPotential() {
+	delete pot;
 }
 
-TabulatedPotential::~TabulatedPotential() {
-	delete[] v0;
-	delete[] v1;
-	delete[] v2;
-	delete[] v3;
-}
-
-int TabulatedPotential::countValueLines(const char* fileName) {
+int FullTabulatedPotential::countValueLines(const char* fileName) {
 	FILE* inp = fopen(fileName, "r");
 	if (inp == NULL) {
 		printf("TabulatedPotential::countValueLines Could not open file '%s'\n", fileName);
@@ -117,101 +182,26 @@ int TabulatedPotential::countValueLines(const char* fileName) {
 	return count;
 }
 
-void TabulatedPotential::truncate(float cutoff) {
-	int home = int(floor((cutoff - r0)/dr));
-	if (home > n) return;
-
-	float v = v0[home];
-	for (int i = home; i < n; i++) v0[i] = v;
-	interpolate();
-}
-
-bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) {
-	int indOff = int(floor((cutoff - r0)/dr));
-	int indSwitch = int(floor((switchDist - r0)/dr));
-
-	if (indSwitch > n) return false;
-
-	// Set everything after the cutoff to "value".
-	for (int i = indOff; i < n; i++) v0[i] = value;
-    
-	// Apply a linear switch.
-	float v = v0[indSwitch];
-	float m = (value - v)/(indOff - indSwitch);
-	for (int i = indSwitch; i < indOff; i++) v0[i] = m*(i - indSwitch) + v;
-
-	interpolate();
-	return true;
-}
-
-Vector3 TabulatedPotential::computeForce(Vector3 r) {
-	float d = r.length();
-	Vector3 rUnit = -r/d;
-	int home = int(floor((d - r0)/dr));
-
-	if (home < 0) return Vector3(0.0f);
-	if (home >= n) return Vector3(0.0f);
-        
-	float homeR = home*dr + r0;
-	float w = (d - homeR)/dr;
-   
-	// Interpolate.
-	Vector3 force = -(3.0f*v3[home]*w*w + 2.0f*v2[home]*w + v1[home])*rUnit/dr;
-	return force;
-}
- 
-void TabulatedPotential::init(const float* dist, const float* pot, int n0) {
-	n = abs(n0);
-	dr = dist[1]-dist[0];
-	r0 = dist[0];
-	r1 = r0 + n*dr;
-
-	v0 = new float[n];
-	for (int i = 0; i < n; i++) v0[i] = pot[i];
-}
-  
-void TabulatedPotential::interpolate() {
-	v1 = new float[n];
-	v2 = new float[n];
-	v3 = new float[n];
-
-	for (int i = 0; i < n; i++) {
-		int i0 = i - 1;
-		int i1 = i;
-		int i2 = i + 1;
-		int i3 = i + 2;
-
-		if (i0 < 0) i0 = 0;
-		if (i2 >= n) i2 = n-1;
-		if (i3 >= n) i3 = n-1;
-
-		v3[i] = 0.5f*(-v0[i0] + 3.0f*v0[i1] - 3.0f*v0[i2] + v0[i3]);
-		v2[i] = 0.5f*(2.0f*v0[i0] - 5.0f*v0[i1] + 4.0f*v0[i2] - v0[i3]);
-		v1[i] = 0.5f*(-v0[i0] + v0[i2]);
-	}
-	e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1];
-}
-
 int countValueLines(const char* fileName) {
-	FILE* inp = fopen(fileName, "r");
-	if (inp == NULL) {
-		printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName);
-		exit(-1);
-	}
-	char line[256];
-	int count = 0;
+    FILE* inp = fopen(fileName, "r");
+    if (inp == NULL) {
+	printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName);
+	exit(-1);
+    }
+    char line[256];
+    int count = 0;
+
+    while (fgets(line, 256, inp) != NULL) {
+	// Ignore comments.
+	int len = strlen(line);
+	if (line[0] == '#') continue;
+	if (len < 2) continue;
+	count++;
+    }
+    fclose(inp);
+    return count;
+} 
 
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-		count++;
-	}
-	fclose(inp);
-	return count;
-}
- 
 SimplePotential::SimplePotential(const char* filename, SimplePotentialType type) : type(type) {
 	FILE* inp = fopen(filename, "r");
 	if (inp == NULL) {
diff --git a/src/TabulatedPotential.h b/src/TabulatedPotential.h
index e409bdb0283228a6bc5ba8485c06535a7dcc986c..3f9cbb30120c4451146c102e161cc248dee88bde 100644
--- a/src/TabulatedPotential.h
+++ b/src/TabulatedPotential.h
@@ -22,6 +22,12 @@
 
 #define BD_PI 3.1415927f
 
+#ifndef gpuErrchk
+#define delgpuErrchk
+#define gpuErrchk(code) { if ((code) != cudaSuccess) {					       \
+	fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, __LINE__); \
+    }}
+#endif
 
 class EnergyForce {
 public:
@@ -41,76 +47,82 @@ public:
 class TabulatedPotential {
 public:
   TabulatedPotential();
-  TabulatedPotential(const char* fileName);
   TabulatedPotential(const TabulatedPotential& tab);
   TabulatedPotential(const float* dist, const float* pot, int n0);
-
   ~TabulatedPotential();
 
-  static int countValueLines(const char* fileName);
-
   void truncate(float cutoff);
   bool truncate(float switchDist, float cutoff, float value);
 
   Vector3 computeForce(Vector3 r);
 
-  HOST DEVICE inline EnergyForce computeOLD(Vector3 r) {
-		float d = r.length();
-		Vector3 rUnit = -r/d;
-		int home = int(floorf((d - r0)/dr));
-		if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
-		if (home >= n) return EnergyForce(e0, Vector3(0.0f));
-		float homeR = home*dr + r0;
-		float w = (d - homeR)/dr;
-		
-		// Interpolate.
-		float energy = v3[home]*w*w*w + v2[home]*w*w + v1[home]*w + v0[home];
-		Vector3 force = -(3.0f*v3[home] * w * w
-										+ 2.0f*v2[home] * w
-										+ v1[home]) * rUnit/dr;
-		return EnergyForce(energy,force);
+  int size() const { return n; }
+
+    TabulatedPotential* copy_to_cuda() const {
+	// Allocate data for array 
+	TabulatedPotential* dev_ptr;
+	TabulatedPotential tmp(*this);
+
+	float *v;
+	{
+	    size_t sz = sizeof(float) * n;
+	    gpuErrchk(cudaMalloc(&v, sz));
+	    gpuErrchk(cudaMemcpy(v, v0, sz, cudaMemcpyHostToDevice));
 	}
+	tmp.v0 = v;
+
+	size_t sz = sizeof(TabulatedPotential);
+	gpuErrchk(cudaMalloc(&dev_ptr, sz));
+	gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sz, cudaMemcpyHostToDevice));
+	tmp.v0 = NULL;
+	return dev_ptr;
+    }
+    void free_from_cuda(TabulatedPotential* dev_ptr) const {
+	// TODO: copy pointers to local temporary object, then cudaFree that data
+    }
+
 
   HOST DEVICE inline EnergyForce compute(Vector3 r) {
 		float d = r.length();
-		float w = (d - r0)/dr;
+		float w = (d - r0) * drInv;
 		int home = int( floorf(w) );
 		w = w - home;
 		// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
 		home = home < 0 ? 0 : home;
-		if (home >= n) return EnergyForce(e0, Vector3(0.0f));
+		if (home >= n) return EnergyForce(v0[n-1], Vector3(0.0f));
 		
 		float u0 = v0[home];
 		float du = home+1 < n ? v0[home+1]-u0 : 0;
 				
 		// Interpolate.
 		float energy = du*w+u0;
-		Vector3 force = (du/(d*dr))*r;
+		Vector3 force = (du*drInv/d)*r;
 		return EnergyForce(energy,force);
 	}
+
   HOST DEVICE inline EnergyForce compute(const Vector3 r, float d) const {
 		d = sqrt(d);
 		// float d = r.length();
-		float w = (d - r0)/dr;
+		float w = (d - r0) * drInv;
 		int home = int( floorf(w) );
 		w = w - home;
 		// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
 		home = home < 0 ? 0 : home;
-		if (home >= n) return EnergyForce(e0, Vector3(0.0f));
+		if (home >= n) return EnergyForce(v0[n-1], Vector3(0.0f));
 		
 		float u0 = v0[home];
 		float du = home+1 < n ? v0[home+1]-u0 : 0;
 				
 		// Interpolate.
 		float energy = du*w+u0;
-		Vector3 force = (du/(d*dr))*r;
+		Vector3 force = (du*drInv/d)*r;
 		return EnergyForce(energy,force);
 	}
   HOST DEVICE inline Vector3 computef(const Vector3 r, float d) const {
 		d = sqrt(d);
 		// float d = r.length();
 		// RBTODO: precompute so that initial blocks are zero; reduce computation here
-		float w = (d - r0)/dr;
+		float w = (d - r0)*drInv;
 		int home = int( floorf(w) );
 		w = w - home;
 		// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
@@ -118,28 +130,50 @@ public:
 		if (home >= n) return Vector3(0.0f);
 		
 		if (home+1 < n) 
-			return ((v0[home+1]-v0[home])/(d*dr))*r;
+		    return ((v0[home+1]-v0[home])*drInv/d)*r;
 		else
-			return Vector3(0.0f);
+		    return Vector3(0.0f);
 	}
 
 // private:
-public:
-  float* pot;
+private:
   float* v0;
-  float* v1;
-  float* v2;
-  float* v3;
   int n;
+  float drInv; //TODO replace with drInv
+  float r0;
+};
+
+class FullTabulatedPotential {
+public:
+  FullTabulatedPotential();
+  FullTabulatedPotential(const char* fileName);
+  FullTabulatedPotential(const FullTabulatedPotential& tab);
+  ~FullTabulatedPotential();
+
+  static int countValueLines(const char* fileName);
+
+  /* HOST DEVICE inline EnergyForce computeOLD(Vector3 r) { */
+  /* 		float d = r.length(); */
+  /* 		Vector3 rUnit = -r/d; */
+  /* 		int home = int(floorf((d - r0)/dr)); */
+  /* 		if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); */
+  /* 		if (home >= n) return EnergyForce(e0, Vector3(0.0f)); */
+  /* 		float homeR = home*dr + r0; */
+  /* 		float w = (d - homeR)/dr; */
+		
+  /* 		// Interpolate. */
+  /* 		float energy = v3[home]*w*w*w + v2[home]*w*w + v1[home]*w + v0[home]; */
+  /* 		Vector3 force = -(3.0f*v3[home] * w * w */
+  /* 										+ 2.0f*v2[home] * w */
+  /* 										+ v1[home]) * rUnit/dr; */
+  /* 		return EnergyForce(energy,force); */
+  /* 	} */
+
+  TabulatedPotential* pot;
+
+private:
   int numLines;
-  float dr; //TODO replace with drInv
-  float r0, r1;
-  float e0;
   String fileName;
-	
-  void init(const float* dist, const float* pot, int n0);
-  void interpolate();
- 
 };
 
 
@@ -478,4 +512,8 @@ public:
 
 };
 
+#ifndef delgpuErrchk
+#undef  delgpuErrchk
+#undef  gpuErrchk
+#endif
 #endif