diff --git a/src/Angle.cu b/src/Angle.cu
index acc2a09dc389f96861b2ca3b1ebc5d7acc0f4d2b..220118759eb45508b49bea6f02b24eb9ac820825 100644
--- a/src/Angle.cu
+++ b/src/Angle.cu
@@ -15,3 +15,15 @@ String Angle::toString()
 	return String("ANGLE ") + ind1 + " " + ind2 + " " + ind3 + " " + fileName;
 }
 
+// void BondAngle::print()
+// {
+// //	printf("about to print fileName %p\n", fileName.val());
+// //	fileName.print();
+//     printf("BONDANGLE (%d %d %d) %s; %s; %s\n", ind1, ind2, ind3, angleFileName.val(), bondFileName1.val(), bondFileName2.val());
+// }
+
+// String BondAngle::toString()
+// {
+//     return String("BONDANGLE ") + ind1 + " " + ind2 + " " + ind3 + " " + angleFileName + " " + bondFileName1 + " " + bondFileName2;
+// }
+
diff --git a/src/Angle.h b/src/Angle.h
index a4e9542c0878b6b195092bfef5b3ea8a4633f1df..e03a77914ec9a22eff5a67df20f4d79416d32a88 100644
--- a/src/Angle.h
+++ b/src/Angle.h
@@ -59,4 +59,56 @@ public:
 	String toString();
 	void print();
 };
+
+// TODO consolidate with Angle using inheritence
+class BondAngle
+{
+public:
+	BondAngle() {}
+    BondAngle(int ind1, int ind2, int ind3, int ind4, String angleFileName1, String bondFileName, String angleFileName2) :
+	ind1(ind1), ind2(ind2), ind3(ind3), ind4(ind4), angleFileName1(angleFileName1), bondFileName(bondFileName), angleFileName2(angleFileName2), tabFileIndex1(-1), tabFileIndex2(-1), tabFileIndex3(-1) { }
+
+	int ind1, ind2, ind3, ind4;
+
+	String angleFileName1;
+	String bondFileName;
+	String angleFileName2;
+	// tabFileIndex will be assigned after ComputeForce loads the
+	// TabulatedAnglePotentials. The tabefileIndex is used by ComputeForce to
+	// discern which TabulatedAnglePotential this Angle uses.
+	int tabFileIndex1;
+	int tabFileIndex2;
+	int tabFileIndex3;
+
+    inline BondAngle(const BondAngle& a) : ind1(a.ind1), ind2(a.ind2), ind3(a.ind3), ind4(a.ind4),
+					   angleFileName1(a.angleFileName1), bondFileName(a.bondFileName), angleFileName2(a.angleFileName2),
+					   tabFileIndex1(a.tabFileIndex1), tabFileIndex2(a.tabFileIndex2), tabFileIndex3(a.tabFileIndex3) { }
+
+	// HOST DEVICE inline float calcAngle(Vector3* pos, BaseGrid* sys) {
+	// 	const Vector3& posa = pos[ind1];
+	// 	const Vector3& posb = pos[ind2];
+	// 	const Vector3& posc = pos[ind3];
+	// 	const float distab = sys->wrapDiff(posa - posb).length();
+	// 	const float distbc = sys->wrapDiff(posb - posc).length();
+	// 	const float distac = sys->wrapDiff(posc - posa).length();
+	// 	float cos = (distbc * distbc + distab * distab - distac * distac)
+	// 						  / (2.0f * distbc * distab);
+	// 	if (cos < -1.0f) cos = -1.0f;
+	// 	else if (cos > 1.0f) cos = 1.0f;
+	// 	float angle = acos(cos);
+	// 	return angle;
+	// }
+
+	// HOST DEVICE inline int getIndex(int index) {
+	// 	if (index == ind1) return 1;
+	// 	if (index == ind2) return 2;
+	// 	if (index == ind3) return 3;
+	// 	if (index == ind4) return 4;
+	// 	return -1;
+	// }
+
+	// String toString();
+	// void print();
+};
+
 #endif
diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
index 7ccf22acdab6420843b3266085cef3dc5a51f005..7b8c42ba37bdd160bef7526a481839a1c99e0f30 100644
--- a/src/ComputeForce.cu
+++ b/src/ComputeForce.cu
@@ -49,6 +49,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
     numExcludes(c.numExcludes), numAngles(c.numAngles),
     numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals),
     numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints),
+    numBondAngles(c.numBondAngles), numProductPotentials(c.numProductPotentials),
     numGroupSites(c.numGroupSites),
     numReplicas(numReplicas) {
 
@@ -233,6 +234,8 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
 	}
 	
 	restraintIds_d = NULL;
+	bondAngleList_d = NULL;
+	product_potential_list_d = NULL;
 
 	//Calculate the number of blocks the grid should contain
 	gridSize =  (num+num_rb_attached_particles) / NUM_THREADS + 1;
@@ -401,7 +404,7 @@ bool ComputeForce::addTabulatedPotential(String fileName, int type0, int type1)
 	return true;
 }
 
-bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[])
+bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[], BondAngle bondAngles[])
 {
     // TODO: see if tableBond_addr can be removed
     if (tableBond[ind] != NULL) {
@@ -415,6 +418,12 @@ bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[])
 		if (bonds[i].fileName == fileName)
 			bonds[i].tabFileIndex = ind;
 
+	for (int i = 0; i < numBondAngles; i++)
+	{
+	    if (bondAngles[i].bondFileName == fileName)
+		bondAngles[i].tabFileIndex2 = ind;
+	}
+
 	gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice));
 
 	tableBond_addr[ind] = tableBond[ind]->copy_to_cuda();
@@ -423,7 +432,7 @@ bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[])
 	return true;
 }
 
-bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles) {
+bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles, BondAngle* bondAngles) {
 	if (tableAngle[ind] != NULL) {
 		delete tableAngle[ind];
 		gpuErrchk(cudaFree(tableAngle_addr[ind]));
@@ -452,6 +461,12 @@ bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles) {
 		if (angles[i].fileName == fileName)
 			angles[i].tabFileIndex = ind;
 
+	for (int i = 0; i < numBondAngles; i++) {
+	    if (bondAngles[i].angleFileName1 == fileName)
+		bondAngles[i].tabFileIndex1 = ind;
+	    if (bondAngles[i].angleFileName2 == fileName)
+		bondAngles[i].tabFileIndex3 = ind;
+	}
 	gpuErrchk(cudaMemcpy(angles_d, angles, sizeof(Angle) * numAngles,
 			cudaMemcpyHostToDevice));
 	return true;
@@ -714,6 +729,17 @@ float ComputeForce::computeTabulated(bool get_energy) {
 
 	//Mlog: the commented function doesn't use bondList, uncomment for testing.
 	//if(bondMap_d != NULL && tableBond_d != NULL)
+
+	if(product_potential_list_d != NULL && product_potentials_d != NULL)
+	{
+	    computeProductPotentials <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d[0], pos_d[0], sys_d[0], numReplicas*numProductPotentials, product_potential_particles_d, product_potentials_d, product_potential_list_d, productCount_d, energies_d, get_energy);
+	}
+
+	if(bondAngleList_d != NULL && tableBond_d != NULL && tableAngle_d != NULL)
+	{
+	    computeTabulatedBondAngles <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d[0], pos_d[0], sys_d[0], numReplicas*numBondAngles, bondAngleList_d, tableAngle_d, tableBond_d, energies_d, get_energy);
+	}
+
 	if(bondList_d != NULL && tableBond_d != NULL)
 
 	{
@@ -865,7 +891,7 @@ void ComputeForce::setForceInternalOnDevice(Vector3* f) {
 	gpuErrchk(cudaMemcpy(forceInternal_d[0], f, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
 }
 
-void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints)
+void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles, const XpotMap simple_potential_map, const std::vector<SimplePotential> simple_potentials, const ProductPotentialConf* const product_potential_confs)
 {
     assert(simNum == numReplicas); // Not sure why we have both of these things
     int tot_num_with_rb = (num+num_rb_attached_particles) * simNum;
@@ -936,6 +962,105 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap,
 				      sizeof(float)   * numRestraints, cudaMemcpyHostToDevice));
 	}	    
 
+	if (numBondAngles > 0) {
+		gpuErrchk(cudaMalloc(&bondAngles_d, sizeof(BondAngle) * numBondAngles));
+		gpuErrchk(cudaMemcpyAsync(bondAngles_d, bondAngles, sizeof(BondAngle) * numBondAngles,
+				cudaMemcpyHostToDevice));
+	}
+
+	if (simple_potentials.size() > 0) {
+	    float **val = simple_potential_pots_d = new float*[simple_potentials.size()];
+	    // float **tmp = new float*[simple_potentials.size()];
+	    for (int i=0; i < simple_potentials.size(); ++i) {
+		const SimplePotential sp = simple_potentials[i];
+		gpuErrchk(cudaMalloc(&val[i], sizeof(float)*sp.size));
+		gpuErrchk(cudaMemcpyAsync(val[i], sp.pot, sizeof(float)*sp.size, cudaMemcpyHostToDevice));
+		// tmp[i] = sp.pot;
+		// // sp.pot = val[i];
+	    }
+
+	    // size_t sz =  sizeof(SimplePotential) * simple_potentials.size();
+	    // gpuErrchk(cudaMalloc(&simple_potentials_d, sz));
+	    // gpuErrchk(cudaMemcpyAsync(simple_potentials_d, &simple_potentials[0], sz,
+	    // 				  cudaMemcpyHostToDevice));
+	    
+	    // for (int i=0; i < simple_potentials.size(); ++i) { // Restore host pointers on host object
+	    // 	SimplePotential &sp = simple_potentials[i];
+	    // 	sp.pot = tmp[i];
+	    // }
+	    // // delete[] val;
+	    // delete[] tmp;
+
+	}
+	
+	if (numProductPotentials > 0) {
+	    // Count particles
+	    int n_pots = 0;
+	    int n_particles = 0;
+	    for (int i=0; i < numProductPotentials; ++i) {
+		const ProductPotentialConf& c = product_potential_confs[i];
+		n_pots += c.indices.size();
+		for (int j=0; j < c.indices.size(); ++j) {
+		    n_particles += c.indices[j].size();
+		}
+	    }
+	    // printf("DEBUG: Found %d particles participating in %d potentials forming %d productPotentials\n",
+	    // 	   n_particles, n_pots, numProductPotentials);
+
+	    // Build productPotentialLists on host
+	    int *particle_list = new int[n_particles*numReplicas];
+	    SimplePotential *product_potentials = new SimplePotential[n_pots];
+	    uint2 *product_potential_list = new uint2[numProductPotentials*numReplicas];
+	    unsigned short *productCount = new unsigned short[numProductPotentials*numReplicas];
+
+	    n_particles = 0;
+	    
+	    for (unsigned int r=0; r < numReplicas; ++r) {
+		n_pots = 0;
+		for (int i=0; i < numProductPotentials; ++i) {
+		    const ProductPotentialConf& c = product_potential_confs[i];
+		    product_potential_list[i+r*numProductPotentials] = make_uint2( n_pots, n_particles );
+
+		    for (int j=0; j < c.indices.size(); ++j) {
+			if (r == 0) {
+			    const unsigned int sp_i = simple_potential_map.find( std::string( c.potential_names[j].val() ) )->second;
+			    product_potentials[n_pots] = simple_potentials[sp_i];
+			    product_potentials[n_pots].pot = simple_potential_pots_d[sp_i];
+			}
+			++n_pots;
+			for (int k=0; k < c.indices[j].size(); ++k) {
+			    particle_list[n_particles++] = c.indices[j][k]+r*num;
+			}
+		    }
+		    productCount[i+r*numProductPotentials] = c.indices.size();
+		}
+	    }
+
+	    // Copy to device
+	    size_t sz = n_particles*numReplicas * sizeof(int);
+	    gpuErrchk(cudaMalloc(&product_potential_particles_d, sz));
+	    gpuErrchk(cudaMemcpyAsync(product_potential_particles_d, particle_list, sz,
+	    				  cudaMemcpyHostToDevice));
+	    sz = n_pots * sizeof(SimplePotential);
+	    gpuErrchk(cudaMalloc(&product_potentials_d, sz));
+	    gpuErrchk(cudaMemcpyAsync(product_potentials_d, product_potentials, sz,
+	    				  cudaMemcpyHostToDevice));
+	    sz = numProductPotentials*numReplicas * sizeof(uint2);
+	    gpuErrchk(cudaMalloc(&product_potential_list_d, sz));
+	    gpuErrchk(cudaMemcpyAsync(product_potential_list_d, product_potential_list, sz,
+	    				  cudaMemcpyHostToDevice));
+	    sz = numProductPotentials*numReplicas * sizeof(unsigned short);
+	    gpuErrchk(cudaMalloc(&productCount_d, sz));
+	    gpuErrchk(cudaMemcpyAsync(productCount_d, productCount, sz,
+	    				  cudaMemcpyHostToDevice));
+
+	    // Clean up
+	    delete[] particle_list;
+	    delete[] product_potentials;
+	    delete[] product_potential_list;
+	    delete[] productCount;
+	}
+
 	gpuErrchk(cudaDeviceSynchronize());
 }
 
@@ -954,7 +1079,7 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap,
 // 	}
 // }
 
-void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList) {
+void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4* bondAngleList) {
 
 	
 	size_t size;
@@ -980,4 +1105,11 @@ void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *d
     gpuErrchk( cudaMalloc( &dihedralPotList_d, size ) );
     gpuErrchk( cudaMemcpyAsync( dihedralPotList_d, dihedralPotList, size, cudaMemcpyHostToDevice) );
 	}
+
+	if (numBondAngles > 0) {
+	    size = 2*numBondAngles * numReplicas * sizeof(int4);
+	    gpuErrchk( cudaMalloc( &bondAngleList_d, size ) );
+	    gpuErrchk( cudaMemcpyAsync( bondAngleList_d, bondAngleList, size, cudaMemcpyHostToDevice) );
+	}
+
 }
diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh
index eb619c91cf054511913d6e0bb9756b12765f2bc1..63778e529cc2e0f26b65fa31ec2f59267c6845f0 100644
--- a/src/ComputeForce.cuh
+++ b/src/ComputeForce.cuh
@@ -3,7 +3,9 @@
 // Terrance Howard <heyterrance@gmail.com>
 #pragma once
 #include <cassert>
+
 #include "CudaUtil.cuh"
+
 #include "TabulatedMethods.cuh"
 
 // From TabulatedMethods.cuh: constexpr float BD_PI = 3.1415927f; 
@@ -865,6 +867,92 @@ void computeTabulatedAngles(Vector3* force,
 	}
 }
 
+__global__
+void computeTabulatedBondAngles(Vector3* force,
+				Vector3* __restrict__ pos,
+				BaseGrid* __restrict__ sys,
+				int numBondAngles, int4* __restrict__ bondAngleList_d, TabulatedAnglePotential** tableAngle,
+				TabulatedPotential** tableBond,
+				float* energy, bool get_energy) {
+	// Loop over ALL angles in ALL replicas
+	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numBondAngles; i+=blockDim.x*gridDim.x) {
+		int atom1 = bondAngleList_d[2*i].x;
+		int atom2 = bondAngleList_d[2*i].y;
+		int atom3 = bondAngleList_d[2*i].z;
+		int atom4 = bondAngleList_d[2*i].w;
+
+		int angleInd1 = bondAngleList_d[2*i+1].x;
+		int bondInd   = bondAngleList_d[2*i+1].y;
+		int angleInd2 = bondAngleList_d[2*i+1].z;
+
+		computeBondAngle(tableAngle[ angleInd1 ], tableBond[ bondInd ], tableAngle[ angleInd2 ], sys, force, pos, atom1, atom2, atom3, atom4, energy, get_energy);
+	}
+}
+
+__global__
+void computeProductPotentials(Vector3* force,
+			      Vector3* __restrict__ pos,
+			      BaseGrid* __restrict__ sys,
+			      int numProductPotentials,
+			      int* __restrict__ productPotentialParticles,
+			      SimplePotential* __restrict__ potentialList,
+			      uint2* __restrict__ productPotential_list,
+			      unsigned short* __restrict__ productCount,
+			      float* energy, bool get_energy) {
+    /*
+      productPotential_list[i].x : index of first potential in potentialList for i_th productPotential
+      productPotential_list[i].y : index of first atom in productPotentialParticles for i_th productPotential
+
+      for three potentials, angle, bond, angle, we would have the following atomic indices in productPotentialParticles:
+        pot1 : productPotential_list[i].y, productPotential_list[i].y + 1 , productPotential_list[i].y + 2
+        pot2 : productPotential_list[i].y + 3, productPotential_list[i].y + 4
+	and
+        pot3 : productPotential_list[i].y + 5, productPotential_list[i].y + 6, productPotential_list[i].y + 7
+
+      productCount[i] : number of potentials in the i_th productPotential
+    */
+
+    // CRAPPY NAIVE IMPLEMENTATION
+    constexpr int MAX_XPOTS = 4;
+    float2 energy_and_deriv[MAX_XPOTS];
+    float tmp_force;
+
+    for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numProductPotentials; i+=blockDim.x*gridDim.x) {
+	unsigned short num_pots = productCount[i];
+
+	unsigned int part_idx = productPotential_list[i].y;
+#pragma unroll
+	for (unsigned short int j = 0; j < MAX_XPOTS; ++j) {
+	    if (j == num_pots) break;
+	    SimplePotential& p = potentialList[ productPotential_list[i].x + j ];
+
+	    // Hidden branch divergence in compute_value => sort potentials by type before running kernel
+	    float tmp = p.compute_value(pos,sys, &productPotentialParticles[part_idx]);
+	    energy_and_deriv[j] = p.compute_energy_and_deriv(tmp);
+	    part_idx += p.type==BOND? 2: p.type==ANGLE? 3: 4;
+	}
+
+	part_idx = productPotential_list[i].y;
+#pragma unroll
+	for (unsigned short int j = 0; j < MAX_XPOTS; ++j) {
+	    if (j == num_pots) break;
+	    tmp_force = energy_and_deriv[j].y;
+#pragma unroll
+	    for (unsigned short int k = 0; k < MAX_XPOTS; ++k) {
+		if (k == num_pots) break;
+		if (j == k) continue;
+		tmp_force *= energy_and_deriv[k].x;
+	    }
+	    SimplePotential& p = potentialList[ productPotential_list[i].x + j ];
+	    if (tmp_force != 0) {
+		// TODO add energy
+		p.apply_force(pos,sys, force, &productPotentialParticles[part_idx], tmp_force);
+	    }
+	    part_idx += p.type==BOND? 2: p.type==ANGLE? 3: 4;
+	}
+    }
+}
+
 
 __global__
 void computeDihedrals(Vector3 force[], Vector3 pos[],
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
index b3bc258541f9603f387221363f3c39fe7fbae881..39247282970c4b83713afffe9eb3e1a073c9d253 100644
--- a/src/ComputeForce.h
+++ b/src/ComputeForce.h
@@ -1,8 +1,7 @@
 ///////////////////////////////////////////////////////////////////////
 // Brownian dynamics base class
 // Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef COMPUTEFORCE_H
-#define COMPUTEFORCE_H
+#pragma once
 
 #ifdef __CUDACC__
     #define HOST __host__
@@ -25,13 +24,34 @@
 #include "TabulatedPotential.h"
 #include "TabulatedAngle.h"
 #include "TabulatedDihedral.h"
+#include "ProductPotential.h"
 #include "GPUManager.h"
 
+// #include <map>
+
 #include <cstdio>
 // #include <cuda_runtime.h>
 #include <thrust/transform_reduce.h>	// thrust::reduce
 #include <thrust/functional.h>				// thrust::plus
 
+#ifdef USE_BOOST
+#include <boost/unordered_map.hpp>
+typedef boost::unordered_map<std::string,unsigned int> XpotMap;
+inline std::size_t hash_value(String const& s) {
+    if (s.length() == 0) return 0;
+    return boost::hash_range(s.val(), s.val()+s.length());
+}
+#else
+#include <map>
+typedef std::map<std::string,unsigned int> XpotMap;
+inline std::size_t hash_value(String const& s) {
+    if (s.length() == 0) return 0;
+    return std::hash<std::string>{}( std::string(s.val()) );
+}
+#endif
+
+
+
 const unsigned int NUM_THREADS = 256;
 
 // Configuration
@@ -46,8 +66,8 @@ public:
 	void makeTables(const BrownianParticleType* part);
 
 	bool addTabulatedPotential(String fileName, int type0, int type1);
-	bool addBondPotential(String fileName, int ind, Bond* bonds);
-	bool addAnglePotential(String fileName, int ind, Angle* angles);
+	bool addBondPotential(String fileName, int ind, Bond* bonds, BondAngle* bondAngles);
+	bool addAnglePotential(String fileName, int ind, Angle* angles, BondAngle* bondAngles);
 	bool addDihedralPotential(String fileName, int ind, Dihedral* dihedrals);
 
 	void decompose();
@@ -75,12 +95,15 @@ public:
 	
 	//MLog: new copy function to allocate memory required by ComputeForce class.
 	void copyToCUDA(Vector3* forceInternal, Vector3* pos);
-	void copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints);
+	void copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles,
+			const XpotMap simple_potential_map,
+			const std::vector<SimplePotential> simple_potentials,
+			const ProductPotentialConf* const product_potential_confs);
         void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom);
         void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random);
 	
 	// void createBondList(int3 *bondList);
-	void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList);
+	void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4 *bondAngleList);
 	    
 	//MLog: because of the move of a lot of private variables, some functions get starved necessary memory access to these variables, below is a list of functions that return the specified private variable.
     std::vector<Vector3*> getPos_d()
@@ -249,6 +272,18 @@ private:
 	Angle* angles_d;
 	Dihedral* dihedrals_d;
 
+	int numBondAngles;
+	BondAngle* bondAngles_d;
+	int4* bondAngleList_d;
+
+    int numProductPotentials;
+    float** simple_potential_pots_d;
+    SimplePotential* simple_potentials_d;
+    int* product_potential_particles_d;
+    SimplePotential* product_potentials_d;
+    uint2* product_potential_list_d;
+    unsigned short* productCount_d;
+
 	int3* bondList_d;
 	int4* angleList_d;
 	int4* dihedralList_d;
@@ -260,5 +295,3 @@ private:
 	float* restraintSprings_d;
 
 };
-
-#endif
diff --git a/src/Configuration.cpp b/src/Configuration.cpp
index 8ee8754485ade2edd9a8124c684e7a68cb3d73b4..0df5f36bc5b567354d634450903ee9f51c22186a 100644
--- a/src/Configuration.cpp
+++ b/src/Configuration.cpp
@@ -2,6 +2,7 @@
 #include "Angle.h"
 #include "Dihedral.h"
 #include "Restraint.h"
+#include "ProductPotential.h"
 #include <cmath>
 #include <cassert>
 #include <stdlib.h>     /* srand, rand */
@@ -271,6 +272,9 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) :
 	if (readAnglesFromFile) readAngles();
 	if (readDihedralsFromFile) readDihedrals();
 	if (readRestraintsFromFile) readRestraints();
+	if (readBondAnglesFromFile) readBondAngles();
+	if (readProductPotentialsFromFile) readProductPotentials();
+
 
 	if (temperatureGridFile.length() != 0) {
 		printf("\nFound temperature grid file: %s\n", temperatureGridFile.val());
@@ -630,6 +634,8 @@ Configuration::~Configuration() {
 	if (excludeMap != NULL) delete[] excludeMap;
 	if (angles != NULL) delete[] angles;
 	if (dihedrals != NULL) delete[] dihedrals;
+	if (bondAngles != NULL) delete[] bondAngles;
+	if (productPotentials != NULL) delete[] productPotentials;
 
 	delete[] numPartsOfType;
 	  
@@ -861,6 +867,16 @@ void Configuration::setDefaults() {
 	dihedrals = NULL;
 	numTabDihedralFiles = 0;
 
+	readBondAnglesFromFile = false;
+	numBondAngles = 0;
+	bondAngles = NULL;
+
+	readProductPotentialsFromFile = false;
+	numProductPotentials = 0;
+	productPotentials = NULL;
+	simple_potential_ids = XpotMap();
+	simple_potentials = std::vector<SimplePotential>();
+
 	readRestraintsFromFile = false;
 	numRestraints = 0;
 	restraints = NULL;
@@ -1122,6 +1138,20 @@ int Configuration::readParameters(const char * config_file) {
 				angleFile = value;
 				readAnglesFromFile = true;
 			}
+		} else if (param == String("inputBondAngles")) {
+			if (readBondAnglesFromFile) {
+				printf("WARNING: More than one bondangle file specified. Ignoring new bondangle file.\n");
+			} else {
+			        bondAngleFile = value;
+				readBondAnglesFromFile = true;
+			}
+		} else if (param == String("inputProductPotentials")) {
+			if (readBondAnglesFromFile) {
+				printf("WARNING: More than one product potential file specified. Ignoring new file.\n");
+			} else {
+			        productPotentialFile = value;
+				readProductPotentialsFromFile = true;
+			}
 		} else if (param == String("tabulatedAngleFile")) {
 			if (numTabAngleFiles >= atfcap) {
 				String* temp = angleTableFile;
@@ -1605,7 +1635,7 @@ void Configuration::readBonds() {
 		      
 		String s(line);
 		int numTokens = s.tokenCount();
-		      
+
 		// Break the line down into pieces (tokens) so we can process them individually
 		String* tokenList = new String[numTokens];
 		s.tokenize(tokenList);
@@ -1925,6 +1955,178 @@ void Configuration::readDihedrals() {
 	// 	dihedrals[i].print();
 }
 
+void Configuration::readBondAngles() {
+	FILE* inp = fopen(bondAngleFile.val(), "r");
+	char line[256];
+	int capacity = 256;
+	numBondAngles = 0;
+	bondAngles = new BondAngle[capacity];
+
+	// If the angle file cannot be found, exit the program
+	if (inp == NULL) {
+		printf("WARNING: Could not open `%s'.\n", bondAngleFile.val());
+		printf("This simulation will not use angles.\n");
+		return;
+	}
+
+	while(fgets(line, 256, inp)) {
+		if (line[0] == '#') continue;
+		String s(line);
+		int numTokens = s.tokenCount();
+		String* tokenList = new String[numTokens];
+		s.tokenize(tokenList);
+
+		// Legitimate BONDANGLE inputs have 8 tokens
+		// BONDANGLE | INDEX1 | INDEX2 | INDEX3 | INDEX4 | ANGLE_FILENAME | BOND_FILENAME1 | BOND_FILENAME2
+		if (numTokens != 8) {
+			printf("WARNING: Invalid bond_angle input line: %s\n", line);
+			continue;
+		}
+
+		// Discard any empty line
+		if (tokenList == NULL)
+			continue;
+
+		int ind1 = atoi(tokenList[1].val());
+		int ind2 = atoi(tokenList[2].val());
+		int ind3 = atoi(tokenList[3].val());
+		int ind4 = atoi(tokenList[4].val());
+		String file_name1 = tokenList[5];
+		String file_name2 = tokenList[6];
+		String file_name3 = tokenList[7];
+		//printf("file_name %s\n", file_name.val());
+		if (ind1 >= num or ind2 >= num or ind3 >= num or ind4 >= num)
+			continue;
+
+		if (numBondAngles >= capacity) {
+			BondAngle* temp = bondAngles;
+			capacity *= 2;
+			bondAngles = new BondAngle[capacity];
+			for (int i = 0; i < numBondAngles; i++)
+				bondAngles[i] = temp[i];
+			delete[] temp;
+		}
+
+		BondAngle a(ind1, ind2, ind3, ind4, file_name1, file_name2, file_name3);
+		bondAngles[numBondAngles++] = a;
+		delete[] tokenList;
+	}
+	std::sort(bondAngles, bondAngles + numBondAngles, compare());
+
+	// for(int i = 0; i < numAngles; i++)
+	// 	angles[i].print();
+}
+
+void Configuration::readProductPotentials() {
+	FILE* inp = fopen(productPotentialFile.val(), "r");
+	char line[256];
+	int capacity = 256;
+	numProductPotentials = 0;
+	productPotentials = new ProductPotentialConf[capacity];
+
+	// If the angle file cannot be found, exit the program
+	if (inp == NULL) {
+		printf("WARNING: Could not open `%s'.\n", productPotentialFile.val());
+		printf("This simulation will not use product potentials.\n");
+		return;
+	}
+	printf("DEBUG: READING PRODUCT POTENTAL FILE\n");
+	std::vector<std::vector<int>> indices;
+	std::vector<int> tmp;
+	std::vector<String> pot_names;
+
+	while(fgets(line, 256, inp)) {
+		if (line[0] == '#') continue;
+		String s(line);
+		int numTokens = s.tokenCount();
+		String* tokenList = new String[numTokens];
+		s.tokenize(tokenList);
+
+		indices.clear();
+		tmp.clear();
+		pot_names.clear();		    
+
+		printf("\rDEBUG: reading line %d",numProductPotentials+1);
+
+		// Legitimate ProductPotential inputs have at least 7 tokens
+		// BONDANGLE | INDEX1 | INDEX2 | INDEX3 | [TYPE1] | POT_FILENAME1 | INDEX4 | INDEX5 | [TYPE2] POT_FILENAME2 ...
+		if (numTokens < 5) {
+		    printf("WARNING: Invalid product potential input line (too few tokens %d): %s\n", numTokens, line);
+			continue;
+		}
+
+		// Discard any empty line
+		if (tokenList == NULL)
+			continue;
+
+		SimplePotentialType type = BOND; // initialize to suppress warning
+		bool type_specified = false;
+		for (int i = 1; i < numTokens; ++i) {
+		    char *end;
+		    // printf("DEBUG: Working on token %d '%s'\n", i, tokenList[i].val());
+
+		    // Try to convert token to integer
+		    int index = (int) strtol(tokenList[i].val(), &end, 10);
+		    if (tokenList[i].val() == end || *end != '\0' || errno == ERANGE) {
+			// Failed to convert token to integer; therefore it must be a potential name or type
+
+			// Try to match a type
+			String n = tokenList[i];
+			n.lower();
+			if (n == "bond") { type = DIHEDRAL; type_specified = true; }
+			else if (n == "angle")  { type = DIHEDRAL; type_specified = true; }
+			else if (n == "dihedral")  { type = DIHEDRAL; type_specified = true; }
+			else if (n == "vecangle") { type = VECANGLE; type_specified = true; }
+			else { // Not a type, therefore a path to a potential
+			    n = tokenList[i];
+			    indices.push_back(tmp);
+			    pot_names.push_back( n );
+			    // TODO: Key should be tuple of (type,n)
+			    std::string n_str = std::string(n.val());
+			    if ( simple_potential_ids.find(n_str) == simple_potential_ids.end() ) {
+				// Could not find fileName in dictionary, so read and add it
+				unsigned int s = tmp.size();
+				if (s < 2 || s > 4) {
+				    printf("WARNING: Invalid product potential input line (indices of potential %d == %d): %s\n", i, s, line);
+				    continue;
+				}
+				simple_potential_ids[ n_str ] = simple_potentials.size();
+				if (not type_specified) type = s==2? BOND: s==3? ANGLE: DIHEDRAL;
+				simple_potentials.push_back( SimplePotential(n.val(), type) );
+			    }
+			    tmp.clear();
+			    type_specified = false;
+
+			}
+		    } else {
+			if (index >= num) {
+			    continue;
+			}
+			tmp.push_back(index);
+		    }
+		}
+
+		if (numProductPotentials >= capacity) {
+			ProductPotentialConf* temp = productPotentials;
+			capacity *= 2;
+			productPotentials = new ProductPotentialConf[capacity];
+			for (int i = 0; i < numProductPotentials; i++)
+				productPotentials[i] = temp[i];
+			delete[] temp;
+		}
+
+		ProductPotentialConf a(indices, pot_names);
+		productPotentials[numProductPotentials++] = a;
+		delete[] tokenList;
+	}
+	printf("\nDEBUG: Sorting\n");
+	std::sort(productPotentials, productPotentials + numProductPotentials, compare());
+
+	// for(int i = 0; i < numAngles; i++)
+	// 	angles[i].print();
+}
+
+
 void Configuration::readRestraints() {
 	FILE* inp = fopen(restraintFile.val(), "r");
 	char line[256];
@@ -2524,3 +2726,34 @@ bool Configuration::compare::operator()(const Dihedral& lhs, const Dihedral& rhs
 		return lhs.ind3 < rhs.ind3;
 	return lhs.ind4 < rhs.ind4;
 }
+
+bool Configuration::compare::operator()(const BondAngle& lhs, const BondAngle& rhs) {
+	int diff = lhs.ind1 - rhs.ind1;
+	if (diff != 0)
+		return lhs.ind1 < rhs.ind1;
+	diff = lhs.ind2 - rhs.ind2;
+	if (diff != 0)
+		return lhs.ind2 < rhs.ind2;
+	diff = lhs.ind3 - rhs.ind3;
+	if (diff != 0) 
+		return lhs.ind3 < rhs.ind3;
+	return lhs.ind4 < rhs.ind4;
+}
+
+bool Configuration::compare::operator()(const ProductPotentialConf& lhs, const ProductPotentialConf& rhs) {
+    int diff = rhs.indices.size() - lhs.indices.size();
+    if (diff != 0) return diff > 0;
+
+    for (unsigned int i = 0; i < lhs.indices.size(); ++i) {
+	diff = rhs.indices[i].size() - lhs.indices[i].size();
+	if (diff != 0) return diff > 0;
+    }
+
+    for (unsigned int i = 0; i < lhs.indices.size(); ++i) {
+	for (unsigned int j = 0; j < lhs.indices[i].size(); ++j) {
+	    diff = rhs.indices[i][j] - lhs.indices[i][j];
+	    if (diff != 0) return diff > 0;
+	}
+    }
+    return true;
+}
diff --git a/src/Configuration.h b/src/Configuration.h
index 6199dd8b24bc019dd7044b2a3a6031312142f13d..7fb589df2b6cca944941d864921b44c4433257b7 100644
--- a/src/Configuration.h
+++ b/src/Configuration.h
@@ -25,6 +25,7 @@
 #include "TrajectoryWriter.h"
 #include "TabulatedPotential.h"
 #include "TabulatedAngle.h"
+#include "ProductPotential.h"
 #include "GPUManager.h"
 #include "RigidBodyType.h"
 #include "RigidBody.h"
@@ -48,6 +49,8 @@ class Configuration {
 		bool operator()(const Exclude& lhs, const Exclude& rhs);
 		bool operator()(const Angle& lhs, const Angle& rhs);
 		bool operator()(const Dihedral& lhs, const Dihedral& rhs);
+		bool operator()(const BondAngle& lhs, const BondAngle& rhs);
+		bool operator()(const ProductPotentialConf& lhs, const ProductPotentialConf& rhs);
 	};
 
 	void setDefaults();
@@ -64,6 +67,7 @@ class Configuration {
 	void buildExcludeMap();
 	void readDihedrals();
 	void readRestraints();
+	void readBondAngles();
 
 
 	bool readTableFile(const String& value, int currTab);
@@ -71,6 +75,8 @@ class Configuration {
 	bool readAngleFile(const String& value, int currAngle);
 	bool readDihedralFile(const String& value, int currDihedral);
 
+	bool readBondAngleFile(const String& value, const String& bondfile1, const String& bondfile2, int currBondAngle);
+
 	// Given the numbers of each particle, populate the type list.
 	void populate();
 
@@ -201,6 +207,7 @@ public:
 	int numExcludes;
 	int numAngles;
 	int numDihedrals;
+	int numBondAngles;
 	int numRestraints;
 	int* numPartsOfType;
 	String partFile;
@@ -209,12 +216,14 @@ public:
 	String angleFile;
 	String dihedralFile;
 	String restraintFile;
+	String bondAngleFile;
 	bool readPartsFromFile;
 	bool readGroupSitesFromFile;
 	bool readBondsFromFile;
 	bool readExcludesFromFile;
 	bool readAnglesFromFile;
 	bool readDihedralsFromFile;
+	bool readBondAnglesFromFile;
 	bool readRestraintsFromFile;
 	//String* partGridFile;
 	String **partGridFile;
@@ -254,8 +263,18 @@ public:
 	String* dihedralTableFile;
 	int numTabDihedralFiles;
 
+	BondAngle* bondAngles;
+
 	Restraint* restraints;
 
+	void readProductPotentials();
+	String productPotentialFile;
+	int numProductPotentials;
+	bool readProductPotentialsFromFile;
+        ProductPotentialConf* productPotentials;
+	XpotMap simple_potential_ids;
+        std::vector<SimplePotential> simple_potentials;
+
         //Han-Yi Chou
         String ParticleDynamicType;
         String RigidBodyDynamicType;
diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index aa5295f4271f2ac169033e139e2fa15418654562..ef6ef5dfd8eb5599b87e32b800a915f649d1ead1 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -204,6 +204,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	partTableIndex0 = c.partTableIndex0;
 	partTableIndex1 = c.partTableIndex1;
 
+	numBondAngles = c.numBondAngles;
+
 	numTabBondFiles = c.numTabBondFiles;
 	bondMap = c.bondMap;
 	// TODO: bondList = c.bondList;
@@ -220,6 +222,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	dihedrals = c.dihedrals;
 	numTabDihedralFiles = c.numTabDihedralFiles;
 
+	bondAngles = c.bondAngles;
+
 	// Device parameters
 	//type_d = c.type_d;
 	part_d = c.part_d;
@@ -256,9 +260,9 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	 //			    fullLongRange, numBonds, numTabBondFiles, numExcludes, numAngles, numTabAngleFiles,
 	 //			    numDihedrals, numTabDihedralFiles, c.pairlistDistance, numReplicas);
 	internal = new ComputeForce(c, numReplicas);
-	
+
 	//MLog: I did the other halve of the copyToCUDA function from the Configuration class here, keep an eye on any mistakes that may occur due to the location.
-	internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals, c.restraints);
+	internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals, c.restraints, c.bondAngles, c.simple_potential_ids, c.simple_potentials, c.productPotentials );
 	if (numGroupSites > 0) init_cuda_group_sites();
 
 	// TODO: check for duplicate potentials 
@@ -283,11 +287,11 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 			if (bondTableFile[p].length() > 0) {
 				//MLog: make sure to add to all GPUs
 			    // printf("...loading %s\n",bondTableFile[p].val());
-				internal->addBondPotential(bondTableFile[p].val(), p, bonds);
+			    internal->addBondPotential(bondTableFile[p].val(), p, bonds, bondAngles);
 				// printf("%s\n",bondTableFile[p].val());
 			} else {
 			    printf("...skipping %s (\n",bondTableFile[p].val());
-			    internal->addBondPotential(bondTableFile[p].val(), p, bonds);
+			    internal->addBondPotential(bondTableFile[p].val(), p, bonds, bondAngles);
 			}
 			    
 	}
@@ -298,7 +302,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 			if (angleTableFile[p].length() > 0)
 			{
 				//MLog: make sure to do this for every GPU
-				internal->addAnglePotential(angleTableFile[p].val(), p, angles);
+			    internal->addAnglePotential(angleTableFile[p].val(), p, angles, bondAngles);
 			}
 	}
 
@@ -373,7 +377,32 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	    }
 	}
 	}
-	internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList);
+
+	if (numBondAngles > 0) {
+	bondAngleList = new int4[ (numBondAngles*2) * numReplicas ];
+	for(int k = 0 ; k < numReplicas; k++) {
+	    for(int i = 0; i < numBondAngles; ++i) {
+			if (bondAngles[i].tabFileIndex1 == -1) {
+				fprintf(stderr,"Error: bondanglefile '%s' was not read with tabulatedAngleFile command.\n", bondAngles[i].angleFileName1.val());
+				exit(1);
+			}
+			if (bondAngles[i].tabFileIndex2 == -1) {
+				fprintf(stderr,"Error: bondanglefile1 '%s' was not read with tabulatedBondFile command.\n", bondAngles[i].bondFileName.val());
+				exit(1);
+			}
+			if (bondAngles[i].tabFileIndex3 == -1) {
+				fprintf(stderr,"Error: bondanglefile2 '%s' was not read with tabulatedBondFile command.\n", bondAngles[i].angleFileName2.val());
+				exit(1);
+			}
+			int idx = i+k*numBondAngles;
+			bondAngleList[idx*2]   = make_int4( bondAngles[i].ind1+k*num, bondAngles[i].ind2+k*num,
+							    bondAngles[i].ind3+k*num, bondAngles[i].ind4+k*num );
+			bondAngleList[idx*2+1] = make_int4( bondAngles[i].tabFileIndex1, bondAngles[i].tabFileIndex2, bondAngles[i].tabFileIndex3, -1 );
+	    }
+	}
+	}
+
+	internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList,bondAngleList);
 	
 	forceInternal = new Vector3[(num+num_rb_attached_particles+numGroupSites)*numReplicas];
 	if (fullLongRange != 0)
diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h
index 56d4c5a3b28c7b765542ba87eb5efa2445768c82..ce7f19c29534db17e8b2ef8656f866b701455c67 100644
--- a/src/GrandBrownTown.h
+++ b/src/GrandBrownTown.h
@@ -275,6 +275,10 @@ private:
 	int4 *dihedralList;
 	int  *dihedralPotList;
 
+	int numBondAngles;
+	BondAngle* bondAngles;
+	int4 *bondAngleList;
+
         //Han-Yi Chou
         String particle_dynamic;
         String rigidbody_dynamic;
diff --git a/src/ProductPotential.h b/src/ProductPotential.h
new file mode 100644
index 0000000000000000000000000000000000000000..ff918e609c1b59827b71bfa0af68401ba287079a
--- /dev/null
+++ b/src/ProductPotential.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#ifdef __CUDACC__
+    #define HOST __host__
+    #define DEVICE __device__
+#else
+    #define HOST 
+    #define DEVICE 
+#endif
+
+#include "useful.h"
+#include <cuda.h>
+
+
+class ProductPotentialConf {
+public:
+    ProductPotentialConf() {}
+    ProductPotentialConf( std::vector< std::vector<int> > indices, std::vector<String> potential_names ) :
+	indices(indices), potential_names(potential_names) { }
+
+    std::vector< std::vector<int> > indices; /* indices of particles */
+    std::vector<String> potential_names;
+
+    inline ProductPotentialConf(const ProductPotentialConf& a) : indices(a.indices), potential_names(a.potential_names) { }
+
+    
+	/* String toString(); */
+	/* void print(); */
+};
+
diff --git a/src/TabulatedDihedral.h b/src/TabulatedDihedral.h
index e806e2dab95b4ddc855f6807266cb8715c82d64d..00c816128813d36686b46bd67142c92efc3d1883 100644
--- a/src/TabulatedDihedral.h
+++ b/src/TabulatedDihedral.h
@@ -13,8 +13,6 @@
 // #include <math.h>
 // #define _USING_MATH_DEFINES
 
-constexpr float BD_PI = 3.1415927f;
-
 class TabulatedDihedralPotential {
 public:
 	TabulatedDihedralPotential();
diff --git a/src/TabulatedMethods.cuh b/src/TabulatedMethods.cuh
index d92809c99ce0206d8b4b380a6cc08abf3b692112..c775a6b00acd336a95615a496244b45eee9e3a36 100644
--- a/src/TabulatedMethods.cuh
+++ b/src/TabulatedMethods.cuh
@@ -2,6 +2,14 @@
 
 // Defined elsewhere: constexpr float BD_PI = 3.1415927f;
 
+struct AngleForce {
+    __host__ __device__
+    AngleForce(Vector3 f1, Vector3 f3, float e) : f1(f1), f3(f3), e(e) { }
+    Vector3 f1;
+    Vector3 f3;
+    float e;
+};
+
 __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ a, const BaseGrid* __restrict__ sys, Vector3* force, const Vector3* __restrict__ pos,
 				const int& i, const int& j, const int& k, float* energy, bool get_energy) {
 	    
@@ -77,6 +85,108 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__
 	atomicAdd( &force[k], force3 );
 }
 
+__device__ inline AngleForce calcAngle(const TabulatedAnglePotential* __restrict__ a, const Vector3 ab, const Vector3 bc, const Vector3 ac) {
+	// // The vectors between each pair of particles
+	// const Vector3 ab = sys->wrapDiff(posa - posb);
+	// const Vector3 bc = sys->wrapDiff(posb - posc);
+	// const Vector3 ac = sys->wrapDiff(posc - posa);
+ 
+	// Find the distance between each pair of particles
+	float distab = ab.length2();
+	float distbc = bc.length2();
+	const float distac2 = ac.length2();
+  
+	// Find the cosine of the angle we want - <ABC	
+	float cos = (distab + distbc - distac2);
+
+	distab = 1.0f/sqrt(distab); //TODO: test other functiosn
+	distbc = 1.0f/sqrt(distbc);
+	cos *= 0.5f * distbc * distab;
+  
+	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
+	if (cos < -1.0f) cos = -1.0f;
+	if (cos > 1.0f) cos = 1.0f;
+
+	// Find the sine while we're at it.
+
+	// Now we can use the cosine to find the actual angle (in radians)		
+	float angle = acos(cos);
+
+	// transform angle to units of tabulated array index
+	angle *= a->angle_step_inv;
+
+	// tableAngle[0] stores the potential at angle_step
+	// tableAngle[1] stores the potential at angle_step * 2, etc.
+	// 'home' is the index after which 'convertedAngle' would appear if it were stored in the table	
+	int home = int(floorf(angle));
+        home =  (home >= a->size) ? (a->size)-1 : home; 
+	//assert(home >= 0);
+	//assert(home+1 < a->size);
+
+	// // Make angle the distance from [0,1) from the first index in the potential array index
+	// angle -= home;
+		
+	// Linearly interpolate the potential	
+	float U0 = a->pot[home];
+	float dUdx = (a->pot[(((home+1)==(a->size)) ? (a->size)-1 : home+1)] - U0) * a->angle_step_inv;
+	float e = ((dUdx * (angle-home)) + U0);
+
+	float sin = sqrtf(1.0f - cos*cos);
+	dUdx /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity 
+
+	// Calculate the forces
+	Vector3 force1 = -(dUdx*distab) * (ab * (cos*distab) + bc * distbc); // force on particle 1
+	Vector3 force3 = (dUdx*distbc) * (bc * (cos*distbc) + ab * distab); // force on particle 3
+
+	return AngleForce(force1,force3,e);
+}
+
+__device__ inline void computeBondAngle(const TabulatedAnglePotential* __restrict__ a1,
+					const TabulatedPotential* __restrict__ b, const TabulatedAnglePotential* __restrict__ a2,
+					const BaseGrid* __restrict__ sys, Vector3* force, const Vector3* __restrict__ pos,
+					const int& i, const int& j, const int& k, const int& l, float* energy, bool get_energy) {
+
+	// Particle's type and position
+	Vector3 posa = pos[i];
+	Vector3 posb = pos[j];
+	Vector3 posc = pos[k];
+	Vector3 posd = pos[l];
+
+	// The vectors between each pair of particles
+	const Vector3 ab = sys->wrapDiff(posb - posa);
+	const Vector3 bc = sys->wrapDiff(posc - posb);
+	const Vector3 ca = sys->wrapDiff(posc - posa);
+	AngleForce fe_a1 = calcAngle(a1, -ab,-bc,ca);
+
+	float distbc = bc.length2();
+	EnergyForce fe_b = b->compute(bc,distbc);
+
+	const Vector3 cd = sys->wrapDiff(posd - posc);
+	const Vector3 db = sys->wrapDiff(posd - posb);
+	AngleForce fe_a2 = calcAngle(a2, -bc,-cd,db);
+
+        if(get_energy)
+        {
+	    float e =  fe_a1.e * fe_b.e * fe_a2.e * 0.25f;
+            atomicAdd( &energy[i], e);
+            atomicAdd( &energy[j], e);
+            atomicAdd( &energy[k], e);
+            atomicAdd( &energy[l], e);
+        }
+	atomicAdd( &force[i], fe_a1.f1 * fe_b.e * fe_a2.e );
+	atomicAdd( &force[j], 
+		   -(fe_a1.f1 + fe_a1.f3) * fe_b.e * fe_a2.e
+		   + fe_b.f * fe_a1.e * fe_a2.e
+		   + fe_a2.f1 * fe_b.e * fe_a1.e 
+	    );
+	atomicAdd( &force[k], 
+		   fe_a1.f3 * fe_b.e * fe_a2.e
+		   - fe_b.f * fe_a1.e * fe_a2.e 
+		   - (fe_a2.f1 + fe_a2.f3) * fe_b.e * fe_a1.e
+	    );
+	atomicAdd( &force[l], fe_a2.f3 * fe_b.e * fe_a1.e );
+}
+
 
 __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restrict__ d,
 				const BaseGrid* __restrict__ sys, Vector3* forces, const Vector3* __restrict__ pos,
diff --git a/src/TabulatedPotential.cu b/src/TabulatedPotential.cu
index 9a378bb67b972a3da538f88ee830a52a0f63d133..75f6fe74a6220ed7b5e1a92c4bfd4d1f223abbfa 100644
--- a/src/TabulatedPotential.cu
+++ b/src/TabulatedPotential.cu
@@ -100,6 +100,16 @@ bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) {
 // 	e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1];
 // }
 
+// void TabulatedPotential::init(const float* dist, const float* pot, int n0) {
+// 	n = abs(n0);
+// 	dr = dist[1]-dist[0];
+// 	r0 = dist[0];
+// 	r1 = r0 + n*dr;
+
+// 	v0 = new float[n];
+// 	for (int i = 0; i < n; i++) v0[i] = pot[i];
+// }
+
 
 FullTabulatedPotential::FullTabulatedPotential(const char* fileName) : fileName(fileName) {
 	// printf("File: %s\n", fileName);
@@ -180,3 +190,72 @@ int FullTabulatedPotential::countValueLines(const char* fileName) {
 
 	return count;
 }
+
+int countValueLines(const char* fileName) {
+	FILE* inp = fopen(fileName, "r");
+	if (inp == NULL) {
+		printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName);
+		exit(-1);
+	}
+	char line[256];
+	int count = 0;
+
+	while (fgets(line, 256, inp) != NULL) {
+		// Ignore comments.
+		int len = strlen(line);
+		if (line[0] == '#') continue;
+		if (len < 2) continue;
+		count++;
+	}
+	fclose(inp);
+	return count;
+}
+ 
+SimplePotential::SimplePotential(const char* filename, SimplePotentialType type) : type(type) {
+	FILE* inp = fopen(filename, "r");
+	if (inp == NULL) {
+		printf("SimplePotential::SimplePotential Could not open file '%s'\n", filename);
+		exit(-1);
+	}
+	
+	char line[256];
+	
+	size = (unsigned int) countValueLines(filename);
+	float* r = new float[size];
+	pot = new float[size];
+	
+	int count = 0;
+	while (fgets(line, 256, inp) != NULL) {
+		// Ignore comments.
+		int len = strlen(line);
+		if (line[0] == '#') continue;
+		if (len < 2) continue;
+		
+		String s(line);
+		int numTokens = s.tokenCount();
+		if (numTokens != 2) {
+			printf("SimplePotential::SimplePotential Invalid tabulated potential file line: %s\n", line);
+			exit(-1);
+		}
+		
+		String* tokenList = new String[numTokens];
+		s.tokenize(tokenList);
+		if (tokenList == NULL) {
+			printf("SimplePotential::SimplePotential Invalid tabulated potential file line: %s\n", line);
+			exit(-1);
+		}
+		r[count] = (float) strtod(tokenList[0], NULL);
+		pot[count] = (float) strtod(tokenList[1], NULL);
+		count++;
+		
+		delete[] tokenList;
+	}
+	fclose(inp);
+
+	if (type == BOND) {
+	    step_inv = (size-1.0f) / (r[size-1]-r[0]);
+	} else {
+	    step_inv = 57.29578f * (size-1.0f) / (r[size-1]-r[0]);
+	}
+	delete[] r;
+}
diff --git a/src/TabulatedPotential.h b/src/TabulatedPotential.h
index e32068a6fa7dc0b8cbada59cb7f6ea8d40933b22..0df2bcf6cc2439cee863a025eacc3029364edb51 100644
--- a/src/TabulatedPotential.h
+++ b/src/TabulatedPotential.h
@@ -7,11 +7,17 @@
     #define HOST __host__
     #define DEVICE __device__
 #else
-    #define HOST 
-    #define DEVICE 
+    #define HOST
+    #define DEVICE
 #endif
 
 #include "useful.h"
+#include "BaseGrid.h"
+
+#ifdef __CUDA_ARCH__
+#include "CudaUtil.cuh"
+#endif
+
 #include <cuda.h>
 
 #ifndef gpuErrchk
@@ -20,6 +26,7 @@
 	    fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, __LINE__); \
 	}}
 #endif
+constexpr float BD_PI = 3.1415927f;
 
 class EnergyForce {
 public:
@@ -153,7 +160,7 @@ public:
 		Vector3 force = (du*drInv/d)*r;
 		return EnergyForce(energy,force);
 	}
-  HOST DEVICE inline Vector3 computef(Vector3 r, float d) {
+  HOST DEVICE inline Vector3 computef(const Vector3 r, float d) const {
 		d = sqrt(d);
 		// float d = r.length();
 		// RBTODO: precompute so that initial blocks are zero; reduce computation here
@@ -177,8 +184,344 @@ private:
   float drInv; //TODO replace with drInv
   float r0;
 };
+
+/* // New unified/simplified classes for working with potentials */
+/* <template int num_indices, int max_integer> */
+/* class BitMaskInts { */
+/*     BitMaskInts(); */
+
+/* private: */
+/*     static_assert( ceil(log2(max_integer)) <= CHAR_BIT ); */
+
+/*     char data[ ceil(num_indices * ceil(log2(max_integer)) / CHAR_BIT) ]; */
+
+/*     HOST DEVICE inline unsigned short int get_int(i) const { */
+/* 	unsigned int first_bit = i * ceil(log2(max_integer)); */
+/* 	unsigned int last_bit  = (i+1) * ceil(log2(max_integer))-1; */
+/* 	char c0 = data[floor(first_bit/CHAR_BIT)]; */
+/* 	char c1 = data[floor(last_bit/CHAR_BIT)]; */
+
+/* 	unsigned short int ret = c0 << (first_bit % CHAR_BIT) /\* shift left *\/ */
+/*     };     */
+
+/* } */
+
+
+enum SimplePotentialType { UNSET, BOND, ANGLE, DIHEDRAL, VECANGLE };
+// enum PotentialTypeAtoms { bond=2, angle=3, dihedral=4 };
+
+
+class SimplePotential {
+public:
+    SimplePotential() { }
+    SimplePotential(const char* filename, SimplePotentialType type);
+    SimplePotential(float* pot, float step_inv, unsigned int size, SimplePotentialType type) :
+	pot(pot), step_inv(step_inv), size(size), type(type) { }
+    
+
+    float* pot;	     // actual potential values
+    float  step_inv; // angular increments of potential file
+    unsigned int size;     // number of data points in the file
+
+    SimplePotentialType type;
+
+    /* float start = 0;  */
+    /* bool is_periodic = false; */
+
+    /* HOST void copy_to_device(SimplePotential* device_addr_p, unsigned int offset=0) { */
+    /* 	/\* Assumes device_addr_p is already allocated, allocates space for pot *\/ */
+    /* 	float* val, tmp; */
+    /* 	gpuErrchk(cudaMalloc(&val, sizeof(float)*size)); // TODO equivalent cudaFree */
+    /* 	gpuErrchk(cudaMemcpyAsync(val, pot, sizeof(float)*size, cudaMemcpyHostToDevice)); */
+    /* 	tmp = pot; */
+    /* 	pot = val; */
+    /* 	gpuErrchk(cudaMemcpyAsync(device_addr_p+offset, this, sizeof(SimplePotential), cudaMemcpyHostToDevice)); */
+    /* 	pot = tmp; */
+    /* 	// return val; */
+    /* } */
+
+    HOST DEVICE inline float compute_value(const Vector3* __restrict__ pos,
+					   const BaseGrid* __restrict__ sys,
+					   const int* __restrict__ particles) const {
+	float val;
+	if (type == BOND)
+	    val = compute_bond(pos, sys, particles[0], particles[1]);
+	else if (type == ANGLE)
+	    val = compute_angle(pos, sys, particles[0], particles[1], particles[2]);
+	else if (type == DIHEDRAL)
+	    val = compute_dihedral(pos, sys, particles[0], particles[1], particles[2], particles[3]);
+	else if (type == VECANGLE)
+	    val = compute_vecangle(pos, sys, particles[0], particles[1], particles[2], particles[3]);
+	return val;
+    }
+
+    HOST DEVICE inline float2 compute_energy_and_deriv(float value) {
+	float2 ret;
+	if (type == DIHEDRAL) {
+	    ret = linearly_interpolate<true>(value, -BD_PI);
+	} else {
+	    ret = linearly_interpolate<false>(value);
+	}
+	return ret;
+    }
+
+    HOST DEVICE inline float compute_bond(const Vector3* __restrict__ pos,
+					      const BaseGrid* __restrict__ sys,
+					      int i, int j) const {
+	return sys->wrapDiff( pos[j] - pos[i] ).length();
+    }
+
+    HOST DEVICE inline float compute_angle(const Vector3* __restrict__ pos,
+					   const BaseGrid* __restrict__ sys,
+					   int i, int j, int k) const {
+	const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]);
+	const Vector3 bc = sys->wrapDiff(pos[k] - pos[j]);
+	const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]);
+	return compute_angle( ab.length2(), bc.length2(), ac.length2() );
+    }
+
+    HOST DEVICE inline float compute_vecangle(const Vector3* __restrict__ pos,
+					      const BaseGrid* __restrict__ sys,
+					      int i, int j, int k, int l) const {
+	const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]);
+	const Vector3 bc = sys->wrapDiff(pos[l] - pos[k]);
+	const Vector3 ac = bc+ab;
+	return compute_angle( ab.length2(), bc.length2(), ac.length2() );
+    }
+
+    HOST DEVICE inline float compute_angle(float distab2, float distbc2, float distac2) const {
+	// Find the cosine of the angle we want - <ABC
+	float cos = (distab2 + distbc2 - distac2);
+
+	distab2 = 1.0f/sqrt(distab2); //TODO: test other functions
+	distbc2 = 1.0f/sqrt(distbc2);
+	cos *= 0.5f * distbc2 * distab2;
+
+	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
+	if (cos < -1.0f) cos = -1.0f;
+	if (cos > 1.0f) cos = 1.0f;
+
+	return acos(cos);
+    }
+
+    HOST DEVICE inline float compute_dihedral(const Vector3* __restrict__ pos,
+					      const BaseGrid* __restrict__ sys,
+					      int i, int j, int k, int l) const {
+	const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]);
+	const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]);
+	const Vector3 cd = -sys->wrapDiff(pos[l] - pos[k]);
+
+	const Vector3 crossABC = ab.cross(bc);
+	const Vector3 crossBCD = bc.cross(cd);
+	const Vector3 crossX = bc.cross(crossABC);
+
+	const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
+	const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
+
+	return -atan2(sin_phi,cos_phi);
+    }
+
+    template <bool is_periodic>
+	HOST DEVICE inline float2 linearly_interpolate(float x, float start=0.0f) const {
+	float w = (x - start) * step_inv;
+	int home = int( floorf(w) );
+	w = w - home;
+	// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
+	if (home < 0) {
+	    if (is_periodic) home += size;
+	    else return make_float2(pot[0],0.0f);
+	}
+	else if (home >= size) {
+	    if (is_periodic) home -= size;
+	    else return make_float2(pot[size-1],0.0f);
+	}
+
+	float u0 = pot[home];
+	float du = home+1 < size ? pot[home+1]-u0 : is_periodic ? pot[0]-u0 : 0;
+
+	return make_float2(du*w+u0, du*step_inv);
+    }
+
+    DEVICE inline void apply_force(const Vector3* __restrict__ pos,
+				   const BaseGrid* __restrict__ sys,
+				   Vector3* __restrict__ forces,
+				   int* particles, float energy_deriv) const {
+	if (type == BOND)
+	    apply_bond_force(pos, sys, forces, particles[0], particles[1], energy_deriv);
+	else if (type == ANGLE)
+	    apply_angle_force(pos, sys, forces, particles[0], particles[1],
+			     particles[2], energy_deriv);
+	else if (type == DIHEDRAL)
+	    apply_dihedral_force(pos, sys, forces, particles[0], particles[1],
+				 particles[2], particles[3], energy_deriv);
+	else if (type == VECANGLE)
+	    apply_vecangle_force(pos, sys, forces, particles[0], particles[1],
+				 particles[2], particles[3], energy_deriv);
+    }
+
+    __device__ inline void apply_bond_force(const Vector3* __restrict__ pos,
+					const BaseGrid* __restrict__ sys,
+					Vector3* __restrict__ force,
+					int i, int j, float energy_deriv) const {
+#ifdef __CUDA_ARCH__
+	Vector3 f = sys->wrapDiff( pos[j] - pos[i] );
+	f = f * energy_deriv / f.length();
+	atomicAdd(&force[i], f);
+	atomicAdd(&force[j], -f);
+#endif
+    }
+
+    struct TwoVector3 {
+	Vector3 v1;
+	Vector3 v2;
+    };
+
+    DEVICE inline TwoVector3 get_angle_force(const Vector3& ab,
+					     const Vector3& bc,
+					     float energy_deriv) const {
+	// Find the distance between each pair of particles
+	float distab = ab.length2();
+	float distbc = bc.length2();
+	const float distac2 = (ab+bc).length2();
+
+	// Find the cosine of the angle we want - <ABC
+	float cos = (distab + distbc - distac2);
+
+	distab = 1.0f/sqrt(distab); //TODO: test other functions
+	distbc = 1.0f/sqrt(distbc);
+	cos *= 0.5f * distbc * distab;
+
+	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
+	if (cos < -1.0f) cos = -1.0f;
+	if (cos > 1.0f) cos = 1.0f;
+
+	float sin = sqrtf(1.0f - cos*cos);
+	energy_deriv /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity
+	if (abs(sin) < 1e-3) {
+	    printf("BAD ANGLE: sin, cos, energy_deriv, distab, distbc, distac2: (%f %f %f %f %f)\n",
+		   sin,cos,energy_deriv,distab,distbc);	}
+
+	// Calculate the forces
+	TwoVector3 force;
+	force.v1 = (energy_deriv*distab) * (ab * (cos*distab) + bc * distbc); // force on 1st particle
+	force.v2 = -(energy_deriv*distbc) * (bc * (cos*distbc) + ab * distab); // force on last particle
+	return force;
+    }
+
+    // DEVICE inline TwoVector3 get_angle_force(const Vector3& ab,
+    // 					     const Vector3& bc,
+    // 					     float energy_deriv) const {
+    // 	// Find the distance between each pair of particles
+    // 	float distab = ab.length2();
+    // 	float distbc = bc.length2();
+
+    // 	float pre = distab*distbc - pow(ab.dot(bc),2);
+    // 	// if (pre < 1e-6) {
+    // 	//     pre = 1e-3;
+    // 	//     printf("BAD ANGLE: pre, energy_deriv, distab, distbc, ab.dot(bc): (%f %f %f %f %f)\n",
+    // 	// 	   pre,energy_deriv,distab,distbc,ab.dot(bc));	
+    // 	// } else pre = sqrt(pre);
+    // 	// if (distab == distbc) {
+    // 	//     printf("GOOD ANGLE: pre, energy_deriv, distab, distbc, ab.dot(bc): (%f %f %f %f %f)\n",
+    // 	// 	   pre,energy_deriv,distab,distbc,ab.dot(bc));	
+    // 	// }
+	    
+    // 	pre = pre > 1e-6 ? sqrt(pre) : 1e-3;
+    // 	energy_deriv /= pre;
+
+    // 	TwoVector3 force;
+    // 	//force.v1 = energy_deriv * Vector3::element_mult( 1-Vector3::element_mult(ab,ab)/distab, bc);
+    // 	//force.v2 = energy_deriv * Vector3::element_mult( 1-Vector3::element_mult(bc,bc)/distbc, ab);
+
+    // 	Vector3 abbc = Vector3::element_mult(-ab,bc);
+    // 	force.v1 = -energy_deriv * (bc-Vector3::element_mult(abbc, -ab/distab));
+    // 	force.v2 = -energy_deriv * (-ab-Vector3::element_mult(abbc, bc/distbc));
+    // 	return force;
+    // }
+
+    DEVICE inline void apply_angle_force(const Vector3* __restrict__ pos,
+					 const BaseGrid* __restrict__ sys,
+					 Vector3* __restrict__ force,
+					 int i, int j, int k, float energy_deriv) const {
+
+#ifdef __CUDA_ARCH__
+	const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]);
+	const Vector3 bc = sys->wrapDiff(pos[k] - pos[j]);
+	// const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]);
+
+	TwoVector3 f = get_angle_force(ab,bc, energy_deriv);
+
+	atomicAdd( &force[i], f.v1 );
+	atomicAdd( &force[j], -(f.v1 + f.v2) );
+	atomicAdd( &force[k], f.v2 );
+#endif
+    }
+
+    DEVICE inline void apply_dihedral_force(const Vector3* __restrict__ pos,
+					    const BaseGrid* __restrict__ sys,
+					    Vector3* __restrict__ force,
+					    int i, int j, int k, int l, float energy_deriv) const {
+#ifdef __CUDA_ARCH__
+	const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]);
+	const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]);
+	const Vector3 cd = -sys->wrapDiff(pos[l] - pos[k]);
+
+	const Vector3 crossABC = ab.cross(bc);
+	const Vector3 crossBCD = bc.cross(cd);
+	const Vector3 crossX = bc.cross(crossABC);
+
+	const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
+	const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
+
+	// return -atan2(sin_phi,cos_phi);
+	Vector3 f1, f2, f3; // forces
+	float distbc = bc.length();
+
+	f1 = -distbc * crossABC.rLength2() * crossABC;
+	f3 = -distbc * crossBCD.rLength2() * crossBCD;
+	f2 = -(ab.dot(bc) * bc.rLength2()) * f1 - (bc.dot(cd) * bc.rLength2()) * f3;
+
+	// energy_deriv = (ab.length2()*bc.length2()*crossABC.rLength2() > 100.0f || bc.length2()*cd.length2()*crossBCD.rLength2() > 100.0f) ? 0.0f : energy_deriv;
+	/* if ( energy_deriv > 1000.0f ) */
+	/*     energy_deriv = 1000.0f; */
+	/* if ( energy_deriv < -1000.0f ) */
+	/*     energy_deriv = -1000.0f; */
+
+	f1 *= energy_deriv;
+	f2 *= energy_deriv;
+	f3 *= energy_deriv;
+
+	atomicAdd( &force[i], f1 );
+	atomicAdd( &force[j], f2-f1 );
+	atomicAdd( &force[k], f3-f2 );
+	atomicAdd( &force[l], -f3 );
+#endif
+    }
+    DEVICE inline void apply_vecangle_force(const Vector3* __restrict__ pos,
+					    const BaseGrid* __restrict__ sys,
+					    Vector3* __restrict__ force,
+					    int i, int j, int k, int l, float energy_deriv) const {
+
+#ifdef __CUDA_ARCH__
+
+	const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]);
+	const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]);
+	// const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]);
+
+	TwoVector3 f = get_angle_force(ab,bc, energy_deriv);
+
+	atomicAdd( &force[i], f.v1 );
+	atomicAdd( &force[j], -f.v1 );
+	atomicAdd( &force[k], -f.v2 );
+	atomicAdd( &force[l], f.v2 );
+#endif
+    }
+
+};
+
 #ifndef delgpuErrchk
 #undef  delgpuErrchk
 #undef  gpuErrchk(code)
 #endif
+
 #endif