diff --git a/ComputeForce.cu b/ComputeForce.cu
index 3f3455a26ceaed3da53e883b21cdd4dab1be385f..ae82baf25f6fc4f0c8a2277cad6466648f14702b 100644
--- a/ComputeForce.cu
+++ b/ComputeForce.cu
@@ -578,23 +578,24 @@ float ComputeForce::computeTabulated(bool get_energy) {
 	// int nb = (1+(decomp.nCells.x * decomp.nCells.y * decomp.nCells.z)) * 75; /* RBTODO: number of pairLists */
 	const int nb = 800;
 	// printf("ComputeTabulated\n");
-	
-	if (get_energy) 
+	gpuErrchk(cudaDeviceSynchronize());
+
+	// RBTODO: get_energy
+	//if (get_energy)
+	if (false) 
 	{
 		clearEnergies<<< nb, numThreads >>>(energies_d,num);
 		gpuErrchk(cudaDeviceSynchronize());
-		computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, type_d, tablePot_d, sys_d, energies_d, cutoff2, numPairs_d, pairLists_d, pairTabPotType_d);
+		computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, sys_d,
+						cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d, energies_d);
 	}
 	
 	else
 	{
-		computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, type_d,
-						tablePot_d, tableBond_d, sys_d,
-						bonds_d, bondMap_d,	numBonds, cutoff2,
-						numPairs_d, pairLists_d, pairTabPotType_d);
+		computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, sys_d,
+						cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d);
 	}
 	/* printPairForceCounter<<<1,32>>>(); */
-	/* gpuErrchk(cudaDeviceSynchronize()); */
 
 	//Mlog: the commented function doesn't use bondList, uncomment for testing.
 	//if(bondMap_d != NULL && tableBond_d != NULL)
@@ -602,21 +603,23 @@ float ComputeForce::computeTabulated(bool get_energy) {
 
 	{
 	    //computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d, bonds, bondMap_d, numBonds, numReplicas, energies_d, get_energy, tableBond_d);
-	    computeTabulatedBonds <<<numBlocks, numThreads>>> ( forceInternal_d, pos_d, num, numParts, sys_d, bondList_d, (numBonds/2), numReplicas, energies_d, get_energy, tableBond_d);
+	computeTabulatedBonds <<<nb, numThreads>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numBonds/2, bondList_d, tableBond_d);
 	}
+
 	if (angleList_d != NULL && tableAngle_d != NULL)
-		computeTabulatedAngles<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d);
-	
+		computeTabulatedAngles<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d);
+
 	if (dihedralList_d != NULL && tableDihedral_d != NULL)
-		computeTabulatedDihedrals<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d);
-	
+		computeTabulatedDihedrals<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d);
+
 
 	// Calculate the energy based on the array created by the kernel
-	if (get_energy) {
-		gpuErrchk(cudaDeviceSynchronize());
-		thrust::device_ptr<float> en_d(energies_d);
-		energy = thrust::reduce(en_d, en_d + num);
-	}
+	// TODO: return energy
+	// if (get_energy) {
+	// 	gpuErrchk(cudaDeviceSynchronize());
+	// 	thrust::device_ptr<float> en_d(energies_d);
+	// 	energy = thrust::reduce(en_d, en_d + num);
+	// }
 
 	return energy;
 }
diff --git a/ComputeForce.cuh b/ComputeForce.cuh
index 90d56bdd61581b9c7999aacd97970976a16e076d..0fa23ba096c8acfd2076299cd7ef22dee8a0818c 100644
--- a/ComputeForce.cuh
+++ b/ComputeForce.cuh
@@ -432,12 +432,9 @@ __global__ void printPairForceCounter() {
 // using cell decomposition
 //
 __global__ void computeTabulatedKernel(
-	Vector3* force, const Vector3* __restrict__ pos, int* type,
-	TabulatedPotential** __restrict__ tablePot, TabulatedPotential** __restrict__ tableBond,
-	const BaseGrid* __restrict__ sys,
-	const Bond* __restrict__ bonds, const int2* __restrict__ bondMap, int numBonds,
-	float cutoff2, const int* __restrict__ g_numPairs,
-	const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType) {
+	Vector3* force, const Vector3* __restrict__ pos,
+	const BaseGrid* __restrict__ sys, float cutoff2,
+	const int* __restrict__ g_numPairs,	const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType, 	TabulatedPotential** __restrict__ tablePot) {
 //remove int* type. remove bond references	
 
 	const int numPairs = *g_numPairs;
@@ -470,10 +467,10 @@ __global__ void clearEnergies(float* g_energies, int num) {
 		g_energies[i] = 0.0f;
 	}
 }
-__global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict__ pos,	int* type, TabulatedPotential** __restrict__ tablePot, BaseGrid* __restrict__ sys,  float* g_energies, float cutoff2, int* __restrict__ g_numPairs,	int2* __restrict__ g_pair, int* __restrict__ g_pairTabPotType) {
-	
+__global__ void computeTabulatedEnergyKernel(Vector3* force, const Vector3* __restrict__ pos,
+				const BaseGrid* __restrict__ sys, float cutoff2,
+				const int* __restrict__ g_numPairs,	const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType, 	TabulatedPotential** __restrict__ tablePot, float* g_energies) {
 	const int numPairs = *g_numPairs;
-	// RBTODO: BONDS (handle through an independent kernel call?)
 	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numPairs; i+=blockDim.x*gridDim.x) {
 		const int2 pair = g_pair[i];
 		const int ai = pair.x;
@@ -486,7 +483,7 @@ __global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict
     // Calculate the force based on the tabulatedPotential file
 		float d2 = dr.length2();
 		/* RBTODO: filter tabpot particles ahead of time */
-		// RBTODO: order pairs according to distance to reduce divergence
+		// RBTODO: order pairs according to distance to reduce divergence // not actually faster
 		
 		if (tablePot[ind] != NULL && d2 <= cutoff2) { 
 			EnergyForce fe = tablePot[ind]->compute(dr,d2);
@@ -695,22 +692,17 @@ void computeAngles(Vector3 force[], Vector3 pos[],
 	}
 }*/
 
-__global__ void computeTabulatedBonds( Vector3* __restrict__ force,		Vector3* __restrict__ pos,					int num,
-				int numParts,		BaseGrid* __restrict__ sys,					int3* __restrict__ bondList_d,
-									   int numBonds,		int numReplicas,				float* __restrict__ g_energies,
-									   bool get_energy,		TabulatedPotential** tableBond)
-{
-	// Thread's unique ID.
-	int currBond = blockIdx.x * blockDim.x + threadIdx.x; // first particle ID
-
+__global__ void computeTabulatedBonds(Vector3* __restrict__ force,
+				Vector3* __restrict__ pos,
+				BaseGrid* __restrict__ sys,
+				int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond) {
 	// Loop over ALL bonds in ALL replicas
-	if( currBond < (numBonds * numReplicas) ) //numBonds should be divided by 2 in the kernel call
-	{
+	for (int bid = threadIdx.x+blockIdx.x*blockDim.x; bid<numBonds; bid+=blockDim.x*gridDim.x) {
 		// Initialize interaction energy (per particle)
-		float energy_local = 0.0f;
+		// float energy_local = 0.0f;
 		
-		int i = bondList_d[currBond].x;
-		int j = bondList_d[currBond].y;
+		int i = bondList_d[bid].x;
+		int j = bondList_d[bid].y;
 
 		// Particle's type and position
 		//Vector3 posi = pos[i];
@@ -719,17 +711,17 @@ __global__ void computeTabulatedBonds( Vector3* __restrict__ force,		Vector3* __
 		// wrapping this value if necessary
 		const Vector3 dr = sys->wrapDiff(pos[j] - pos[i]);
 
-		Vector3 force_local = tableBond[ bondList_d[currBond].z ]->computef(dr,dr.length2());
+		Vector3 force_local = tableBond[ bondList_d[bid].z ]->computef(dr,dr.length2());
 			
 		atomicAdd( &force[i], force_local );
 		atomicAdd( &force[j], -force_local );
-
-		if (get_energy)
-		{
-			//TODO: clarification on energy computation needed, consider changing.
-			atomicAdd( &g_energies[i], energy_local);
-			//atomicAdd( &g_energies[j], energy_local);
-		}
+		
+		// if (get_energy)
+		// {
+		// 	//TODO: clarification on energy computation needed, consider changing.
+		// 	atomicAdd( &g_energies[i], energy_local);
+		// 	//atomicAdd( &g_energies[j], energy_local);
+		// }
 	}
 }
 
diff --git a/Configuration.cpp b/Configuration.cpp
index ce253af8b71fb2a7fe947ad737586cd8b4d8d5fb..2efe3e77603d7e8a54422e7ce7609efb94111578 100644
--- a/Configuration.cpp
+++ b/Configuration.cpp
@@ -1262,8 +1262,8 @@ void Configuration::readAngles() {
 	}
 	std::sort(angles, angles + numAngles, compare());	
 
-	for(int i = 0; i < numAngles; i++)
-		angles[i].print();
+	// for(int i = 0; i < numAngles; i++)
+	// 	angles[i].print();
 }
 
 void Configuration::readDihedrals() {
@@ -1324,8 +1324,8 @@ void Configuration::readDihedrals() {
 	}
 	std::sort(dihedrals, dihedrals + numDihedrals, compare());	
 
-	for(int i = 0; i < numDihedrals; i++)
-		dihedrals[i].print();
+	// for(int i = 0; i < numDihedrals; i++)
+	// 	dihedrals[i].print();
 }
 
 void Configuration::populate() {
diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu
index 5efb8ff831919169942b2403f015599d96b25d8c..4e53b816b92e0ae09786d90594a57271e089a7e3 100644
--- a/GrandBrownTown.cu
+++ b/GrandBrownTown.cu
@@ -185,21 +185,21 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 
 	// TODO: check for duplicate potentials 
 	if (c.tabulatedPotential) {
-		printf("Loading the tabulated potentials...\n");
+		printf("Loading %d tabulated non-bonded potentials...\n", numParts*numParts);
 		for (int p = 0; p < numParts*numParts; p++) {
 			if (partTableFile[p].length() > 0) {
 				int type0 = partTableIndex0[p];
 				int type1 = partTableIndex1[p];
 
 				internal->addTabulatedPotential(partTableFile[p].val(), type0, type1);
-				printf("Loaded %s for types %s and %s.\n", partTableFile[p].val(),
-						part[type0].name.val(), part[type1].name.val());
+				// printf("  Loaded %s for types %s and %s.\n", partTableFile[p].val(),
+				// 		part[type0].name.val(), part[type1].name.val());
 			}
 		}
 	}
 
 	if (c.readBondsFromFile) {
-		printf("Loading the tabulated bond potentials...\n");
+		printf("Loading %d tabulated bond potentials...\n", numTabBondFiles);
 		for (int p = 0; p < numTabBondFiles; p++)
 			if (bondTableFile[p].length() > 0) {
 				//MLog: make sure to add to all GPUs
@@ -209,7 +209,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	}
 
 	if (c.readAnglesFromFile) {
-		printf("Loading the tabulated angle potentials...\n");
+		printf("Loading %d tabulated angle potentials...\n", numTabAngleFiles);
 		for (int p = 0; p < numTabAngleFiles; p++)
 			if (angleTableFile[p].length() > 0)
 			{
@@ -219,7 +219,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	}
 
 	if (c.readDihedralsFromFile) {
-		printf("Loading the tabulated dihedral potentials...\n");
+		printf("Loading %d tabulated dihedral potentials...\n", numTabDihedralFiles);
 		for (int p = 0; p < numTabDihedralFiles; p++)
 			if (dihedralTableFile[p].length() > 0)
 				internal->addDihedralPotential(dihedralTableFile[p].val(), p, dihedrals);
@@ -571,7 +571,7 @@ void GrandBrownTown::run() {
 		*/
 
 			// Copy positions from GPU to CPU.
-			gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas,
+			gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,
 													 cudaMemcpyDeviceToHost));
 
 			// Write restart files for each replica.
@@ -585,7 +585,7 @@ void GrandBrownTown::run() {
 		{
 			int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1);
 			//MLog: along with calls to internal (ComputeForce class) this function should execute once per GPU.
-			clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal -> getForceInternal_d(), num, numReplicas);
+			clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal->getForceInternal_d(), num*numReplicas);
 		}
 		
 	} // done with all Brownian dynamics steps
diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh
index 243d26b84dcb4edcfd246e6c75ddde3cc3db3af6..d5a1d16f55e2a1530ab953ca03038830aeda2330 100644
--- a/GrandBrownTown.cuh
+++ b/GrandBrownTown.cuh
@@ -14,9 +14,9 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion,
 						 Random *randoGen, int num);
 
 __global__
-void clearInternalForces(Vector3 forceInternal[], int num, int numReplicas) {
+void clearInternalForces(Vector3* __restrict__ forceInternal, const int num) {
 	const int idx = blockIdx.x * blockDim.x + threadIdx.x;
-	if (idx < num * numReplicas)
+	if (idx < num)
 		forceInternal[idx] = Vector3(0.0f);
 }
 __global__
@@ -90,8 +90,10 @@ void updateKernel(Vector3 pos[], Vector3 forceInternal[],
 		/* printf("atom %d: force: %f %f %f\n", idx, force.x, force.y, force.z); */
 		/* printf("atom %d: kTlocal, diffusion, timestep: %f, %f, %f\n", */
 		/* 			 idx, kTlocal, diffusion, timestep); */
-		
-		pos[idx] = step(p, kTlocal, force, diffusion, -diffGrad, timestep, sys, randoGen, num);
+		Vector3 tmp = step(p, kTlocal, force, diffusion, -diffGrad, timestep, sys, randoGen, num);
+		assert( tmp.length() < 10000.0f );
+		pos[idx] = tmp;
+			
 	}
 }
 
diff --git a/TabulatedMethods.cuh b/TabulatedMethods.cuh
index 8986e1c6d92d05bb363aaa32110609ac7e6bf268..3a07b7c98957bc5a4bb71483b8cac67f3319f981 100644
--- a/TabulatedMethods.cuh
+++ b/TabulatedMethods.cuh
@@ -19,17 +19,16 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__
 	// Find the distance between each pair of particles
 	const float distab = ab.length();
 	const float distbc = bc.length();
-	const float distac = ac.length();
+	const float distac2 = ac.length2();
   
 	// Find the cosine of the angle we want - <ABC	
-	float cos = (distbc * distbc + distab * distab - distac * distac) / (2.0f * distbc * distab);
+	float cos = (distbc * distbc + distab * distab - distac2) / (2.0f * distbc * distab);
   
 	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
 	if (cos < -1.0f) cos = -1.0f;
 	if (cos > 1.0f) cos = 1.0f;
 
 	// Find the sine while we're at it.
-	float sin = sqrtf(1.0f - cos*cos);
 
 	// Now we can use the cosine to find the actual angle (in radians)		
 	float angle = acos(cos);
@@ -44,18 +43,23 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__
 	assert(home >= 0);
 	assert(home < a->size);
 
-	// Make angle the distance from [0,1) from the first index in the potential array index
-	angle -= home;
+	// // Make angle the distance from [0,1) from the first index in the potential array index
+	// angle -= home;
 		
 	// Linearly interpolate the potential	
 	float U0 = a->pot[home];
 	float dUdx = (a->pot[(home+1)] - U0) / a->angle_step;
 	// float energy = (dUdx * angle) + U0;
-	dUdx /= sqrtf(1.0f-cos*cos);
+	float sin = sqrtf(1.0f - cos*cos);
+	dUdx /= abs(sin) > 1e-4 ? sin : 1e-4; // avoid singularity 
 
 	// Calculate the forces
 	Vector3 force1 = (dUdx/distab) * (ab * (cos/distab) - bc / distbc); // force on particle 1
 	Vector3 force3 = (dUdx/distbc) * (bc * (cos/distbc) - ab / distab); // force on particle 3
+
+	// assert( force1.length() < 10000.0f );
+	// assert( force3.length() < 10000.0f );
+	
 	atomicAdd( &force[i], force1 );
 	atomicAdd( &force[k], force3 );
 	atomicAdd( &force[j], -(force1 + force3) );
@@ -81,7 +85,10 @@ __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restr
 	Vector3 crossABC = ab.cross(bc);
 	Vector3 crossBCD = bc.cross(cd);
 	Vector3 crossX = bc.cross(crossABC);
+	// assert( crossABC.rLength2() <= 1.0f );
+	// assert( crossBCD.rLength2() <= 1.0f );
 
+	
 	const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
 	const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
 		
@@ -116,11 +123,15 @@ __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restr
 
 	// avoid singularity when one angle is straight 
 	force = (crossABC.rLength() > 1.0f || crossBCD.rLength() > 1.0f) ? 0.0f : force;
-	    
+	assert( force < 10000.0f );
 	f1 *= force;
 	f2 *= force;
 	f3 *= force;
 
+	// assert( f1.length() < 10000.0f );
+	// assert( f2.length() < 10000.0f );
+	// assert( f3.length() < 10000.0f );
+
 	atomicAdd( &forces[i], f1 );
 	atomicAdd( &forces[j], f2-f1 );
 	atomicAdd( &forces[k], f3-f2 );
diff --git a/TabulatedPotential.cu b/TabulatedPotential.cu
index a1788925b914b99660624d9b0ae9b73d68a79d71..0e9f30f21532899809c90996db0caf31751fd9dd 100644
--- a/TabulatedPotential.cu
+++ b/TabulatedPotential.cu
@@ -18,7 +18,7 @@ TabulatedPotential::TabulatedPotential() {
 }
 
 TabulatedPotential::TabulatedPotential(const char* fileName) : fileName(fileName) {
-	printf("File: %s\n", fileName);
+	// printf("File: %s\n", fileName);
 	FILE* inp = fopen(fileName, "r");
 	if (inp == NULL) {
 		printf("TabulatedPotential:TabulatedPotential Could not open file '%s'\n", fileName);