diff --git a/ComputeForce.cu b/ComputeForce.cu
index 656635466bbb7b412814fef67a192c05f6ecf5a1..d1cfc0bea80ac19cd8c6a06d54341ab068e01c9b 100644
--- a/ComputeForce.cu
+++ b/ComputeForce.cu
@@ -413,7 +413,7 @@ void ComputeForce::decompose(Vector3* pos, int type[]) {
 		int tmp = 0;
 		gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp,
 															sizeof(int), cudaMemcpyHostToDevice));
-		gpuErrchk(cudaDeviceSynchronize()); /* RBTOOD: maybe unnecessary */
+		gpuErrchk(cudaDeviceSynchronize());
 	}
 
 	
diff --git a/ComputeForce.cuh b/ComputeForce.cuh
index 10454f21af12e6d32909f2ee779ff4776d22faa7..715df18a342d6b82566e3ff11c94628671f191f8 100644
--- a/ComputeForce.cuh
+++ b/ComputeForce.cuh
@@ -286,10 +286,10 @@ void createPairlistsOld(Vector3* __restrict__ pos, int num, int numReplicas,
 
 __global__
 void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
-				BaseGrid* sys, CellDecomposition* __restrict__ decomp,
+				const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp,
 				const int nCells,
 				int* g_numPairs, int2* g_pair,
-				int numParts, int type[], int* __restrict__ g_pairTabPotType,
+				int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType,
 				float pairlistdist2) {
 	// Loop over threads searching for atom pairs
   //   Each thread has designated values in shared memory as a buffer
@@ -299,7 +299,7 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
 	const int split = 32;					/* numblocks should be divisible by split */
 	/* const int blocksPerCell = gridDim.x/split;  */
 	
-	const CellDecomposition::cell_t* pairs = decomp->getCells();
+	const CellDecomposition::cell_t* __restrict__ pairs = decomp->getCells();
 	for (int cID = 0 + (blockIdx.x % split); cID < nCells; cID += split) {
 	// for (int cID = blockIdx.x/blocksPerCell; cID < nCells; cID += split ) {
 		for (int repID = 0; repID < numReplicas; repID++) {
@@ -314,8 +314,8 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
 				const int ai = pairs[ci].particle;
 				// const CellDecomposition::cell_t celli = decomp->getCellForParticle(ai);
 				const CellDecomposition::cell_t celli = pairs[ci];
-				const Vector3 posi = pos[ai];
-				
+				// Vector3 posi = pos[ai];
+
 				for (int x = -1; x <= 1; ++x) {
 					for (int y = -1; y <= 1; ++y) {
 						for (int z = -1; z <= 1; ++z) {					
@@ -329,7 +329,8 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
 								if (aj <= ai) continue;
 								
 								// skip ones that are too far away
-								float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2();
+								const float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2();
+								// const float dr = (sys->wrapDiff(pos[aj] - posi)).length2();
 								if (dr > pairlistdist2) continue;
 								
 								int gid = atomicAggInc( g_numPairs, warpLane );
diff --git a/GPUController.h b/GPUController.h
new file mode 100644
index 0000000000000000000000000000000000000000..088a3ddc2564ce0a81bbb1d89575b571b026b0bc
--- /dev/null
+++ b/GPUController.h
@@ -0,0 +1,44 @@
+// provides interface between main CPU loop and various GPUs
+//   -- holds data for each GPU
+
+#pragma once
+#include "useful.h"
+
+class GPUcontroller {
+public:
+	GPUcontroller(const Configuration& c, const long int randomSeed,
+			bool debug, int numReplicas = 0);
+	~GPUcontroller();
+
+	static bool DEBUG;
+
+private:  
+
+	void copyToCUDA();
+
+	
+private:
+	const Configuration& conf;
+	int numReplicas;
+
+	// Integrator variables
+	BaseGrid* sys;
+	ComputeForce* internal;
+	Vector3* forceInternal;
+
+	// CUDA device variables
+	Vector3 *pos_d, *forceInternal_d, *force_d;
+	int *type_d;
+	BrownianParticleType **part_d;
+	BaseGrid *sys_d, *kTGrid_d;
+	Random *randoGen_d;
+	Bond* bonds_d;
+	int2* bondMap_d;
+	Exclude* excludes_d;
+	int2* excludeMap_d;
+	Angle* angles_d;
+	Dihedral* dihedrals_d;
+	
+}
+	
+