diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
index 580a73496e52be0476e250f0c097ec318a2bc3e6..fb379874e89e7550f953f0c6a80f81e88270ccab 100644
--- a/src/ComputeForce.cu
+++ b/src/ComputeForce.cu
@@ -27,7 +27,7 @@ ComputeForce::ComputeForce(int num, const BrownianParticleType part[],
 													 int fullLongRange, int numBonds, int numTabBondFiles,
 													 int numExcludes, int numAngles, int numTabAngleFiles,
 													 int numDihedrals, int numTabDihedralFiles,
-													 int numReplicas) :
+				float pairlistDistance, int numReplicas) :
 		num(num), numParts(numParts), sys(g), switchStart(switchStart),
 		switchLen(switchLen), electricConst(electricConst),
 		cutoff2((switchLen + switchStart) * (switchLen + switchStart)),
@@ -39,6 +39,9 @@ ComputeForce::ComputeForce(int num, const BrownianParticleType part[],
 	// Allocate the parameter tables.
 	decomp_d = NULL;
 
+	pairlistdist2 = (sqrt(cutoff2) + pairlistDistance);
+	pairlistdist2 *= pairlistdist2;
+
 	tableEps = new float[numParts * numParts];
 	tableRad6 = new float[numParts * numParts];
 	tableAlpha = new float[numParts * numParts];
@@ -444,17 +447,18 @@ void ComputeForce::decompose() {
 	int tmp = 0;
 	gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp,	sizeof(int), cudaMemcpyHostToDevice));
 	gpuErrchk(cudaDeviceSynchronize());
-	
-	float pairlistdist2 = (sqrt(cutoff2) + 2.0f);
-	pairlistdist2 = pairlistdist2*pairlistdist2;
-	
+	// printf("Pairlistdist: %f\n",sqrt(pairlistdist2));
+		
 #if __CUDA_ARCH__ >= 300
 	createPairlists<<< 2048, 64 >>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d, pairLists_d, numParts, type_d, pairTabPotType_d, excludes_d, excludeMap_d, numExcludes, pairlistdist2);
 #else
 	// Use shared memory for warp_bcast function
 	createPairlists<<< 2048, 64, 2048/WARPSIZE >>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d, pairLists_d, numParts, type_d, pairTabPotType_d, excludes_d, excludeMap_d, numExcludes, pairlistdist2);
 #endif			
-	
+	// DEBUGING
+	// gpuErrchk(cudaMemcpy(&tmp, numPairs_d,	sizeof(int), cudaMemcpyDeviceToHost));
+	// printf("CreatePairlist found %d pairs\n",tmp);
+
 	gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */
 }
 
@@ -662,6 +666,11 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos)
 
 	gpuErrchk(cudaDeviceSynchronize());
 }
+void ComputeForce::setForceInternalOnDevice(Vector3* f) {
+	const size_t tot_num = num * numReplicas;
+	gpuErrchk(cudaMemcpy(forceInternal_d, f, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
+}
+
 
 void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals)
 {
diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh
index b2bab6e5444c36173e37bb3eb2ff3abc2378c760..be1e975b47fe97747860c1ff658d80d887c57478 100644
--- a/src/ComputeForce.cuh
+++ b/src/ComputeForce.cuh
@@ -221,6 +221,8 @@ void createPairlists(Vector3* __restrict__ pos, const int num, const int numRepl
 				int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType,
 				const Exclude* __restrict__ excludes, const int2* __restrict__ excludeMap, const int numExcludes,
 				float pairlistdist2) {
+	// TODO: loop over all cells with edges within pairlistdist2
+
 	// Loop over threads searching for atom pairs
 	//   Each thread has designated values in shared memory as a buffer
 	//   A sync operation periodically moves data from shared to global
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
index cbe2be8f64013696adcf676b1c8599c66b065633..dc20c4e20637425e2263594bb231f754091fe6ad 100644
--- a/src/ComputeForce.h
+++ b/src/ComputeForce.h
@@ -36,7 +36,7 @@ public:
 			const BaseGrid* g, float switchStart, float switchLen,
 			float electricConst, int fullLongRange, int numBonds,
 			int numTabBondFiles, int numExcludes, int numAngles, int numTabAngleFiles,
-			int numDihedrals, int numTabDihedralFiles, int numReplicas = 1);
+					int numDihedrals, int numTabDihedralFiles, float pairlistDistance, int numReplicas = 1);
 	~ComputeForce();
 
 	void updateNumber(int newNum);
@@ -87,6 +87,7 @@ public:
 	{
 		return forceInternal_d;
 	}
+	void setForceInternalOnDevice(Vector3* f);
 
 	int* getType_d()
 	{
@@ -172,6 +173,7 @@ private:
 	TabulatedDihedralPotential **tableDihedral_d, **tableDihedral_addr;
 
 	// Pairlists
+	float pairlistdist2;
 	int2 *pairLists_d;
 	int *pairTabPotType_d;
 
diff --git a/src/Configuration.cpp b/src/Configuration.cpp
index bc2905ea33cd5713667e678457efa8ce00a5666c..4440e42ab01d11c4d05454b452f15e59919eee3d 100644
--- a/src/Configuration.cpp
+++ b/src/Configuration.cpp
@@ -547,6 +547,7 @@ void Configuration::setDefaults() {
 	cutoff = 10.0f;
 	switchLen = 2.0f;
 	pairlistDistance = 2.0f;
+	imdForceScale = 1.0f;
 	outputPeriod = 200;
 	outputEnergyPeriod = -1;
 	outputFormat = TrajectoryWriter::formatDcd;
@@ -668,6 +669,8 @@ int Configuration::readParameters(const char * config_file) {
 			switchLen = (float) strtod(value.val(), NULL);
 		else if (param == String("pairlistDistance"))
 			pairlistDistance = (float) strtod(value.val(), NULL);
+		else if (param == String("scaleIMDForce"))
+			imdForceScale = (float) strtod(value.val(), NULL);		
 		else if (param == String("outputPeriod"))
 			outputPeriod = atoi(value.val());
 		else if (param == String("outputEnergyPeriod"))
diff --git a/src/Configuration.h b/src/Configuration.h
index aa7e2ee3caa99415d4e0c71c757c971591b407cb..dfd5c379d878fa750fe9e0e5ffeac94b4fc90e2d 100644
--- a/src/Configuration.h
+++ b/src/Configuration.h
@@ -142,6 +142,7 @@ public:
 	float cutoff;
 	float pairlistDistance;
 	float switchLen;
+	float imdForceScale;
 	int outputPeriod;
 	int outputEnergyPeriod;
 	int outputFormat;
diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index fc766edf14e6f79d460746226db2bac6423d69fc..356d854a74fa71e029cc562514f75a7854860034 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -323,6 +323,11 @@ GrandBrownTown::~GrandBrownTown() {
 	//gpuErrchk(cudaFree(forceInternal_d));
 	gpuErrchk(cudaFree(randoGen_d));
 	//gpuErrchk( cudaFree(bondList_d) );
+
+	if (imd_on)
+		delete[] imdForces;
+	
+		
 }
 
 // Run the Brownian Dynamics steps.
@@ -374,6 +379,10 @@ void GrandBrownTown::run() {
 				imd_recv_header(clientsock, &length) != IMD_GO) {
 			clientsock = NULL;
 		}
+		imdForces = new Vector3[num*numReplicas];
+		for (size_t i = 0; i < num; ++i) // clear old forces
+			imdForces[i] = Vector3(0.0f);
+
 	} // endif (imd_on)
 
 	// Start timers
@@ -514,50 +523,99 @@ void GrandBrownTown::run() {
 		
 		Vector3 force0(0.0f);
 
+
+		if (s % outputPeriod == 0) {
+			// Copy particle positions back to CPU
+			// cudaSetDevice(0);
+			gpuErrchk(cudaMemcpy(pos, internal ->  getPos_d(), sizeof(Vector3) * num * numReplicas,
+					cudaMemcpyDeviceToHost));
+		}
+
 		if (imd_on && clientsock && s % outputPeriod == 0) {
+		
+			float* coords = new float[num*3]; // TODO: move allocation out of run loop
+			int* atomIds = new int[num]; // TODO: move allocation out of run loop
+
 			int length;
-			if (vmdsock_selread(clientsock, 0) == 1) {
+
+			bool paused = false;
+			while (vmdsock_selread(clientsock, 0) > 0 || paused) {
 				switch (imd_recv_header(clientsock, &length)) {
-					case IMD_DISCONNECT:
-						printf("[IMD] Disconnecting...\n");
-						imd_disconnect(clientsock);
-						clientsock = NULL;
-						sleep(5);
-						break;
-					case IMD_KILL:
-						printf("[IMD] Killing...\n");
-						imd_disconnect(clientsock);
-						clientsock = NULL;
-						steps = s; // Stop the simulation at this step
-						sleep(5);
-						break;
-					default:
-						printf("[IMD] Something weird happened. Disconnecting..\n");
-						break;
+				case IMD_DISCONNECT:
+					printf("[IMD] Disconnecting...\n");
+					imd_disconnect(clientsock);
+					clientsock = NULL;
+					sleep(5);
+					break;
+				case IMD_KILL:
+					printf("[IMD] Killing...\n");
+					imd_disconnect(clientsock);
+					clientsock = NULL;
+					steps = s; // Stop the simulation at this step
+					sleep(5);
+					break;
+				case IMD_PAUSE:
+					// if (paused) {
+					// 	printf("[IMD] Continuing...\n");
+					// } else {
+					// 	printf("[IMD] Pausing...\n");
+					// }
+					paused = !paused;
+					break;
+				case IMD_GO:
+					printf("[IMD] Caught IMD_GO\n");
+					break;
+
+				case IMD_MDCOMM:					
+					// if (!paused)
+					// 	printf("[IMD] Receiving %d forces...\n",length);
+					for (size_t i = 0; i < num; ++i) // clear old forces
+						imdForces[i] = Vector3(0.0f);
+
+					if (imd_recv_mdcomm(clientsock, length, atomIds, coords)) {
+						printf("[IMD] Error receiving forces\n"); 
+					} else {
+						for (size_t j = 0; j < length; ++j) {
+							int i = atomIds[j];
+							imdForces[i] = Vector3(coords[j*3], coords[j*3+1], coords[j*3+2]) * conf.imdForceScale;
+							// if (!paused)
+							  // printf("[IMD] Applying force (%0.2f, %0.2f, %0.2f) to particle %d\n", imdForces[i].x, imdForces[i].y, imdForces[i].z, i);
+							  //printf("[IMD] Applying %0.1f pN force to particle %d at step %d\n", 69.47694f * imdForces[i].length(), i, s);
+						}
+					}
+					break; 
+
+				default:
+					printf("[IMD] Something weird happened. Disconnecting..\n");
+					break;
 				}
 			}
 			if (clientsock) {
 				// cudaSetDevice(0);
-				gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num, cudaMemcpyDeviceToHost));
-				float* coords = new float[num * 3];
+				// float* coords = new float[num*3]; // TODO: move allocation out of run loop
 				for (size_t i = 0; i < num; i++) {
-					Vector3 p = pos[i];
+					const Vector3& p = pos[i];
 					coords[3*i] = p.x;
 					coords[3*i+1] = p.y;
 					coords[3*i+2] = p.z;
 				}
 				imd_send_fcoords(clientsock, num, coords);
-				delete[] coords;
 			}
+
+			delete[] coords;
+			delete[] atomIds;
 		}
+		
 
 		// Output trajectories (to files)
 		if (s % outputPeriod == 0) {
-			// Copy particle positions back to CPU
-			// cudaSetDevice(0);
-			gpuErrchk(cudaMemcpy(pos, internal ->  getPos_d(), sizeof(Vector3) * num * numReplicas,
-					cudaMemcpyDeviceToHost));
-
+		  /* // print IMDforces
+		  for (int i=0; i < num * numReplicas; ++i) {
+		    float f = imdForces[i].length() * 69.47694f;
+		    if (f > 0.1f)
+		      printf("[IMD] Applying %0.1f pN force to particle %d at step %d\n", f, i, s);
+		  }
+		  // */
 			// Nanoseconds computed
 			t = s*timestep;
 
@@ -593,7 +651,7 @@ void GrandBrownTown::run() {
 			wkf_timer_stop(timerS);
 
 			// Copy back forces to display (internal only)
-			gpuErrchk(cudaMemcpy(&force0, internal -> getForceInternal_d(), sizeof(Vector3), cudaMemcpyDeviceToHost));
+			// gpuErrchk(cudaMemcpy(&force0, internal -> getForceInternal_d(), sizeof(Vector3), cudaMemcpyDeviceToHost));
 			
 			// Nanoseconds computed
 			t = s * timestep;
@@ -625,7 +683,9 @@ void GrandBrownTown::run() {
 			wkf_timer_start(timerS);
 		} // s % outputEnergyPeriod
 
-		{
+		if (imd_on && clientsock) {
+			internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD
+		} else {
 			int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1);
 			//MLog: along with calls to internal (ComputeForce class) this function should execute once per GPU.
 			clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal->getForceInternal_d(), num*numReplicas);
diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh
index 671724a55f5417f023a73a57c582177c2c98690b..401d06649c530597494e7fc591bdb8f20bbc737d 100644
--- a/src/GrandBrownTown.cuh
+++ b/src/GrandBrownTown.cuh
@@ -22,6 +22,7 @@ void clearInternalForces(Vector3* __restrict__ forceInternal, const int num) {
 	if (idx < num)
 		forceInternal[idx] = Vector3(0.0f);
 }
+
 __global__
 void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal,
 									int type[], BrownianParticleType* part[],
diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h
index 95857aa1087f85e2db811f6ffb08bc9ed6d42d5c..444349f0e50838f0a08a12cfb2d1234badbba509 100644
--- a/src/GrandBrownTown.h
+++ b/src/GrandBrownTown.h
@@ -40,6 +40,7 @@
 /* #include "RigidBodyType.h" */
 /* #include "RigidBodyGrid.h" */
 #include "RigidBodyController.h"
+#include "WKFUtils.h"
 
 // IMD
 #include "vmdsock.h"
@@ -119,7 +120,8 @@ private:
 	// IMD variables
 	bool imd_on;
 	unsigned int imd_port;
-
+	Vector3* imdForces;
+	
 	// Output variables
 	std::vector<std::string> outCurrFiles;
 	std::vector<std::string> restartFiles;