diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
index 4f39b3fe3a905fca9703f03423b357a401bef198..f06b4badfea31f23f917aabc7671e166c60aed4f 100644
--- a/src/ComputeForce.cu
+++ b/src/ComputeForce.cu
@@ -40,14 +40,15 @@ void runSort(int2 *d1, int *d2, float *key,
 				unsigned int count);
 
 ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
-    num(c.num), numParts(c.numParts), sys(c.sys), switchStart(c.switchStart),
+    num(c.num), numParts(c.numParts), num_rb_attached_particles(c.num_rb_attached_particles),
+    sys(c.sys), switchStart(c.switchStart),
     switchLen(c.switchLen), electricConst(c.coulombConst),
     cutoff2((c.switchLen + c.switchStart) * (c.switchLen + c.switchStart)),
     decomp(c.sys->getBox(), c.sys->getOrigin(), c.switchStart + c.switchLen + c.pairlistDistance, numReplicas),
     numBonds(c.numBonds), numTabBondFiles(c.numTabBondFiles),
     numExcludes(c.numExcludes), numAngles(c.numAngles),
     numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals),
-    numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), 
+    numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints),
     numGroupSites(c.numGroupSites),
     numReplicas(numReplicas) {
 
@@ -237,7 +238,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
 	gridSize =  num / NUM_THREADS + 1;
 
 	// Create and allocate the energy arrays
-	gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+numGroupSites) * numReplicas));
+	gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+num_rb_attached_particles+numGroupSites) * numReplicas));
 	cudaEventCreate(&start);
 	cudaEventCreate(&stop);
 }
@@ -894,7 +895,7 @@ float ComputeForce::computeTabulatedFull(bool get_energy) {
 
 void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos)
 {
-    const size_t tot_num = (num+numGroupSites) * numReplicas;
+    const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas;
 
 	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
 	    gpuman.use(i);
@@ -965,10 +966,10 @@ void ComputeForce::setForceInternalOnDevice(Vector3* f) {
 void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints)
 {
     assert(simNum == numReplicas); // Not sure why we have both of these things
-    int tot_num = num * simNum;
+    int tot_num_with_rb = (num+num_rb_attached_particles) * simNum;
 	// type_d
-	gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num));
-	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num, cudaMemcpyHostToDevice));
+	gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num_with_rb));
+	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num_with_rb, cudaMemcpyHostToDevice));
 	
 	if (numBonds > 0)
 	{
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
index 5d602e4a609482a73ba7c639f8a575fc3023b2a7..b3bc258541f9603f387221363f3c39fe7fbae881 100644
--- a/src/ComputeForce.h
+++ b/src/ComputeForce.h
@@ -174,6 +174,7 @@ private:
 	int numReplicas;
 	int num;
 	int numParts;
+	int num_rb_attached_particles;
 	int numBonds;
 	int numExcludes;
 	int numTabBondFiles;
diff --git a/src/Configuration.h b/src/Configuration.h
index 8bc023de4be3c92c07bc45d2e36fa63477475f8a..ff7879faf686ff4b29e2c63be454d99ca2a96cde 100644
--- a/src/Configuration.h
+++ b/src/Configuration.h
@@ -91,9 +91,11 @@ public:
 	~Configuration();
 
     int find_particle_type(const char* s) const {
-	for (int j = 0; j < numParts; j++)
-	    if (s == part[j].name)
+	for (int j = 0; j < numParts; j++) {
+	    printf("Searching particle %d (%s) =? %s\n", j, part[j].name.val(), s);
+	    if (strcmp(s,part[j].name.val()) == 0)
 		return j;
+	}
 	return -1;
     }
 
diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index 339a9f37938bdb0481e8d52f022e7b3d6a288f8a..46f556aa46c85066ec564d48786965b8785690a5 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -613,7 +613,7 @@ void GrandBrownTown::RunNoseHooverLangevin()
 			internal->getPos_d()[0]+i*(num+num_rb_attached_particles),
 			internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles),
 			internal->getEnergy()+i*(num+num_rb_attached_particles),
-			sys_d);
+			sys_d, num, num_rb_attached_particles, numReplicas);
 		}
 	    }
 
@@ -704,7 +704,7 @@ void GrandBrownTown::RunNoseHooverLangevin()
 				     s,
 				     internal->getEnergy()+i*(num+num_rb_attached_particles),
 				     get_energy,
-				     RigidBodyInterpolationType, sys, sys_d);
+				     RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles);
             if(rigidbody_dynamic == String("Langevin"))
             {
                 #ifdef _OPENMP
@@ -778,7 +778,7 @@ void GrandBrownTown::RunNoseHooverLangevin()
 		    internal->getPos_d()[0]+i*(num+num_rb_attached_particles),
 		    internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles),
 		    internal->getEnergy()+i*(num+num_rb_attached_particles),
-		    sys_d);
+		    sys_d, num, num_rb_attached_particles, numReplicas);
 	    }
 	}
 	
@@ -954,7 +954,7 @@ void GrandBrownTown::RunNoseHooverLangevin()
         #pragma omp parallel for
         for(int i = 0; i < numReplicas; ++i) // TODO: Use different buffer for RB particle forces to avoid race condition
             RBC[i]->updateForces((internal->getPos_d()[0])+i*(num+num_rb_attached_particles), (internal->getForceInternal_d()[0])+i*(num+num_rb_attached_particles), s, (internal->getEnergy())+i*(num+num_rb_attached_particles), get_energy,
-                                 RigidBodyInterpolationType, sys, sys_d);
+				 RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles);
 
 	if (numGroupSites > 0) {
 	    gpuman.sync();
diff --git a/src/Makefile b/src/Makefile
index 4f6bb5779b9248ff6e4ef7e018387e236b18ab58..68598f462bf925e748a759b91ab9bd5ab09d6afc 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,7 @@
 ### Paths, libraries, includes, options
 include ./findcudalib.mk
 
+SUFFIX ?= ""
 
 ifeq ($(dbg),1)
 	NV_FLAGS += -g -G -O0
@@ -99,17 +100,17 @@ ifeq ($(omp),1)
 TARGET := $(TARGET)_omp
 endif
 
-all: $(TARGET)
-	@echo "Done ->" $(TARGET)
+all: $(TARGET)$(SUFFIX)
+	@echo "Done ->" $(TARGET)$(SUFFIX)
 
 $(TARGET)_link.o: $(CU_OBJ)
 #	$(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o
 	$(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o
 
-$(TARGET): $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) arbd.cpp vmdsock.c imd.c imd.h
+$(TARGET)$(SUFFIX): $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) arbd.cpp vmdsock.c imd.c imd.h
 #	$(NVCC) $(NVLD_FLAGS) $(CU_OBJ) $(CC_OBJ) -o $(TARGET)_link.o
 #	$(NVCC) $(NVLD_FLAGS) $(CU_OBJ) -o $(TARGET)_link.o
-	$(CC) $(CC_FLAGS) $(EX_FLAGS) arbd.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS)  -o $(TARGET)
+	$(CC) $(CC_FLAGS) $(EX_FLAGS) arbd.cpp vmdsock.c imd.c $(TARGET)_link.o $(CU_OBJ) $(CC_OBJ) $(LD_FLAGS)  -o $(TARGET)$(SUFFIX)
 
 
 .SECONDEXPANSION:
diff --git a/src/RigidBody.cu b/src/RigidBody.cu
index abd693bdd3895f19af9027655132825501b52c37..0c9afe316652b93026792abe01f0e39001f0e2c8 100644
--- a/src/RigidBody.cu
+++ b/src/RigidBody.cu
@@ -333,6 +333,20 @@ void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, in
 	}
 }
 
+void RigidBody::apply_attached_particle_forces(const Vector3* force) {
+    const auto &rb_pos = t->attached_particle_positions;
+    int num = rb_pos.size();
+    Vector3 total_force = Vector3(0.0f);
+    Vector3 torque = Vector3(0.0f);
+    for (int i = 0; i < num; ++i) {
+	const int j = i + attached_particle_start;
+	torque = torque + (orientation*rb_pos[i]).cross(force[j]);
+	total_force = total_force + force[j];
+    }
+    addForce(total_force);
+    addTorque(torque);
+}
+
 void RigidBody::applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx) {
 	// loop over potential grids 
 	for (int i = 0; i < t->numPotGrids; ++i) {
diff --git a/src/RigidBody.h b/src/RigidBody.h
index 636c80e94fd0f17475156bbc84e0cd74409d8e94..e3e81737ae1d7aee45f16e915d44f6aa413e1749 100644
--- a/src/RigidBody.h
+++ b/src/RigidBody.h
@@ -83,6 +83,7 @@ class RigidBody { // host side representation of rigid bodies
         void initializeParticleLists();
 	void updateParticleList(Vector3* pos_d, BaseGrid* sys_d);
 	void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, ForceEnergy* forcestorques_d, const std::vector<int>& forcestorques_offset, int& fto_idx);
+	void apply_attached_particle_forces(const Vector3* force);
 	void applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx);
 	
 	bool langevin;
diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu
index b16eedbe24610071705a3772919ac3561eef8840..087823a0f7e4a0b6cabf0a3238de87cab5b06feb 100644
--- a/src/RigidBodyController.cu
+++ b/src/RigidBodyController.cu
@@ -72,6 +72,7 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* pre
 		}
 		rigidBodyByType.push_back(tmp);
 	}
+	attached_particle_forces = new Vector3[attached_particle_offset];
 
 	totalParticleForceNumBlocks = 0;
 	for (int i=0; i < particleForceNumBlocks.size(); ++i) {
@@ -98,6 +99,8 @@ RigidBodyController::~RigidBodyController() {
 	for (int i = 0; i < rigidBodyByType.size(); i++)
 		rigidBodyByType[i].clear();
 	rigidBodyByType.clear();
+
+	delete [] attached_particle_forces;
 	delete random;
 }
 
@@ -527,13 +530,17 @@ void RigidBodyController::initializeForcePairs() {
 			
 }
 
-void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d) {
+void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas) {
     for (int i = 0; i < rigidBodyByType.size(); i++) {
 	for (int j = 0; j < rigidBodyByType[i].size(); j++) {
 	    rigidBodyByType[i][j].update_particle_positions(pos_d, force_d, energy_d);
 	}
     }
-    assert(false); // clear force and energy
+    for (int rep = 0; rep < numReplicas; ++rep) {
+	size_t o = (num+num_rb_attached_particles)*rep;
+	gpuErrchk(cudaMemset((void*)&(force_d[num+o]),0,num_rb_attached_particles*sizeof(Vector3)));
+    }
+    gpuErrchk(cudaMemset((void*)&(energy_d[num]),0,num_rb_attached_particles*sizeof(float)));
 }
 
 
@@ -559,7 +566,7 @@ void RigidBodyController::clearForceAndTorque()
     }
 }
 
-void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d) 
+void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles)
 {
 	//if (s <= 1)
 		//gpuErrchk( cudaProfilerStart() );
@@ -576,6 +583,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s,
 	// RBTODO: launch kernels ahead of time and sync using event and memcpyAsync 
 	gpuErrchk( cudaDeviceSynchronize() );
 	cudaMemcpy(particleForces, particleForces_d, sizeof(ForceEnergy)*2*totalParticleForceNumBlocks, cudaMemcpyDeviceToHost);
+	cudaMemcpyAsync(attached_particle_forces, &force_d[num], sizeof(Vector3)*(num_rb_attached_particles), cudaMemcpyDeviceToHost);
 
 	pfo_idx=0;
 	for (int i = 0; i < rigidBodyByType.size(); i++) {
@@ -586,6 +594,16 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s,
 		}
 	}
 
+	{
+	    gpuErrchk( cudaDeviceSynchronize() );
+	    for (auto &rbv: rigidBodyByType) {
+		for (auto &rb: rbv) {
+		    rb.apply_attached_particle_forces( attached_particle_forces );
+		}
+	    }
+	}
+
+
 	// Grid–Grid forces
 	if ( ((s % conf.rigidBodyGridGridPeriod) == 0 || s == 1 ) && forcePairs.size() > 0) {
 		for (int i=0; i < forcePairs.size(); i++) {
diff --git a/src/RigidBodyController.h b/src/RigidBodyController.h
index bf8e58637d2b9860a8d1fd132f2c34a108ecfcb2..fa2fa4e5d1efd6b8bf43a83e9e34b456d5f67245 100644
--- a/src/RigidBodyController.h
+++ b/src/RigidBodyController.h
@@ -108,9 +108,9 @@ public:
         void SetRandomTorques();
 	void integrate(BaseGrid* sys, int step);
         void integrateDLM(BaseGrid* sys, int step);
-	void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d);
+	void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles);
 	void updateParticleLists(Vector3* pos_d, BaseGrid* sys_d);
-    void update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d);
+    void update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas);
         void clearForceAndTorque(); 
         void KineticEnergy();
         void print(int step);
@@ -152,7 +152,8 @@ private:
 	void construct_grids();
 	void destruct_grids();
 
-        //float* rb_energy;	
+        //float* rb_energy;
+	Vector3* attached_particle_forces;
 	ForceEnergy* particleForces;
 	ForceEnergy* particleForces_d;
 	std::vector<int> particleForceNumBlocks;
diff --git a/src/RigidBodyType.cu b/src/RigidBodyType.cu
index ec0f8b65c7a25587755326291ce45542af777c44..2137160943aaf63fa2345f48e0b115373a242704 100644
--- a/src/RigidBodyType.cu
+++ b/src/RigidBodyType.cu
@@ -110,7 +110,7 @@ void RigidBodyType::attach_particles() {
 		// Particle_type | x | y | z
 		// A line without exactly six tokens should be discarded.
 		if (numTokens != 4) {
-		    printf("Error: Invalid attached particle file line: %s\n", line);
+		    printf("Error: Invalid attached particle file line: %s\n", tokenList[0].val());
 		    fclose(inp);
 		    exit(-1);
 		}