diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
index 7b8c42ba37bdd160bef7526a481839a1c99e0f30..8f817614aa94e58923899ed977ee186e8304e0dd 100644
--- a/src/ComputeForce.cu
+++ b/src/ComputeForce.cu
@@ -766,7 +766,7 @@ float ComputeForce::computeTabulated(bool get_energy) {
 
 	// TODO: Sum energy
 	if (restraintIds_d != NULL )
-	    computeHarmonicRestraints<<<1, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numRestraints*numReplicas, restraintIds_d, restraintLocs_d, restraintSprings_d);
+	    computeHarmonicRestraints<<<1, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numRestraints*numReplicas, restraintList_d, restraintLocs_d, restraintSprings_d);
 	
 
 	// Calculate the energy based on the array created by the kernel
@@ -1079,9 +1079,7 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap,
 // 	}
 // }
 
-void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4* bondAngleList) {
-
-	
+void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4* bondAngleList, int2* restraintList) {
 	size_t size;
 
 	if (numBonds > 0) {
@@ -1112,4 +1110,10 @@ void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *d
 	    gpuErrchk( cudaMemcpyAsync( bondAngleList_d, bondAngleList, size, cudaMemcpyHostToDevice) );
 	}
 
+	if (numRestraints > 0) {
+	    size = numRestraints * numReplicas * sizeof(int2);
+	    gpuErrchk( cudaMalloc( &restraintList_d, size ) );
+	    gpuErrchk( cudaMemcpyAsync( restraintList_d, restraintList, size, cudaMemcpyHostToDevice) );
+	}
+
 }
diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh
index 63778e529cc2e0f26b65fa31ec2f59267c6845f0..9c05c38c5b85e322f254110a8041d91fdeda2681 100644
--- a/src/ComputeForce.cuh
+++ b/src/ComputeForce.cuh
@@ -1068,14 +1068,16 @@ void computeTabulatedDihedrals(Vector3* force, const Vector3* __restrict__ pos,
 __global__
 void computeHarmonicRestraints(Vector3* force, const Vector3* __restrict__ pos,
 			       const BaseGrid* __restrict__ sys,
-			       int numRestraints, const int* const __restrict__ particleId,
+			       int numRestraints, const int2* const __restrict__ restraintList,
 			       const Vector3* __restrict__ r0, const float* __restrict__ k) {
 
     // Loop over ALL dihedrals in ALL replicas
     for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numRestraints; i+=blockDim.x*gridDim.x) {
-	const int& id = particleId[i];
-	const Vector3 dr = sys->wrapDiff(pos[id]-r0[i]);
-	Vector3 f = -k[i]*dr;
+	const int& id = restraintList[i].x;
+	const int& rid = restraintList[i].y;
+
+	const Vector3 dr = sys->wrapDiff(pos[id]-r0[rid]);
+	Vector3 f = -k[rid]*dr;
 	atomicAdd( &force[ id ], f );
     }
 }
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
index 39247282970c4b83713afffe9eb3e1a073c9d253..3d0e7041a259834e3179412782f4666d71206b01 100644
--- a/src/ComputeForce.h
+++ b/src/ComputeForce.h
@@ -103,7 +103,7 @@ public:
         void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random);
 	
 	// void createBondList(int3 *bondList);
-	void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4 *bondAngleList);
+    void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4 *bondAngleList, int2 *restraintList);
 	    
 	//MLog: because of the move of a lot of private variables, some functions get starved necessary memory access to these variables, below is a list of functions that return the specified private variable.
     std::vector<Vector3*> getPos_d()
@@ -171,14 +171,16 @@ public:
         }
     
     void clear_force() { 
+	const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas;
 	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
 	    gpuman.use(i);
-	    gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,(num+numGroupSites)*numReplicas*sizeof(Vector3)));
+	    gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,tot_num*sizeof(Vector3)));
 	}
 	gpuman.use(0);		// TODO move to a paradigm where gpu0 is not preferentially treated 
     }
     void clear_energy() { 
-	gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*(num+numGroupSites)*numReplicas)); // TODO make async
+	const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas;
+	gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*tot_num)); // TODO make async
     }
 
 	HOST DEVICE
@@ -288,10 +290,13 @@ private:
 	int4* angleList_d;
 	int4* dihedralList_d;
 	int* dihedralPotList_d;
-
+        int2* restraintList_d;
+    
 	int numRestraints;
 	int* restraintIds_d;
 	Vector3* restraintLocs_d;
 	float* restraintSprings_d;
 
+    
+
 };
diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index 5df60f03a558d739ad3042c4412b9b0c658ad24f..cc8e21720e8ca24e16cc5ee2768e4e15e7a1c712 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -395,14 +395,35 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 				exit(1);
 			}
 			int idx = i+k*numBondAngles;
-			bondAngleList[idx*2]   = make_int4( bondAngles[i].ind1+k*num, bondAngles[i].ind2+k*num,
-							    bondAngles[i].ind3+k*num, bondAngles[i].ind4+k*num );
+			bondAngleList[idx*2]   = make_int4( _get_index(bondAngles[i].ind1,k),
+							    _get_index(bondAngles[i].ind2,k),
+							    _get_index(bondAngles[i].ind3,k),
+							    _get_index(bondAngles[i].ind4,k) );
 			bondAngleList[idx*2+1] = make_int4( bondAngles[i].tabFileIndex1, bondAngles[i].tabFileIndex2, bondAngles[i].tabFileIndex3, -1 );
 	    }
 	}
 	}
 
-	internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList,bondAngleList);
+	// TODO: remove older copy-to-cuda
+	int2* restraintList = NULL;
+	if (c.numRestraints > 0) {
+	    restraintList = new int2[c.numRestraints * numReplicas];
+	    int j = 0;
+	    for(int k = 0 ; k < numReplicas; k++)
+	    {
+		for(int i = 0; i < c.numRestraints; ++i)
+		{
+		    restraintList[j] = make_int2( _get_index(c.restraints[i].id, k), i );
+		    // cout << "Displaying: bondList["<< j <<"].x = " << bondList[j].x << ".\n"
+		    // << "Displaying: bondList["<< j <<"].y = " << bondList[j].y << ".\n"
+		    // << "Displaying: bondList["<< j <<"].z = " << bondList[j].z << ".\n";
+		    ++j;
+		}
+	    }
+	}
+	
+	internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList,bondAngleList,restraintList);
+	if (c.numRestraints > 0) delete[] restraintList;
 	
 	forceInternal = new Vector3[(num+num_rb_attached_particles+numGroupSites)*numReplicas];
 	if (fullLongRange != 0)
@@ -657,11 +678,9 @@ void GrandBrownTown::run()
 	    #ifdef USE_NCCL
 	    if (gpuman.gpus.size() > 1) {
 		gpuman.nccl_broadcast(0, _pos, _pos, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1);
+		gpuman.sync();
 	    }
 	    #endif
-	    gpuman.sync();
-
-
 
             #ifdef _OPENMP
             omp_set_num_threads(4);
@@ -729,20 +748,21 @@ void GrandBrownTown::run()
                 }
             }//if inter-particle force
 
-	    if (get_energy) {
-		compute_position_dependent_force_for_rb_attached_particles
-		    <<< numBlocks, NUM_THREADS >>> (
-			internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
-			internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
-	    } else {
-		compute_position_dependent_force_for_rb_attached_particles
-		    <<< numBlocks, NUM_THREADS >>> (
-			internal -> getPos_d()[0],
-			internal -> getForceInternal_d()[0], internal -> getEnergy(),
-			internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+	    if (num_rb_attached_particles > 0) {
+		if (get_energy) {
+		    compute_position_dependent_force_for_rb_attached_particles
+			<<< numBlocks, NUM_THREADS >>> (
+			    internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
+			    internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+		} else {
+		    compute_position_dependent_force_for_rb_attached_particles
+			<<< numBlocks, NUM_THREADS >>> (
+			    internal -> getPos_d()[0],
+			    internal -> getForceInternal_d()[0], internal -> getEnergy(),
+			    internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
+		}
 	    }
 
-
             #ifdef _OPENMP
             omp_set_num_threads(4);
             #endif
@@ -1401,7 +1421,6 @@ void GrandBrownTown::init_cuda_group_sites()
     }
 
     // Create GPU-friendly data structure
-    assert(numReplicas == 1);    // TODO make this work for replicas
     int* tmp = new int[numGroupSites+1+num_particles];
     num_particles = 0;
     int i = 0;