diff --git a/src/BrownianParticleType.cpp b/src/BrownianParticleType.cpp
index e514500ec3b6b87058cf58389c2a99ab7ddd506d..226814419a20fef51671d3a17ada2539b445f738 100644
--- a/src/BrownianParticleType.cpp
+++ b/src/BrownianParticleType.cpp
@@ -5,6 +5,7 @@
 //////////////////////////////////////////
 void BrownianParticleType::clear() {
 	if (pmf != NULL) delete [] pmf;
+	if (pmf_scale != NULL) delete [] pmf_scale;
 	if (diffusionGrid != NULL) delete diffusionGrid;
 	if (forceXGrid != NULL) delete forceXGrid;
 	if (forceYGrid != NULL) delete forceYGrid;
@@ -12,6 +13,7 @@ void BrownianParticleType::clear() {
 	if (reservoir != NULL) delete reservoir;
         if (meanPmf != NULL) delete []  meanPmf;
 	pmf = NULL, diffusionGrid = NULL;
+	pmf_scale = NULL;
 	pmf_boundary_conditions = NULL;
 	forceXGrid = NULL, forceYGrid = NULL, forceZGrid = NULL;
 	reservoir = NULL, meanPmf = NULL;
@@ -26,6 +28,7 @@ void BrownianParticleType::copy(const BrownianParticleType& src) {
 	radius = src.radius;
 	eps = src.eps;
         pmf = src.pmf;
+        pmf_scale = src.pmf_scale;
         pmf_boundary_conditions = src.pmf_boundary_conditions;
 	meanPmf = src.meanPmf;
         numPartGridFiles = src.numPartGridFiles;
diff --git a/src/BrownianParticleType.h b/src/BrownianParticleType.h
index 524a5b66be452771f0aa8aec65d76dfddb60b7dd..8af1385a803768eabf9d3efe225ac01a31be38de 100644
--- a/src/BrownianParticleType.h
+++ b/src/BrownianParticleType.h
@@ -30,7 +30,7 @@ class BrownianParticleType {
 		BrownianParticleType(const String& name = "") :
 				name(name), num(0),
 				diffusion(0.0f), radius(1.0f), charge(0.0f), eps(0.0f), meanPmf(NULL),
-				numPartGridFiles(-1), reservoir(NULL), pmf(NULL), pmf_boundary_conditions(NULL),
+				numPartGridFiles(-1), reservoir(NULL), pmf(NULL), pmf_scale(NULL), pmf_boundary_conditions(NULL),
 				diffusionGrid(NULL),
 				forceXGrid(NULL), forceYGrid(NULL), forceZGrid(NULL){ }
 
@@ -62,8 +62,9 @@ public:
                 float mu; //for Nose-Hoover Langevin dynamics
 
 		Reservoir* reservoir;
-		BaseGrid* pmf;
-		BoundaryCondition* pmf_boundary_conditions;
+		BaseGrid** pmf;
+    float* pmf_scale;
+    BoundaryCondition* pmf_boundary_conditions;
 		BaseGrid* diffusionGrid;
 		BaseGrid* forceXGrid;
 		BaseGrid* forceYGrid;
diff --git a/src/Configuration.cpp b/src/Configuration.cpp
index 41b4cf3a1dd9c97bd1bf9b28b2591058627b0287..ce8f1533f296c48878035b056a7bb6ba06e0af09 100644
--- a/src/Configuration.cpp
+++ b/src/Configuration.cpp
@@ -6,6 +6,7 @@
 #include <cassert>
 #include <stdlib.h>     /* srand, rand */
 #include <time.h>       /* time */
+#include <string>
 #include <iostream>
 using namespace std;
 
@@ -257,72 +258,66 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) :
 	}
 
 	printf("\nFound %d particle types.\n", numParts);
-	// Load the potential grids.
+
 	printf("Loading the potential grids...\n");
+	// First load a single copy of each grid
 	for (int i = 0; i < numParts; i++) 
         {
-            String map = partGridFile[i][0];
-            int len = map.length();
-
-            if (len >= 3 && map[len-3]=='.' && map[len-2]=='d' && map[len-1]=='x') 
-            {
-                part[i].pmf     = new BaseGrid[part[i].numPartGridFiles];
-                part[i].meanPmf = new float[part[i].numPartGridFiles];
-                for(int j = 0; j < part[i].numPartGridFiles; ++j)
-                {
-                    map = partGridFile[i][j];
-                    len = map.length();
-                    if (!(len >= 3 && map[len-3]=='.' && map[len-2]=='d' && map[len-1]=='x'))
-                    {
-                        cout << "currently do not support different format " << endl;
-                        exit(1);
-                    }
-                    // Decide which type of grid is given.
-                    //String map = partGridFile[i];
- 
-	            // A dx file. Load the old-fashioned way.
-	            //part[i].pmf[j] = new BaseGrid(map.val());
-	            BaseGrid tmp(map.val());
-		    part[i].pmf[j] = tmp;
-		    if (partGridFileScale[i][j] != 1.0f) 
-                        part[i].pmf[j].scale(partGridFileScale[i][j]);
-
-			//part[i].meanPmf = part[i].pmf->mean();
-		    part[i].meanPmf[j] = part[i].pmf[j].mean();
-		    printf("Loaded dx potential grid `%s'.\n", map.val());
-		    printf("Grid size %s.\n", part[i].pmf[j].getExtent().toString().val());
-                }
-            } 
-	    else if  (len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f') 
-            {
-                OverlordGrid* over = new OverlordGrid[part[i].numPartGridFiles];
-                part[i].meanPmf = new float[part[i].numPartGridFiles];
-                for(int j = 0; j < part[i].numPartGridFiles; ++j)
-                {
-                    map = partGridFile[i][j];
-                    len = map.length();
-                    if (!(len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f'))
-                    {
-                        cout << "currently do not support different format " << endl;
-                        exit(1);
-                    }
-
-                    String rootGrid = OverlordGrid::readDefFirst(map);
-                    over[j] = OverlordGrid(rootGrid.val());
-		    int count = over->readDef(map);
-		    printf("Loaded system def file `%s'.\n", map.val());
-		    printf("Found %d unique grids.\n", over->getUniqueGridNum());
-		    printf("Linked %d subgrids.\n", count);
-                    part[i].meanPmf[j] = part[i].pmf[j].mean();
-                 }
-      		 part[i].pmf = static_cast<BaseGrid*>(over);
-	     } 
-             else 
-             {
-	         printf("WARNING: Unrecognized gridFile extension. Must be *.def or *.dx.\n");
-		 exit(-1);
-	     }
+	    for(int j = 0; j < part[i].numPartGridFiles; ++j)
+	    {
+		std::string fname(partGridFile[i][j].val(), partGridFile[i][j].length());
+		if (part_grid_dictionary.count( fname ) == 0)
+		{
+		    int len = fname.length();
+		    if (len >= 3 && fname[len-3]=='.' && fname[len-2]=='d' && fname[len-1]=='x')
+		    {
+			part_grid_dictionary.insert({fname, BaseGrid(fname.c_str())});
+		    }
+		    else if  (len >= 4 && fname[len-4]=='.' && fname[len-3]=='d' && fname[len-2]=='e' && fname[len-1]=='f')
+		    {
+			assert(1==2); // Throw exception because this implementation needs to be revisited
+/*                OverlordGrid* over = new OverlordGrid[part[i].numPartGridFiles];
+		  part[i].meanPmf = new float[part[i].numPartGridFiles];
+		  for(int j = 0; j < part[i].numPartGridFiles; ++j)
+		  {
+		  map = partGridFile[i][j];
+		  len = map.length();
+		  if (!(len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f'))
+		  {
+		  cout << "currently do not support different format " << endl;
+		  exit(1);
+		  }
+
+		  String rootGrid = OverlordGrid::readDefFirst(map);
+		  over[j] = OverlordGrid(rootGrid.val());
+		  int count = over->readDef(map);
+		  printf("Loaded system def file `%s'.\n", map.val());
+		  printf("Found %d unique grids.\n", over->getUniqueGridNum());
+		  printf("Linked %d subgrids.\n", count);
+		  part[i].meanPmf[j] = part[i].pmf[j].mean();
+		  }
+		  part[i].pmf = static_cast<BaseGrid*>(over);
+*/
+		    } else {
+			printf("WARNING: Unrecognized gridFile extension. Must be *.def or *.dx.\n");
+			exit(-1);
+		    }
+		}
+	    }
+	}
 
+	// Then assign grid addresses to particles
+	for (int i = 0; i < numParts; i++)
+        {
+	    part[i].pmf     = new BaseGrid*[part[i].numPartGridFiles];
+	    part[i].pmf_scale = new float[part[i].numPartGridFiles];
+	    part[i].meanPmf = new float[part[i].numPartGridFiles];
+	    for(int j = 0; j < part[i].numPartGridFiles; ++j)
+	    {
+		part[i].pmf[j] = &(part_grid_dictionary.find( std::string(partGridFile[i][j]) )->second);
+		part[i].pmf_scale[j] = partGridFileScale[i][j];
+		part[i].meanPmf[j] = part[i].pmf[j]->mean(); // TODO: review how this is used and decide whether to scale
+	    }
 		if (partForceXGridFile[i].length() != 0) {
 			part[i].forceXGrid = new BaseGrid(partForceXGridFile[i].val());
 			printf("Loaded `%s'.\n", partForceXGridFile[i].val());
@@ -385,7 +380,7 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) :
 	sys = new BaseGrid( Matrix3(basis1,basis2,basis3), origin, 1, 1, 1 );
     } else {
 	// TODO: use largest system in x,y,z
-	sys = part[0].pmf;
+	sys = *part[0].pmf;
     }
     sysDim = sys->getExtent();
 
@@ -594,7 +589,17 @@ Configuration::~Configuration() {
 }
 
 void Configuration::copyToCUDA() {
-	printf("Copying particle data to GPU %d\n", GPUManager::current());
+    printf("Copying particle grids to GPU %d\n", GPUManager::current());
+    for (const auto& pair : part_grid_dictionary)
+    {
+	// Copy PMF
+	const BaseGrid& g = pair.second;
+	BaseGrid *g_d = g.copy_to_cuda();
+	part_grid_dictionary_d.insert({pair.first, g_d});
+	// printf("Assigning grid for %s to %p (originally %p)\n", pair.first.c_str(), (void *) part_grid_dictionary_d[pair.first], (void *) g_d);
+    }
+
+    printf("Copying particle data to GPU %d\n", GPUManager::current());
 
 	BrownianParticleType **part_addr = new BrownianParticleType*[numParts];
 
@@ -604,11 +609,31 @@ void Configuration::copyToCUDA() {
 	
 	for (int i = 0; i < numParts; i++) 
         {
-	        BaseGrid *pmf = NULL, *diffusionGrid = NULL;
 		BrownianParticleType *b = new BrownianParticleType(part[i]);
-		// Copy PMF
+		// Copy PMF pointers
 		if (part[i].pmf != NULL) 
                 {
+		    {
+			BaseGrid** tmp_d = new BaseGrid*[part[i].numPartGridFiles];
+			BaseGrid** tmp   = new BaseGrid*[part[i].numPartGridFiles];
+			for(int j = 0; j < part[i].numPartGridFiles; ++j) {
+			    // printf("Retrieving grid for %s (at %p)\n", partGridFile[i][j].val(), (void *) part_grid_dictionary_d[std::string(partGridFile[i][j])]);
+			    tmp[j] = part_grid_dictionary_d[std::string(partGridFile[i][j])];
+			}
+			gpuErrchk(cudaMalloc(&tmp_d, sizeof(BaseGrid*)*part[i].numPartGridFiles));
+			gpuErrchk(cudaMemcpy(tmp_d, tmp, sizeof(BaseGrid*)*part[i].numPartGridFiles,
+					     cudaMemcpyHostToDevice));
+			b->pmf = tmp_d;
+		    }
+
+		    {
+			float *tmp;
+			gpuErrchk(cudaMalloc(&tmp, sizeof(float)*part[i].numPartGridFiles));
+			gpuErrchk(cudaMemcpy(tmp, part[i].pmf_scale, sizeof(float)*part[i].numPartGridFiles,
+					     cudaMemcpyHostToDevice));
+			b->pmf_scale = tmp;
+		    }
+
 		    {
 			float *tmp;
 			gpuErrchk(cudaMalloc(&tmp, sizeof(float)*part[i].numPartGridFiles));
@@ -624,40 +649,17 @@ void Configuration::copyToCUDA() {
 			gpuErrchk(cudaMemcpy(tmp, part[i].pmf_boundary_conditions, s, cudaMemcpyHostToDevice));
 			b->pmf_boundary_conditions = tmp;
 		    }
-
-		    gpuErrchk(cudaMalloc(&pmf, sizeof(BaseGrid)*part[i].numPartGridFiles));
-                    for(int j = 0; j < part[i].numPartGridFiles; ++j)
-                    { 
-                        float *val = NULL;
-		        size_t sz = sizeof(float) * part[i].pmf[j].getSize();
-		        //gpuErrchk(cudaMalloc(pmf, sizeof(BaseGrid)));
-		        gpuErrchk(cudaMalloc(&val, sz));
-		        gpuErrchk(cudaMemcpyAsync(val, part[i].pmf[j].val, sz, cudaMemcpyHostToDevice));
-		        BaseGrid *pmf_h = new BaseGrid(part[i].pmf[j]);
-	                pmf_h->val = val;
-		        gpuErrchk(cudaMemcpy(pmf+j, pmf_h, sizeof(BaseGrid), cudaMemcpyHostToDevice));
-		        pmf_h->val = NULL;
-                    }
                     
 		}
-		b->pmf = pmf;
 
 		// Copy the diffusion grid
 		if (part[i].diffusionGrid != NULL) {
-			float *val = NULL;
-			size_t sz = sizeof(float) * part[i].diffusionGrid->getSize();
-		  BaseGrid *diffusionGrid_h = new BaseGrid(*part[i].diffusionGrid);
-		  gpuErrchk(cudaMalloc(&diffusionGrid, sizeof(BaseGrid)));
-		  gpuErrchk(cudaMalloc(&val, sz));
-			diffusionGrid_h->val = val;
-			gpuErrchk(cudaMemcpyAsync(diffusionGrid, diffusionGrid_h, sizeof(BaseGrid),
-					cudaMemcpyHostToDevice));
-		  gpuErrchk(cudaMemcpy(val, part[i].diffusionGrid->val, sz, cudaMemcpyHostToDevice));
-			diffusionGrid_h->val = NULL;
+		    b->diffusionGrid = part[i].diffusionGrid->copy_to_cuda();
+		} else {
+		    b->diffusionGrid = NULL;
 		}
 		
 		//b->pmf = pmf;
-		b->diffusionGrid = diffusionGrid;
 		gpuErrchk(cudaMalloc(&part_addr[i], sizeof(BrownianParticleType)));
 		gpuErrchk(cudaMemcpyAsync(part_addr[i], b, sizeof(BrownianParticleType),
 				cudaMemcpyHostToDevice));
diff --git a/src/Configuration.h b/src/Configuration.h
index debf4b9f6ea4d0ce8eaa9c52d70d6c57f1efdcbb..9daf4fe39375351724b81ba3f3eb4e1ec48a6a3b 100644
--- a/src/Configuration.h
+++ b/src/Configuration.h
@@ -11,6 +11,7 @@
 
 #include <algorithm> // sort
 #include <vector>
+#include <map>
 
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -208,6 +209,8 @@ public:
 	//float* partGridFileScale;
 	float **partGridFileScale;
         //int *numPartGridFiles;
+    std::map<std::string,BaseGrid> part_grid_dictionary;
+    std::map<std::string,BaseGrid*> part_grid_dictionary_d;
 	std::vector< std::vector<String> > partRigidBodyGrid;
 	String* partDiffusionGridFile;
 	String* partForceXGridFile;
diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
index a8576e54ab0e07474cdd18db2c6e138f27caf5da..1fb98bc084e49aca00279e5c91611bfbe0a9f32e 100644
--- a/src/GrandBrownTown.cu
+++ b/src/GrandBrownTown.cu
@@ -1664,7 +1664,7 @@ void GrandBrownTown::writeRestart(int repID) const
 /*the center is defined by the first pmf*/
 void GrandBrownTown::initialCondCen() {
 	for (int i = 0; i < num; i++)
-		pos[i] = part[ type[i] ].pmf->getCenter();
+		pos[i] = part[ type[i] ].pmf[0]->getCenter();
 }
 
 
@@ -1688,7 +1688,7 @@ Vector3 GrandBrownTown::findPos(int typ) {
 		const float ry = sysDim.y * randoGen->uniform();
 		const float rz = sysDim.z * randoGen->uniform();
 		r = sys->wrap( Vector3(rx, ry, rz) );
-	} while (pt.pmf->interpolatePotential(r) > *pt.meanPmf);
+	} while (pt.pmf[0]->interpolatePotential(r) > *pt.meanPmf);
 	return r;
 }
 
@@ -1701,7 +1701,7 @@ Vector3 GrandBrownTown::findPos(int typ, float minZ) {
 		const float ry = sysDim.y * randoGen->uniform();
 		const float rz = sysDim.z * randoGen->uniform();
 		r = sys->wrap( Vector3(rx, ry, rz) );
-	} while (pt.pmf->interpolatePotential(r) > *pt.meanPmf and fabs(r.z) > minZ);
+	} while (pt.pmf[0]->interpolatePotential(r) > *pt.meanPmf and fabs(r.z) > minZ);
 	return r;
 }
 
diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh
index 4a2211c6c407f16871f93ab7aa9e33f5defa9a2b..c4c059aea09aafe0f9a77c3f0cd6a6a1f71e7073 100644
--- a/src/GrandBrownTown.cuh
+++ b/src/GrandBrownTown.cuh
@@ -120,10 +120,10 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__
             ForceEnergy tmp(0.f,0.f);
             if(!scheme) {
 		const BoundaryCondition& bc = pt.pmf_boundary_conditions[i];
-		INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, r0)
-	    } else 
-                tmp = pt.pmf[i].interpolateForceD(r0);
-            fe.f += tmp.f;
+		INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0)
+	    } else
+                tmp = pt.pmf[i]->interpolateForceD(r0);
+            fe.f += tmp.f * pt.pmf_scale[i];
         }
         //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0);
 
@@ -236,10 +236,10 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re
             ForceEnergy tmp(0.f, 0.f);
             if(!scheme) {
 		BoundaryCondition bc = pt.pmf_boundary_conditions[i];
-		INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, r0)
+		INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0)
 	    } else 
-                tmp = pt.pmf[i].interpolateForceD(r0);
-            fe.f += tmp.f;
+                tmp = pt.pmf[i]->interpolateForceD(r0);
+            fe.f += tmp.f * pt.pmf_scale[i];
         }
 
 #ifndef FORCEGRIDOFF
@@ -336,10 +336,12 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _
             ForceEnergy tmp(0.f, 0.f);
             if(!scheme) {
 		BoundaryCondition bc = pt.pmf_boundary_conditions[i];
-		INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, r0)
+		INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0)
 	    } else 
-                tmp = pt.pmf[i].interpolateForceD(r0);
+                tmp = pt.pmf[i]->interpolateForceD(r0);
 
+	    tmp.f = tmp.f * pt.pmf_scale[i];
+	    tmp.e = tmp.e * pt.pmf_scale[i];
             fe += tmp;
             //fe.e += tmp.e;
         }
@@ -468,9 +470,11 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[],
                     ForceEnergy tmp(0.f,0.f);
 		    if(!scheme) {
 			BoundaryCondition bc = pt.pmf_boundary_conditions[i];
-			INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, p)
+			INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, p)
 		    } else 
-                        tmp = pt.pmf[i].interpolateForceD(p);
+                        tmp = pt.pmf[i]->interpolateForceD(p);
+		    tmp.f *= pt.pmf_scale[i];
+		    tmp.e *= pt.pmf_scale[i];
                     fe += tmp;
                 }
 #ifndef FORCEGRIDOFF
@@ -550,10 +554,12 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[],
                     float en_local = 0.f;
                     for(int i = 0; i < pt.numPartGridFiles; ++i)
                     {
+			float en_tmp = 0.0f;
                         if(!scheme)
-                            en_local += pt.pmf[i].interpolatePotentialLinearly(tmp);
+                            en_tmp = pt.pmf[i]->interpolatePotentialLinearly(tmp);
                         else
-                            en_local += pt.pmf[i].interpolatePotential(tmp);
+                            en_tmp = pt.pmf[i]->interpolatePotential(tmp);
+			en_tmp *= pt.pmf_scale[i];
                     }
                     energy[idx] += en_local;
                 }