diff --git a/src/BrownianParticleType.cpp b/src/BrownianParticleType.cpp index e514500ec3b6b87058cf58389c2a99ab7ddd506d..226814419a20fef51671d3a17ada2539b445f738 100644 --- a/src/BrownianParticleType.cpp +++ b/src/BrownianParticleType.cpp @@ -5,6 +5,7 @@ ////////////////////////////////////////// void BrownianParticleType::clear() { if (pmf != NULL) delete [] pmf; + if (pmf_scale != NULL) delete [] pmf_scale; if (diffusionGrid != NULL) delete diffusionGrid; if (forceXGrid != NULL) delete forceXGrid; if (forceYGrid != NULL) delete forceYGrid; @@ -12,6 +13,7 @@ void BrownianParticleType::clear() { if (reservoir != NULL) delete reservoir; if (meanPmf != NULL) delete [] meanPmf; pmf = NULL, diffusionGrid = NULL; + pmf_scale = NULL; pmf_boundary_conditions = NULL; forceXGrid = NULL, forceYGrid = NULL, forceZGrid = NULL; reservoir = NULL, meanPmf = NULL; @@ -26,6 +28,7 @@ void BrownianParticleType::copy(const BrownianParticleType& src) { radius = src.radius; eps = src.eps; pmf = src.pmf; + pmf_scale = src.pmf_scale; pmf_boundary_conditions = src.pmf_boundary_conditions; meanPmf = src.meanPmf; numPartGridFiles = src.numPartGridFiles; diff --git a/src/BrownianParticleType.h b/src/BrownianParticleType.h index 524a5b66be452771f0aa8aec65d76dfddb60b7dd..8af1385a803768eabf9d3efe225ac01a31be38de 100644 --- a/src/BrownianParticleType.h +++ b/src/BrownianParticleType.h @@ -30,7 +30,7 @@ class BrownianParticleType { BrownianParticleType(const String& name = "") : name(name), num(0), diffusion(0.0f), radius(1.0f), charge(0.0f), eps(0.0f), meanPmf(NULL), - numPartGridFiles(-1), reservoir(NULL), pmf(NULL), pmf_boundary_conditions(NULL), + numPartGridFiles(-1), reservoir(NULL), pmf(NULL), pmf_scale(NULL), pmf_boundary_conditions(NULL), diffusionGrid(NULL), forceXGrid(NULL), forceYGrid(NULL), forceZGrid(NULL){ } @@ -62,8 +62,9 @@ public: float mu; //for Nose-Hoover Langevin dynamics Reservoir* reservoir; - BaseGrid* pmf; - BoundaryCondition* pmf_boundary_conditions; + BaseGrid** pmf; + float* pmf_scale; + BoundaryCondition* pmf_boundary_conditions; BaseGrid* diffusionGrid; BaseGrid* forceXGrid; BaseGrid* forceYGrid; diff --git a/src/Configuration.cpp b/src/Configuration.cpp index 41b4cf3a1dd9c97bd1bf9b28b2591058627b0287..ce8f1533f296c48878035b056a7bb6ba06e0af09 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -6,6 +6,7 @@ #include <cassert> #include <stdlib.h> /* srand, rand */ #include <time.h> /* time */ +#include <string> #include <iostream> using namespace std; @@ -257,72 +258,66 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : } printf("\nFound %d particle types.\n", numParts); - // Load the potential grids. + printf("Loading the potential grids...\n"); + // First load a single copy of each grid for (int i = 0; i < numParts; i++) { - String map = partGridFile[i][0]; - int len = map.length(); - - if (len >= 3 && map[len-3]=='.' && map[len-2]=='d' && map[len-1]=='x') - { - part[i].pmf = new BaseGrid[part[i].numPartGridFiles]; - part[i].meanPmf = new float[part[i].numPartGridFiles]; - for(int j = 0; j < part[i].numPartGridFiles; ++j) - { - map = partGridFile[i][j]; - len = map.length(); - if (!(len >= 3 && map[len-3]=='.' && map[len-2]=='d' && map[len-1]=='x')) - { - cout << "currently do not support different format " << endl; - exit(1); - } - // Decide which type of grid is given. - //String map = partGridFile[i]; - - // A dx file. Load the old-fashioned way. - //part[i].pmf[j] = new BaseGrid(map.val()); - BaseGrid tmp(map.val()); - part[i].pmf[j] = tmp; - if (partGridFileScale[i][j] != 1.0f) - part[i].pmf[j].scale(partGridFileScale[i][j]); - - //part[i].meanPmf = part[i].pmf->mean(); - part[i].meanPmf[j] = part[i].pmf[j].mean(); - printf("Loaded dx potential grid `%s'.\n", map.val()); - printf("Grid size %s.\n", part[i].pmf[j].getExtent().toString().val()); - } - } - else if (len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f') - { - OverlordGrid* over = new OverlordGrid[part[i].numPartGridFiles]; - part[i].meanPmf = new float[part[i].numPartGridFiles]; - for(int j = 0; j < part[i].numPartGridFiles; ++j) - { - map = partGridFile[i][j]; - len = map.length(); - if (!(len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f')) - { - cout << "currently do not support different format " << endl; - exit(1); - } - - String rootGrid = OverlordGrid::readDefFirst(map); - over[j] = OverlordGrid(rootGrid.val()); - int count = over->readDef(map); - printf("Loaded system def file `%s'.\n", map.val()); - printf("Found %d unique grids.\n", over->getUniqueGridNum()); - printf("Linked %d subgrids.\n", count); - part[i].meanPmf[j] = part[i].pmf[j].mean(); - } - part[i].pmf = static_cast<BaseGrid*>(over); - } - else - { - printf("WARNING: Unrecognized gridFile extension. Must be *.def or *.dx.\n"); - exit(-1); - } + for(int j = 0; j < part[i].numPartGridFiles; ++j) + { + std::string fname(partGridFile[i][j].val(), partGridFile[i][j].length()); + if (part_grid_dictionary.count( fname ) == 0) + { + int len = fname.length(); + if (len >= 3 && fname[len-3]=='.' && fname[len-2]=='d' && fname[len-1]=='x') + { + part_grid_dictionary.insert({fname, BaseGrid(fname.c_str())}); + } + else if (len >= 4 && fname[len-4]=='.' && fname[len-3]=='d' && fname[len-2]=='e' && fname[len-1]=='f') + { + assert(1==2); // Throw exception because this implementation needs to be revisited +/* OverlordGrid* over = new OverlordGrid[part[i].numPartGridFiles]; + part[i].meanPmf = new float[part[i].numPartGridFiles]; + for(int j = 0; j < part[i].numPartGridFiles; ++j) + { + map = partGridFile[i][j]; + len = map.length(); + if (!(len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f')) + { + cout << "currently do not support different format " << endl; + exit(1); + } + + String rootGrid = OverlordGrid::readDefFirst(map); + over[j] = OverlordGrid(rootGrid.val()); + int count = over->readDef(map); + printf("Loaded system def file `%s'.\n", map.val()); + printf("Found %d unique grids.\n", over->getUniqueGridNum()); + printf("Linked %d subgrids.\n", count); + part[i].meanPmf[j] = part[i].pmf[j].mean(); + } + part[i].pmf = static_cast<BaseGrid*>(over); +*/ + } else { + printf("WARNING: Unrecognized gridFile extension. Must be *.def or *.dx.\n"); + exit(-1); + } + } + } + } + // Then assign grid addresses to particles + for (int i = 0; i < numParts; i++) + { + part[i].pmf = new BaseGrid*[part[i].numPartGridFiles]; + part[i].pmf_scale = new float[part[i].numPartGridFiles]; + part[i].meanPmf = new float[part[i].numPartGridFiles]; + for(int j = 0; j < part[i].numPartGridFiles; ++j) + { + part[i].pmf[j] = &(part_grid_dictionary.find( std::string(partGridFile[i][j]) )->second); + part[i].pmf_scale[j] = partGridFileScale[i][j]; + part[i].meanPmf[j] = part[i].pmf[j]->mean(); // TODO: review how this is used and decide whether to scale + } if (partForceXGridFile[i].length() != 0) { part[i].forceXGrid = new BaseGrid(partForceXGridFile[i].val()); printf("Loaded `%s'.\n", partForceXGridFile[i].val()); @@ -385,7 +380,7 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : sys = new BaseGrid( Matrix3(basis1,basis2,basis3), origin, 1, 1, 1 ); } else { // TODO: use largest system in x,y,z - sys = part[0].pmf; + sys = *part[0].pmf; } sysDim = sys->getExtent(); @@ -594,7 +589,17 @@ Configuration::~Configuration() { } void Configuration::copyToCUDA() { - printf("Copying particle data to GPU %d\n", GPUManager::current()); + printf("Copying particle grids to GPU %d\n", GPUManager::current()); + for (const auto& pair : part_grid_dictionary) + { + // Copy PMF + const BaseGrid& g = pair.second; + BaseGrid *g_d = g.copy_to_cuda(); + part_grid_dictionary_d.insert({pair.first, g_d}); + // printf("Assigning grid for %s to %p (originally %p)\n", pair.first.c_str(), (void *) part_grid_dictionary_d[pair.first], (void *) g_d); + } + + printf("Copying particle data to GPU %d\n", GPUManager::current()); BrownianParticleType **part_addr = new BrownianParticleType*[numParts]; @@ -604,11 +609,31 @@ void Configuration::copyToCUDA() { for (int i = 0; i < numParts; i++) { - BaseGrid *pmf = NULL, *diffusionGrid = NULL; BrownianParticleType *b = new BrownianParticleType(part[i]); - // Copy PMF + // Copy PMF pointers if (part[i].pmf != NULL) { + { + BaseGrid** tmp_d = new BaseGrid*[part[i].numPartGridFiles]; + BaseGrid** tmp = new BaseGrid*[part[i].numPartGridFiles]; + for(int j = 0; j < part[i].numPartGridFiles; ++j) { + // printf("Retrieving grid for %s (at %p)\n", partGridFile[i][j].val(), (void *) part_grid_dictionary_d[std::string(partGridFile[i][j])]); + tmp[j] = part_grid_dictionary_d[std::string(partGridFile[i][j])]; + } + gpuErrchk(cudaMalloc(&tmp_d, sizeof(BaseGrid*)*part[i].numPartGridFiles)); + gpuErrchk(cudaMemcpy(tmp_d, tmp, sizeof(BaseGrid*)*part[i].numPartGridFiles, + cudaMemcpyHostToDevice)); + b->pmf = tmp_d; + } + + { + float *tmp; + gpuErrchk(cudaMalloc(&tmp, sizeof(float)*part[i].numPartGridFiles)); + gpuErrchk(cudaMemcpy(tmp, part[i].pmf_scale, sizeof(float)*part[i].numPartGridFiles, + cudaMemcpyHostToDevice)); + b->pmf_scale = tmp; + } + { float *tmp; gpuErrchk(cudaMalloc(&tmp, sizeof(float)*part[i].numPartGridFiles)); @@ -624,40 +649,17 @@ void Configuration::copyToCUDA() { gpuErrchk(cudaMemcpy(tmp, part[i].pmf_boundary_conditions, s, cudaMemcpyHostToDevice)); b->pmf_boundary_conditions = tmp; } - - gpuErrchk(cudaMalloc(&pmf, sizeof(BaseGrid)*part[i].numPartGridFiles)); - for(int j = 0; j < part[i].numPartGridFiles; ++j) - { - float *val = NULL; - size_t sz = sizeof(float) * part[i].pmf[j].getSize(); - //gpuErrchk(cudaMalloc(pmf, sizeof(BaseGrid))); - gpuErrchk(cudaMalloc(&val, sz)); - gpuErrchk(cudaMemcpyAsync(val, part[i].pmf[j].val, sz, cudaMemcpyHostToDevice)); - BaseGrid *pmf_h = new BaseGrid(part[i].pmf[j]); - pmf_h->val = val; - gpuErrchk(cudaMemcpy(pmf+j, pmf_h, sizeof(BaseGrid), cudaMemcpyHostToDevice)); - pmf_h->val = NULL; - } } - b->pmf = pmf; // Copy the diffusion grid if (part[i].diffusionGrid != NULL) { - float *val = NULL; - size_t sz = sizeof(float) * part[i].diffusionGrid->getSize(); - BaseGrid *diffusionGrid_h = new BaseGrid(*part[i].diffusionGrid); - gpuErrchk(cudaMalloc(&diffusionGrid, sizeof(BaseGrid))); - gpuErrchk(cudaMalloc(&val, sz)); - diffusionGrid_h->val = val; - gpuErrchk(cudaMemcpyAsync(diffusionGrid, diffusionGrid_h, sizeof(BaseGrid), - cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(val, part[i].diffusionGrid->val, sz, cudaMemcpyHostToDevice)); - diffusionGrid_h->val = NULL; + b->diffusionGrid = part[i].diffusionGrid->copy_to_cuda(); + } else { + b->diffusionGrid = NULL; } //b->pmf = pmf; - b->diffusionGrid = diffusionGrid; gpuErrchk(cudaMalloc(&part_addr[i], sizeof(BrownianParticleType))); gpuErrchk(cudaMemcpyAsync(part_addr[i], b, sizeof(BrownianParticleType), cudaMemcpyHostToDevice)); diff --git a/src/Configuration.h b/src/Configuration.h index debf4b9f6ea4d0ce8eaa9c52d70d6c57f1efdcbb..9daf4fe39375351724b81ba3f3eb4e1ec48a6a3b 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -11,6 +11,7 @@ #include <algorithm> // sort #include <vector> +#include <map> #include <cuda.h> #include <cuda_runtime.h> @@ -208,6 +209,8 @@ public: //float* partGridFileScale; float **partGridFileScale; //int *numPartGridFiles; + std::map<std::string,BaseGrid> part_grid_dictionary; + std::map<std::string,BaseGrid*> part_grid_dictionary_d; std::vector< std::vector<String> > partRigidBodyGrid; String* partDiffusionGridFile; String* partForceXGridFile; diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index a8576e54ab0e07474cdd18db2c6e138f27caf5da..1fb98bc084e49aca00279e5c91611bfbe0a9f32e 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -1664,7 +1664,7 @@ void GrandBrownTown::writeRestart(int repID) const /*the center is defined by the first pmf*/ void GrandBrownTown::initialCondCen() { for (int i = 0; i < num; i++) - pos[i] = part[ type[i] ].pmf->getCenter(); + pos[i] = part[ type[i] ].pmf[0]->getCenter(); } @@ -1688,7 +1688,7 @@ Vector3 GrandBrownTown::findPos(int typ) { const float ry = sysDim.y * randoGen->uniform(); const float rz = sysDim.z * randoGen->uniform(); r = sys->wrap( Vector3(rx, ry, rz) ); - } while (pt.pmf->interpolatePotential(r) > *pt.meanPmf); + } while (pt.pmf[0]->interpolatePotential(r) > *pt.meanPmf); return r; } @@ -1701,7 +1701,7 @@ Vector3 GrandBrownTown::findPos(int typ, float minZ) { const float ry = sysDim.y * randoGen->uniform(); const float rz = sysDim.z * randoGen->uniform(); r = sys->wrap( Vector3(rx, ry, rz) ); - } while (pt.pmf->interpolatePotential(r) > *pt.meanPmf and fabs(r.z) > minZ); + } while (pt.pmf[0]->interpolatePotential(r) > *pt.meanPmf and fabs(r.z) > minZ); return r; } diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 4a2211c6c407f16871f93ab7aa9e33f5defa9a2b..c4c059aea09aafe0f9a77c3f0cd6a6a1f71e7073 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -120,10 +120,10 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ ForceEnergy tmp(0.f,0.f); if(!scheme) { const BoundaryCondition& bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, r0) - } else - tmp = pt.pmf[i].interpolateForceD(r0); - fe.f += tmp.f; + INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) + } else + tmp = pt.pmf[i]->interpolateForceD(r0); + fe.f += tmp.f * pt.pmf_scale[i]; } //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); @@ -236,10 +236,10 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re ForceEnergy tmp(0.f, 0.f); if(!scheme) { BoundaryCondition bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, r0) + INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) } else - tmp = pt.pmf[i].interpolateForceD(r0); - fe.f += tmp.f; + tmp = pt.pmf[i]->interpolateForceD(r0); + fe.f += tmp.f * pt.pmf_scale[i]; } #ifndef FORCEGRIDOFF @@ -336,10 +336,12 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ ForceEnergy tmp(0.f, 0.f); if(!scheme) { BoundaryCondition bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, r0) + INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0) } else - tmp = pt.pmf[i].interpolateForceD(r0); + tmp = pt.pmf[i]->interpolateForceD(r0); + tmp.f = tmp.f * pt.pmf_scale[i]; + tmp.e = tmp.e * pt.pmf_scale[i]; fe += tmp; //fe.e += tmp.e; } @@ -468,9 +470,11 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], ForceEnergy tmp(0.f,0.f); if(!scheme) { BoundaryCondition bc = pt.pmf_boundary_conditions[i]; - INTERPOLATE_FORCE(tmp, pt.pmf[i].interpolateForceDLinearly, bc, p) + INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, p) } else - tmp = pt.pmf[i].interpolateForceD(p); + tmp = pt.pmf[i]->interpolateForceD(p); + tmp.f *= pt.pmf_scale[i]; + tmp.e *= pt.pmf_scale[i]; fe += tmp; } #ifndef FORCEGRIDOFF @@ -550,10 +554,12 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], float en_local = 0.f; for(int i = 0; i < pt.numPartGridFiles; ++i) { + float en_tmp = 0.0f; if(!scheme) - en_local += pt.pmf[i].interpolatePotentialLinearly(tmp); + en_tmp = pt.pmf[i]->interpolatePotentialLinearly(tmp); else - en_local += pt.pmf[i].interpolatePotential(tmp); + en_tmp = pt.pmf[i]->interpolatePotential(tmp); + en_tmp *= pt.pmf_scale[i]; } energy[idx] += en_local; }