diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 5144a55a20abcb1108bdc63caaf65e58f8437fcd..4f39b3fe3a905fca9703f03423b357a401bef198 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -47,7 +47,9 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : numBonds(c.numBonds), numTabBondFiles(c.numTabBondFiles), numExcludes(c.numExcludes), numAngles(c.numAngles), numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals), - numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), numReplicas(numReplicas) { + numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), + numGroupSites(c.numGroupSites), + numReplicas(numReplicas) { // Grow vectors for per-gpu device pointers for (int i = 0; i < gpuman.gpus.size(); ++i) { @@ -235,7 +237,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : gridSize = num / NUM_THREADS + 1; // Create and allocate the energy arrays - gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * num * numReplicas)); + gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+numGroupSites) * numReplicas)); cudaEventCreate(&start); cudaEventCreate(&stop); } @@ -674,7 +676,7 @@ float ComputeForce::computeFull(bool get_energy) { if (get_energy) { gpuErrchk(cudaDeviceSynchronize()); thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num); + energy = thrust::reduce(en_d, en_d + num + numGroupSites); } return energy; @@ -742,7 +744,7 @@ float ComputeForce::compute(bool get_energy) { if (get_energy) { gpuErrchk(cudaDeviceSynchronize()); thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num); + energy = thrust::reduce(en_d, en_d + num + numGroupSites); } return energy; @@ -771,7 +773,7 @@ float ComputeForce::computeTabulated(bool get_energy) { { //clearEnergies<<< nb, numThreads >>>(energies_d,num); //gpuErrchk(cudaDeviceSynchronize()); - cudaMemset((void*)energies_d, 0, sizeof(float)*num*numReplicas); + cudaMemset((void*)energies_d, 0, sizeof(float)*(num+numGroupSites)*numReplicas); computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d[0], pos_d[0], sys_d[0], cutoff2, numPairs_d[0], pairLists_d[0], pairTabPotType_d[0], tablePot_d[0], energies_d); } @@ -892,7 +894,7 @@ float ComputeForce::computeTabulatedFull(bool get_energy) { void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) { - const size_t tot_num = num * numReplicas; + const size_t tot_num = (num+numGroupSites) * numReplicas; for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); @@ -924,7 +926,7 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); - gpuErrchk(cudaMalloc(&forceInternal_d[i], sizeof(Vector3) * num * numReplicas)); + gpuErrchk(cudaMalloc(&forceInternal_d[i], sizeof(Vector3) * tot_num)); } gpuman.use(0); gpuErrchk(cudaMemcpyAsync(forceInternal_d[0], forceInternal, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice)); @@ -933,17 +935,18 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) } void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom) { - const size_t tot_num = num * numReplicas; + const size_t tot_num = (num+numGroupSites) * numReplicas; + const size_t small_tot_num = num * numReplicas; gpuErrchk(cudaMalloc(&mom_d, sizeof(Vector3) * tot_num)); - gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * small_tot_num, cudaMemcpyHostToDevice)); copyToCUDA(forceInternal,pos); gpuErrchk(cudaDeviceSynchronize()); } void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random) { - const size_t tot_num = num * numReplicas; + const size_t tot_num = num * numReplicas; gpuErrchk(cudaMalloc(&ran_d, sizeof(float) * tot_num)); gpuErrchk(cudaMemcpyAsync(ran_d, random, sizeof(float) * tot_num, cudaMemcpyHostToDevice)); @@ -953,16 +956,19 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom } void ComputeForce::setForceInternalOnDevice(Vector3* f) { - const size_t tot_num = num * numReplicas; + // const size_t tot_num = (num+numGroupSites) * numReplicas; + assert(numGroupSites == 0); // IMD, the only feature using this function, is currently incompatible with group sites + const size_t tot_num = num * numReplicas; gpuErrchk(cudaMemcpy(forceInternal_d[0], f, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice)); } - void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints) { + assert(simNum == numReplicas); // Not sure why we have both of these things + int tot_num = num * simNum; // type_d - gpuErrchk(cudaMalloc(&type_d, sizeof(int) * num * simNum)); - gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * num * simNum, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num)); + gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num, cudaMemcpyHostToDevice)); if (numBonds > 0) { diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index 9109ab48c94f6bacb7d121e32990c67fa8bf79f9..8735799be098867866ddc865fa39579e3ff77bc5 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -116,6 +116,7 @@ void computeFullKernel(Vector3 force[], Vector3 pos[], int type[], } } + __global__ void computeSoftcoreFullKernel(Vector3 force[], Vector3 pos[], int type[], float tableEps[], float tableRad6[], diff --git a/src/ComputeForce.h b/src/ComputeForce.h index fc111068841ff730fc03a043e4320cad161c7d34..5d602e4a609482a73ba7c639f8a575fc3023b2a7 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -150,12 +150,12 @@ public: void clear_force() { for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) { gpuman.use(i); - gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,num*numReplicas*sizeof(Vector3))); + gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,(num+numGroupSites)*numReplicas*sizeof(Vector3))); } gpuman.use(0); // TODO move to a paradigm where gpu0 is not preferentially treated } void clear_energy() { - gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*num*numReplicas)); // TODO make async + gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*(num+numGroupSites)*numReplicas)); // TODO make async } HOST DEVICE @@ -181,6 +181,11 @@ private: int numTabAngleFiles; int numDihedrals; int numTabDihedralFiles; + + int numGroupSites; + int* comSiteParticles; + int* comSiteParticles_d; + float *tableEps, *tableRad6, *tableAlpha; TabulatedPotential **tablePot; // 100% on Host TabulatedPotential **tableBond; diff --git a/src/Configuration.cpp b/src/Configuration.cpp index 66c420671f8ffdc09eadc97fa5d0e4ed1721597b..41b4cf3a1dd9c97bd1bf9b28b2591058627b0287 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -96,12 +96,14 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : } } // end result: variable "num" is set - // Set the number capacity printf("\n%d particles\n", num); if (numCap <= 0) numCap = numCapFactor*num; // max number of particles if (numCap <= 0) numCap = 20; + if (readGroupSitesFromFile) readGroups(); + printf("%d groups\n", numGroupSites); + // Allocate particle variables. pos = new Vector3[num * simNum]; //Han-Yi Chou @@ -764,6 +766,10 @@ void Configuration::setDefaults() { numPartsFromFile = 0; partsFromFile = NULL; readBondsFromFile = false; + numGroupSites = 0; + readGroupSitesFromFile = false; + + numBonds = 0; bonds = NULL; bondMap = NULL; @@ -1000,6 +1006,13 @@ int Configuration::readParameters(const char * config_file) { readPartsFromFile = true; loadedCoordinates = true; } + } else if (param == String("inputGroups")) { + if (readGroupSitesFromFile) { + printf("WARNING: More than one group file specified. Ignoring new file.\n"); + } else { + groupSiteFile = value; + readGroupSitesFromFile = true; + } } else if (param == String("inputBonds")) { if (readBondsFromFile) { printf("WARNING: More than one bond file specified. Ignoring new bond file.\n"); @@ -1391,6 +1404,62 @@ void Configuration::readAtoms() { } } } +void Configuration::readGroups() { + // Open the file + const size_t line_char = 16384; + FILE* inp = fopen(groupSiteFile.val(), "r"); + char line[line_char]; + + // If the particle file cannot be found, exit the program + if (inp == NULL) { + printf("ERROR: Could not open `%s'.\n", partFile.val()); + exit(1); + } + + // Our particle array has a starting capacity of 256 + // We will expand this later if we need to. + // int capacity = 256; + numGroupSites = 0; + + // partsFromFile = new String[capacity]; + // indices = new int[capacity]; + // indices[0] = 0; + + // Get and process all lines of input + while (fgets(line, line_char, inp) != NULL) { + // Lines in the particle file that begin with # are comments + if (line[0] == '#') continue; + + String s(line); + int numTokens = s.tokenCount(); + + // Break the line down into pieces (tokens) so we can process them individually + String* tokenList = new String[numTokens]; + s.tokenize(tokenList); + + // Legitimate GROUP input lines have at least 3 tokens: + // GROUP | Atom_1_idx | Atom_2_idx | ... + // A line without exactly six tokens should be discarded. + if (numTokens < 3) { + printf("Error: Invalid group file line: %s\n", line); + exit(-1); + } + + // Make sure the index of this particle is unique. + // NOTE: The particle list is sorted by index. + std::vector<int> tmp; + for (int i=1; i < numTokens; ++i) { + const int ai = atoi(tokenList[i].val()); + if (ai >= num) { + printf("Error: Attempted to include invalid particle in group: %s\n", line); + exit(-1); + } + tmp.push_back( ai ); + } + groupSiteData.push_back(tmp); + numGroupSites++; + } +} void Configuration::readBonds() { // Open the file @@ -1441,7 +1510,7 @@ void Configuration::readBonds() { continue; } - if (ind1 < 0 || ind1 >= num || ind2 < 0 || ind2 >=num) { + if (ind1 < 0 || ind1 >= num+numGroupSites || ind2 < 0 || ind2 >= num+numGroupSites) { printf("ERROR: Bond file line '%s' includes invalid index\n", line); exit(1); } @@ -1489,8 +1558,8 @@ void Configuration::readBonds() { * bondMap[i].x is the index in the bonds array where the ith particle's bonds begin * bondMap[i].y is the index in the bonds array where the ith particle's bonds end */ - bondMap = new int2[num]; - for (int i = 0; i < num; i++) { + bondMap = new int2[num+numGroupSites]; + for (int i = 0; i < num+numGroupSites; i++) { bondMap[i].x = -1; bondMap[i].y = -1; } @@ -1651,7 +1720,7 @@ void Configuration::readAngles() { int ind3 = atoi(tokenList[3].val()); String file_name = tokenList[4]; //printf("file_name %s\n", file_name.val()); - if (ind1 >= num or ind2 >= num or ind3 >= num) + if (ind1 >= num+numGroupSites or ind2 >= num+numGroupSites or ind3 >= num+numGroupSites) continue; if (numAngles >= capacity) { @@ -1712,8 +1781,8 @@ void Configuration::readDihedrals() { int ind4 = atoi(tokenList[4].val()); String file_name = tokenList[5]; //printf("file_name %s\n", file_name.val()); - if (ind1 >= num or ind2 >= num - or ind3 >= num or ind4 >= num) + if (ind1 >= num+numGroupSites or ind2 >= num+numGroupSites + or ind3 >= num+numGroupSites or ind4 >= num+numGroupSites) continue; if (numDihedrals >= capacity) { @@ -1772,7 +1841,7 @@ void Configuration::readRestraints() { float y0 = (float) strtod(tokenList[4].val(), NULL); float z0 = (float) strtod(tokenList[5].val(), NULL); - if (id >= num) continue; + if (id >= num+numGroupSites) continue; if (numRestraints >= capacity) { Restraint* temp = restraints; diff --git a/src/Configuration.h b/src/Configuration.h index 0895235c7929bddd22f0598ccb807cce10426740..debf4b9f6ea4d0ce8eaa9c52d70d6c57f1efdcbb 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -56,6 +56,7 @@ class Configuration { int readParameters(const char* config_file); void readAngles(); void readAtoms(); + void readGroups(); void readBonds(); void readExcludes(); void addExclusion(int ind1, int ind2); @@ -196,6 +197,7 @@ public: String dihedralFile; String restraintFile; bool readPartsFromFile; + bool readGroupSitesFromFile; bool readBondsFromFile; bool readExcludesFromFile; bool readAnglesFromFile; @@ -216,6 +218,10 @@ public: int* partTableIndex0; int* partTableIndex1; + String groupSiteFile; + int numGroupSites; + std::vector<std::vector<int>> groupSiteData; + String* bondTableFile; int numTabBondFiles; int2* bondMap; diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 59668438f50436ed25ef0ae401423a134d84b6a7..f1e3b73c071bc3b460f40fb1b7e9bbad78582654 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -97,9 +97,10 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, bonds = c.bonds; numCap = c.numCap; // max number of particles num = c.num; // current number of particles + numGroupSites = c.numGroupSites; // Allocate arrays of positions, types and serial numbers - pos = new Vector3[num * numReplicas]; // [HOST] array of particles' positions. + pos = new Vector3[(num+numGroupSites) * numReplicas]; // [HOST] array of particles' positions. // Allocate arrays of momentum Han-Yi Chou if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { @@ -254,6 +255,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, //MLog: I did the other halve of the copyToCUDA function from the Configuration class here, keep an eye on any mistakes that may occur due to the location. internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals, c.restraints); + if (numGroupSites > 0) init_cuda_group_sites(); // TODO: check for duplicate potentials if (c.tabulatedPotential) { @@ -303,6 +305,13 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, internal->addDihedralPotential(dihedralTableFile[p].val(), p, dihedrals); } + auto _get_index = [this](int idx, int replica) { + // Convenient lambda function to deal with increasingly complicated indexing + auto num = this->num; auto numReplicas = this->numReplicas; auto numGroupSites = this->numGroupSites; + idx = (idx < num) ? idx + replica*num : (idx-num) + numReplicas*num + replica * numGroupSites; + return idx; + }; + //Mlog: this is where we create the bondList. if (numBonds > 0) { bondList = new int3[ (numBonds / 2) * numReplicas ]; @@ -318,8 +327,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, fprintf(stderr,"Error: bondfile '%s' was not read with tabulatedBondFile command.\n", bonds[i].fileName.val()); exit(1); } - - bondList[j] = make_int3( (bonds[i].ind1 + k * num), (bonds[i].ind2 + k * num), bonds[i].tabFileIndex ); + bondList[j] = make_int3( _get_index(bonds[i].ind1, k), _get_index(bonds[i].ind2, k), bonds[i].tabFileIndex ); // cout << "Displaying: bondList["<< j <<"].x = " << bondList[j].x << ".\n" // << "Displaying: bondList["<< j <<"].y = " << bondList[j].y << ".\n" // << "Displaying: bondList["<< j <<"].z = " << bondList[j].z << ".\n"; @@ -338,7 +346,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, fprintf(stderr,"Error: anglefile '%s' was not read with tabulatedAngleFile command.\n", angles[i].fileName.val()); exit(1); } - angleList[i+k*numAngles] = make_int4( angles[i].ind1+k*num, angles[i].ind2+k*num, angles[i].ind3+k*num, angles[i].tabFileIndex ); + angleList[i+k*numAngles] = make_int4( _get_index(angles[i].ind1,k), _get_index(angles[i].ind2,k), _get_index(angles[i].ind3,k), angles[i].tabFileIndex ); } } } @@ -352,14 +360,14 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, fprintf(stderr,"Error: dihedralfile '%s' was not read with tabulatedDihedralFile command.\n", dihedrals[i].fileName.val()); exit(1); } - dihedralList[i+k*numDihedrals] = make_int4( dihedrals[i].ind1+k*num, dihedrals[i].ind2+k*num, dihedrals[i].ind3+k*num, dihedrals[i].ind4+k*num); + dihedralList[i+k*numDihedrals] = make_int4( _get_index(dihedrals[i].ind1,k), _get_index(dihedrals[i].ind2,k), _get_index(dihedrals[i].ind3,k), _get_index(dihedrals[i].ind4,k) ); dihedralPotList[i+k*numDihedrals] = dihedrals[i].tabFileIndex; } } } internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList); - forceInternal = new Vector3[num * numReplicas]; + forceInternal = new Vector3[(num+numGroupSites)*numReplicas]; if (fullLongRange != 0) printf("No cell decomposition created.\n"); @@ -570,7 +578,7 @@ void GrandBrownTown::RunNoseHooverLangevin() int numBlocks = (num * numReplicas) / NUM_THREADS + ((num * numReplicas) % NUM_THREADS == 0 ? 0 : 1); int tl = temperatureGridFile.length(); Vector3 *force_d; - gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*num * numReplicas)); + gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*(num+numGroupSites) * numReplicas)); printf("Configuration: %d particles | %d replicas\n", num, numReplicas); for (int i=0; i< gpuman.gpus.size(); ++i) { @@ -593,11 +601,14 @@ void GrandBrownTown::RunNoseHooverLangevin() const std::vector<Vector3*>& _pos = internal->getPos_d(); #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { - gpuman.nccl_broadcast(0, _pos, _pos, num*numReplicas, -1); + gpuman.nccl_broadcast(0, _pos, _pos, (num+numGroupSites)*numReplicas, -1); } #endif gpuman.sync(); + if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num, numGroupSites, numReplicas); + + #ifdef _OPENMP omp_set_num_threads(4); #endif @@ -686,14 +697,16 @@ void GrandBrownTown::RunNoseHooverLangevin() #ifdef USE_NCCL if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _f = internal->getForceInternal_d(); - gpuman.nccl_reduce(0, _f, _f, num*numReplicas, -1); + gpuman.nccl_reduce(0, _f, _f, (num+numGroupSites)*numReplicas, -1); } #endif + }//if step == 1 internal->clear_energy(); gpuman.sync(); + if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); if(particle_dynamic == String("Langevin")) updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); @@ -823,11 +836,13 @@ void GrandBrownTown::RunNoseHooverLangevin() if (gpuman.gpus.size() > 1) { const std::vector<Vector3*>& _p = internal->getPos_d(); nccl_broadcast_streams[0] = gpuman.gpus[0].get_next_stream(); - gpuman.nccl_broadcast(0, _p, _p, num*numReplicas, nccl_broadcast_streams); + gpuman.nccl_broadcast(0, _p, _p, (num+numGroupSites)*numReplicas, nccl_broadcast_streams); } #endif } + if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + if (interparticleForce) { // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas @@ -900,6 +915,8 @@ void GrandBrownTown::RunNoseHooverLangevin() RBC[i]->updateForces((internal->getPos_d()[0])+i*num, (internal->getForceInternal_d()[0])+i*num, s, (internal->getEnergy())+i*num, get_energy, RigidBodyInterpolationType, sys, sys_d); + if (numGroupSites > 0) distributeGroupSiteForces<<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], internal->getPos_d()[0], groupSiteData_d, num, numGroupSites, numReplicas); + if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, @@ -1722,6 +1739,40 @@ void GrandBrownTown::InitNoseHooverBath(int N) } printf("Done in nose-hoover bath\n"); } + +void GrandBrownTown::init_cuda_group_sites() +{ + + // Count the number of particles that form groups + int num_particles = 0; + for (auto it = conf.groupSiteData.begin(); it != conf.groupSiteData.end(); ++it) { + num_particles += it->size(); + } + + // Create GPU-friendly data structure + // TODO make this work for replicas + int* tmp = new int[numGroupSites+1+num_particles]; + num_particles = 0; + int i = 0; + for (auto it = conf.groupSiteData.begin(); it != conf.groupSiteData.end(); ++it) { + tmp[i] = num_particles+numGroupSites; + for (auto it2 = it->begin(); it2 != it->end(); ++it2) { + tmp[num_particles+numGroupSites+1] = *it2; + num_particles++; + } + i++; + } + assert(i == numGroupSites); + tmp[i] = num_particles; + + // Copy data structure to GPU + gpuErrchk(cudaMalloc((void**) &groupSiteData_d, sizeof(int)*(numGroupSites+1+num_particles))); + gpuErrchk(cudaMemcpy(groupSiteData_d, tmp, sizeof(int)*(numGroupSites+1+num_particles), cudaMemcpyHostToDevice)); + // TODO deallocate CUDA + delete[] tmp; + +} + // ----------------------------------------------------------------------------- // Initialize file for recording ionic current void GrandBrownTown::newCurrent(int repID) const { diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index ba31f468596616e431b35c7bc82998ef69659c92..589fc2d09300b54f8760260eb4237b994e96eaa5 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -607,6 +607,55 @@ inline Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, return sys->wrap(r); } +__global__ +void updateGroupSites(Vector3 pos[], int* groupSiteData, int num, int numGroupSites, int numReplicas) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + // For all threads representing a valid pair of particles + if (i < numGroupSites*numReplicas) { + pos[num*numReplicas + i] = Vector3(0.0f); + } + + // For all threads representing a valid pair of particles + if (i < numGroupSites*numReplicas) { + const int imod = i % numReplicas; + const int rep = i/numReplicas; + const int start = groupSiteData[imod]; + const int finish = groupSiteData[imod+1]; + float weight = 1.0 / (finish-start); + + for (int j = start; j < finish; j++) { + const int aj = groupSiteData[j] + num*rep; + pos[num*numReplicas + i] += weight * pos[aj]; + } + } +} + +__global__ +void distributeGroupSiteForces(Vector3 force[], Vector3 pos[], int* groupSiteData, int num, int numGroupSites, int numReplicas) { + // TODO, handle groupsite energies + int i = blockIdx.x * blockDim.x + threadIdx.x; + + // For all threads representing a valid pair of particles + if (i < numGroupSites*numReplicas) { + pos[num*numReplicas + i] = Vector3(0.0f); + } + + // For all threads representing a valid pair of particles + if (i < numGroupSites*numReplicas) { + const int imod = i % numReplicas; + const int rep = i/numReplicas; + const int start = groupSiteData[imod]; + const int finish = groupSiteData[imod+1]; + float weight = 1.0 / (finish-start); + + for (int j = start; j < finish; j++) { + const int aj = groupSiteData[j] + num*rep; + atomicAdd( force+aj, weight * force[num*numReplicas+i] ); + } + } +} + __global__ void devicePrint(RigidBodyType* rb[]) { // printf("Device printing\n"); int i = 0; diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index bb63a5c580de4a141321c63285003481dfc15c40..c6479a081fb2b4461493f49269c1e67b29eb7323 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -102,6 +102,9 @@ private: //Initialize the Nose-Hoover auxilliary variables void InitNoseHooverBath(int N); //curandState_t *randoDevice; + + void init_cuda_group_sites(); + public: // Compute the current in nanoamperes. float current(float t) const; @@ -226,6 +229,10 @@ private: int numExcludes; int numAngles; int numDihedrals; + + int numGroupSites; + int* groupSiteData_d; + String partFile; String bondFile; String excludeFile;