diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 74710b86fd0fd131f53c22e372c85e341c809450..727ee61829921996219f25b4b3a539f266da237a 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -515,45 +515,18 @@ void ComputeForce::decompose() { // initializePairlistArrays int nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z; - - // int blocksPerCell = 10; - /* cuMemGetInfo(&free,&total); */ /* printf("Free memory: %zu / %zu\n", free, total); */ - // const int NUMTHREADS = 128; - //const size_t nBlocks = (num * numReplicas) / NUM_THREADS + 1; - // const size_t nBlocks = nCells*blocksPerCell; - - /* clearPairlists<<< 1, 32 >>>(pos, num, numReplicas, sys_d[0], decomp_d); */ - /* gpuErrchk(cudaDeviceSynchronize()); */ - /* pairlistTest<<< nBlocks, NUMTHREADS >>>(pos, num, numReplicas, */ - /* sys_d[0], decomp_d, nCells, blocksPerCell, */ - /* numPairs_d[0], pairListListI_d, pairListListJ_d); */ - /* gpuErrchk(cudaDeviceSynchronize()); */ - int tmp = 0; gpuErrchk(cudaMemcpyAsync(numPairs_d[0], &tmp, sizeof(int), cudaMemcpyHostToDevice)); gpuErrchk(cudaDeviceSynchronize()); - // printf("Pairlistdist: %f\n",sqrt(pairlistdist2)); #ifdef DEBUGEXCLUSIONS initExSum(); gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */ #endif - //Han-Yi Chou bind texture - //printf("%d\n", sizeof(Vector3)); - //gpuErrchk(cudaBindTexture(0, PosTex, pos_d[0],sizeof(Vector3)*num*numReplicas)); - //gpuErrchk(cudaBindTexture(0,CellsTex, decomp_d->getCells_d(),sizeof(CellDecomposition::cell_t)*num*numReplicas)); - -//#if __CUDA_ARCH__ >= 300 - //createPairlists_debug<<< 2048, 64 >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2); - //#ifdef NEW - //for sm52 - //createPairlists<32,64,1><<< dim3(256,128,numReplicas),dim3(32,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], - //GTX 980 - //Han-Yi Chou 2017 my code #if __CUDA_ARCH__ >= 520 createPairlists<64,64,8><<<dim3(128,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num+num_rb_attached_particles, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], @@ -577,77 +550,6 @@ void ComputeForce::decompose() { } gpuman.sync(); #endif - - //createPairlists<64,64><<< dim3(256,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], - // pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, - // excludeMap_d, numExcludes, pairlistdist2); - - //#else - //createPairlists_debug<<< 2048, 64 >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, - // pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2); - //#endif -//#else - // Use shared memory for warp_bcast function - //createPairlists<<< 2048, 64, 2048/WARPSIZE >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2); - //#ifdef NEW - //for sm52 - //createPairlists<32,64,1><<<dim3(256,128,numReplicas),dim3(32,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], - //GTX 980 - //createPairlists<64,64,8><<<dim3(128,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], - //GTX 680 - //createPairlists<64,64,8><<<dim3(256,256,numReplicas),dim3(64,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], - // pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, - // excludeMap_d, numExcludes, pairlistdist2); - //createPairlists<64,64><<<dim3(256,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], - // pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, - // excludeMap_d, numExcludes, pairlistdist2); - - //#else - //createPairlists<<< 2048, 64, 2048/WARPSIZE >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, - // pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2, CellNeighborsList); - //#endif - -//#endif -#if 0 -//////debug section - // DEBUGING - gpuErrchk(cudaMemcpy(&tmp, numPairs_d[0], sizeof(int), cudaMemcpyDeviceToHost)); - //printf("CreatePairlist found %d pairs\n",tmp); - gpuErrchk(cudaDeviceSynchronize()); - - gpuErrchk( cudaProfilerStart() ); - - // Reset the cell decomposition. - if (decomp_d) - cudaFree(decomp_d); - - decomp.decompose_d(pos_d[0], num); - decomp_d = decomp.copyToCUDA(); - - gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */ - int tmp1 = 0; - gpuErrchk(cudaMemcpyAsync(numPairs_d[0], &tmp1, sizeof(int), cudaMemcpyHostToDevice)); - gpuErrchk(cudaDeviceSynchronize()); - // printf("Pairlistdist: %f\n",sqrt(pairlistdist2)); - -#ifdef DEBUGEXCLUSIONS - initExSum(); - gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */ -#endif - #if __CUDA_ARCH__ >= 300 - createPairlists_debug<<< 2048, 64 >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2); -#else - // Use shared memory for warp_bcast function - createPairlists_debug<<< 2048, 64, 2048/WARPSIZE >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0], pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, excludeMap_d, numExcludes, pairlistdist2); -#endif - gpuErrchk(cudaMemcpy(&tmp1, numPairs_d[0], sizeof(int), cudaMemcpyDeviceToHost)); - printf("Difference CreatePairlist found %d pairs\n",tmp-tmp1); - gpuErrchk(cudaDeviceSynchronize()); - -#ifdef DEBUGEXCLUSIONS - printf("Counted %d exclusions\n", getExSum()); -#endif -#endif } IndexList ComputeForce::decompDim() const { diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index 8735799be098867866ddc865fa39579e3ff77bc5..2d6dac72262eb436802303010731b03040de542e 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -296,7 +296,6 @@ void createNeighborsList(const int3 *Cells,int* __restrict__ CellNeighborsList) } } } -//#ifdef NEW template<const int BlockSize,const int Size,const int N> __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const int numReplicas, const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp, @@ -424,101 +423,6 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const } } -#if 0 -template<const int BlockSize,const int Size> -__global__ void createPairlists(Vector3* __restrict__ pos, const int num, const int numReplicas, - const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp, - const int nCells,int* g_numPairs, int2* g_pair, int numParts, const int* __restrict__ type, - int* __restrict__ g_pairTabPotType, const Exclude* __restrict__ excludes, - const int2* __restrict__ excludeMap, const int numExcludes, float pairlistdist2) -{ - const int TotalBlocks = gridDim.x * gridDim.y; - const int cells = TotalBlocks / Size; - const int cell_start = (blockIdx.x + gridDim.x * blockIdx.y) / Size; - const int pid_start = (blockIdx.x + gridDim.x * blockIdx.y) % Size; - const int tid = threadIdx.x + blockDim.x * threadIdx.y - + blockDim.x * blockDim.y * threadIdx.z; - const int warpLane = tid % WARPSIZE; - const int repID = blockIdx.z; - - const CellDecomposition::cell_t* __restrict__ cellInfo = decomp->getCells_d(); - - for(int cellid_i = cell_start; cellid_i < nCells; cellid_i += cells) - { - CellDecomposition::range_t rangeI = decomp->getRange(cellid_i,repID); - int Ni = rangeI.last-rangeI.first; - - for(int pid_i = pid_start; pid_i < Ni; pid_i += Size) - { - int ai = cellInfo[rangeI.first+pid_i].particle; - - float4 a = tex1Dfetch<float4>(PosTex,ai); - Vector3 A(a.x,a.y,a.z); - - int2 ex_pair = make_int2(-1,-1); - if(numExcludes > 0 && excludeMap != NULL) - { - ex_pair = excludeMap[ai]; - } - - int currEx = ex_pair.x; - int nextEx = (ex_pair.x >= 0) ? excludes[currEx].ind2 : -1; - - //loop over neighbor directions - for(int idx = 0; idx < 27; ++idx) - { - int neighbor_cell = tex1Dfetch(NeighborsTex,idx+27*cellid_i); - - if(neighbor_cell < 0) - { - continue; - } - - CellDecomposition::range_t rangeJ = decomp->getRange(neighbor_cell,repID); - int Nj = rangeJ.last-rangeJ.first; - - // In each neighbor cell, loop over particles - for(int pid_j = tid; pid_j < Nj; pid_j += BlockSize) - { - int aj = cellInfo[pid_j+rangeJ.first].particle; - if(aj <= ai) - { - continue; - } - - while (nextEx >= 0 && nextEx < ( aj - repID * num)) - { - nextEx = (currEx < ex_pair.y - 1) ? excludes[++currEx].ind2 : -1; - } - - if (nextEx == (aj - repID * num)) - { - #ifdef DEBUGEXCLUSIONS - atomicAggInc( exSum, warpLane ); - #endif - nextEx = (currEx < ex_pair.y - 1) ? excludes[++currEx].ind2 : -1; - continue; - } - - float4 b = tex1Dfetch(PosTex,aj); - Vector3 B(b.x,b.y,b.z); - - float dr = (sys->wrapDiff(A-B)).length2(); - - if(dr < pairlistdist2) - { - int gid = atomicAggInc( g_numPairs, warpLane ); - int pairType = type[ai] + type[aj] * numParts; - - g_pair[gid] = make_int2(ai,aj); - g_pairTabPotType[gid] = pairType; - } - } - } - } - } -} -#endif __global__ void createPairlists_debug(Vector3* __restrict__ pos, const int num, const int numReplicas, const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp, @@ -597,104 +501,6 @@ void createPairlists_debug(Vector3* __restrict__ pos, const int num, const int n } } -//#else -#if 0 -__global__ -void createPairlists(Vector3* __restrict__ pos, const int num, const int numReplicas, - const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp, - const int nCells, - int* g_numPairs, int2* g_pair, - int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType, - const Exclude* __restrict__ excludes, const int2* __restrict__ excludeMap, const int numExcludes, - float pairlistdist2, const int* __restrict__ CellNeighborsList) { - // TODO: loop over all cells with edges within pairlistdist2 - - // Loop over threads searching for atom pairs - // Each thread has designated values in shared memory as a buffer - // A sync operation periodically moves data from shared to global - const int tid = threadIdx.x; - const int warpLane = tid % WARPSIZE; /* RBTODO: optimize */ - const int split = 32; /* numblocks should be divisible by split */ - /* const int blocksPerCell = gridDim.x/split; */ - const CellDecomposition::cell_t* __restrict__ cellInfo = decomp->getCells(); - for (int cID = 0 + (blockIdx.x % split); cID < nCells; cID += split) { - // for (int cID = blockIdx.x/blocksPerCell; cID < nCells; cID += split ) { - for (int repID = 0; repID < numReplicas; repID++) { - const CellDecomposition::range_t rangeI = decomp->getRange(cID, repID); - - for (int ci = rangeI.first + blockIdx.x/split; ci < rangeI.last; ci += gridDim.x/split) { - // ai - index of the particle in the original, unsorted array - const int ai = cellInfo[ci].particle; - // const CellDecomposition::cell_t celli = decomp->getCellForParticle(ai); - const CellDecomposition::cell_t celli = cellInfo[ci]; - // Vector3 posi = pos[ai]; - - // Same as for bonds, but for exclusions now - const int ex_start = (numExcludes > 0 && excludeMap != NULL) ? excludeMap[ai -repID*num].x : -1; - const int ex_end = (numExcludes > 0 && excludeMap != NULL) ? excludeMap[ai -repID*num].y : -1; - /* - for (int x = -1; x <= 1; ++x) { - for (int y = -1; y <= 1; ++y) { - #pragma unroll(3) - for (int z = -1; z <= 1; ++z) { - */ - for(int x = 0; x < 27; ++x) { - //const int nID = decomp->getNeighborID(celli, x, y, z); - const int nID = CellNeighborsList[x+27*cID];//elli.id]; - if (nID < 0) continue; - - // Initialize exclusions - // TODO: optimize exclusion code (and entire kernel) - int currEx = ex_start; - int nextEx = (ex_start >= 0) ? excludes[currEx].ind2 : -1; - int ajLast = -1; // TODO: remove this sanity check - - const CellDecomposition::range_t range = decomp->getRange(nID, repID); - - for (int n = range.first + tid; n < range.last; n+=blockDim.x) { - const int aj = cellInfo[n].particle; - if (aj <= ai) continue; - - // Skip excludes - // Implementation requires that aj increases monotonically - assert( ajLast < aj ); ajLast = aj; // TODO: remove this sanity check - while (nextEx >= 0 && nextEx < (aj - repID * num)) // TODO get rid of this - nextEx = (currEx < ex_end - 1) ? excludes[++currEx].ind2 : -1; - if (nextEx == (aj - repID * num)) { -#ifdef DEBUGEXCLUSIONS - atomicAggInc( exSum, warpLane ); -#endif - nextEx = (currEx < ex_end - 1) ? excludes[++currEx].ind2 : -1; - continue; - } - // TODO: Skip non-interacting types for efficiency - - // Skip ones that are too far away - const float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2(); - // const float dr = (sys->wrapDiff(pos[aj] - posi)).length2(); - if (dr > pairlistdist2) continue; - - // Add to pairlist - - int gid = atomicAggInc( g_numPairs, warpLane ); - int pairType = type[ai] + type[aj] * numParts; - /* assert( ai >= 0 ); */ - /* assert( aj >= 0 ); */ - - g_pair[gid] = make_int2(ai,aj); - g_pairTabPotType[gid] = pairType; - - // g_pairDists[gid] = dr; - } // atoms J - //} // z - //} // y - } // x - } // atoms I - } // replicas - /* if (tid == 0) printf("Cell%d: found %d pairs\n",cID,g_numPairs[cID]); */ - } -} -#endif // TODO: deprecate? __global__ void computeKernel(Vector3 force[], Vector3 pos[], int type[], @@ -774,42 +580,6 @@ __global__ void printPairForceCounter() { printf("Computed the force for %d pairs\n", pairForceCounter); } -// ============================================================================ -// Kernel computes forces between Brownian particles (ions) -// using cell decomposition -// -#if 0 -__global__ void computeTabulatedKernel( - Vector3* force, const Vector3* __restrict__ pos, - const BaseGrid* __restrict__ sys, float cutoff2, - const int* __restrict__ g_numPairs, const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot) { -//remove int* type. remove bond references - - const int numPairs = *g_numPairs; - for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numPairs; i+=blockDim.x*gridDim.x) { - const int2 pair = g_pair[i]; - const int ai = pair.x; - const int aj = pair.y; - - // Particle's type and position - const int ind = g_pairTabPotType[i]; - - /* printf("tid,bid,pos[ai(%d)]: %d %d %f %f %f\n", ai, threadIdx.x, blockIdx.x, pos[ai].x, pos[ai].y, pos[ai].z); //*/ - - Vector3 dr = pos[aj] - pos[ai]; - dr = sys->wrapDiff(dr); - - // Calculate the force based on the tabulatedPotential file - float d2 = dr.length2(); - if (tablePot[ind] != NULL && d2 <= cutoff2) { - Vector3 f = tablePot[ind]->computef(dr,d2); - atomicAdd( &force[ai], f ); - atomicAdd( &force[aj], -f ); - } - } -} -#endif -//#if 0 template<const int BlockSize> __device__ inline void _computeTabulatedKernel(Vector3* force, const BaseGrid* __restrict__ sys, float cutoff2, const int numPairs, const int2* __restrict__ g_pair, @@ -870,11 +640,7 @@ __global__ void computeTabulatedKernel(Vector3* force, const BaseGrid* __restric cutoff2, numPairs, g_pair+start, g_pairTabPotType+start, tablePot, pairListsTex, PosTex, pairTabPotTypeTex); -} - - -//#endif - +} __global__ void clearEnergies(float* __restrict__ g_energies, int num) { for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < num; i+=blockDim.x*gridDim.x) { diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 0510a0e41881166cabb49556a27b9c2f278130e6..fe55aad463b5b27ea6f095329ea14ed3917d498e 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -495,11 +495,11 @@ GrandBrownTown::~GrandBrownTown() { } -//temporary test for Nose-Hoover Langevin dynamics + //Nose Hoover is now implement for particles. -void GrandBrownTown::RunNoseHooverLangevin() +void GrandBrownTown::run() { - //comment this out because this is the origin points Han-Yi Chou + // Open the files for recording ionic currents for (int repID = 0; repID < numReplicas; ++repID) { @@ -508,6 +508,7 @@ void GrandBrownTown::RunNoseHooverLangevin() momentum_writers[repID]->newFile((momentum + repID * num), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay //random_writers[repID]->newFile(random + (repID * num), name, 0.0f, num); } + // Initialize timers (util.*) wkf_timerhandle timer0, timerS; timer0 = wkf_timer_create(); @@ -1177,505 +1178,6 @@ void GrandBrownTown::RunNoseHooverLangevin() gpuErrchk(cudaFree(force_d)); } // GrandBrownTown::run() -// Run the Brownian Dynamics steps. -void GrandBrownTown::run() { - - RunNoseHooverLangevin(); -#if 0 - printf("\n\n"); - Vector3 runningNetForce(0.0f); - - // Open the files for recording ionic currents - for (int repID = 0; repID < numReplicas; ++repID) { - writers[repID]->newFile(pos + (repID * num), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay - } - //Han-Yi Chou - if(particle_dynamic == String("Langevin")) - { - for (int repID = 0; repID < numReplicas; ++repID) - { - momentum_writers[repID]->newFile(momentum + (repID * num), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay - //force_writers[repID]->newFile(force + (repID * num), name, 0.0f, num); - } - } - - // Save (remember) particle positions for later (analysis) - remember(0.0f); - - // Initialize timers (util.*) - wkf_timerhandle timer0, timerS; - timer0 = wkf_timer_create(); - timerS = wkf_timer_create(); - - copyToCUDA(); - if(particle_dynamic == String("Langevin")) - internal -> copyToCUDA(forceInternal, pos,momentum); - else - internal -> copyToCUDA(forceInternal, pos); - - // IMD Stuff - void* sock = NULL; - void* clientsock = NULL; - int length; - if (imd_on) { - printf("Setting up incoming socket\n"); - vmdsock_init(); - sock = vmdsock_create(); - clientsock = NULL; - vmdsock_bind(sock, imd_port); - - printf("Waiting for IMD connection on port %d...\n", imd_port); - vmdsock_listen(sock); - while (!clientsock) { - if (vmdsock_selread(sock, 0) > 0) { - clientsock = vmdsock_accept(sock); - if (imd_handshake(clientsock)) - clientsock = NULL; - } - } - sleep(1); - if (vmdsock_selread(clientsock, 0) != 1 || - imd_recv_header(clientsock, &length) != IMD_GO) { - clientsock = NULL; - } - imdForces = new Vector3[num*numReplicas]; - for (size_t i = 0; i < num; ++i) // clear old forces - imdForces[i] = Vector3(0.0f); - - } // endif (imd_on) - - // Start timers - wkf_timer_start(timer0); - wkf_timer_start(timerS); - - // We haven't done any steps yet. - // Do decomposition if we have to - if (fullLongRange == 0) - { - // cudaSetDevice(0); - internal->decompose(); - gpuErrchk(cudaDeviceSynchronize()); - RBC.updateParticleLists( internal->getPos_d() ); - } - - float t; // simulation time - - int numBlocks = (num * numReplicas) / NUM_THREADS + ((num * numReplicas) % NUM_THREADS == 0 ? 0 : 1); - int tl = temperatureGridFile.length(); - Vector3 *force_d; - gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*num * numReplicas)); - - printf("Configuration: %d particles | %d replicas\n", num, numReplicas); - - // Main loop over Brownian dynamics steps - for (long int s = 1; s < steps; s++) - { - // Compute the internal forces. Only calculate the energy when we are about to output. - bool get_energy = ((s % outputEnergyPeriod) == 0); - //At the very first time step, the force is computed - if(s == 1) - { - // 'interparticleForce' - determines whether particles interact with each other - gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); - RBC.clearForceAndTorque(); //Han-Yi Chou - - if (interparticleForce) - { - //gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); - //RBC.clearForceAndTorque(); //Han-Yi Chou - - // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas - if (tabulatedPotential) - { - // Using tabulated potentials - // 'fullLongRange' - determines whether 'cutoff' is used - // 0 - use cutoff (cell decomposition) [ N*log(N) ] - // 1 - do not use cutoff [ N^2 ] - switch (fullLongRange) - { - case 0: // [ N*log(N) ] interactions, + cutoff | decomposition - //I want to replace this - // by checking how far particles move to decide whether the pair list should be updated - //if(NeedUpdatePairList()) //ToDo, Han-Yi Chou - //{ - //internal-> decompose(); - // RBC.updateParticleLists( internal->getPos_d() ); - //} - if (s % decompPeriod == 0) - { - // cudaSetDevice(0); - internal -> decompose(); - RBC.updateParticleLists( internal->getPos_d() ); - } - - //MLog: added Bond* bondList to the list of passed in variables. - /*energy = internal->computeTabulated(forceInternal_d, pos_d, type_d, bonds_d, bondMap_d, excludes_d, excludeMap_d, angles_d, dihedrals_d, get_energy);*/ - // energy = internal -> computeTabulated(get_energy); - internal -> computeTabulated(get_energy); - break; - default: - // [ N^2 ] interactions, no cutoff | decompositions - internal->computeTabulatedFull(get_energy); - break; - } - } - else - { - // Not using tabulated potentials. - switch (fullLongRange) - { - case 0: // Use cutoff | cell decomposition. - if (s % decompPeriod == 0) - { - // cudaSetDevice(0); - internal->decompose(); - RBC.updateParticleLists( internal->getPos_d() ); - } - internal->compute(get_energy); - break; - - case 1: // Do not use cutoff - internal->computeFull(get_energy); - break; - - case 2: // Compute only softcore forces. - internal->computeSoftcoreFull(get_energy); - break; - - case 3: // Compute only electrostatic forces. - internal->computeElecFull(get_energy); - break; - } - } - }//if inter-particle force - - gpuErrchk(cudaDeviceSynchronize()); - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, RigidBodyInterpolationType); - if(rigidbody_dynamic == String("Langevin")) - { - RBC.SetRandomTorques(); - RBC.AddLangevin(); - } - gpuErrchk(cudaDeviceSynchronize()); - }//if step == 1 - - - //Han-Yi Chou - //update the rigid body positions and orientation - //So far only brownian dynamics is used - //RBC.integrate(sys, s); - - if(particle_dynamic == String("Langevin")) - updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); - //For Brownian motion - else - updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getForceInternal_d(), internal -> getType_d(), - part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); - - - if(rigidbody_dynamic == String("Langevin")) - { - RBC.integrateDLM(sys,0); - RBC.integrateDLM(sys,1); - } - else - RBC.integrate(sys,s); - - - if (s % outputPeriod == 0) { - // Copy particle positions back to CPU - gpuErrchk(cudaDeviceSynchronize()); - gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); - } - - //compute again the new force with new positions. - //reset the internal force, I hope. Han-Yi Chou - //First clear the old force - if (imd_on && clientsock && s % outputPeriod == 0) - { - float* coords = new float[num*3]; // TODO: move allocation out of run loop - int* atomIds = new int[num]; // TODO: move allocation out of run loop - int length; - - bool paused = false; - while (vmdsock_selread(clientsock, 0) > 0 || paused) - { - switch (imd_recv_header(clientsock, &length)) - { - case IMD_DISCONNECT: - printf("[IMD] Disconnecting...\n"); - imd_disconnect(clientsock); - clientsock = NULL; - sleep(5); - break; - case IMD_KILL: - printf("[IMD] Killing...\n"); - imd_disconnect(clientsock); - clientsock = NULL; - steps = s; // Stop the simulation at this step - sleep(5); - break; - case IMD_PAUSE: - paused = !paused; - break; - case IMD_GO: - printf("[IMD] Caught IMD_GO\n"); - break; - case IMD_MDCOMM: - for (size_t i = 0; i < num; ++i) // clear old forces - imdForces[i] = Vector3(0.0f); - - if (imd_recv_mdcomm(clientsock, length, atomIds, coords)) - { - printf("[IMD] Error receiving forces\n"); - } - else - { - for (size_t j = 0; j < length; ++j) - { - int i = atomIds[j]; - imdForces[i] = Vector3(coords[j*3], coords[j*3+1], coords[j*3+2]) * conf.imdForceScale; - } - } - break; - default: - printf("[IMD] Something weird happened. Disconnecting..\n"); - break; - } - } - if (clientsock) - { - // float* coords = new float[num*3]; // TODO: move allocation out of run loop - for (size_t i = 0; i < num; i++) - { - const Vector3& p = pos[i]; - coords[3*i] = p.x; - coords[3*i+1] = p.y; - coords[3*i+2] = p.z; - } - imd_send_fcoords(clientsock, num, coords); - } - delete[] coords; - delete[] atomIds; - } - if (imd_on && clientsock) - internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD - //else - //{ - //int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); - //MLog: along with calls to internal (ComputeForce class) this function should execute once per GPU. - //clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal->getForceInternal_d(), num*numReplicas); - //use cudaMemset instead - //gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); - //RBC.clearForceAndTorque(); - //} - //compute the new force for particles - RBC.clearForceAndTorque(); - gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); - //RBC.clearForceAndTorque(); - - if (interparticleForce) - { - // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas - if (tabulatedPotential) - { - switch (fullLongRange) - { - case 0: // [ N*log(N) ] interactions, + cutoff | decomposition - if (s % decompPeriod == 0) - { - internal -> decompose(); - RBC.updateParticleLists( internal->getPos_d() ); - } - internal -> computeTabulated(get_energy); - break; - default: // [ N^2 ] interactions, no cutoff | decompositions - internal->computeTabulatedFull(get_energy); - break; - } - } - else - { - // Not using tabulated potentials. - switch (fullLongRange) - { - case 0: // Use cutoff | cell decomposition. - if (s % decompPeriod == 0) - { - internal->decompose(); - RBC.updateParticleLists( internal->getPos_d() ); - } - internal->compute(get_energy); - break; - case 1: // Do not use cutoff - internal->computeFull(get_energy); - break; - - case 2: // Compute only softcore forces. - internal->computeSoftcoreFull(get_energy); - break; - - case 3: // Compute only electrostatic forces. - internal->computeElecFull(get_energy); - break; - } - } - } - - //compute the force for rigid bodies - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, RigidBodyInterpolationType); - //Han-Yi Chou - //For BAOAB, the last update is only to update the momentum - // gpuErrchk(cudaDeviceSynchronize()); - - if(particle_dynamic == String("Langevin")) - LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); - - //gpuErrchk(cudaDeviceSynchronize()); - - if(rigidbody_dynamic == String("Langevin")) - { - RBC.SetRandomTorques(); - RBC.AddLangevin(); - RBC.integrateDLM(sys,2); - RBC.print(s); - } - - if (s % outputPeriod == 0) - { - if(particle_dynamic == String("Langevin")) - { - // TODO: make async - gpuErrchk(cudaMemcpy(momentum, internal -> getMom_d(), sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); - /* - gpuErrchk(cudaMemcpy(force, force_d, sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost)); - Vector3 f(0.f), p(0.f), r0(0.f); - double total_mass = 0.; - for(int i = 0; i < num * numReplicas; ++i) - { - f = f + force[i]; - p = p + momentum[i]; - total_mass += part[type[i]].mass; - r0 = r0 + part[type[i]].mass * pos[i]; - } - printf("The COM is %f %f %f\n",r0.x / total_mass, r0.y / total_mass, r0.z / total_mass); - printf("The total momentum is %f %f %f\n",p.x, p.y, p.z); - printf("The total force %f %f %f\n",f.x, f.y, f.z); - */ - // Output trajectories (to files) - } - //RBC.print(s); - t = s*timestep; - - // Loop over all replicas - for (int repID = 0; repID < numReplicas; ++repID) - { - - if (numberFluct == 1) - updateNameList(); // no need for it here if particles stay the same - - // Write the trajectory. - writers[repID]->append(pos + (repID*num), name, serial, t, num); - - if(particle_dynamic == String("Langevin")) - { - momentum_writers[repID]->append(momentum + (repID * num), name, serial, t, num); - //force_writers[repID]->append(force + (repID * num), name, serial, t, num); - } - - } - - // TODO: Currently, not compatible with replicas. Needs a fix. - if (numberFluct == 1) - updateReservoirs(); - - remember(t); - } - // Output energy. - if (get_energy) - { - wkf_timer_stop(timerS); - t = s * timestep; - // Simulation progress and statistics. - float percent = (100.0f * s) / steps; - float msPerStep = wkf_timer_time(timerS) * 1000.0f / outputEnergyPeriod; - float nsPerDay = numReplicas * timestep / msPerStep * 864E5f; - - // Nice thousand separator - setlocale(LC_NUMERIC, ""); - - // Do the output - printf("\rStep %ld [%.2f%% complete | %.3f ms/step | %.3f ns/day]",s, percent, msPerStep, nsPerDay); - - // Copy positions from GPU to CPU. - gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - if(particle_dynamic == String("Langevin")) - { - //gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - float e = KineticEnergy(); - printf(" The kinetic energy is %f \n",e*(2.388458509e-1)); - } - if(rigidbody_dynamic == String("Langevin")) - { - //gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - float e = RotKineticEnergy(); - printf(" The Rotational kinetic energy is %f \n",e*(2.388458509e-1)); - } - - // Write restart files for each replica. - for (int repID = 0; repID < numReplicas; ++repID) - writeRestart(repID); - - wkf_timer_start(timerS); - } // s % outputEnergyPeriod - } // done with all Brownian dynamics steps - - // If IMD is on & our socket is still open. - if (imd_on and clientsock) - { - if (vmdsock_selread(clientsock, 0) == 1) - { - int length; - switch (imd_recv_header(clientsock, &length)) - { - case IMD_DISCONNECT: - printf("\n[IMD] Disconnecting...\n"); - imd_disconnect(clientsock); - clientsock = NULL; - sleep(5); - break; - case IMD_KILL: - printf("\n[IMD] Killing...\n"); - imd_disconnect(clientsock); - clientsock = NULL; - sleep(5); - break; - default: - printf("\n[IMD] Something weird happened. Disconnecting..\n"); - break; - } - } - } - // Stop the main timer. - wkf_timer_stop(timer0); - - // Compute performance data. - const float elapsed = wkf_timer_time(timer0); // seconds - int tot_hrs = (int) std::fmod(elapsed / 3600.0f, 60.0f); - int tot_min = (int) std::fmod(elapsed / 60.0f, 60.0f); - float tot_sec = std::fmod(elapsed, 60.0f); - - printf("\nFinal Step: %d\n", (int) steps); - - printf("Total Run Time: "); - if (tot_hrs > 0) printf("%dh%dm%.1fs\n", tot_hrs, tot_min, tot_sec); - else if (tot_min > 0) printf("%dm%.1fs\n", tot_min, tot_sec); - else printf("%.2fs\n", tot_sec); - - gpuErrchk(cudaFree(force_d)); -#endif -} // GrandBrownTown::run() // -------------------------------------------- // Populate lists of types and serial numbers. // diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index a152dd1ddef4f945e5f468c124a685b0b2d23a13..44ffe6f82c1c3dde464d8039ac6fef26dd97f84f 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -60,91 +60,6 @@ ForceEnergy compute_position_dependent_force( } - -//update both position and momentum for Langevin dynamics -//Han-Yi Chou -#if 0 -__global__ void updateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, - int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - if (idx < num * numReplicas) - { - idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); - - int t = type[idx]; - //Vector3 r0(tex1Dfetch(PosTex, idx)); - //Vector3 p0(tex1Dfetch(MomTex, idx)); - Vector3 r0 = pos[idx]; - Vector3 p0 = momentum[idx]; - - const BrownianParticleType& pt = *part[t]; - float mass = pt.mass; - r0 = r0 + (timestep * 5e3 * p0) / mass; - r0 = sys->wrap(r0); - pos[idx] = r0; - -#if 0 - Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); - - // Compute PMF - // TODO: maybe make periodic and nonPeriodic versions of this kernel - ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); - -#ifndef FORCEGRIDOFF - // Add a force defined via 3D FORCE maps (not 3D potential maps) - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); -#endif - Vector3 force = forceInternal[idx] + forceExternal + fe.f; - - // Get local kT value - float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ - - // Update the particle's position using the calculated values for time, force, etc. - float mass = pt.mass; - Vector3 gamma = pt.transDamping; - - if (pt.diffusionGrid != NULL) - { - - Vector3 gridCenter = pt.diffusionGrid->origin + - pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx, 0.5*pt.diffusionGrid->ny, - 0.5*pt.diffusionGrid->nz)); - Vector3 p2 = r0 - gridCenter; - p2 = sys->wrapDiff( p2 ) + gridCenter; - ForceEnergy diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); - gamma = Vector3(kTlocal / (mass * diff.e)); - //diffGrad = diff.f; - } - - float tmp = sqrtf(0.50f * kTlocal * mass * timestep); - Vector3 rando = randoGen->gaussian_vector(idx, num * numReplicas); - - #ifdef MDSTEP - force = Vector3(-r0.x, -r0.y, -r0.z); - #endif - - p0.x = (1.0f - 0.50f * timestep * gamma.x) * p0.x + (0.50f * timestep * force.x * Unit1 + tmp * sqrtf(gamma.x) * rando.x * Unit2); - p0.y = (1.0f - 0.50f * timestep * gamma.y) * p0.y + (0.50f * timestep * force.y * Unit1 + tmp * sqrtf(gamma.y) * rando.y * Unit2); - p0.z = (1.0f - 0.50f * timestep * gamma.z) * p0.z + (0.50f * timestep * force.z * Unit1 + tmp * sqrtf(gamma.z) * rando.z * Unit2); - - r0 = r0 + (timestep * 1e4 * p0) / mass; - r0 = sys->wrap(r0); - - //step(r0, p0, kTlocal, force, tmp, -diffGrad, mass, gamma, timestep, sys, randoGen, num * numReplicas); - pos[idx] = r0; - momentum[idx] = p0; -#endif - - } -} -#endif - - ////The kernel is for Nose-Hoover Langevin dynamics __global__ void updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random, @@ -346,69 +261,6 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ momentum[idx] = p0; } } -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//The last step of BBK integrator to update the momentum for Langevin dynamics. -#if 0 -__global__ void LastUpdateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, - int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, - int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - if (idx < num * numReplicas) - { - idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles); - - int t = type[idx]; - Vector3 r0 = pos[idx]; - Vector3 p0 = momentum[idx]; - - const BrownianParticleType& pt = *part[t]; - Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField); - - ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); - -#ifndef FORCEGRIDOFF - // Add a force defined via 3D FORCE maps (not 3D potential maps) - if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); - if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0); - if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0); -#endif - Vector3 force = forceInternal[idx] + forceExternal + fe.f; - - float mass = pt.mass; - Vector3 gamma = pt.transDamping; - - float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ - - float tmp = sqrtf(0.50f * kTlocal * mass * timestep); - - if (pt.diffusionGrid != NULL) - { - - Vector3 gridCenter = pt.diffusionGrid->origin + - pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx, 0.5*pt.diffusionGrid->ny, - 0.5*pt.diffusionGrid->nz)); - Vector3 p2 = r0 - gridCenter; - p2 = sys->wrapDiff( p2 ) + gridCenter; - ForceEnergy diff = pt.diffusionGrid->interpolateForceDLinearlyPeriodic(p2); - gamma = Vector3(kTlocal / (mass * diff.e)); - } - - Vector3 rando = randoGen->gaussian_vector(idx, num * numReplicas); - #ifdef MDSTEP - force = Vector3(-r0.x, -r0.y, -r0.z); - #endif - - //step(p0, kTlocal, force, tmp, -diffGrad, mass, gamma, timestep, sys, randoGen, num * numReplicas); - p0.x = (p0.x + 0.50 * timestep * force.x * Unit1 + tmp * sqrtf(gamma.x) * rando.x * Unit2) / (1.0f+0.5f*timestep*gamma.x); - p0.y = (p0.y + 0.50 * timestep * force.y * Unit1 + tmp * sqrtf(gamma.y) * rando.y * Unit2) / (1.0f+0.5f*timestep*gamma.y); - p0.z = (p0.z + 0.50 * timestep * force.z * Unit1 + tmp * sqrtf(gamma.z) * rando.z * Unit2) / (1.0f+0.5f*timestep*gamma.z); - - momentum[idx] = p0; - } -} -#endif //Update kernel for Brownian dynamics __global__ diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index 6d8ed7f34c5bcdbed402929d8c5c3b41ffcb5f89..56d4c5a3b28c7b765542ba87eb5efa2445768c82 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -62,7 +62,6 @@ public: ~GrandBrownTown(); void run(); - void RunNoseHooverLangevin(); static bool DEBUG; private: