diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index ff6255045df2846ffd2641744cebdb2d53e2b5ad..d7f055c3d4c495acc01698df6b60304cdac95705 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -123,16 +123,17 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : //Han-Yi Chou int nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z; //int* nCells_dev; - int3 *Cells_dev; - - gpuErrchk(cudaMalloc(&CellNeighborsList,sizeof(int)*27*nCells)); - //gpuErrchk(cudaMalloc(&nCells_dev,sizeof(int))); - gpuErrchk(cudaMalloc(&Cells_dev,sizeof(int3))); - //gpuErrchk(cudaMemcpy(nCells_dev,&nCells,1,cudaMemcpyHostToDevice); - gpuErrchk(cudaMemcpy(Cells_dev,&(decomp.nCells),sizeof(int3),cudaMemcpyHostToDevice)); - createNeighborsList<<<256,256>>>(Cells_dev,CellNeighborsList); - gpuErrchk(cudaFree(Cells_dev)); - cudaBindTexture(0, NeighborsTex, CellNeighborsList, 27*nCells*sizeof(int)); + if (nCells < MAX_CELLS_FOR_CELLNEIGHBORLIST) { + int3 *Cells_dev; + gpuErrchk(cudaMalloc(&CellNeighborsList,sizeof(int)*27*nCells)); + //gpuErrchk(cudaMalloc(&nCells_dev,sizeof(int))); + gpuErrchk(cudaMalloc(&Cells_dev,sizeof(int3))); + //gpuErrchk(cudaMemcpy(nCells_dev,&nCells,1,cudaMemcpyHostToDevice); + gpuErrchk(cudaMemcpy(Cells_dev,&(decomp.nCells),sizeof(int3),cudaMemcpyHostToDevice)); + createNeighborsList<<<256,256>>>(Cells_dev,CellNeighborsList); + gpuErrchk(cudaFree(Cells_dev)); + cudaBindTexture(0, NeighborsTex, CellNeighborsList, 27*nCells*sizeof(int)); + } } //Calculate the number of blocks the grid should contain diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index babbe0ad40f7d5d9c387363f3914a6bfc393d817..2f818b89cd244930d35deeaf28c506e0981671d9 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -8,6 +8,9 @@ #include "TabulatedMethods.cuh" #define BD_PI 3.1415927f + +#define MAX_CELLS_FOR_CELLNEIGHBORLIST 1<<25 + texture<int, 1, cudaReadModeElementType> NeighborsTex; texture<int, 1, cudaReadModeElementType> pairTabPotTypeTex; texture<int2, 1, cudaReadModeElementType> pairListsTex; @@ -234,6 +237,35 @@ int getExSum() { return tmp; } // +__device__ +int computeCellNeighbor( const int3 cells, const int3 cell_idx, const int dx, const int dy, const int dz ) +{ + int idx = cell_idx.x; + int idy = cell_idx.y; + int idz = cell_idx.z; + + int u = idx + dx; + int v = idy + dy; + int w = idz + dz; + + int nID; + if (cells.x == 1 and u != 0) nID = -1; + else if (cells.y == 1 and v != 0) nID = -1; + else if (cells.z == 1 and w != 0) nID = -1; + else if (cells.x == 2 and (u < 0 || u > 1)) nID = -1; + else if (cells.y == 2 and (v < 0 || v > 1)) nID = -1; + else if (cells.z == 2 and (w < 0 || w > 1)) nID = -1; + else + { + u = (u + cells.x) % cells.x; + v = (v + cells.y) % cells.y; + w = (w + cells.z) % cells.z; + nID = w + cells.z * (v + cells.y * u); + } + + return nID; +} + __global__ void createNeighborsList(const int3 *Cells,int* __restrict__ CellNeighborsList) { @@ -254,25 +286,7 @@ void createNeighborsList(const int3 *Cells,int* __restrict__ CellNeighborsList) for (int dy = -1; dy <= 1; ++dy) { for (int dz = -1; dz <= 1; ++dz) { - int u = idx + dx; - int v = idy + dy; - int w = idz + dz; - - if (cells.x == 1 and u != 0) nID = -1; - else if (cells.y == 1 and v != 0) nID = -1; - else if (cells.z == 1 and w != 0) nID = -1; - else if (cells.x == 2 and (u < 0 || u > 1)) nID = -1; - else if (cells.y == 2 and (v < 0 || v > 1)) nID = -1; - else if (cells.z == 2 and (w < 0 || w > 1)) nID = -1; - else - { - u = (u + cells.x) % cells.x; - v = (v + cells.y) % cells.y; - w = (w + cells.z) % cells.z; - - nID = w + cells.z * (v + cells.y * u); - } - + nID = computeCellNeighbor( cells, make_int3(idx,idy,idz), dx, dy, dz ); CellNeighborsList[size_t(count+27*cID)] = nID; ++count; //__syncthreads(); @@ -342,7 +356,20 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const int currEx = ex_pair.x; int nextEx = (ex_pair.x >= 0) ? excludes[currEx].ind2 : -1; - int neighbor_cell = tex1Dfetch(NeighborsTex,idx+27*cellid_i); + int neighbor_cell; + if (nCells < MAX_CELLS_FOR_CELLNEIGHBORLIST) { + neighbor_cell = tex1Dfetch(NeighborsTex,idx+27*cellid_i); + } else { + int3 cells = decomp->nCells; + int3 cell_idx = make_int3(cellid_i % cells.z, + cellid_i / cells.z % cells.y, + cellid_i / (cells.z * cells.y)); + + int dz = (idx % 3) - 1; + int dy = ((idx/3) % 3) - 1; + int dx = ((idx/9) % 3) - 1; + neighbor_cell = computeCellNeighbor( decomp->nCells, cell_idx, dx, dy, dz ); + } if(neighbor_cell < 0) {