diff --git a/ComputeForce.cu b/ComputeForce.cu index 656635466bbb7b412814fef67a192c05f6ecf5a1..d1cfc0bea80ac19cd8c6a06d54341ab068e01c9b 100644 --- a/ComputeForce.cu +++ b/ComputeForce.cu @@ -413,7 +413,7 @@ void ComputeForce::decompose(Vector3* pos, int type[]) { int tmp = 0; gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp, sizeof(int), cudaMemcpyHostToDevice)); - gpuErrchk(cudaDeviceSynchronize()); /* RBTOOD: maybe unnecessary */ + gpuErrchk(cudaDeviceSynchronize()); } diff --git a/ComputeForce.cuh b/ComputeForce.cuh index 10454f21af12e6d32909f2ee779ff4776d22faa7..715df18a342d6b82566e3ff11c94628671f191f8 100644 --- a/ComputeForce.cuh +++ b/ComputeForce.cuh @@ -286,10 +286,10 @@ void createPairlistsOld(Vector3* __restrict__ pos, int num, int numReplicas, __global__ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas, - BaseGrid* sys, CellDecomposition* __restrict__ decomp, + const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp, const int nCells, int* g_numPairs, int2* g_pair, - int numParts, int type[], int* __restrict__ g_pairTabPotType, + int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType, float pairlistdist2) { // Loop over threads searching for atom pairs // Each thread has designated values in shared memory as a buffer @@ -299,7 +299,7 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas, const int split = 32; /* numblocks should be divisible by split */ /* const int blocksPerCell = gridDim.x/split; */ - const CellDecomposition::cell_t* pairs = decomp->getCells(); + const CellDecomposition::cell_t* __restrict__ pairs = decomp->getCells(); for (int cID = 0 + (blockIdx.x % split); cID < nCells; cID += split) { // for (int cID = blockIdx.x/blocksPerCell; cID < nCells; cID += split ) { for (int repID = 0; repID < numReplicas; repID++) { @@ -314,8 +314,8 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas, const int ai = pairs[ci].particle; // const CellDecomposition::cell_t celli = decomp->getCellForParticle(ai); const CellDecomposition::cell_t celli = pairs[ci]; - const Vector3 posi = pos[ai]; - + // Vector3 posi = pos[ai]; + for (int x = -1; x <= 1; ++x) { for (int y = -1; y <= 1; ++y) { for (int z = -1; z <= 1; ++z) { @@ -329,7 +329,8 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas, if (aj <= ai) continue; // skip ones that are too far away - float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2(); + const float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2(); + // const float dr = (sys->wrapDiff(pos[aj] - posi)).length2(); if (dr > pairlistdist2) continue; int gid = atomicAggInc( g_numPairs, warpLane ); diff --git a/GPUController.h b/GPUController.h new file mode 100644 index 0000000000000000000000000000000000000000..088a3ddc2564ce0a81bbb1d89575b571b026b0bc --- /dev/null +++ b/GPUController.h @@ -0,0 +1,44 @@ +// provides interface between main CPU loop and various GPUs +// -- holds data for each GPU + +#pragma once +#include "useful.h" + +class GPUcontroller { +public: + GPUcontroller(const Configuration& c, const long int randomSeed, + bool debug, int numReplicas = 0); + ~GPUcontroller(); + + static bool DEBUG; + +private: + + void copyToCUDA(); + + +private: + const Configuration& conf; + int numReplicas; + + // Integrator variables + BaseGrid* sys; + ComputeForce* internal; + Vector3* forceInternal; + + // CUDA device variables + Vector3 *pos_d, *forceInternal_d, *force_d; + int *type_d; + BrownianParticleType **part_d; + BaseGrid *sys_d, *kTGrid_d; + Random *randoGen_d; + Bond* bonds_d; + int2* bondMap_d; + Exclude* excludes_d; + int2* excludeMap_d; + Angle* angles_d; + Dihedral* dihedrals_d; + +} + +