diff --git a/ComputeForce.cu b/ComputeForce.cu index 3f3455a26ceaed3da53e883b21cdd4dab1be385f..ae82baf25f6fc4f0c8a2277cad6466648f14702b 100644 --- a/ComputeForce.cu +++ b/ComputeForce.cu @@ -578,23 +578,24 @@ float ComputeForce::computeTabulated(bool get_energy) { // int nb = (1+(decomp.nCells.x * decomp.nCells.y * decomp.nCells.z)) * 75; /* RBTODO: number of pairLists */ const int nb = 800; // printf("ComputeTabulated\n"); - - if (get_energy) + gpuErrchk(cudaDeviceSynchronize()); + + // RBTODO: get_energy + //if (get_energy) + if (false) { clearEnergies<<< nb, numThreads >>>(energies_d,num); gpuErrchk(cudaDeviceSynchronize()); - computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, type_d, tablePot_d, sys_d, energies_d, cutoff2, numPairs_d, pairLists_d, pairTabPotType_d); + computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, sys_d, + cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d, energies_d); } else { - computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, type_d, - tablePot_d, tableBond_d, sys_d, - bonds_d, bondMap_d, numBonds, cutoff2, - numPairs_d, pairLists_d, pairTabPotType_d); + computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, sys_d, + cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d); } /* printPairForceCounter<<<1,32>>>(); */ - /* gpuErrchk(cudaDeviceSynchronize()); */ //Mlog: the commented function doesn't use bondList, uncomment for testing. //if(bondMap_d != NULL && tableBond_d != NULL) @@ -602,21 +603,23 @@ float ComputeForce::computeTabulated(bool get_energy) { { //computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d, bonds, bondMap_d, numBonds, numReplicas, energies_d, get_energy, tableBond_d); - computeTabulatedBonds <<<numBlocks, numThreads>>> ( forceInternal_d, pos_d, num, numParts, sys_d, bondList_d, (numBonds/2), numReplicas, energies_d, get_energy, tableBond_d); + computeTabulatedBonds <<<nb, numThreads>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numBonds/2, bondList_d, tableBond_d); } + if (angleList_d != NULL && tableAngle_d != NULL) - computeTabulatedAngles<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d); - + computeTabulatedAngles<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d); + if (dihedralList_d != NULL && tableDihedral_d != NULL) - computeTabulatedDihedrals<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d); - + computeTabulatedDihedrals<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d); + // Calculate the energy based on the array created by the kernel - if (get_energy) { - gpuErrchk(cudaDeviceSynchronize()); - thrust::device_ptr<float> en_d(energies_d); - energy = thrust::reduce(en_d, en_d + num); - } + // TODO: return energy + // if (get_energy) { + // gpuErrchk(cudaDeviceSynchronize()); + // thrust::device_ptr<float> en_d(energies_d); + // energy = thrust::reduce(en_d, en_d + num); + // } return energy; } diff --git a/ComputeForce.cuh b/ComputeForce.cuh index 90d56bdd61581b9c7999aacd97970976a16e076d..0fa23ba096c8acfd2076299cd7ef22dee8a0818c 100644 --- a/ComputeForce.cuh +++ b/ComputeForce.cuh @@ -432,12 +432,9 @@ __global__ void printPairForceCounter() { // using cell decomposition // __global__ void computeTabulatedKernel( - Vector3* force, const Vector3* __restrict__ pos, int* type, - TabulatedPotential** __restrict__ tablePot, TabulatedPotential** __restrict__ tableBond, - const BaseGrid* __restrict__ sys, - const Bond* __restrict__ bonds, const int2* __restrict__ bondMap, int numBonds, - float cutoff2, const int* __restrict__ g_numPairs, - const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType) { + Vector3* force, const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, float cutoff2, + const int* __restrict__ g_numPairs, const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot) { //remove int* type. remove bond references const int numPairs = *g_numPairs; @@ -470,10 +467,10 @@ __global__ void clearEnergies(float* g_energies, int num) { g_energies[i] = 0.0f; } } -__global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict__ pos, int* type, TabulatedPotential** __restrict__ tablePot, BaseGrid* __restrict__ sys, float* g_energies, float cutoff2, int* __restrict__ g_numPairs, int2* __restrict__ g_pair, int* __restrict__ g_pairTabPotType) { - +__global__ void computeTabulatedEnergyKernel(Vector3* force, const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, float cutoff2, + const int* __restrict__ g_numPairs, const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot, float* g_energies) { const int numPairs = *g_numPairs; - // RBTODO: BONDS (handle through an independent kernel call?) for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numPairs; i+=blockDim.x*gridDim.x) { const int2 pair = g_pair[i]; const int ai = pair.x; @@ -486,7 +483,7 @@ __global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict // Calculate the force based on the tabulatedPotential file float d2 = dr.length2(); /* RBTODO: filter tabpot particles ahead of time */ - // RBTODO: order pairs according to distance to reduce divergence + // RBTODO: order pairs according to distance to reduce divergence // not actually faster if (tablePot[ind] != NULL && d2 <= cutoff2) { EnergyForce fe = tablePot[ind]->compute(dr,d2); @@ -695,22 +692,17 @@ void computeAngles(Vector3 force[], Vector3 pos[], } }*/ -__global__ void computeTabulatedBonds( Vector3* __restrict__ force, Vector3* __restrict__ pos, int num, - int numParts, BaseGrid* __restrict__ sys, int3* __restrict__ bondList_d, - int numBonds, int numReplicas, float* __restrict__ g_energies, - bool get_energy, TabulatedPotential** tableBond) -{ - // Thread's unique ID. - int currBond = blockIdx.x * blockDim.x + threadIdx.x; // first particle ID - +__global__ void computeTabulatedBonds(Vector3* __restrict__ force, + Vector3* __restrict__ pos, + BaseGrid* __restrict__ sys, + int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond) { // Loop over ALL bonds in ALL replicas - if( currBond < (numBonds * numReplicas) ) //numBonds should be divided by 2 in the kernel call - { + for (int bid = threadIdx.x+blockIdx.x*blockDim.x; bid<numBonds; bid+=blockDim.x*gridDim.x) { // Initialize interaction energy (per particle) - float energy_local = 0.0f; + // float energy_local = 0.0f; - int i = bondList_d[currBond].x; - int j = bondList_d[currBond].y; + int i = bondList_d[bid].x; + int j = bondList_d[bid].y; // Particle's type and position //Vector3 posi = pos[i]; @@ -719,17 +711,17 @@ __global__ void computeTabulatedBonds( Vector3* __restrict__ force, Vector3* __ // wrapping this value if necessary const Vector3 dr = sys->wrapDiff(pos[j] - pos[i]); - Vector3 force_local = tableBond[ bondList_d[currBond].z ]->computef(dr,dr.length2()); + Vector3 force_local = tableBond[ bondList_d[bid].z ]->computef(dr,dr.length2()); atomicAdd( &force[i], force_local ); atomicAdd( &force[j], -force_local ); - - if (get_energy) - { - //TODO: clarification on energy computation needed, consider changing. - atomicAdd( &g_energies[i], energy_local); - //atomicAdd( &g_energies[j], energy_local); - } + + // if (get_energy) + // { + // //TODO: clarification on energy computation needed, consider changing. + // atomicAdd( &g_energies[i], energy_local); + // //atomicAdd( &g_energies[j], energy_local); + // } } } diff --git a/Configuration.cpp b/Configuration.cpp index ce253af8b71fb2a7fe947ad737586cd8b4d8d5fb..2efe3e77603d7e8a54422e7ce7609efb94111578 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -1262,8 +1262,8 @@ void Configuration::readAngles() { } std::sort(angles, angles + numAngles, compare()); - for(int i = 0; i < numAngles; i++) - angles[i].print(); + // for(int i = 0; i < numAngles; i++) + // angles[i].print(); } void Configuration::readDihedrals() { @@ -1324,8 +1324,8 @@ void Configuration::readDihedrals() { } std::sort(dihedrals, dihedrals + numDihedrals, compare()); - for(int i = 0; i < numDihedrals; i++) - dihedrals[i].print(); + // for(int i = 0; i < numDihedrals; i++) + // dihedrals[i].print(); } void Configuration::populate() { diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index 5efb8ff831919169942b2403f015599d96b25d8c..4e53b816b92e0ae09786d90594a57271e089a7e3 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -185,21 +185,21 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, // TODO: check for duplicate potentials if (c.tabulatedPotential) { - printf("Loading the tabulated potentials...\n"); + printf("Loading %d tabulated non-bonded potentials...\n", numParts*numParts); for (int p = 0; p < numParts*numParts; p++) { if (partTableFile[p].length() > 0) { int type0 = partTableIndex0[p]; int type1 = partTableIndex1[p]; internal->addTabulatedPotential(partTableFile[p].val(), type0, type1); - printf("Loaded %s for types %s and %s.\n", partTableFile[p].val(), - part[type0].name.val(), part[type1].name.val()); + // printf(" Loaded %s for types %s and %s.\n", partTableFile[p].val(), + // part[type0].name.val(), part[type1].name.val()); } } } if (c.readBondsFromFile) { - printf("Loading the tabulated bond potentials...\n"); + printf("Loading %d tabulated bond potentials...\n", numTabBondFiles); for (int p = 0; p < numTabBondFiles; p++) if (bondTableFile[p].length() > 0) { //MLog: make sure to add to all GPUs @@ -209,7 +209,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, } if (c.readAnglesFromFile) { - printf("Loading the tabulated angle potentials...\n"); + printf("Loading %d tabulated angle potentials...\n", numTabAngleFiles); for (int p = 0; p < numTabAngleFiles; p++) if (angleTableFile[p].length() > 0) { @@ -219,7 +219,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, } if (c.readDihedralsFromFile) { - printf("Loading the tabulated dihedral potentials...\n"); + printf("Loading %d tabulated dihedral potentials...\n", numTabDihedralFiles); for (int p = 0; p < numTabDihedralFiles; p++) if (dihedralTableFile[p].length() > 0) internal->addDihedralPotential(dihedralTableFile[p].val(), p, dihedrals); @@ -571,7 +571,7 @@ void GrandBrownTown::run() { */ // Copy positions from GPU to CPU. - gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas, + gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas, cudaMemcpyDeviceToHost)); // Write restart files for each replica. @@ -585,7 +585,7 @@ void GrandBrownTown::run() { { int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); //MLog: along with calls to internal (ComputeForce class) this function should execute once per GPU. - clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal -> getForceInternal_d(), num, numReplicas); + clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal->getForceInternal_d(), num*numReplicas); } } // done with all Brownian dynamics steps diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh index 243d26b84dcb4edcfd246e6c75ddde3cc3db3af6..d5a1d16f55e2a1530ab953ca03038830aeda2330 100644 --- a/GrandBrownTown.cuh +++ b/GrandBrownTown.cuh @@ -14,9 +14,9 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion, Random *randoGen, int num); __global__ -void clearInternalForces(Vector3 forceInternal[], int num, int numReplicas) { +void clearInternalForces(Vector3* __restrict__ forceInternal, const int num) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num * numReplicas) + if (idx < num) forceInternal[idx] = Vector3(0.0f); } __global__ @@ -90,8 +90,10 @@ void updateKernel(Vector3 pos[], Vector3 forceInternal[], /* printf("atom %d: force: %f %f %f\n", idx, force.x, force.y, force.z); */ /* printf("atom %d: kTlocal, diffusion, timestep: %f, %f, %f\n", */ /* idx, kTlocal, diffusion, timestep); */ - - pos[idx] = step(p, kTlocal, force, diffusion, -diffGrad, timestep, sys, randoGen, num); + Vector3 tmp = step(p, kTlocal, force, diffusion, -diffGrad, timestep, sys, randoGen, num); + assert( tmp.length() < 10000.0f ); + pos[idx] = tmp; + } } diff --git a/TabulatedMethods.cuh b/TabulatedMethods.cuh index 8986e1c6d92d05bb363aaa32110609ac7e6bf268..3a07b7c98957bc5a4bb71483b8cac67f3319f981 100644 --- a/TabulatedMethods.cuh +++ b/TabulatedMethods.cuh @@ -19,17 +19,16 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ // Find the distance between each pair of particles const float distab = ab.length(); const float distbc = bc.length(); - const float distac = ac.length(); + const float distac2 = ac.length2(); // Find the cosine of the angle we want - <ABC - float cos = (distbc * distbc + distab * distab - distac * distac) / (2.0f * distbc * distab); + float cos = (distbc * distbc + distab * distab - distac2) / (2.0f * distbc * distab); // If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail if (cos < -1.0f) cos = -1.0f; if (cos > 1.0f) cos = 1.0f; // Find the sine while we're at it. - float sin = sqrtf(1.0f - cos*cos); // Now we can use the cosine to find the actual angle (in radians) float angle = acos(cos); @@ -44,18 +43,23 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ assert(home >= 0); assert(home < a->size); - // Make angle the distance from [0,1) from the first index in the potential array index - angle -= home; + // // Make angle the distance from [0,1) from the first index in the potential array index + // angle -= home; // Linearly interpolate the potential float U0 = a->pot[home]; float dUdx = (a->pot[(home+1)] - U0) / a->angle_step; // float energy = (dUdx * angle) + U0; - dUdx /= sqrtf(1.0f-cos*cos); + float sin = sqrtf(1.0f - cos*cos); + dUdx /= abs(sin) > 1e-4 ? sin : 1e-4; // avoid singularity // Calculate the forces Vector3 force1 = (dUdx/distab) * (ab * (cos/distab) - bc / distbc); // force on particle 1 Vector3 force3 = (dUdx/distbc) * (bc * (cos/distbc) - ab / distab); // force on particle 3 + + // assert( force1.length() < 10000.0f ); + // assert( force3.length() < 10000.0f ); + atomicAdd( &force[i], force1 ); atomicAdd( &force[k], force3 ); atomicAdd( &force[j], -(force1 + force3) ); @@ -81,7 +85,10 @@ __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restr Vector3 crossABC = ab.cross(bc); Vector3 crossBCD = bc.cross(cd); Vector3 crossX = bc.cross(crossABC); + // assert( crossABC.rLength2() <= 1.0f ); + // assert( crossBCD.rLength2() <= 1.0f ); + const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length()); const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length()); @@ -116,11 +123,15 @@ __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restr // avoid singularity when one angle is straight force = (crossABC.rLength() > 1.0f || crossBCD.rLength() > 1.0f) ? 0.0f : force; - + assert( force < 10000.0f ); f1 *= force; f2 *= force; f3 *= force; + // assert( f1.length() < 10000.0f ); + // assert( f2.length() < 10000.0f ); + // assert( f3.length() < 10000.0f ); + atomicAdd( &forces[i], f1 ); atomicAdd( &forces[j], f2-f1 ); atomicAdd( &forces[k], f3-f2 ); diff --git a/TabulatedPotential.cu b/TabulatedPotential.cu index a1788925b914b99660624d9b0ae9b73d68a79d71..0e9f30f21532899809c90996db0caf31751fd9dd 100644 --- a/TabulatedPotential.cu +++ b/TabulatedPotential.cu @@ -18,7 +18,7 @@ TabulatedPotential::TabulatedPotential() { } TabulatedPotential::TabulatedPotential(const char* fileName) : fileName(fileName) { - printf("File: %s\n", fileName); + // printf("File: %s\n", fileName); FILE* inp = fopen(fileName, "r"); if (inp == NULL) { printf("TabulatedPotential:TabulatedPotential Could not open file '%s'\n", fileName);