diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 580a73496e52be0476e250f0c097ec318a2bc3e6..fb379874e89e7550f953f0c6a80f81e88270ccab 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -27,7 +27,7 @@ ComputeForce::ComputeForce(int num, const BrownianParticleType part[], int fullLongRange, int numBonds, int numTabBondFiles, int numExcludes, int numAngles, int numTabAngleFiles, int numDihedrals, int numTabDihedralFiles, - int numReplicas) : + float pairlistDistance, int numReplicas) : num(num), numParts(numParts), sys(g), switchStart(switchStart), switchLen(switchLen), electricConst(electricConst), cutoff2((switchLen + switchStart) * (switchLen + switchStart)), @@ -39,6 +39,9 @@ ComputeForce::ComputeForce(int num, const BrownianParticleType part[], // Allocate the parameter tables. decomp_d = NULL; + pairlistdist2 = (sqrt(cutoff2) + pairlistDistance); + pairlistdist2 *= pairlistdist2; + tableEps = new float[numParts * numParts]; tableRad6 = new float[numParts * numParts]; tableAlpha = new float[numParts * numParts]; @@ -444,17 +447,18 @@ void ComputeForce::decompose() { int tmp = 0; gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp, sizeof(int), cudaMemcpyHostToDevice)); gpuErrchk(cudaDeviceSynchronize()); - - float pairlistdist2 = (sqrt(cutoff2) + 2.0f); - pairlistdist2 = pairlistdist2*pairlistdist2; - + // printf("Pairlistdist: %f\n",sqrt(pairlistdist2)); + #if __CUDA_ARCH__ >= 300 createPairlists<<< 2048, 64 >>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d, pairLists_d, numParts, type_d, pairTabPotType_d, excludes_d, excludeMap_d, numExcludes, pairlistdist2); #else // Use shared memory for warp_bcast function createPairlists<<< 2048, 64, 2048/WARPSIZE >>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d, pairLists_d, numParts, type_d, pairTabPotType_d, excludes_d, excludeMap_d, numExcludes, pairlistdist2); #endif - + // DEBUGING + // gpuErrchk(cudaMemcpy(&tmp, numPairs_d, sizeof(int), cudaMemcpyDeviceToHost)); + // printf("CreatePairlist found %d pairs\n",tmp); + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */ } @@ -662,6 +666,11 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos) gpuErrchk(cudaDeviceSynchronize()); } +void ComputeForce::setForceInternalOnDevice(Vector3* f) { + const size_t tot_num = num * numReplicas; + gpuErrchk(cudaMemcpy(forceInternal_d, f, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice)); +} + void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals) { diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index b2bab6e5444c36173e37bb3eb2ff3abc2378c760..be1e975b47fe97747860c1ff658d80d887c57478 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -221,6 +221,8 @@ void createPairlists(Vector3* __restrict__ pos, const int num, const int numRepl int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType, const Exclude* __restrict__ excludes, const int2* __restrict__ excludeMap, const int numExcludes, float pairlistdist2) { + // TODO: loop over all cells with edges within pairlistdist2 + // Loop over threads searching for atom pairs // Each thread has designated values in shared memory as a buffer // A sync operation periodically moves data from shared to global diff --git a/src/ComputeForce.h b/src/ComputeForce.h index cbe2be8f64013696adcf676b1c8599c66b065633..dc20c4e20637425e2263594bb231f754091fe6ad 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -36,7 +36,7 @@ public: const BaseGrid* g, float switchStart, float switchLen, float electricConst, int fullLongRange, int numBonds, int numTabBondFiles, int numExcludes, int numAngles, int numTabAngleFiles, - int numDihedrals, int numTabDihedralFiles, int numReplicas = 1); + int numDihedrals, int numTabDihedralFiles, float pairlistDistance, int numReplicas = 1); ~ComputeForce(); void updateNumber(int newNum); @@ -87,6 +87,7 @@ public: { return forceInternal_d; } + void setForceInternalOnDevice(Vector3* f); int* getType_d() { @@ -172,6 +173,7 @@ private: TabulatedDihedralPotential **tableDihedral_d, **tableDihedral_addr; // Pairlists + float pairlistdist2; int2 *pairLists_d; int *pairTabPotType_d; diff --git a/src/Configuration.cpp b/src/Configuration.cpp index bc2905ea33cd5713667e678457efa8ce00a5666c..4440e42ab01d11c4d05454b452f15e59919eee3d 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -547,6 +547,7 @@ void Configuration::setDefaults() { cutoff = 10.0f; switchLen = 2.0f; pairlistDistance = 2.0f; + imdForceScale = 1.0f; outputPeriod = 200; outputEnergyPeriod = -1; outputFormat = TrajectoryWriter::formatDcd; @@ -668,6 +669,8 @@ int Configuration::readParameters(const char * config_file) { switchLen = (float) strtod(value.val(), NULL); else if (param == String("pairlistDistance")) pairlistDistance = (float) strtod(value.val(), NULL); + else if (param == String("scaleIMDForce")) + imdForceScale = (float) strtod(value.val(), NULL); else if (param == String("outputPeriod")) outputPeriod = atoi(value.val()); else if (param == String("outputEnergyPeriod")) diff --git a/src/Configuration.h b/src/Configuration.h index aa7e2ee3caa99415d4e0c71c757c971591b407cb..dfd5c379d878fa750fe9e0e5ffeac94b4fc90e2d 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -142,6 +142,7 @@ public: float cutoff; float pairlistDistance; float switchLen; + float imdForceScale; int outputPeriod; int outputEnergyPeriod; int outputFormat; diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index fc766edf14e6f79d460746226db2bac6423d69fc..356d854a74fa71e029cc562514f75a7854860034 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -323,6 +323,11 @@ GrandBrownTown::~GrandBrownTown() { //gpuErrchk(cudaFree(forceInternal_d)); gpuErrchk(cudaFree(randoGen_d)); //gpuErrchk( cudaFree(bondList_d) ); + + if (imd_on) + delete[] imdForces; + + } // Run the Brownian Dynamics steps. @@ -374,6 +379,10 @@ void GrandBrownTown::run() { imd_recv_header(clientsock, &length) != IMD_GO) { clientsock = NULL; } + imdForces = new Vector3[num*numReplicas]; + for (size_t i = 0; i < num; ++i) // clear old forces + imdForces[i] = Vector3(0.0f); + } // endif (imd_on) // Start timers @@ -514,50 +523,99 @@ void GrandBrownTown::run() { Vector3 force0(0.0f); + + if (s % outputPeriod == 0) { + // Copy particle positions back to CPU + // cudaSetDevice(0); + gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas, + cudaMemcpyDeviceToHost)); + } + if (imd_on && clientsock && s % outputPeriod == 0) { + + float* coords = new float[num*3]; // TODO: move allocation out of run loop + int* atomIds = new int[num]; // TODO: move allocation out of run loop + int length; - if (vmdsock_selread(clientsock, 0) == 1) { + + bool paused = false; + while (vmdsock_selread(clientsock, 0) > 0 || paused) { switch (imd_recv_header(clientsock, &length)) { - case IMD_DISCONNECT: - printf("[IMD] Disconnecting...\n"); - imd_disconnect(clientsock); - clientsock = NULL; - sleep(5); - break; - case IMD_KILL: - printf("[IMD] Killing...\n"); - imd_disconnect(clientsock); - clientsock = NULL; - steps = s; // Stop the simulation at this step - sleep(5); - break; - default: - printf("[IMD] Something weird happened. Disconnecting..\n"); - break; + case IMD_DISCONNECT: + printf("[IMD] Disconnecting...\n"); + imd_disconnect(clientsock); + clientsock = NULL; + sleep(5); + break; + case IMD_KILL: + printf("[IMD] Killing...\n"); + imd_disconnect(clientsock); + clientsock = NULL; + steps = s; // Stop the simulation at this step + sleep(5); + break; + case IMD_PAUSE: + // if (paused) { + // printf("[IMD] Continuing...\n"); + // } else { + // printf("[IMD] Pausing...\n"); + // } + paused = !paused; + break; + case IMD_GO: + printf("[IMD] Caught IMD_GO\n"); + break; + + case IMD_MDCOMM: + // if (!paused) + // printf("[IMD] Receiving %d forces...\n",length); + for (size_t i = 0; i < num; ++i) // clear old forces + imdForces[i] = Vector3(0.0f); + + if (imd_recv_mdcomm(clientsock, length, atomIds, coords)) { + printf("[IMD] Error receiving forces\n"); + } else { + for (size_t j = 0; j < length; ++j) { + int i = atomIds[j]; + imdForces[i] = Vector3(coords[j*3], coords[j*3+1], coords[j*3+2]) * conf.imdForceScale; + // if (!paused) + // printf("[IMD] Applying force (%0.2f, %0.2f, %0.2f) to particle %d\n", imdForces[i].x, imdForces[i].y, imdForces[i].z, i); + //printf("[IMD] Applying %0.1f pN force to particle %d at step %d\n", 69.47694f * imdForces[i].length(), i, s); + } + } + break; + + default: + printf("[IMD] Something weird happened. Disconnecting..\n"); + break; } } if (clientsock) { // cudaSetDevice(0); - gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num, cudaMemcpyDeviceToHost)); - float* coords = new float[num * 3]; + // float* coords = new float[num*3]; // TODO: move allocation out of run loop for (size_t i = 0; i < num; i++) { - Vector3 p = pos[i]; + const Vector3& p = pos[i]; coords[3*i] = p.x; coords[3*i+1] = p.y; coords[3*i+2] = p.z; } imd_send_fcoords(clientsock, num, coords); - delete[] coords; } + + delete[] coords; + delete[] atomIds; } + // Output trajectories (to files) if (s % outputPeriod == 0) { - // Copy particle positions back to CPU - // cudaSetDevice(0); - gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas, - cudaMemcpyDeviceToHost)); - + /* // print IMDforces + for (int i=0; i < num * numReplicas; ++i) { + float f = imdForces[i].length() * 69.47694f; + if (f > 0.1f) + printf("[IMD] Applying %0.1f pN force to particle %d at step %d\n", f, i, s); + } + // */ // Nanoseconds computed t = s*timestep; @@ -593,7 +651,7 @@ void GrandBrownTown::run() { wkf_timer_stop(timerS); // Copy back forces to display (internal only) - gpuErrchk(cudaMemcpy(&force0, internal -> getForceInternal_d(), sizeof(Vector3), cudaMemcpyDeviceToHost)); + // gpuErrchk(cudaMemcpy(&force0, internal -> getForceInternal_d(), sizeof(Vector3), cudaMemcpyDeviceToHost)); // Nanoseconds computed t = s * timestep; @@ -625,7 +683,9 @@ void GrandBrownTown::run() { wkf_timer_start(timerS); } // s % outputEnergyPeriod - { + if (imd_on && clientsock) { + internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD + } else { int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1); //MLog: along with calls to internal (ComputeForce class) this function should execute once per GPU. clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal->getForceInternal_d(), num*numReplicas); diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 671724a55f5417f023a73a57c582177c2c98690b..401d06649c530597494e7fc591bdb8f20bbc737d 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -22,6 +22,7 @@ void clearInternalForces(Vector3* __restrict__ forceInternal, const int num) { if (idx < num) forceInternal[idx] = Vector3(0.0f); } + __global__ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index 95857aa1087f85e2db811f6ffb08bc9ed6d42d5c..444349f0e50838f0a08a12cfb2d1234badbba509 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -40,6 +40,7 @@ /* #include "RigidBodyType.h" */ /* #include "RigidBodyGrid.h" */ #include "RigidBodyController.h" +#include "WKFUtils.h" // IMD #include "vmdsock.h" @@ -119,7 +120,8 @@ private: // IMD variables bool imd_on; unsigned int imd_port; - + Vector3* imdForces; + // Output variables std::vector<std::string> outCurrFiles; std::vector<std::string> restartFiles;