diff --git a/src/BrownianParticleType.cpp b/src/BrownianParticleType.cpp index 0b56413236549ef34e2a0c381b54437f317f5aa4..50abb69740c2b6fdc4bad1a9083c7ae0425a943d 100644 --- a/src/BrownianParticleType.cpp +++ b/src/BrownianParticleType.cpp @@ -13,7 +13,7 @@ void BrownianParticleType::clear() { if (meanPmf != NULL) delete [] meanPmf; pmf = NULL, diffusionGrid = NULL; forceXGrid = NULL, forceYGrid = NULL, forceZGrid = NULL; - reservoir = NULL; meanPmf = NULL; + reservoir = NULL, meanPmf = NULL; } void BrownianParticleType::copy(const BrownianParticleType& src) { diff --git a/src/BrownianParticleType.h b/src/BrownianParticleType.h index 67e634652c90695bc58ca5736085da3f9ea9c864..68f99a9bb03d7cec972931bbebcb9347fef3f852 100644 --- a/src/BrownianParticleType.h +++ b/src/BrownianParticleType.h @@ -30,8 +30,8 @@ class BrownianParticleType { BrownianParticleType(const String& name = "") : name(name), num(0), diffusion(0.0f), radius(1.0f), charge(0.0f), eps(0.0f), meanPmf(NULL), - reservoir(NULL), pmf(NULL), diffusionGrid(NULL), - forceXGrid(NULL), forceYGrid(NULL), forceZGrid(NULL), numPartGridFiles(-1) { } + numPartGridFiles(-1), reservoir(NULL), pmf(NULL), diffusionGrid(NULL), + forceXGrid(NULL), forceYGrid(NULL), forceZGrid(NULL){ } BrownianParticleType(const BrownianParticleType& src) { copy(src); } diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 903b6c22a1ade77d2e2018b86364ddfd8894d9aa..e5d212ce7b6f4dbf598b8de43ea4dbc3246f4ef9 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -6,8 +6,9 @@ #include "ComputeForce.cuh" #include "Configuration.h" #include <cuda_profiler_api.h> - - +#include <thrust/device_ptr.h> +#include <fstream> +#include <iostream> #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { @@ -121,7 +122,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : gpuErrchk(cudaBindTexture(0, pairTabPotTypeTex, pairTabPotType_d, sizeof(int)*maxPairs)); //Han-Yi //Han-Yi Chou - size_t nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z; + int nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z; //int* nCells_dev; int3 *Cells_dev; @@ -130,7 +131,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : gpuErrchk(cudaMalloc(&Cells_dev,sizeof(int3))); //gpuErrchk(cudaMemcpy(nCells_dev,&nCells,1,cudaMemcpyHostToDevice); gpuErrchk(cudaMemcpy(Cells_dev,&(decomp.nCells),sizeof(int3),cudaMemcpyHostToDevice)); - createNeighborsList<<<1,64>>>(Cells_dev,CellNeighborsList); + createNeighborsList<<<256,256>>>(Cells_dev,CellNeighborsList); gpuErrchk(cudaFree(Cells_dev)); cudaBindTexture(0, NeighborsTex, CellNeighborsList, 27*nCells*sizeof(int)); } @@ -139,8 +140,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : gridSize = num / NUM_THREADS + 1; // Create and allocate the energy arrays - gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * num)); - + gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * num * numReplicas)); cudaEventCreate(&start); cudaEventCreate(&stop); } @@ -702,11 +702,12 @@ float ComputeForce::computeTabulated(bool get_energy) { gpuErrchk(cudaDeviceSynchronize()); // RBTODO: get_energy - //if (get_energy) - if (false) + if (get_energy) + //if (false) { - clearEnergies<<< nb, numThreads >>>(energies_d,num); - gpuErrchk(cudaDeviceSynchronize()); + //clearEnergies<<< nb, numThreads >>>(energies_d,num); + //gpuErrchk(cudaDeviceSynchronize()); + cudaMemset((void*)energies_d, 0, sizeof(float)*num*numReplicas); computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, sys_d, cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d, energies_d); } @@ -728,27 +729,50 @@ float ComputeForce::computeTabulated(bool get_energy) { { //computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d, bonds, bondMap_d, numBonds, numReplicas, energies_d, get_energy, tableBond_d); - computeTabulatedBonds <<<nb, numThreads>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numBonds/2, bondList_d, tableBond_d); + //computeTabulatedBonds <<<nb, numThreads>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numBonds/2, bondList_d, tableBond_d); + //if(get_energy) + //cudaMemset(bond_energy_d, 0, sizeof(float)*num); + computeTabulatedBonds <<<nb, numThreads>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numBonds/2, bondList_d, tableBond_d, energies_d, get_energy); } if (angleList_d != NULL && tableAngle_d != NULL) - computeTabulatedAngles<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d); - + { + //if(get_energy) + //computeTabulatedAngles<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d); + computeTabulatedAngles<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numAngles*numReplicas, angleList_d, tableAngle_d, energies_d, get_energy); + } if (dihedralList_d != NULL && tableDihedral_d != NULL) - computeTabulatedDihedrals<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d); - + { + //if(get_energy) + //computeTabulatedDihedrals<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d); + computeTabulatedDihedrals<<<nb, numThreads>>>(forceInternal_d, pos_d, sys_d, numDihedrals*numReplicas, + dihedralList_d, dihedralPotList_d, tableDihedral_d, energies_d, get_energy); + } if (restraintIds_d != NULL ) computeHarmonicRestraints<<<1, numThreads>>>(forceInternal_d, pos_d, sys_d, numRestraints*numReplicas, restraintIds_d, restraintLocs_d, restraintSprings_d); // Calculate the energy based on the array created by the kernel // TODO: return energy - // if (get_energy) { - // gpuErrchk(cudaDeviceSynchronize()); - // thrust::device_ptr<float> en_d(energies_d); - // energy = thrust::reduce(en_d, en_d + num); - // } - + if (get_energy) + { + float e = 0.f; + gpuErrchk(cudaDeviceSynchronize()); + thrust::device_ptr<float> en_d(energies_d); + e = (thrust::reduce(en_d, en_d+num*numReplicas)) / numReplicas; + std::fstream energy_file; + energy_file.open("energy_config.txt", std::fstream::out | std::fstream::app); + if(energy_file.is_open()) + { + energy_file << "Configuation Energy: " << e << " kcal/mol " << std::endl; + energy_file.close(); + } + else + { + std::cout << "Error in opening energ files\n"; + } + energy = e; + } return energy; } diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index 8547e602857358731280f735bd97257906a2b2df..d325f9fd221766acea6b766bcd4e7ba7b09f37e3 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -987,10 +987,14 @@ void computeAngles(Vector3 force[], Vector3 pos[], } // TODO: add kernels for energy calculations -__global__ void computeTabulatedBonds(Vector3* force, - Vector3* __restrict__ pos, - BaseGrid* __restrict__ sys, - int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond) { +//__global__ void computeTabulatedBonds(Vector3* force, +// Vector3* __restrict__ pos, +// BaseGrid* __restrict__ sys, +// int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond) { +__global__ +void computeTabulatedBonds(Vector3* force, Vector3* __restrict__ pos, BaseGrid* __restrict__ sys, +int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond, float* energy, bool get_energy) +{ // Loop over ALL bonds in ALL replicas for (int bid = threadIdx.x+blockIdx.x*blockDim.x; bid<numBonds; bid+=blockDim.x*gridDim.x) { // Initialize interaction energy (per particle) @@ -1003,17 +1007,19 @@ __global__ void computeTabulatedBonds(Vector3* force, // wrapping this value if necessary const Vector3 dr = sys->wrapDiff(pos[j] - pos[i]); - Vector3 force_local = tableBond[ bondList_d[bid].z ]->computef(dr,dr.length2()); - - atomicAdd( &force[i], force_local ); - atomicAdd( &force[j], -force_local ); - - // if (get_energy) - // { - // //TODO: clarification on energy computation needed, consider changing. - // atomicAdd( &g_energies[i], energy_local); - // //atomicAdd( &g_energies[j], energy_local); - // } + //Vector3 force_local = tableBond[ bondList_d[bid].z ]->computef(dr,dr.length2()); + EnergyForce fe_local = tableBond[ bondList_d[bid].z ]->compute(dr,dr.length2()); + //atomicAdd( &force[i], force_local ); + //atomicAdd( &force[j], -force_local ); + atomicAdd( &force[i], fe_local.f ); + atomicAdd( &force[j], -fe_local.f ); + + if (get_energy) + { + //TODO: clarification on energy computation needed, consider changing. + atomicAdd( &energy[i], fe_local.e*0.5f); + atomicAdd( &energy[j], fe_local.e*0.5f); + } } } @@ -1021,11 +1027,11 @@ __global__ void computeTabulatedAngles(Vector3* force, Vector3* __restrict__ pos, BaseGrid* __restrict__ sys, - int numAngles, int4* __restrict__ angleList_d, TabulatedAnglePotential** tableAngle) { + int numAngles, int4* __restrict__ angleList_d, TabulatedAnglePotential** tableAngle, float* energy, bool get_energy) { // Loop over ALL angles in ALL replicas for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numAngles; i+=blockDim.x*gridDim.x) { int4& ids = angleList_d[i]; - computeAngle(tableAngle[ ids.w ], sys, force, pos, ids.x, ids.y, ids.z); + computeAngle(tableAngle[ ids.w ], sys, force, pos, ids.x, ids.y, ids.z, energy, get_energy); // if (get_energy) // { // //TODO: clarification on energy computation needed, consider changing. @@ -1128,7 +1134,7 @@ __global__ void computeTabulatedDihedrals(Vector3* force, const Vector3* __restrict__ pos, const BaseGrid* __restrict__ sys, int numDihedrals, const int4* const __restrict__ dihedralList_d, - const int* __restrict__ dihedralPotList_d, TabulatedDihedralPotential** tableDihedral) { + const int* __restrict__ dihedralPotList_d, TabulatedDihedralPotential** tableDihedral, float* energy, bool get_energy) { // int currDihedral = blockIdx.x * blockDim.x + threadIdx.x; // first particle ID @@ -1136,7 +1142,7 @@ void computeTabulatedDihedrals(Vector3* force, const Vector3* __restrict__ pos, for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numDihedrals; i+=blockDim.x*gridDim.x) { const int4& ids = dihedralList_d[i]; const int& id = dihedralPotList_d[i]; - computeDihedral(tableDihedral[ id ], sys, force, pos, ids.x, ids.y, ids.z, ids.w); + computeDihedral(tableDihedral[ id ], sys, force, pos, ids.x, ids.y, ids.z, ids.w, energy, get_energy); // if (get_energy) // { diff --git a/src/ComputeForce.h b/src/ComputeForce.h index e3101bf63503ef10313586f1e60535e0cd2df482..2b4f7c7d6471c95208f6125c56540160750c3666 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -141,7 +141,10 @@ public: return bondList_d; } - + float* getEnergy() + { + return energies_d; + } HOST DEVICE static EnergyForce coulombForce(Vector3 r, float alpha,float start, float len); diff --git a/src/ComputeGridGrid.cu b/src/ComputeGridGrid.cu index 1cbba924bcfd0f402e985906816dbd52692795e0..6ba57915b8172e52340dabb967227fa65427134f 100644 --- a/src/ComputeGridGrid.cu +++ b/src/ComputeGridGrid.cu @@ -2,7 +2,6 @@ #include "ComputeGridGrid.cuh" #include "RigidBodyGrid.h" #include "CudaUtil.cuh" - //RBTODO handle periodic boundaries //RBTODO: add __restrict__, benchmark (Q: how to restrict member data?) __global__ @@ -29,17 +28,14 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, r_pos = basis_rho.transform( r_pos ) + origin_rho_minus_origin_u; /* real space */ const Vector3 u_ijk_float = basis_u_inv.transform( r_pos ); - // RBTODO: Test for non-unit delta /* Vector3 tmpf = Vector3(0.0f); */ /* float tmpe = 0.0f; */ /* const ForceEnergy fe = ForceEnergy( tmpf, tmpe); */ const ForceEnergy fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */ force[tid] = fe.f; - const float r_val = rho->val[r_id]; /* maybe move to beginning of function? */ force[tid] = basis_u_inv.transpose().transform( r_val*force[tid] ); /* transform to lab frame, with correct scaling factor */ - // Calculate torque about origin_u in the lab frame torque[tid] = r_pos.cross(force[tid]); } diff --git a/src/Configuration.cpp b/src/Configuration.cpp index 8dfbb22f1c43a600facaf55f3c17b3bfac68267e..bbcdebd15c1d06ade7fb3e4953787492442da721 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -1105,7 +1105,6 @@ int Configuration::readParameters(const char * config_file) { { printf("The grid file name %s\n", value.val()); //partGridFile[currPart] = value; - cout << part[currPart].numPartGridFiles << endl; stringToArray<String>(&value, part[currPart].numPartGridFiles, &partGridFile[currPart]); for(int i = 0; i < part[currPart].numPartGridFiles; ++i) diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 26df336db6a7604757d3ed6b3fd20a3887bfcb96..6e67408324cbe433f90fcc5f369aa03fe996d109 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -77,7 +77,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, if(particle_dynamic == String("NoseHooverLangevin")) random = new float[num * numReplicas]; } - printf("%d\n",__LINE__); + //printf("%d\n",__LINE__); //for debug //force = new Vector3[num * numReplicas]; @@ -531,7 +531,7 @@ void GrandBrownTown::RunNoseHooverLangevin() gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*num * numReplicas)); printf("Configuration: %d particles | %d replicas\n", num, numReplicas); - + //float total_energy = 0.f; // Main loop over Brownian dynamics steps for (long int s = 1; s < steps; s++) { @@ -608,11 +608,12 @@ void GrandBrownTown::RunNoseHooverLangevin() else if(particle_dynamic == String("NoseHooverLangevin")) //kernel for Nose-Hoover Langevin dynamic updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), - internal -> getRan_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); + internal -> getRan_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, + randoGen_d, numReplicas); //For Brownian motion else updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getForceInternal_d(), internal -> getType_d(), - part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); + part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy); if(rigidbody_dynamic == String("Langevin")) { @@ -751,7 +752,7 @@ void GrandBrownTown::RunNoseHooverLangevin() gpuErrchk(cudaDeviceSynchronize()); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) - LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); + LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy); //gpuErrchk(cudaDeviceSynchronize()); if(rigidbody_dynamic == String("Langevin")) @@ -802,7 +803,7 @@ void GrandBrownTown::RunNoseHooverLangevin() wkf_timer_stop(timerS); t = s * timestep; // Simulation progress and statistics. - float percent = (100.0f * s) / steps; + float percent = (100.0f * s) / steps; float msPerStep = wkf_timer_time(timerS) * 1000.0f / outputEnergyPeriod; float nsPerDay = numReplicas * timestep / msPerStep * 864E5f; @@ -820,7 +821,19 @@ void GrandBrownTown::RunNoseHooverLangevin() gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); float e = KineticEnergy(); printf(" The kinetic energy is %f \n",e*(2.388458509e-1)); + std::fstream energy_file; + energy_file.open("energy_config.txt", std::fstream::out | std::fstream::app); + if(energy_file.is_open()) + { + energy_file << "Kinetic Energy: " << e*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl; + energy_file.close(); + } + else + { + std::cout << "Error in opening energ files\n"; + } } + if(rigidbody_dynamic == String("Langevin")) { //gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 2ff30850f460bd5849fd6a3c6b51985824843b20..9e8ef8bdf904c4a0563f91ee67e6f8e2fb17d6a2 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -272,7 +272,7 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, int num, - BaseGrid* sys, Random* randoGen, int numReplicas) + BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy) { const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); @@ -291,8 +291,10 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ { ForceEnergy tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); fe.f += tmp.f; + fe.e += tmp.e; } - + if(get_energy) + energy[idx] += fe.e; #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0); @@ -379,7 +381,7 @@ __global__ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[],float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, int num, BaseGrid* sys, - Random* randoGen, int numReplicas) + Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy) { // Calculate this thread's ID const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); @@ -405,8 +407,10 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], { ForceEnergy tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(p); fe.f += tmp.f; + fe.e += tmp.e; } - + if(get_energy) + energy[idx]+=fe.e; #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(p); diff --git a/src/RigidBody.cu b/src/RigidBody.cu index 7c9fdce55165e98811e9b4a42c68a73d5c24c309..bc22c296cf9ed682b25cc092f510762261391f44 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -231,6 +231,7 @@ void RigidBody::integrateDLM(int startFinishAll) { Vector3 trans; // = *p_trans; //Matrix3 rot = Matrix3(1); // = *p_rot; + if ( isnan(force.x) || isnan(torque.x) ) { // NaN check diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index de54866acc4ef9ceec59b2571fe1f97de0403834..16d3f13c8a0fe48af659f24628d078bd0624d83b 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -125,13 +125,21 @@ bool RigidBodyController::loadRBCoordinates(const char* fileName) { delete[] tokenList; - i++; - if (i == imax) { - j++; - i=0; - if (j == jmax) - break; - } + //i++; + //if (i == imax) { + // j++; + // i=0; + // if (j == jmax) + // break; + //} + j++; + if (j == jmax) { + i++; + if (i == imax) + break; + j=0; + jmax = rigidBodyByType[i].size(); + } } fclose(inp); return true; @@ -493,10 +501,8 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { */ Matrix3 B1 = getBasis1(i); Vector3 c = getOrigin1(i) - getOrigin2(i); - Matrix3 B2 = getBasis2(i).inverse(); - - + // RBTODO: get energy if (!isPmf) { /* pair of RBs */ computeGridGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3), s >>> @@ -527,7 +533,6 @@ void RigidBodyForcePair::retrieveForcesForGrid(const int i) { cudaMemcpyDeviceToHost, s)); gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, cudaMemcpyDeviceToHost, s)); - } void RigidBodyForcePair::processGPUForces() { diff --git a/src/TabulatedMethods.cuh b/src/TabulatedMethods.cuh index 90a78391914cd22a53e290c4704368f46535f0e4..2f460faca6328f2742d65bc3b2ef6a1df445bbf8 100644 --- a/src/TabulatedMethods.cuh +++ b/src/TabulatedMethods.cuh @@ -3,7 +3,7 @@ #define BD_PI 3.1415927f __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ a, const BaseGrid* __restrict__ sys, Vector3* force, const Vector3* __restrict__ pos, - const int& i, const int& j, const int& k) { + const int& i, const int& j, const int& k, float* energy, bool get_energy) { // Particle's type and position @@ -54,7 +54,14 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ // Linearly interpolate the potential float U0 = a->pot[home]; float dUdx = (a->pot[(((home+1)==(a->size)) ? (a->size)-1 : home+1)] - U0) * a->angle_step_inv; - // float energy = (dUdx * angle) + U0; + if(get_energy) + { + float e = ((dUdx * angle) + U0)*0.3333333333; + atomicAdd( &energy[i], e); + atomicAdd( &energy[j], e); + atomicAdd( &energy[k], e); + + } float sin = sqrtf(1.0f - cos*cos); dUdx /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity @@ -73,7 +80,7 @@ __device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restrict__ d, const BaseGrid* __restrict__ sys, Vector3* forces, const Vector3* __restrict__ pos, - const int& i, const int& j, const int& k, const int& l) { + const int& i, const int& j, const int& k, const int& l, float* energy, bool get_energy) { const Vector3 posa = pos[i]; const Vector3 posb = pos[j]; const Vector3 posc = pos[k]; @@ -128,8 +135,14 @@ __device__ inline void computeDihedral(const TabulatedDihedralPotential* __restr // Linear interpolation float U0 = d->pot[home]; // Potential float dU = d->pot[home1] - U0; // Change in potential - - // energy = dU * t + U0; + if(get_energy) + { + float e_local = (dU * t + U0)*0.25f; + atomicAdd( &energy[i], e_local ); + atomicAdd( &energy[j], e_local ); + atomicAdd( &energy[k], e_local ); + atomicAdd( &energy[l], e_local ); + } force = -dU * d->angle_step_inv; // avoid singularity when one angle is straight diff --git a/src/useful.cu b/src/useful.cu index b729cacf0719b359e0bf39b5ad7c4ba3dbe1c9e9..5e1a2b2a34a4875183133fe137c2fc970ed2f735 100644 --- a/src/useful.cu +++ b/src/useful.cu @@ -409,7 +409,6 @@ Matrix3::Matrix3(const float* d) { ezz = d[8]; setIsDiag(); } - Matrix3 Matrix3::inverse() const { Matrix3 m; if (isDiag) { diff --git a/src/useful.h b/src/useful.h index d3a70fbef6451fe8295dbf1e8ca83050eda2fabd..28417b791eb25ef54db2188ddbb508ba804f62e8 100644 --- a/src/useful.h +++ b/src/useful.h @@ -390,7 +390,9 @@ public: HOST DEVICE Matrix3 inverse() const; - float det() const; + HOST DEVICE + float det() const; + //Han-Yi Chou HOST DEVICE inline Matrix3 normalized() const {