From 8f5693f6d15d57c9e3ce63cfb8909b4fe1982d63 Mon Sep 17 00:00:00 2001 From: Han-Yi Chou <hchou10@illinois.edu> Date: Sat, 27 Jan 2018 18:18:37 -0600 Subject: [PATCH] modify feature of computing energy and make interpolation scheme as an option Conflicts: src/Configuration.cpp src/GrandBrownTown.cu --- src/BaseGrid.h | 45 ++++++++-- src/ComputeForce.cu | 5 +- src/ComputeGridGrid.cu | 76 ++++++++++------ src/ComputeGridGrid.cuh | 5 +- src/Configuration.cpp | 11 ++- src/Configuration.h | 3 +- src/GrandBrownTown.cu | 107 ++++++++++++++-------- src/GrandBrownTown.cuh | 76 ++++++++++------ src/GrandBrownTown.h | 5 +- src/RigidBody.cu | 72 ++++++++++----- src/RigidBody.h | 19 ++-- src/RigidBodyController.cu | 179 ++++++++++++++++++++++++------------- src/RigidBodyController.h | 16 ++-- src/RigidBodyGrid.cu | 14 +-- src/useful.h | 12 +-- 15 files changed, 429 insertions(+), 216 deletions(-) diff --git a/src/BaseGrid.h b/src/BaseGrid.h index 5523284..29c4f7a 100644 --- a/src/BaseGrid.h +++ b/src/BaseGrid.h @@ -35,10 +35,39 @@ public: class ForceEnergy { public: - DEVICE ForceEnergy(Vector3 &f, float &e) : + HOST DEVICE ForceEnergy() : f(0.f), e(0.f) {}; + HOST DEVICE ForceEnergy(Vector3 &f, float &e) : f(f), e(e) {}; - DEVICE ForceEnergy(float f, float e) : + HOST DEVICE explicit ForceEnergy(float e) : f(e), e(e) {}; + HOST DEVICE ForceEnergy(float f, float e) : f(f), e(e) {}; + HOST DEVICE ForceEnergy(const ForceEnergy& src) + { + f = src.f; + e = src.e; + } + HOST DEVICE ForceEnergy& operator=(const ForceEnergy& src) + { + if(&src != this) + { + this->f = src.f; + this->e = src.e; + } + return *this; + } + HOST DEVICE ForceEnergy operator+(const ForceEnergy& src) + { + ForceEnergy fe; + fe.f = this->f + src.f; + fe.e = this->e + src.e; + return fe; + } + HOST DEVICE ForceEnergy& operator+=(ForceEnergy& src) + { + this->f += src.f; + this->e += src.e; + return *this; + } Vector3 f; float e; }; @@ -877,13 +906,13 @@ public: //#define cubic DEVICE inline ForceEnergy interpolateForceDLinearlyPeriodic(const Vector3& pos) const { - #ifdef cubic - return interpolateForceD(pos); - #elif defined(cubic_namd) - return interpolateForceDnamd(pos); - #else + //#ifdef cubic + //return interpolateForceD(pos); + //#elif defined(cubic_namd) + //return interpolateForceDnamd(pos); + //#else return interpolateForceDLinearly(pos); - #endif + //#endif #if 0 const Vector3 l = basisInv.transform(pos - origin); diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index e5d212c..3610f8f 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -6,7 +6,6 @@ #include "ComputeForce.cuh" #include "Configuration.h" #include <cuda_profiler_api.h> -#include <thrust/device_ptr.h> #include <fstream> #include <iostream> #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } @@ -754,7 +753,7 @@ float ComputeForce::computeTabulated(bool get_energy) { // Calculate the energy based on the array created by the kernel // TODO: return energy - if (get_energy) + /*if (get_energy) { float e = 0.f; gpuErrchk(cudaDeviceSynchronize()); @@ -772,7 +771,7 @@ float ComputeForce::computeTabulated(bool get_energy) { std::cout << "Error in opening energ files\n"; } energy = e; - } + }*/ return energy; } diff --git a/src/ComputeGridGrid.cu b/src/ComputeGridGrid.cu index 6ba5791..7fae6ae 100644 --- a/src/ComputeGridGrid.cu +++ b/src/ComputeGridGrid.cu @@ -5,21 +5,21 @@ //RBTODO handle periodic boundaries //RBTODO: add __restrict__, benchmark (Q: how to restrict member data?) __global__ -void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, - const Matrix3 basis_rho, const Matrix3 basis_u_inv, - const Vector3 origin_rho_minus_origin_u, - Vector3 * retForce, Vector3 * retTorque) { +void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const Vector3 origin_rho_minus_origin_u, + ForceEnergy* retForce, Vector3 * retTorque, int scheme) +{ - extern __shared__ Vector3 s[]; - Vector3 *force = s; - Vector3 *torque = &s[NUMTHREADS]; + extern __shared__ ForceEnergy s[]; + ForceEnergy *force = s; + //Vector3 *torque = &s[NUMTHREADS]; + ForceEnergy *torque = &s[NUMTHREADS]; // RBTODO: http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops const int tid = threadIdx.x; const int r_id = blockIdx.x * blockDim.x + threadIdx.x; - force[tid] = Vector3(0.0f); - torque[tid] = Vector3(0.0f); + force[tid] = ForceEnergy(0.f,0.f); + torque[tid] = ForceEnergy(0.f,0.f); if (r_id < rho->getSize()) { // skip threads with nothing to do // RBTODO: reduce registers used; // commenting out interpolateForceD still uses ~40 registers @@ -32,12 +32,20 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, /* Vector3 tmpf = Vector3(0.0f); */ /* float tmpe = 0.0f; */ /* const ForceEnergy fe = ForceEnergy( tmpf, tmpe); */ - const ForceEnergy fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */ - force[tid] = fe.f; + + ForceEnergy fe; + if(!scheme) + fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */ + else + fe = u->interpolateForceD( u_ijk_float ); + + force[tid] = fe; + //force[tid].e = fe.e; + const float r_val = rho->val[r_id]; /* maybe move to beginning of function? */ - force[tid] = basis_u_inv.transpose().transform( r_val*force[tid] ); /* transform to lab frame, with correct scaling factor */ + force[tid].f = basis_u_inv.transpose().transform( r_val*(force[tid].f) ); /* transform to lab frame, with correct scaling factor */ // Calculate torque about origin_u in the lab frame - torque[tid] = r_pos.cross(force[tid]); + torque[tid].f = r_pos.cross(force[tid].f); } // Reduce force and torques @@ -48,6 +56,8 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, for (int offset = blockDim.x/2; offset > 0; offset >>= 1) { if (tid < offset) { int oid = tid + offset; + //if(get_energy) + //force[tid].e = force[tid].e + force[oid].e; force[tid] = force[tid] + force[oid]; torque[tid] = torque[tid] + torque[oid]; } @@ -55,8 +65,9 @@ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, } if (tid == 0) { + //retForce[blockIdx.x].e = force[0].e; retForce[blockIdx.x] = force[0]; - retTorque[blockIdx.x] = torque[0]; + retTorque[blockIdx.x] = torque[0].f; } } @@ -65,29 +76,38 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc const int num, const int* __restrict__ particleIds, const RigidBodyGrid* __restrict__ u, const Matrix3 basis_u_inv, const Vector3 origin_u, - Vector3* __restrict__ retForce, Vector3* __restrict__ retTorque) { + ForceEnergy* __restrict__ retForce, Vector3* __restrict__ retTorque, float* __restrict__ energy, bool get_energy, int scheme) { - extern __shared__ Vector3 s[]; - Vector3 *force = s; - Vector3 *torque = &s[NUMTHREADS]; + extern __shared__ ForceEnergy s[]; + ForceEnergy *force = s; + ForceEnergy *torque = &s[NUMTHREADS]; const int tid = threadIdx.x; const int i = blockIdx.x * blockDim.x + threadIdx.x; - force[tid] = Vector3(0.0f); - torque[tid] = Vector3(0.0f); + force[tid] = ForceEnergy(0.f, 0.f); + torque[tid] = ForceEnergy(0.f,0.f); if (i < num) { - const int& id = particleIds[i]; + const int id = particleIds[i]; Vector3 p = pos[id] - origin_u; // TODO: wrap to center of u const Vector3 u_ijk_float = basis_u_inv.transform( p ); - const ForceEnergy fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */ - force[tid] = fe.f; - force[tid] = basis_u_inv.transpose().transform( force[tid] ); /* transform to lab frame */ - atomicAdd( &particleForce[id], force[tid] ); // apply force to particle + + ForceEnergy fe; + if(!scheme) + fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */ + else + fe = u->interpolateForceD( u_ijk_float ); + + force[tid] = fe; + //force[tid].e = fe.e; + if(get_energy) + atomicAdd(&energy[id], fe.e); + force[tid].f = basis_u_inv.transpose().transform( force[tid].f ); /* transform to lab frame */ + atomicAdd( &particleForce[id], force[tid].f ); // apply force to particle // Calculate torque about origin_u in the lab frame - torque[tid] = p.cross(force[tid]); // RBTODO: test sign + torque[tid].f = p.cross(force[tid].f); // RBTODO: test sign } // Reduce force and torques @@ -96,6 +116,8 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc for (int offset = blockDim.x/2; offset > 0; offset >>= 1) { if (tid < offset) { int oid = tid + offset; + //if(get_energy) + //force[tid].e = force[tid].e + force[oid].e; force[tid] = force[tid] + force[oid]; torque[tid] = torque[tid] + torque[oid]; } @@ -104,7 +126,7 @@ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForc if (tid == 0) { retForce[blockIdx.x] = force[0]; - retTorque[blockIdx.x] = torque[0]; + retTorque[blockIdx.x] = torque[0].f; } } diff --git a/src/ComputeGridGrid.cuh b/src/ComputeGridGrid.cuh index 0140616..98275a1 100644 --- a/src/ComputeGridGrid.cuh +++ b/src/ComputeGridGrid.cuh @@ -5,19 +5,20 @@ #define WARPSIZE 32 class RigidBodyGrid; +class ForceEnergy; extern __global__ void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const Vector3 origin_rho_minus_origin_u, - Vector3 * retForce, Vector3 * retTorque); + ForceEnergy* retForce, Vector3 * retTorque, int scheme); extern __global__ void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForce, const int num, const int* __restrict__ particleIds, const RigidBodyGrid* __restrict__ u, const Matrix3 basis_u_inv, const Vector3 origin_u, - Vector3* __restrict__ retForce, Vector3* __restrict__ retTorque); + ForceEnergy* __restrict__ retForce, Vector3* __restrict__ retTorque, float* energy, bool get_energy, int scheme); extern __global__ void createPartlist(const Vector3* __restrict__ pos, diff --git a/src/Configuration.cpp b/src/Configuration.cpp index bbcdebd..61728da 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -6,6 +6,9 @@ #include <cassert> #include <stdlib.h> /* srand, rand */ #include <time.h> /* time */ +#include <iostream> +using namespace std; + #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { @@ -776,6 +779,9 @@ void Configuration::setDefaults() { // Hidden parameters // Might be parameters later numCapFactor = 5; + + ParticleInterpolationType = 0; + RigidBodyInterpolationType = 0; } int Configuration::readParameters(const char * config_file) { @@ -925,7 +931,10 @@ int Configuration::readParameters(const char * config_file) { RigidBodyDynamicType = value; else if (param == String("ParticleLangevinIntegrator")) ParticleLangevinIntegrator = value; - + else if (param == String("ParticleInterpolationType")) + ParticleInterpolationType = atoi(value.val()); + else if (param == String("RigidBodyInterpolationType")) + RigidBodyInterpolationType = atoi(value.val()); // PARTICLES else if (param == String("particle")) { part[++currPart] = BrownianParticleType(value); diff --git a/src/Configuration.h b/src/Configuration.h index 6bd6996..0895235 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -242,7 +242,8 @@ public: // RigidBody parameters. RigidBodyType* rigidBody; int numRigidTypes; - + int ParticleInterpolationType; + int RigidBodyInterpolationType; }; #endif diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 6e67408..6c3a766 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -5,6 +5,8 @@ #include "BrownParticlesKernel.h" #include <stdlib.h> /* srand, rand */ #include <time.h> /* time */ +#include <thrust/device_ptr.h> +#include <fstream> #ifndef gpuErrchk #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } @@ -16,6 +18,16 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t } #endif +static void checkEnergyFiles() +{ + std::ifstream part_ifile("energy_config.txt"); + std::ifstream rb_ifile("rb_energy_config.txt"); + if(part_ifile.good()) + rename("energy_config.txt","energy_config.txt.BAK"); + if(rb_ifile.good()) + rename("rb_energy_config.txt","rb_energy_config.txt.BAK"); +} + bool GrandBrownTown::DEBUG; cudaEvent_t START, STOP; @@ -28,6 +40,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, //Determine which dynamic. Han-Yi Chou particle_dynamic = c.ParticleDynamicType; rigidbody_dynamic = c.RigidBodyDynamicType; + ParticleInterpolationType = c.ParticleInterpolationType; + RigidBodyInterpolationType = c.RigidBodyInterpolationType; //particle_langevin_integrator = c.ParticleLangevinIntegrator; printf("%d\n",__LINE__); for (int i = 0; i < numReplicas; ++i) @@ -530,6 +544,8 @@ void GrandBrownTown::RunNoseHooverLangevin() Vector3 *force_d; gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*num * numReplicas)); + checkEnergyFiles(); + printf("Configuration: %d particles | %d replicas\n", num, numReplicas); //float total_energy = 0.f; // Main loop over Brownian dynamics steps @@ -541,8 +557,9 @@ void GrandBrownTown::RunNoseHooverLangevin() { // 'interparticleForce' - determines whether particles interact with each other gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); + gpuErrchk(cudaMemset((void*)(internal->getEnergy()), 0, sizeof(float)*num*numReplicas)); RBC.clearForceAndTorque(); //Han-Yi Chou - + if (interparticleForce) { if (tabulatedPotential) @@ -593,7 +610,7 @@ void GrandBrownTown::RunNoseHooverLangevin() } }//if inter-particle force gpuErrchk(cudaDeviceSynchronize()); - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s); + RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType); if(rigidbody_dynamic == String("Langevin")) { RBC.SetRandomTorques(); @@ -601,19 +618,20 @@ void GrandBrownTown::RunNoseHooverLangevin() } }//if step == 1 - gpuErrchk(cudaDeviceSynchronize()); - + gpuErrchk(cudaMemset((void*)(internal->getEnergy()), 0, sizeof(float)*num*numReplicas)); + if(particle_dynamic == String("Langevin")) - updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas); + updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, ParticleInterpolationType); else if(particle_dynamic == String("NoseHooverLangevin")) //kernel for Nose-Hoover Langevin dynamic updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getRan_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, - randoGen_d, numReplicas); - //For Brownian motion + randoGen_d, numReplicas, ParticleInterpolationType); + ////For Brownian motion else updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getForceInternal_d(), internal -> getType_d(), - part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy); + part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, + internal->getEnergy(), get_energy, ParticleInterpolationType); if(rigidbody_dynamic == String("Langevin")) { @@ -621,7 +639,10 @@ void GrandBrownTown::RunNoseHooverLangevin() RBC.integrateDLM(1); } else + { RBC.integrate(s); + //RBC.print(s); + } gpuErrchk(cudaDeviceSynchronize()); if (s % outputPeriod == 0) @@ -696,7 +717,7 @@ void GrandBrownTown::RunNoseHooverLangevin() } if (imd_on && clientsock) internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD - + RBC.clearForceAndTorque(); gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); if (interparticleForce) @@ -748,11 +769,13 @@ void GrandBrownTown::RunNoseHooverLangevin() } gpuErrchk(cudaDeviceSynchronize()); //compute the force for rigid bodies - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s); + RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType); gpuErrchk(cudaDeviceSynchronize()); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) - LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy); + LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getMom_d(), internal -> getForceInternal_d(), + internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy, + ParticleInterpolationType); //gpuErrchk(cudaDeviceSynchronize()); if(rigidbody_dynamic == String("Langevin")) @@ -763,7 +786,6 @@ void GrandBrownTown::RunNoseHooverLangevin() RBC.print(s); } - gpuErrchk(cudaDeviceSynchronize()); if (s % outputPeriod == 0) { if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) @@ -812,33 +834,46 @@ void GrandBrownTown::RunNoseHooverLangevin() // Do the output printf("\rStep %ld [%.2f%% complete | %.3f ms/step | %.3f ns/day]",s, percent, msPerStep, nsPerDay); + //} + //if (get_energy) + //{ // Copy positions from GPU to CPU. - gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); + //gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); + float e = 0.f; + float V = 0.f; + thrust::device_ptr<float> en_d(internal->getEnergy()); + V = (thrust::reduce(en_d, en_d+num*numReplicas)) / numReplicas; if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) { - //gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - float e = KineticEnergy(); - printf(" The kinetic energy is %f \n",e*(2.388458509e-1)); - std::fstream energy_file; - energy_file.open("energy_config.txt", std::fstream::out | std::fstream::app); - if(energy_file.is_open()) - { - energy_file << "Kinetic Energy: " << e*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl; - energy_file.close(); - } - else - { - std::cout << "Error in opening energ files\n"; - } + e = KineticEnergy(); + } + std::fstream energy_file; + energy_file.open("energy_config.txt", std::fstream::out | std::fstream::app); + if(energy_file.is_open()) + { + energy_file << "Kinetic Energy: " << e*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl; + energy_file << "Potential Energy: " << V << " (kcal/mol) " << std::endl; + energy_file.close(); + } + else + { + std::cout << "Error in opening energ files\n"; } if(rigidbody_dynamic == String("Langevin")) + RBC.KineticEnergy(); + std::fstream rb_energy_file; + rb_energy_file.open("rb_energy_config.txt", std::fstream::out | std::fstream::app); + if(rb_energy_file.is_open()) { - //gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost)); - float e = RotKineticEnergy(); - printf(" The Rotational kinetic energy is %f \n",e*(2.388458509e-1)); + RBC.printEnergyData(rb_energy_file); + energy_file.close(); + } + else + { + std::cout << "Error in opening rb energy files\n"; } // Write restart files for each replica. @@ -1067,7 +1102,7 @@ void GrandBrownTown::run() { }//if inter-particle force gpuErrchk(cudaDeviceSynchronize()); - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s); + RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, RigidBodyInterpolationType); if(rigidbody_dynamic == String("Langevin")) { RBC.SetRandomTorques(); @@ -1241,7 +1276,7 @@ void GrandBrownTown::run() { gpuErrchk(cudaDeviceSynchronize()); //compute the force for rigid bodies - RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s); + RBC.updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, RigidBodyInterpolationType); //Han-Yi Chou //For BAOAB, the last update is only to update the momentum gpuErrchk(cudaDeviceSynchronize()); @@ -1515,14 +1550,14 @@ float GrandBrownTown::KineticEnergy() return 2. * particle_energy / kT / num / numReplicas; //In the unit of 0.5kT } - -float GrandBrownTown::RotKineticEnergy() +/* +void GrandBrownTown::RotKineticEnergy() { - float e = RBC.KineticEnergy(); + RBC.KineticEnergy(); return 2. * e / numReplicas / kT; //In the unit of 0.5kT } - +*/ void GrandBrownTown::InitNoseHooverBath(int N) { printf("Entering Nose-Hoover Langevin\n"); diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 9e8ef8b..f42764d 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -101,9 +101,9 @@ __global__ void updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, - int num, BaseGrid* sys, Random* randoGen, int numReplicas) + int num, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme) { - const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); + const int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { int t = type[idx]; @@ -117,7 +117,11 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ ForceEnergy fe(0.f, 0.f); for(int i = 0; i < pt.numPartGridFiles; ++i) { - ForceEnergy tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); + ForceEnergy tmp(0.f,0.f); + if(!scheme) + tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); + else + tmp = pt.pmf[i].interpolateForceD(r0); fe.f += tmp.f; } //ForceEnergy fe = pt.pmf->interpolateForceDLinearlyPeriodic(r0); @@ -193,9 +197,9 @@ updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField,int tGridLength, float timestep, int num, BaseGrid* sys, - Random* randoGen, int numReplicas) + Random* randoGen, int numReplicas, int scheme) { - const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); + const int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num * numReplicas) { @@ -211,7 +215,11 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re ForceEnergy fe(0.f, 0.f); for(int i = 0; i < pt.numPartGridFiles; ++i) { - ForceEnergy tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); + ForceEnergy tmp(0.f, 0.f); + if(!scheme) + tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); + else + tmp = pt.pmf[i].interpolateForceD(r0); fe.f += tmp.f; } @@ -224,7 +232,7 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re Vector3 force = forceInternal[idx] + forceExternal + fe.f; #ifdef Debug forceInternal[idx] = -force; - #endif +#endif // Get local kT value float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */ @@ -272,7 +280,7 @@ __global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __re __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, int num, - BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy) + BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy,int scheme) { const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); @@ -289,9 +297,14 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ ForceEnergy fe(0.f, 0.f); for(int i = 0; i < pt.numPartGridFiles; ++i) { - ForceEnergy tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); - fe.f += tmp.f; - fe.e += tmp.e; + ForceEnergy tmp(0.f, 0.f); + if(!scheme) + tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(r0); + else + tmp = pt.pmf[i].interpolateForceD(r0); + + fe += tmp; + //fe.e += tmp.e; } if(get_energy) energy[idx] += fe.e; @@ -304,7 +317,7 @@ __global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* _ Vector3 force = forceInternal[idx] + forceExternal + fe.f; #ifdef Debug forceInternal[idx] = -force; - #endif +#endif #ifdef MDSTEP force = Vector3(-r0.x, -r0.y, -r0.z); @@ -381,14 +394,15 @@ __global__ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[],float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, int num, BaseGrid* sys, - Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy) + Random* randoGen, int numReplicas, float* energy, bool get_energy, int scheme) { // Calculate this thread's ID const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x); // TODO: Make this a grid-stride loop to make efficient reuse of RNG states // Loop over ALL particles in ALL replicas - if (idx < num * numReplicas) { + if (idx < num * numReplicas) + { const int t = type[idx]; Vector3 p = pos[idx]; @@ -405,12 +419,13 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], ForceEnergy fe(0.f, 0.f); for(int i = 0; i < pt.numPartGridFiles; ++i) { - ForceEnergy tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(p); - fe.f += tmp.f; - fe.e += tmp.e; + ForceEnergy tmp(0.f,0.f); + if(!scheme) + tmp = pt.pmf[i].interpolateForceDLinearlyPeriodic(p); + else + tmp = pt.pmf[i].interpolateForceD(p); + fe += tmp; } - if(get_energy) - energy[idx]+=fe.e; #ifndef FORCEGRIDOFF // Add a force defined via 3D FORCE maps (not 3D potential maps) if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(p); @@ -423,12 +438,10 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], // External: electric field (now this is basically a constant vector) // forceGrid: ADD force due to PMF or other potentials defined in 3D space Vector3 force = forceInternal[idx] + forceExternal + fe.f; -#ifdef Debug - forceInternal[idx] = -force; - #endif + #ifdef Debug + forceInternal[idx] = -force; + #endif - //if (idx == 0) - //forceInternal[idx] = force; // write it back out for force0 in run() // Get local kT value float kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(p); /* periodic */ @@ -466,8 +479,19 @@ void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], num * numReplicas); // assert( tmp.length() < 10000.0f ); pos[idx] = tmp; - - + + if(get_energy) + { + float en_local = 0.f; + for(int i = 0; i < pt.numPartGridFiles; ++i) + { + if(!scheme) + en_local += pt.pmf[i].interpolatePotentialLinearly(tmp); + else + en_local += pt.pmf[i].interpolatePotential(tmp); + } + energy[idx] += en_local; + } } } /* diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index 6965ca5..afbd008 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -96,7 +96,7 @@ private: //Compute the kinetic energy in general. Han-Yi Chou float KineticEnergy(); - float RotKineticEnergy(); + //float RotKineticEnergy(); //Initialize the Nose-Hoover auxilliary variables void InitNoseHooverBath(int N); @@ -269,7 +269,8 @@ private: String particle_dynamic; String rigidbody_dynamic; String particle_langevin_integrator; - + int ParticleInterpolationType; + int RigidBodyInterpolationType; void updateNameList(); void remember(float t); diff --git a/src/RigidBody.cu b/src/RigidBody.cu index bc22c29..c0db5e7 100644 --- a/src/RigidBody.cu +++ b/src/RigidBody.cu @@ -6,6 +6,10 @@ #include "Configuration.h" #include "ComputeGridGrid.cuh" +#include <gsl/gsl_rng.h> +#include <gsl/gsl_randist.h> +#include <gsl/gsl_math.h> + #include "Debug.h" #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } @@ -39,9 +43,11 @@ void RigidBody::init() { const int& numGrids = t->numPotGrids; numParticles = new int[numGrids]; particles_d = new int*[numGrids]; - particleForces = new Vector3*[numGrids]; + //particleForces = new Vector3*[numGrids]; + particleForces = new ForceEnergy*[numGrids]; particleTorques = new Vector3*[numGrids]; - particleForces_d = new Vector3*[numGrids]; + //particleForces_d = new Vector3*[numGrids]; + particleForces_d = new ForceEnergy*[numGrids]; particleTorques_d = new Vector3*[numGrids]; for (int i = 0; i < numGrids; ++i) { numParticles[i] = -1; @@ -50,9 +56,11 @@ void RigidBody::init() { if (n > 0) { // gpuErrchk(cudaMalloc( &particles_d[i], 0.5*sizeof(int)*n )); // not sure why 0.5 was here; prolly bug gpuErrchk(cudaMalloc( &particles_d[i], sizeof(int)*n )); // TODO: dynamically allocate memory as needed - particleForces[i] = new Vector3[nb]; + //particleForces[i] = new Vector3[nb]; + particleForces[i] = new ForceEnergy[nb]; particleTorques[i] = new Vector3[nb]; - gpuErrchk(cudaMalloc( &particleForces_d[i], sizeof(Vector3)*nb )); + //gpuErrchk(cudaMalloc( &particleForces_d[i], sizeof(Vector3)*nb )); + gpuErrchk(cudaMalloc( &particleForces_d[i], sizeof(ForceEnergy)*nb )); gpuErrchk(cudaMalloc( &particleTorques_d[i], sizeof(Vector3)*nb )); } } @@ -62,13 +70,19 @@ void RigidBody::init() { void RigidBody::Boltzmann() { - Vector3 rando = getRandomGaussVector(); - momentum = sqrt(t->mass*Temp) * 2.046167135 * rando; - rando = getRandomGaussVector(); - angularMomentum.x = sqrt(t->inertia.x*Temp) * 2.046167135 * rando.x; - angularMomentum.y = sqrt(t->inertia.y*Temp) * 2.046167135 * rando.y; - angularMomentum.z = sqrt(t->inertia.z*Temp) * 2.046167135 * rando.z; + gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); + std::srand(time(NULL)); + gsl_rng_set (gslcpp_rng, rand() % 100000); + + double sigma[4] = { sqrt(t->mass*Temp) * 2.046167135,sqrt(t->inertia.x*Temp) * 2.046167135, sqrt(t->inertia.y*Temp) * 2.046167135, sqrt(t->inertia.z*Temp) * 2.046167135 }; + + //Vector3 rando = getRandomGaussVector(); + momentum = Vector3(gsl_ran_gaussian(gslcpp_rng,sigma[0]),gsl_ran_gaussian(gslcpp_rng,sigma[0]), gsl_ran_gaussian(gslcpp_rng,sigma[0])); + angularMomentum.x = gsl_ran_gaussian(gslcpp_rng,sigma[1]); + angularMomentum.y = gsl_ran_gaussian(gslcpp_rng,sigma[2]); + angularMomentum.z = gsl_ran_gaussian(gslcpp_rng,sigma[3]); + printf("%f\n", Temp); printf("%f\n", Temperature()); } @@ -100,7 +114,10 @@ void RigidBody::addForce(Force f) { void RigidBody::addTorque(Force torq) { torque += torq; } - +void RigidBody::addEnergy(float e) +{ + energy += e; +} void RigidBody::updateParticleList(Vector3* pos_d) { for (int i = 0; i < t->numPotGrids; ++i) { numParticles[i] = 0; @@ -131,7 +148,7 @@ void RigidBody::updateParticleList(Vector3* pos_d) { } } -void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s) { +void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme) { // Apply the force and torque on the rigid body, and forces on particles // RBTODO: performance: consolidate CUDA stream management @@ -157,37 +174,44 @@ void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, in // RBTODO: performance: Improve parellism here (use streams/events; overlap with other computations) const int nb = (numParticles[i]/NUMTHREADS)+1; - Vector3* forces = particleForces[i]; + //Vector3* forces = particleForces[i]; + ForceEnergy* forces = particleForces[i]; Vector3* torques = particleTorques[i]; - Vector3* forces_d = particleForces_d[i]; + //Vector3* forces_d = particleForces_d[i]; + ForceEnergy* forces_d = particleForces_d[i]; Vector3* torques_d = particleTorques_d[i]; for (int k=0; k < nb; ++k) { - forces[k] = Vector3(0.0f); + //forces[k] = Vector3(0.0f); + forces[k] = ForceEnergy(0.f); torques[k] = Vector3(0.0f); } - gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); + //gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpy(forces_d, forces, sizeof(ForceEnergy)*nb, cudaMemcpyHostToDevice)); gpuErrchk(cudaMemcpy(torques_d, torques, sizeof(Vector3)*nb, cudaMemcpyHostToDevice)); - computePartGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3) >>>( + computePartGridForce<<< nb, NUMTHREADS,2*NUMTHREADS*sizeof(ForceEnergy)*nb >>>( pos_d, force_d, numParticles[i], particles_d[i], t->rawPotentialGrids_d[i], - B, c, forces_d, torques_d); + B, c, forces_d, torques_d, energy, get_energy, scheme); - gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); + //gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); + gpuErrchk(cudaMemcpy(forces, forces_d, sizeof(ForceEnergy)*nb, cudaMemcpyDeviceToHost)); gpuErrchk(cudaMemcpy(torques, torques_d, sizeof(Vector3)*nb, cudaMemcpyDeviceToHost)); // Sum and apply forces and torques - Vector3 f = Vector3(0.0f); + //Vector3 f = Vector3(0.0f); + ForceEnergy f = ForceEnergy(0.f,0.f); Vector3 torq = Vector3(0.0f); for (int k = 0; k < nb; ++k) { f = f + forces[k]; torq = torq + torques[k]; } - torq = -torq + (getPosition()-c).cross( f ); - addForce( -f ); + torq = -torq + (getPosition()-c).cross( f.f ); + addForce( -f.f ); addTorque( torq ); + addEnergy( f.e ); } } @@ -282,7 +306,7 @@ void RigidBody::integrate(int startFinishAll) { //if (startFinishAll == 1) return; - Matrix3 rot = Matrix3(1); // = *p_rot; + //Matrix3 rot = Matrix3(1); // = *p_rot; if ( isnan(force.x) || isnan(torque.x) ) { @@ -316,7 +340,7 @@ float RigidBody::Temperature() return (momentum.length2() / t->mass + angularMomentum.x * angularMomentum.x / t->inertia.x + angularMomentum.y * angularMomentum.y / t->inertia.y + - angularMomentum.z * angularMomentum.z / t->inertia.z) * 0.50; + angularMomentum.z * angularMomentum.z / t->inertia.z) * 0.50 / Temp * (2.388458509e-1); } void RigidBody::applyRotation(const Matrix3& R) { diff --git a/src/RigidBody.h b/src/RigidBody.h index 9eaf2b1..9974511 100644 --- a/src/RigidBody.h +++ b/src/RigidBody.h @@ -18,7 +18,7 @@ #include "RigidBodyController.h" class Configuration; - +class ForceEnergy; typedef float BigReal; /* strip this out later */ typedef Vector3 Force; @@ -40,9 +40,11 @@ class RigidBody { // host side representation of rigid bodies HOST DEVICE void addForce(Force f); HOST DEVICE void addTorque(Force t); + HOST DEVICE void addEnergy(float e); HOST DEVICE void addLangevin(Vector3 w1, Vector3 w2); - - HOST DEVICE inline void clearForce() { force = Force(0.0f); } + HOST inline void setKinetic(float e) { kinetic = e; }; + HOST DEVICE inline void clearForce() { force = Force(0.0f); energy = 0.f;} + //HOST DEVICE inline void clearForce() { force = ForceEnergy(0.f, 0.f); } HOST DEVICE inline void clearTorque() { torque = Force(0.0f); } // HOST DEVICE void integrate(Vector3& old_trans, Matrix3& old_rot, int startFinishAll); @@ -61,6 +63,8 @@ class RigidBody { // host side representation of rigid bodies HOST DEVICE inline BigReal getMass() const { return t->mass; } //HOST DEVICE inline Vector3 getVelocity() const { return momentum/t->mass; } HOST DEVICE inline Vector3 getVelocity() const { return momentum; } + HOST DEVICE inline float getEnergy() const { return energy; } + HOST DEVICE inline float getKinetic() const { return kinetic; } //HOST DEVICE inline Vector3 getAngularVelocity() const { // return Vector3( angularMomentum.x / t->inertia.x, // angularMomentum.y / t->inertia.y, @@ -71,7 +75,7 @@ class RigidBody { // host side representation of rigid bodies } void updateParticleList(Vector3* pos_d); - void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s); + void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme); bool langevin; @@ -110,14 +114,15 @@ private: const RigidBodyType* t; float timestep; Vector3 force; // lab frame - + float energy; //potential energy + float kinetic; bool isFirstStep; int* numParticles; /* particles affected by potential grids */ int** particles_d; - Vector3** particleForces; + ForceEnergy** particleForces; Vector3** particleTorques; - Vector3** particleForces_d; + ForceEnergy** particleForces_d; Vector3** particleTorques_d; diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index 16d3f13..bb49a5e 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -1,4 +1,5 @@ /* #include "RigidBody.h" */ +#include <iomanip> #include "RigidBodyController.h" #include "Configuration.h" #include "RigidBodyType.h" @@ -51,14 +52,6 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* out rigidBodyByType.push_back(tmp); } - if (conf.inputRBCoordinates.length() > 0) - loadRBCoordinates(conf.inputRBCoordinates.val()); - - random = new RandomCPU(conf.seed + 1); /* +1 to avoid using same seed as RandomCUDA */ - - gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ - initializeForcePairs(); - gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ //Boltzmann distribution for (int i = 0; i < rigidBodyByType.size(); i++) { @@ -69,6 +62,15 @@ RigidBodyController::RigidBodyController(const Configuration& c, const char* out rb.Boltzmann(); } } + + if (conf.inputRBCoordinates.length() > 0) + loadRBCoordinates(conf.inputRBCoordinates.val()); + + random = new RandomCPU(conf.seed + 1); /* +1 to avoid using same seed as RandomCUDA */ + + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ + initializeForcePairs(); + gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */ } RigidBodyController::~RigidBodyController() { @@ -100,11 +102,15 @@ bool RigidBodyController::loadRBCoordinates(const char* fileName) { String s(line); int numTokens = s.tokenCount(); - if (numTokens != 3+9) { + if (numTokens < 3+9) { printf("GrandBrownTown: load RB coordinates: Invalid coordinate file line: %s\n", line); fclose(inp); exit(-1); } + if(conf.RigidBodyDynamicType == String("Langevin") && numTokens < 18) + { + std::cout << "Warning the initial momentum set by random number" << std::endl; + } String* tokenList = new String[numTokens]; s.tokenize(tokenList); @@ -122,7 +128,12 @@ bool RigidBodyController::loadRBCoordinates(const char* fileName) { (float) strtod(tokenList[6],NULL), (float) strtod(tokenList[7],NULL), (float) strtod(tokenList[8],NULL), (float) strtod(tokenList[9],NULL), (float) strtod(tokenList[10],NULL), (float) strtod(tokenList[11],NULL)); - + if(conf.RigidBodyDynamicType == String("Langevin") && numTokens >= 18) + { + rb.momentum = Vector3((float)strtod(tokenList[12],NULL), (float) strtod(tokenList[13],NULL), (float) strtod(tokenList[14],NULL)); + rb.angularMomentum = Vector3((float)strtod(tokenList[15],NULL), (float) strtod(tokenList[16],NULL), (float) strtod(tokenList[17],NULL)); + } + delete[] tokenList; //i++; @@ -268,7 +279,7 @@ void RigidBodyController::clearForceAndTorque() } } -void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s) { +void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme) { if (s <= 1) gpuErrchk( cudaProfilerStart() ); @@ -276,7 +287,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s) for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { RigidBody& rb = rigidBodyByType[i][j]; - rb.callGridParticleForceKernel( pos_d, force_d, s ); + rb.callGridParticleForceKernel( pos_d, force_d, s, energy, get_energy, scheme ); } } @@ -285,7 +296,7 @@ void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s) for (int i=0; i < forcePairs.size(); i++) { // TODO: performance: make this check occur less frequently if (forcePairs[i].isWithinPairlistDist()) - forcePairs[i].callGridForceKernel(i,s); + forcePairs[i].callGridForceKernel(i, s, scheme); } // each kernel call is followed by async memcpy for previous; now get last @@ -353,18 +364,26 @@ void RigidBodyController::integrateDLM(int step) void RigidBodyController::integrate(int step) { // tell RBs to integrate - if ( step % conf.outputPeriod == 0 ) { /* PRINT & INTEGRATE */ - if (step == 0) { // first step so only start this cycle + if ( step % conf.outputPeriod == 0 ) + { /* PRINT & INTEGRATE */ + if (step == 0) + { // first step so only start this cycle print(step); - for (int i = 0; i < rigidBodyByType.size(); i++) { - for (int j = 0; j < rigidBodyByType[i].size(); j++) { + for (int i = 0; i < rigidBodyByType.size(); i++) + { + for (int j = 0; j < rigidBodyByType[i].size(); j++) + { RigidBody& rb = rigidBodyByType[i][j]; rb.integrate(0); } } - } else { // finish last cycle - for (int i = 0; i < rigidBodyByType.size(); i++) { - for (int j = 0; j < rigidBodyByType[i].size(); j++) { + } + else + { // finish last cycle + for (int i = 0; i < rigidBodyByType.size(); i++) + { + for (int j = 0; j < rigidBodyByType[i].size(); j++) + { RigidBody& rb = rigidBodyByType[i][j]; rb.integrate(1); } @@ -372,25 +391,34 @@ void RigidBodyController::integrate(int step) print(step); // start this cycle - for (int i = 0; i < rigidBodyByType.size(); i++) { + /*for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { RigidBody& rb = rigidBodyByType[i][j]; rb.integrate(0); } - } + }*/ } - } else { /* INTEGRATE ONLY */ - if (step == 0) { // first step so only start this cycle + } + else + { /* INTEGRATE ONLY */ + if (step == 0) + { // first step so only start this cycle print(step); - for (int i = 0; i < rigidBodyByType.size(); i++) { - for (int j = 0; j < rigidBodyByType[i].size(); j++) { + for (int i = 0; i < rigidBodyByType.size(); i++) + { + for (int j = 0; j < rigidBodyByType[i].size(); j++) + { RigidBody& rb = rigidBodyByType[i][j]; rb.integrate(0); } } - } else { // integrate end of last step and start of this one - for (int i = 0; i < rigidBodyByType.size(); i++) { - for (int j = 0; j < rigidBodyByType[i].size(); j++) { + } + else + { // integrate end of last step and start of this one + for (int i = 0; i < rigidBodyByType.size(); i++) + { + for (int j = 0; j < rigidBodyByType[i].size(); j++) + { RigidBody& rb = rigidBodyByType[i][j]; rb.integrate(2); } @@ -399,23 +427,26 @@ void RigidBodyController::integrate(int step) } } -float RigidBodyController::KineticEnergy() +void RigidBodyController::KineticEnergy() { - float e = 0.; - int num = 0; + //float e = 0.; + //int num = 0; for (int i = 0; i < rigidBodyByType.size(); i++) { for (int j = 0; j < rigidBodyByType[i].size(); j++) { RigidBody& rb = rigidBodyByType[i][j]; - e += rb.Temperature(); - num += 1; + rb.setKinetic(rb.Temperature()); + //rb.kinetic=tmp; + //e += tmp; + //num += 1; } } - if(num > 0) + //return e; + /*if(num > 0) return e / num; else - return 0.; + return 0.;*/ } // allocate and initialize an array of stream handles @@ -468,7 +499,7 @@ Matrix3 RigidBodyForcePair::getBasis2(const int i) { } // RBTODO: bundle several rigidbodypair evaluations in single kernel call -void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { +void RigidBodyForcePair::callGridForceKernel(int pairId, int s, int scheme) { // get the force/torque between a pair of rigid bodies /* printf(" Updating rbPair forces\n"); */ const int numGrids = gridKeyId1.size(); @@ -505,15 +536,15 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { // RBTODO: get energy if (!isPmf) { /* pair of RBs */ - computeGridGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3), s >>> + computeGridGridForce<<< nb, NUMTHREADS, 2*sizeof(ForceEnergy)*NUMTHREADS, s>>> (type1->rawDensityGrids_d[k1], type2->rawPotentialGrids_d[k2], B1, B2, c, - forces_d[i], torques_d[i]); + forces_d[i], torques_d[i], scheme); } else { /* RB with a PMF */ - computeGridGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(Vector3), s >>> + computeGridGridForce<<< nb, NUMTHREADS, 2*sizeof(ForceEnergy)*NUMTHREADS, s>>> (type1->rawDensityGrids_d[k1], type2->rawPmfs_d[k2], B1, B2, c, - forces_d[i], torques_d[i]); + forces_d[i], torques_d[i], scheme); } // retrieveForcesForGrid(i); // this is slower than approach below, unsure why @@ -529,22 +560,21 @@ void RigidBodyForcePair::retrieveForcesForGrid(const int i) { const cudaStream_t &s = stream[streamID[i]]; const int nb = numBlocks[i]; - gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(Vector3)*nb, - cudaMemcpyDeviceToHost, s)); - gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, - cudaMemcpyDeviceToHost, s)); + gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(ForceEnergy)*nb, cudaMemcpyDeviceToHost, s)); + gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, cudaMemcpyDeviceToHost, s)); } void RigidBodyForcePair::processGPUForces() { const int numGrids = gridKeyId1.size(); - Vector3 f = Vector3(0.0f); - Vector3 t = Vector3(0.0f); - + Vector3 f = Vector3(0.f); + Vector3 t = Vector3(0.f); + float energy = 0.f; for (int i = 0; i < numGrids; i++) { const int nb = numBlocks[i]; - Vector3 tmpF = Vector3(0.0f); - Vector3 tmpT = Vector3(0.0f); + //Vector3 tmpF = Vector3(0.0f); + ForceEnergy tmpF = ForceEnergy(0.f, 0.f); + Vector3 tmpT = Vector3(0.f); for (int j = 0; j < nb; j++) { tmpF = tmpF + forces[i][j]; @@ -554,10 +584,11 @@ void RigidBodyForcePair::processGPUForces() { // tmpT is the torque calculated about the origin of grid k2 (e.g. c2) // so here we transform torque to be about rb1 Vector3 o2 = getOrigin2(i); - tmpT = tmpT - (rb1->getPosition() - o2).cross( tmpF ); + tmpT = tmpT - (rb1->getPosition() - o2).cross( tmpF.f ); - // sum forces and torques - f = f + tmpF; + // sum energies,forces and torques + energy += tmpF.e; + f = f + tmpF.f; t = t + tmpT; } @@ -566,11 +597,13 @@ void RigidBodyForcePair::processGPUForces() { rb1->addForce( f ); rb1->addTorque( t ); - + rb1->addEnergy( energy ); + if (!isPmf) { const Vector3 t2 = -t + (rb2->getPosition()-rb1->getPosition()).cross( f ); rb2->addForce( -f ); rb2->addTorque( t2 ); + rb2->addEnergy( energy ); } // printf("force: %s\n", f.toString().val()); @@ -611,6 +644,10 @@ void RigidBodyController::print(int step) { printData(step,trajFile); trajFile.flush(); } + if(step % conf.outputEnergyPeriod == 0) + { + + } // Write restart File /* if ( simParams->restartFrequency && */ @@ -665,18 +702,38 @@ void RigidBodyController::printData(int step,std::ofstream &file) { Matrix3 t = rb.getOrientation(); file << step <<" "<< rb.getKey() <<" "<< v.x <<" "<< v.y <<" "<< v.z; - file <<" "<< t.exx <<" "<< t.exy <<" "<< t.exz + file << std::setprecision(10) <<" "<< t.exx <<" "<< t.exy <<" "<< t.exz <<" "<< t.eyx <<" "<< t.eyy <<" "<< t.eyz <<" "<< t.ezx <<" "<< t.ezy <<" "<< t.ezz; v = rb.getVelocity(); - file <<" "<< v.x <<" "<< v.y <<" "<< v.z; + file << std::setprecision(10) <<" "<< v.x <<" "<< v.y <<" "<< v.z; v = rb.getAngularVelocity(); - file <<" "<< v.x <<" "<< v.y <<" "<< v.z + file << std::setprecision(10) <<" "<< v.x <<" "<< v.y <<" "<< v.z << std::endl; } } } +void RigidBodyController::printEnergyData(std::fstream &file) +{ + if(file.is_open()) + { + + for (int i = 0; i < rigidBodyByType.size(); i++) + { + for(int j = 0; j < rigidBodyByType[i].size(); j++) + { + const RigidBody& rb = rigidBodyByType[i][j]; + file << "Kinetic Energy " << rb.getKey() << ": " << rb.getKinetic() << " (kT)" << std::endl; + file << " Potential Energy " << rb.getKey() << ": " << rb.getEnergy() << " (kcal/mol)" << std::endl; + } + } + } + else + { + std::cout << " Error in opening files\n"; + } +} int RigidBodyForcePair::initialize() { // printf(" Initializing (streams for) RB force pair...\n"); @@ -692,15 +749,17 @@ int RigidBodyForcePair::initialize() { nextStreamID++; numBlocks.push_back(nb); - forces.push_back( new Vector3[nb] ); + //forces.push_back( new Vector3[nb] ); + forces.push_back( new ForceEnergy[nb]); torques.push_back( new Vector3[nb] ); - forces_d.push_back( new Vector3[nb] ); // RBTODO: correct? + //forces_d.push_back( new Vector3[nb] ); // RBTODO: correct? + forces_d.push_back( new ForceEnergy[nb]); torques_d.push_back( new Vector3[nb] ); // allocate device memory for numBlocks of torque, etc. // printf(" Allocating device memory for forces/torques\n"); - gpuErrchk(cudaMalloc(&(forces_d[i]), sizeof(Vector3) * nb)); + gpuErrchk(cudaMalloc(&(forces_d[i]), sizeof(ForceEnergy) * nb)); gpuErrchk(cudaMalloc(&(torques_d[i]), sizeof(Vector3) * nb)); } gpuErrchk(cudaDeviceSynchronize()); diff --git a/src/RigidBodyController.h b/src/RigidBodyController.h index f30df42..1aee363 100644 --- a/src/RigidBodyController.h +++ b/src/RigidBodyController.h @@ -14,6 +14,7 @@ class RigidBodyType; class RigidBody; class Configuration; +class ForceEnergy; // class RandomCPU; #include "RandomCPU.h" @@ -64,8 +65,10 @@ private: bool isPmf; - std::vector<Vector3*> forces; - std::vector<Vector3*> forces_d; + //std::vector<Vector3*> forces; + //std::vector<Vector3*> forces_d; + std::vector<ForceEnergy*> forces; + std::vector<ForceEnergy*> forces_d; std::vector<Vector3*> torques; std::vector<Vector3*> torques_d; @@ -78,7 +81,7 @@ private: static RigidBodyForcePair* lastRbForcePair; static int lastRbGridID; - void callGridForceKernel(int pairId, int s); + void callGridForceKernel(int pairId, int s,int scheme); void retrieveForcesForGrid(const int i); void processGPUForces(); Matrix3 getBasis1(const int i); @@ -98,11 +101,12 @@ public: void SetRandomTorques(); void integrate(int step); void integrateDLM(int step); - void updateForces(Vector3* pos_d, Vector3* force_d, int s); + void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme); void updateParticleLists(Vector3* pos_d); void clearForceAndTorque(); - float KineticEnergy(); + void KineticEnergy(); void print(int step); + void printEnergyData(std::fstream &file); private: bool loadRBCoordinates(const char* fileName); void initializeForcePairs(); @@ -133,6 +137,6 @@ private: std::vector< RigidBodyForcePair > forcePairs; - + float* rb_energy; }; diff --git a/src/RigidBodyGrid.cu b/src/RigidBodyGrid.cu index d9b32da..06d68ba 100644 --- a/src/RigidBodyGrid.cu +++ b/src/RigidBodyGrid.cu @@ -243,13 +243,13 @@ DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const { return ForceEnergy(f,e); } -#define cubic +//#define cubic DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const { -#ifdef cubic -return interpolateForceD(l); -#elif defined(cubic_namd) -return interpolateForceDnamd(l); -#else +//#ifdef cubic +//return interpolateForceD(l); +//#elif defined(cubic_namd) +//return interpolateForceDnamd(l); +//#else // Find the home node. const int homeX = int(floor(l.x)); const int homeY = int(floor(l.y)); @@ -293,7 +293,7 @@ return interpolateForceDnamd(l); f.z = - (g3[2][1] - g3[2][0]); float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0]; return ForceEnergy(f,e); -#endif +//#endif } DEVICE ForceEnergy RigidBodyGrid::interpolateForceDnamd(const Vector3& l) const { diff --git a/src/useful.h b/src/useful.h index 28417b7..cc6bbe0 100644 --- a/src/useful.h +++ b/src/useful.h @@ -131,12 +131,12 @@ String operator+(String s1, String s2); // class MY_ALIGN(16) Vector3 { public: - HOST DEVICE inline Vector3() : x(0), y(0), z(0) {} - HOST DEVICE inline Vector3(float s):x(s), y(s), z(s) {} - HOST DEVICE inline Vector3(const Vector3& v):x(v.x), y(v.y), z(v.z) {} - HOST DEVICE inline Vector3(float x0, float y0, float z0) : x(x0), y(y0), z(z0) {} - HOST DEVICE inline Vector3(const float* d) : x(d[0]), y(d[1]), z(d[2]) {} - DEVICE inline Vector3(const float4 a) : x(a.x ), y(a.y ), z(a.z ) {} + HOST DEVICE inline Vector3() : x(0), y(0), z(0), w(0) {} + HOST DEVICE inline Vector3(float s):x(s), y(s), z(s), w(s) {} + HOST DEVICE inline Vector3(const Vector3& v):x(v.x), y(v.y), z(v.z), w(v.w) {} + HOST DEVICE inline Vector3(float x0, float y0, float z0) : x(x0), y(y0), z(z0), w(0) {} + HOST DEVICE inline Vector3(const float* d) : x(d[0]), y(d[1]), z(d[2]), w(0) {} + DEVICE inline Vector3(const float4 a) : x(a.x ), y(a.y ), z(a.z ), w(a.w) {} static Vector3 random(float s); -- GitLab