From 7d2e8acc6621fc199e2a59fe861456f4588ac1ab Mon Sep 17 00:00:00 2001 From: Han-Yi Chou <hchou10@illinois.edu> Date: Sun, 11 Feb 2018 18:40:37 -0600 Subject: [PATCH] implement openMP option for rigid body simulation and fix bug in rigid body replicas, need to be reviewed and tested Conflicts: src/GrandBrownTown.cu src/Makefile --- src/GrandBrownTown.cu | 155 +++++++++++++++++++++++++++---------- src/Makefile | 24 ++++-- src/RigidBodyController.cu | 2 +- 3 files changed, 132 insertions(+), 49 deletions(-) diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index e3f95ef..3e2baca 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -8,7 +8,14 @@ #include <thrust/device_ptr.h> #include <fstream> -#ifndef gpuErrchk +#ifdef _OPENMP +#include <omp.h> +//#else +//typedef int omp_int_t; +//inline omp_int_t omp_get_thread_num() { return 0; } +//inline omp_int_t omp_get_max_threads() { return 1; } +#endif + #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { @@ -16,7 +23,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t if (abort) exit(code); } } -#endif static void checkEnergyFiles() { @@ -443,7 +449,7 @@ GrandBrownTown::~GrandBrownTown() { gpuErrchk(cudaFree(randoGen_d)); for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) { - (*iter)->~RigidBodyController(); + //(*iter)->~RigidBodyController(); delete *iter; } RBC.clear(); @@ -548,8 +554,13 @@ void GrandBrownTown::RunNoseHooverLangevin() // cudaSetDevice(0); internal->decompose(); gpuErrchk(cudaDeviceSynchronize()); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateParticleLists( internal->getPos_d(), sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateParticleLists( (internal->getPos_d())+i*num, sys_d); + gpuErrchk(cudaDeviceSynchronize()); } float t; // simulation time @@ -573,8 +584,12 @@ void GrandBrownTown::RunNoseHooverLangevin() // 'interparticleForce' - determines whether particles interact with each other gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); gpuErrchk(cudaMemset((void*)(internal->getEnergy()), 0, sizeof(float)*num*numReplicas)); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->clearForceAndTorque(); //Han-Yi Chou + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->clearForceAndTorque(); //Han-Yi Chou if (interparticleForce) { @@ -587,8 +602,12 @@ void GrandBrownTown::RunNoseHooverLangevin() { // cudaSetDevice(0); internal -> decompose(); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateParticleLists( internal->getPos_d(), sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateParticleLists( (internal->getPos_d())+i*num, sys_d); } internal -> computeTabulated(get_energy); break; @@ -607,8 +626,12 @@ void GrandBrownTown::RunNoseHooverLangevin() { // cudaSetDevice(0); internal->decompose(); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateParticleLists( internal->getPos_d(), sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateParticleLists( (internal->getPos_d())+i*num, sys_d); } internal->compute(get_energy); break; @@ -628,14 +651,25 @@ void GrandBrownTown::RunNoseHooverLangevin() } }//if inter-particle force gpuErrchk(cudaDeviceSynchronize()); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType, sys, sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateForces((internal->getPos_d())+i*num, (internal->getForceInternal_d())+i*num, s, (internal->getEnergy())+i*num, get_energy, + RigidBodyInterpolationType, sys, sys_d); + gpuErrchk(cudaDeviceSynchronize()); + if(rigidbody_dynamic == String("Langevin")) { - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) { - (*iter)->SetRandomTorques(); - (*iter)->AddLangevin(); + RBC[i]->SetRandomTorques(); + RBC[i]->AddLangevin(); } } }//if step == 1 @@ -657,17 +691,28 @@ void GrandBrownTown::RunNoseHooverLangevin() if(rigidbody_dynamic == String("Langevin")) { - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) { - (*iter)->integrateDLM(0); - (*iter)->integrateDLM(1); + RBC[i]->integrateDLM(0); + RBC[i]->integrateDLM(1); } } else { - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->integrate(s); - //RBC.print(s); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for ordered + for(int i = 0; i < numReplicas; ++i) + { + RBC[i]->integrate(s); + #pragma omp ordered + RBC[i]->print(s); + } } gpuErrchk(cudaDeviceSynchronize()); @@ -743,8 +788,12 @@ void GrandBrownTown::RunNoseHooverLangevin() } if (imd_on && clientsock) internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->clearForceAndTorque(); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->clearForceAndTorque(); gpuErrchk(cudaMemset((void*)(internal->getForceInternal_d()),0,num*numReplicas*sizeof(Vector3))); if (interparticleForce) { @@ -757,8 +806,12 @@ void GrandBrownTown::RunNoseHooverLangevin() if (s % decompPeriod == 0) { internal -> decompose(); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateParticleLists( internal->getPos_d(), sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateParticleLists( (internal->getPos_d())+i*num, sys_d); } internal -> computeTabulated(get_energy); break; @@ -776,8 +829,12 @@ void GrandBrownTown::RunNoseHooverLangevin() if (s % decompPeriod == 0) { internal->decompose(); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateParticleLists( internal->getPos_d(), sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateParticleLists( (internal->getPos_d())+i*num, sys_d); } internal->compute(get_energy); break; @@ -797,8 +854,13 @@ void GrandBrownTown::RunNoseHooverLangevin() } gpuErrchk(cudaDeviceSynchronize()); //compute the force for rigid bodies - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->updateForces(internal->getPos_d(), internal->getForceInternal_d(), s, internal->getEnergy(), get_energy, RigidBodyInterpolationType, sys, sys_d); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->updateForces((internal->getPos_d())+i*num, (internal->getForceInternal_d())+i*num, s, (internal->getEnergy())+i*num, get_energy, + RigidBodyInterpolationType, sys, sys_d); gpuErrchk(cudaDeviceSynchronize()); if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin")) @@ -809,12 +871,17 @@ void GrandBrownTown::RunNoseHooverLangevin() if(rigidbody_dynamic == String("Langevin")) { - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for ordered + for(int i = 0; i < numReplicas; ++i) { - (*iter)->SetRandomTorques(); - (*iter)->AddLangevin(); - (*iter)->integrateDLM(2); - (*iter)->print(s); + RBC[i]->SetRandomTorques(); + RBC[i]->AddLangevin(); + RBC[i]->integrateDLM(2); + #pragma omp ordered + RBC[i]->print(s); } } @@ -896,8 +963,12 @@ void GrandBrownTown::RunNoseHooverLangevin() if(rigidbody_dynamic == String("Langevin")) { - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) - (*iter)->KineticEnergy(); + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for + for(int i = 0; i < numReplicas; ++i) + RBC[i]->KineticEnergy(); } std::fstream rb_energy_file; rb_energy_file.open("rb_energy_config.txt", std::fstream::out | std::fstream::app); @@ -906,12 +977,16 @@ void GrandBrownTown::RunNoseHooverLangevin() float k_tol = 0.f; float v_tol = 0.f; float (RigidBody::*func_ptr)(); - for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter) + #ifdef _OPENMP + omp_set_num_threads(4); + #endif + #pragma omp parallel for private(func_ptr) reduction(+:k_tol,v_tol) + for(int i = 0; i < numReplicas; ++i) { func_ptr = &RigidBody::getKinetic; - k_tol += (*iter)->getEnergy(func_ptr); + k_tol += RBC[i]->getEnergy(func_ptr); func_ptr = &RigidBody::getEnergy; - v_tol += (*iter)->getEnergy(func_ptr); + v_tol += RBC[i]->getEnergy(func_ptr); } rb_energy_file << "Kinetic Energy " << k_tol/numReplicas << " (kT)" << std::endl; rb_energy_file << "Potential Energy " << v_tol/numReplicas << " (kcal/mol)" << std::endl; diff --git a/src/Makefile b/src/Makefile index 02f2afc..759562e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,12 +4,18 @@ INCLUDE = $(CUDA_PATH)/include ifeq ($(dbg),1) - NV_FLAGS += -g -G - EX_FLAGS = -g -O0 -m$(OS_SIZE) + NV_FLAGS += -g -G + EX_FLAGS = -g -m$(OS_SIZE) else + NV_FLAGS = -Xptxas -O3 EX_FLAGS = -O3 -m$(OS_SIZE) endif +ifeq ($(omp),1) + NV_FLAGS += -Xcompiler -fopenmp + CC_FLAGS += -fopenmp +endif + ## Determine the version; TODO add tags for versions ifndef (VERSION) ifeq ($(shell git status >& /dev/null && echo true),true) @@ -25,9 +31,9 @@ CC_FLAGS += -DVERSION="\"$(VERSION)\"" CC_FLAGS += -I$(CUDA_PATH)/include -CC_FLAGS += -Wall -Wno-write-strings -std=c++0x -pedantic # TODO: test on Mac OSX and other architectures +CC_FLAGS += -Wall -Wno-write-strings -std=c++0x -pedantic# TODO: test on Mac OSX and other architectures ifeq ($(dbg),1) -NV_FLAGS += -Xcompiler -rdynamic -lineinfo +NV_FLAGS += -lineinfo else NV_FLAGS += -lineinfo endif @@ -69,12 +75,14 @@ CC_OBJ := $(patsubst %.cpp, %.o, $(CC_SRC)) CU_OBJ := $(patsubst %.cu, %.o, $(CU_SRC)) ### Targets -ifeq ($(dbg),1) -TARGET = arbd_dbg -else -# TARGET = arbd_lin_cubic TARGET = arbd +ifeq ($(dbg),1) +TARGET = $(TARGET)_dbg endif +ifeq ($(omp),1) +TARGET = $(TARGET)_omp +endif + all: $(TARGET) @echo "Done ->" $(TARGET) diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu index 09178c6..3cab4c6 100644 --- a/src/RigidBodyController.cu +++ b/src/RigidBodyController.cu @@ -401,7 +401,7 @@ void RigidBodyController::integrate(int step) rb.integrate(1); } } - print(step); + //print(step); // start this cycle /*for (int i = 0; i < rigidBodyByType.size(); i++) { -- GitLab