diff --git a/src/Configuration.cpp b/src/Configuration.cpp index ef076168b38c12e1a2713b8c25c538e6d86d3b64..c33274b9e7129fb7cae01373816e62deeb36c468 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -6,9 +6,7 @@ #include <cassert> #include <stdlib.h> /* srand, rand */ #include <time.h> /* time */ -#include <gsl/gsl_rng.h> -#include <gsl/gsl_randist.h> -#include <gsl/gsl_math.h> + #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { @@ -603,7 +601,11 @@ void Configuration::setDefaults() { timestep = 1e-5f; rigidBodyGridGridPeriod = 1; steps = 100; - seed = 0; + + unsigned long int r0 = clock(); + for (int i = 0; i < 4; i++) + r0 *= r0 + 1; + seed = time(NULL) + r0; origin = Vector3(0,0,0); size = Vector3(0,0,0); @@ -2054,9 +2056,7 @@ bool Configuration::Boltzmann(const Vector3& v_com, int N) int count = 0; Vector3 total_momentum = Vector3(0.); - gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); - srand(time(NULL)); - gsl_rng_set (gslcpp_rng, rand() % 100000); + RandomCPU random = RandomCPU(seed + 2); /* +2 to avoid using same seed elsewhere */ for(int i = 0; i < N; ++i) { @@ -2064,7 +2064,7 @@ bool Configuration::Boltzmann(const Vector3& v_com, int N) double M = part[typ].mass; double sigma = sqrt(kT * M) * 2.046167337e4; - Vector3 tmp(gsl_ran_gaussian(gslcpp_rng,sigma),gsl_ran_gaussian(gslcpp_rng,sigma),gsl_ran_gaussian(gslcpp_rng,sigma)); + Vector3 tmp = random.gaussian_vector() * sigma; tmp = tmp * 1e-4; total_momentum += tmp; momentum[(size_t)count] = tmp; @@ -2083,7 +2083,7 @@ bool Configuration::Boltzmann(const Vector3& v_com, int N) } } - gsl_rng_free(gslcpp_rng); + return true; } diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 25dff31ed5194bba52048c0923995c57d35968f8..c2378388ebd8a84dd9a5de4b567cbf6ae7067238 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -5,10 +5,8 @@ #include "BrownParticlesKernel.h" #include <stdlib.h> /* srand, rand */ #include <time.h> /* time */ -#include <gsl/gsl_rng.h> -#include <gsl/gsl_randist.h> -#include <gsl/gsl_math.h> +#ifndef gpuErrchk #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { @@ -16,13 +14,14 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t if (abort) exit(code); } } +#endif bool GrandBrownTown::DEBUG; cudaEvent_t START, STOP; GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, - const long int randomSeed, bool debug, bool imd_on, unsigned int imd_port, int numReplicas) : + bool debug, bool imd_on, unsigned int imd_port, int numReplicas) : imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas), conf(c), RBC(RigidBodyController(c,outArg)) { printf("%d\n",__LINE__); @@ -76,7 +75,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, { momentum = new Vector3[num * numReplicas]; //[HOST] array of particles' momentum if(particle_dynamic == String("NoseHooverLangevin")) - random = new float[num * numReplicas]; + random = new float[num * numReplicas]; } printf("%d\n",__LINE__); //for debug @@ -198,22 +197,12 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, //angles_d = c.angles_d; //dihedrals_d = c.dihedrals_d; + printf("Setting up random number generator with seed %lu\n", seed); + randoGen = new Random(num * numReplicas, seed); + copyRandToCUDA(); + if(particle_dynamic == String("NoseHooverLangevin")) InitNoseHooverBath(num * numReplicas); - printf("%d\n",__LINE__); - // Seed random number generator - long unsigned int r_seed; - if (seed == 0) { - long int r0 = randomSeed; - for (int i = 0; i < 4; i++) - r0 *= r0 + 1; - r_seed = time(NULL) + r0; - } else { - r_seed = seed + randomSeed; - } - printf("Setting up Random Generator\n"); - randoGen = new Random(num * numReplicas, r_seed); - printf("Random Generator Seed: %lu -> %lu\n", randomSeed, r_seed); // "Some geometric stuff that should be gotten rid of." -- Jeff Comer Vector3 buffer = (sys->getCenter() + 2.0f * sys->getOrigin())/3.0f; @@ -223,8 +212,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, if (!c.loadedCoordinates) { //printf("Populating\n"); //populate(); Han-Yi Chou, Actually the list is already populated - initialCond(); - printf("Set random initial conditions.\n"); + initialCond(); + printf("Setting random initial conditions.\n"); } // Prepare internal force computation @@ -1465,13 +1454,12 @@ void GrandBrownTown::initialCond() { } } -#if 0 // A couple old routines for getting particle positions. Vector3 GrandBrownTown::findPos(int typ) { Vector3 r; const BrownianParticleType& pt = part[typ]; do { - const float rx = sysDim.x * randoGen->uniform(); + const float rx = sysDim.x * randoGen->uniform(); const float ry = sysDim.y * randoGen->uniform(); const float rz = sysDim.z * randoGen->uniform(); r = sys->wrap( Vector3(rx, ry, rz) ); @@ -1491,39 +1479,6 @@ Vector3 GrandBrownTown::findPos(int typ, float minZ) { } while (pt.pmf->interpolatePotential(r) > pt.meanPmf and fabs(r.z) > minZ); return r; } -#endif - -Vector3 GrandBrownTown::findPos(int typ) { - Vector3 r; - static gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); - //srand (time(NULL)); - //gsl_rng_set (gslcpp_rng, rand() % 100000); - - const BrownianParticleType& pt = part[typ]; - do { - const float rx = sysDim.x * (float)(gsl_ran_flat(gslcpp_rng,0.,1.)); - const float ry = sysDim.y * (float)(gsl_ran_flat(gslcpp_rng,0.,1.)); - const float rz = sysDim.z * (float)(gsl_ran_flat(gslcpp_rng,0.,1.)); - r = sys->wrap( Vector3(rx, ry, rz) ); - } while (pt.pmf->interpolatePotential(r) > pt.meanPmf); - return r; -} - - -Vector3 GrandBrownTown::findPos(int typ, float minZ) { - Vector3 r; - static gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); - //srand(time(NULL)); - - const BrownianParticleType& pt = part[typ]; - do { - const float rx = sysDim.x * (float)(gsl_ran_flat(gslcpp_rng,0.,1.)); - const float ry = sysDim.y * (float)(gsl_ran_flat(gslcpp_rng,0.,1.)); - const float rz = sysDim.z * (float)(gsl_ran_flat(gslcpp_rng,0.,1.)); - r = sys->wrap( Vector3(rx, ry, rz) ); - } while (pt.pmf->interpolatePotential(r) > pt.meanPmf and fabs(r.z) > minZ); - return r; -} //Compute the kinetic energy of particle and rigid body Han-Yi Chou float GrandBrownTown::KineticEnergy() @@ -1559,9 +1514,6 @@ void GrandBrownTown::InitNoseHooverBath(int N) { printf("Entering Nose-Hoover Langevin\n"); int count = 0; - gsl_rng *gslcpp_rng = gsl_rng_alloc(gsl_rng_default); - srand (time(NULL)); - gsl_rng_set (gslcpp_rng, rand() % 100000); for(int i = 0; i < N; ++i) { @@ -1570,11 +1522,10 @@ void GrandBrownTown::InitNoseHooverBath(int N) double sigma = sqrt(kT / mu); - float tmp = gsl_ran_gaussian(gslcpp_rng,sigma); + float tmp = sigma * randoGen->gaussian(); random[(size_t)count] = tmp; ++count; } - gsl_rng_free(gslcpp_rng); printf("Done in nose-hoover bath\n"); } // ----------------------------------------------------------------------------- @@ -1857,6 +1808,12 @@ void GrandBrownTown::updateReservoirs() { internal->updateNumber(num); } +void GrandBrownTown::copyRandToCUDA() { + gpuErrchk(cudaMalloc((void**)&randoGen_d, sizeof(Random))); + gpuErrchk(cudaMemcpy(&(randoGen_d->states), &(randoGen->states), sizeof(curandState_t*),cudaMemcpyHostToDevice)); +} + + // ----------------------------------------------------------------------------- // Allocate memory on GPU(s) and copy to device void GrandBrownTown::copyToCUDA() { @@ -1869,18 +1826,6 @@ void GrandBrownTown::copyToCUDA() { gpuErrchk(cudaMemcpyAsync(forceInternal_d, forceInternal, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));*/ - //gpuErrchk(cudaMalloc(&randoGen_d, sizeof(Random))); - //gpuErrchk(cudaMemcpy(randoGen_d, randoGen, sizeof(Random), - // cudaMemcpyHostToDevice)); - gpuErrchk(cudaMalloc((void**)&randoGen_d, sizeof(Random))); - //gpuErrchk(cudaMemcpy(randoGen_d, randoGen, sizeof(Random), cudaMemcpyHostToDevice)); - - //gpuErrchk(cudaMemcpy(&(randoGen_d->generator), &(randoGen->generator), sizeof(curandGenerator_t),cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(&(randoGen_d->states), &(randoGen->states), sizeof(curandState_t*),cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(&(randoGen_d->generator), &(randoGen->generator), sizeof(curandGenerator_t),cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(&(randoGen_d->integer_d), &(randoGen->integer_d), sizeof(unsigned int*),cudaMemcpyHostToDevice)); - gpuErrchk(cudaMemcpy(&(randoGen_d->uniform_d), &(randoGen->uniform_d), sizeof(float*),cudaMemcpyHostToDevice)); - // gpuErrchk(cudaDeviceSynchronize()); } diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh index 4fc8d0f1d359e4403e09d6c72ea8ec1c75add476..7c5741b3eaf5ff0405c7d18d81d2c4c315c2139f 100644 --- a/src/GrandBrownTown.cuh +++ b/src/GrandBrownTown.cuh @@ -94,6 +94,8 @@ __global__ void updateKernelABOBA(Vector3* pos, Vector3* momentum, Vector3* __re } } #endif + + ////The kernel is for Nose-Hoover Langevin dynamics __global__ void updateKernelNoseHooverLangevin(Vector3* pos, Vector3* momentum, float* random, Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, float electricField, diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h index b5d8430bf92d8e308bc74e902af8a46486d9a406..6965ca5bc0fc0b84895e6cfec0d5237754f48e56 100644 --- a/src/GrandBrownTown.h +++ b/src/GrandBrownTown.h @@ -54,9 +54,9 @@ class GrandBrownTown { public: - GrandBrownTown(const char* configFile, const char* outArg, const long int randomSeed, + GrandBrownTown(const char* configFile, const char* outArg, bool debug, bool imd_on, unsigned int imd_port, int numReplicas = 0); - GrandBrownTown(const Configuration& c, const char* outArg, const long int randomSeed, + GrandBrownTown(const Configuration& c, const char* outArg, bool debug, bool imd_on, unsigned int imd_port, int numReplicas = 0); ~GrandBrownTown(); @@ -91,6 +91,7 @@ private: void writeCurrentSegment(int repID, float t, float segZ) const; void getDebugForce(); + void copyRandToCUDA(); void copyToCUDA(); //Compute the kinetic energy in general. Han-Yi Chou @@ -186,7 +187,7 @@ private: String outputName; float timestep; long int steps; - long int seed; + unsigned long int seed; String temperatureGridFile; String inputCoordinates; String restartCoordinates; @@ -210,7 +211,6 @@ private: BaseGrid* kTGrid; BaseGrid* tGrid; BaseGrid* sigmaT; - unsigned long randoSeed; // Other parameters. float switchStart; diff --git a/src/Makefile b/src/Makefile index b7971a29aad42a8ac35a589c3c4caeb63996778f..02f2afc9859443e2a79de0dd77f06c9b3cab0050 100644 --- a/src/Makefile +++ b/src/Makefile @@ -37,7 +37,6 @@ ifneq ($(DARWIN),) else LIBRARY = $(CUDA_PATH)/lib64 endif -LIBRARY += -lgsl -lgslcblas # NV_FLAGS += -ftz=true # TODO: test if this preserves accurate simulation $(info $(NVCC)) diff --git a/src/RandomCPU.h b/src/RandomCPU.h index a270f4cda0264de24571a61d2995342eda616447..0f5c79ce10f1d8d0d970beaf3e45e3e5e5259623 100644 --- a/src/RandomCPU.h +++ b/src/RandomCPU.h @@ -53,7 +53,7 @@ class RandomCPU { private: float second_gaussian; - int64 second_gaussian_waiting; + bool second_gaussian_waiting; int64 rand48_seed; int64 rand48_mult; int64 rand48_add; @@ -74,7 +74,7 @@ public: // reinitialize with seed HOST DEVICE inline void init(unsigned long seed) { second_gaussian = 0; - second_gaussian_waiting = 0; + second_gaussian_waiting = false; rand48_seed = seed & INT64_LITERAL(0x00000000ffffffff); rand48_seed = rand48_seed << 16; rand48_seed |= RAND48_SEED & INT64_LITERAL(0x0000ffff); @@ -122,7 +122,7 @@ public: rand48_seed = save_seed; second_gaussian = 0; - second_gaussian_waiting = 0; + second_gaussian_waiting = false; print("END SPLIT\n"); } @@ -151,7 +151,7 @@ public: BigReal fac, r, v1, v2; if (second_gaussian_waiting) { - second_gaussian_waiting = 0; + second_gaussian_waiting = false; return second_gaussian; } else { r = 2.; // r >= 1.523e-8 ensures abs result < 6 @@ -164,7 +164,7 @@ public: fac = sqrt(-2.0 * log(r)/r); // now make the Box-Muller transformation to get two normally // distributed random numbers. Save one and return the other. - second_gaussian_waiting = 1; + second_gaussian_waiting = true; second_gaussian = v1 * fac; return v2 * fac; } diff --git a/src/RandomCUDA.cu b/src/RandomCUDA.cu index 3d974254ce84ecb038ccc162ad597a7355d074c9..fe90992114e41c938d22ceee5630de3d56d89668 100644 --- a/src/RandomCUDA.cu +++ b/src/RandomCUDA.cu @@ -1,21 +1,5 @@ #include "RandomCUDA.h" -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { - if (code != cudaSuccess) { - fprintf(stderr,"CUDA Error: %s (%s:%d)\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } -} - -#define cuRandchk(ans) { cuRandAssert((ans), __FILE__, __LINE__); } -inline void cuRandAssert(curandStatus code, const char *file, int line, bool abort=true) { - if (code != CURAND_STATUS_SUCCESS) { - fprintf(stderr, "CURAND Error: %d (%s:%d)\n", code, file, line); - if (abort) exit(code); - } -} - __global__ void initKernel(unsigned long seed, curandState_t *state, int num); @@ -41,6 +25,11 @@ void Random::init(int num, unsigned long seed) { gpuErrchk(cudaFree(integer_d)); integer_d = NULL; } + if(gaussian_d!=NULL) + { + gpuErrchk(cudaFree(gaussian_d)); + gaussian_d = NULL; + } if(integer_h!=NULL) { delete[] integer_h; @@ -51,12 +40,20 @@ void Random::init(int num, unsigned long seed) { delete[] uniform_h; uniform_h = NULL; } + if(gaussian_h!=NULL) + { + delete[] gaussian_h; + gaussian_h = NULL; + } gpuErrchk(cudaMalloc((void**)&uniform_d, sizeof(float) * RAND_N)); gpuErrchk(cudaMalloc((void**)&integer_d, sizeof(unsigned int) * RAND_N)); + gpuErrchk(cudaMalloc((void**)&gaussian_d, sizeof(float) * RAND_N)); integer_h = new unsigned int[RAND_N]; uniform_h = new float[RAND_N]; + gaussian_h = new float[RAND_N]; uniform_n = 0; integer_n = 0; + gaussian_n = 0; } float Random::uniform() { diff --git a/src/RandomCUDA.h b/src/RandomCUDA.h index ee8b5542cfc7bfc814c4046a8a30e90288d565ba..79740d4de277656a15857385a1111cef4349fe3f 100644 --- a/src/RandomCUDA.h +++ b/src/RandomCUDA.h @@ -21,20 +21,39 @@ #define DEVICE #endif +#ifndef gpuErrchk +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { + if (code != cudaSuccess) { + fprintf(stderr,"CUDA Error: %s (%s:%d)\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } +} +#endif + +#define cuRandchk(ans) { cuRandAssert((ans), __FILE__, __LINE__); } +inline void cuRandAssert(curandStatus code, const char *file, int line, bool abort=true) { + if (code != CURAND_STATUS_SUCCESS) { + fprintf(stderr, "CURAND Error: %d (%s:%d)\n", code, file, line); + if (abort) exit(code); + } +} + class Random { public: - static const size_t RAND_N = 512; // max random numbers stored + static const size_t RAND_N = 1024*4; // max random numbers stored curandState_t *states; curandGenerator_t generator; unsigned int *integer_h, *integer_d; float *uniform_h, *uniform_d; - size_t integer_n, uniform_n; + float *gaussian_h, *gaussian_d; + size_t integer_n, uniform_n, gaussian_n; public: - Random() : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL) { } - Random(int num, unsigned long seed=0) : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL) { + Random() : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) { } + Random(int num, unsigned long seed=0) : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) { init(num, seed); } @@ -70,6 +89,14 @@ public: unsigned int poisson(float lambda); float uniform(); + HOST inline float gaussian() { + if (gaussian_n < 1) { + cuRandchk(curandGenerateNormal(generator, gaussian_d, RAND_N, 0, 1)); + gpuErrchk(cudaMemcpy(gaussian_h, gaussian_d, sizeof(float) * RAND_N, cudaMemcpyDeviceToHost)); + } + return gaussian_h[--gaussian_n]; + } + void reorder(int *a, int n); }; diff --git a/src/arbd.cpp b/src/arbd.cpp index d277fc829e6b4bc7d3c59ab0cbaa33e05ea717e1..a1206407b4053893e6b074325282b7a453f2272c 100644 --- a/src/arbd.cpp +++ b/src/arbd.cpp @@ -111,16 +111,15 @@ int main(int argc, char* argv[]) { } } - long int randomSeed = 1992; char* configFile = NULL; char* outArg = NULL; - if (argc - num_flags >= 4) { - configFile = argv[argc - 3]; - outArg = argv[argc - 2]; - randomSeed = atol(argv[argc - 1]); - } else if (argc - num_flags >= 3) { + if (argc - num_flags == 3) { configFile = argv[argc - 2]; outArg = argv[argc - 1]; + } else { + printf("%s: too many arguments\n", argv[0]); + printf("Try '%s --help' for more information.\n", argv[0]); + return 1; } GPUManager::safe(safe); @@ -135,7 +134,7 @@ int main(int argc, char* argv[]) { config.copyToCUDA(); - GrandBrownTown brown(config, outArg, randomSeed, + GrandBrownTown brown(config, outArg, debug, imd_on, imd_port, replicas); printf("Running on GPU %d...\n", GPUManager::current()); cudaDeviceProp prop = GPUManager::properties[GPUManager::current()];