diff --git a/src/RandomCUDA.cu b/src/RandomCUDA.cu index 0a92583d8795df2add7bf9c7bdaca4bec3c8306a..3d974254ce84ecb038ccc162ad597a7355d074c9 100644 --- a/src/RandomCUDA.cu +++ b/src/RandomCUDA.cu @@ -31,14 +31,28 @@ void Random::init(int num, unsigned long seed) { curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_XORWOW); curandSetPseudoRandomGeneratorSeed(generator, seed); - if (uniform_d != NULL) { - gpuErrchk(cudaFree(uniform_d)); - gpuErrchk(cudaFree(integer_d)); - delete[] integer_h; - delete[] uniform_h; + if (uniform_d != NULL) + { + gpuErrchk(cudaFree(uniform_d)); + uniform_d = NULL; + } + if(integer_d!=NULL) + { + gpuErrchk(cudaFree(integer_d)); + integer_d = NULL; + } + if(integer_h!=NULL) + { + delete[] integer_h; + integer_h = NULL; + } + if(uniform_h!=NULL) + { + delete[] uniform_h; + uniform_h = NULL; } - gpuErrchk(cudaMalloc(&uniform_d, sizeof(float) * RAND_N)); - gpuErrchk(cudaMalloc(&integer_d, sizeof(unsigned int) * RAND_N)); + gpuErrchk(cudaMalloc((void**)&uniform_d, sizeof(float) * RAND_N)); + gpuErrchk(cudaMalloc((void**)&integer_d, sizeof(unsigned int) * RAND_N)); integer_h = new unsigned int[RAND_N]; uniform_h = new float[RAND_N]; uniform_n = 0; @@ -47,7 +61,7 @@ void Random::init(int num, unsigned long seed) { float Random::uniform() { if (uniform_n < 1) { - cuRandchk(curandGenerateUniform(generator, (float*) uniform_d, RAND_N)); + cuRandchk(curandGenerateUniform(generator, uniform_d, RAND_N)); gpuErrchk(cudaMemcpy(uniform_h, uniform_d, sizeof(float) * RAND_N, cudaMemcpyDeviceToHost)); uniform_n = RAND_N; } @@ -86,8 +100,17 @@ void Random::reorder(int a[], int n) { } } -__global__ void initKernel(unsigned long seed, curandState_t *state, int num) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num) - curand_init(seed, idx, 0, &state[idx]); +__global__ +void initKernel(unsigned long seed, curandState_t *state, int num) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int step = blockDim.x * gridDim.x; + for(int i = idx; i < num; i=i+step) + { + curandState_t local; + // curand_init(clock64()+seed,i,0,&local); + //curand_init(clock64(),i,0,&state[i]); + curand_init(seed,i,0,&local); + state[(size_t)i] = local; + } + } diff --git a/src/RandomCUDA.h b/src/RandomCUDA.h index b3f719ac5b3440c97cf6130c7849487a9f153d7a..ee8b5542cfc7bfc814c4046a8a30e90288d565ba 100644 --- a/src/RandomCUDA.h +++ b/src/RandomCUDA.h @@ -22,7 +22,7 @@ #endif class Random { -private: +public: static const size_t RAND_N = 512; // max random numbers stored curandState_t *states;