From 38af91964eee4787ea52c6466fc0b548e605244a Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Thu, 12 Nov 2015 11:28:10 -0600 Subject: [PATCH] transfer rigid bodies with potential grids + data to GPU --- .gitignore | 1 + Configuration.cpp | 75 +++++++++++++++++++++++++++------------------- GrandBrownTown.cu | 4 ++- GrandBrownTown.cuh | 36 ++++++++++++++++++---- RigidBodyType.cu | 6 ++++ makefile | 3 +- notes.org | 1 + 7 files changed, 89 insertions(+), 37 deletions(-) create mode 100644 notes.org diff --git a/.gitignore b/.gitignore index 942163a..56e5b99 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *~ .backup TAGS +TAGS.sh .dir-locals.el BD_example* runBrownCUDA \ No newline at end of file diff --git a/Configuration.cpp b/Configuration.cpp index 1fddb08..028b2fc 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -443,10 +443,11 @@ void Configuration::copyToCUDA() { gpuErrchk(cudaMalloc(&part_addr[i], sizeof(BrownianParticleType))); gpuErrchk(cudaMemcpyAsync(part_addr[i], b, sizeof(BrownianParticleType), cudaMemcpyHostToDevice)); - - gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts, - cudaMemcpyHostToDevice)); } + // RBTODO: moved this out of preceding loop; was that correct? + gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts, + cudaMemcpyHostToDevice)); + printf("copying RBs\n"); // Copy rigidbody types @@ -459,49 +460,61 @@ void Configuration::copyToCUDA() { int ng = rb->numPotGrids; // copy rigidbody to device - RigidBodyType *rb_d; - gpuErrchk(cudaMalloc((void **) &rb_d, sizeof(RigidBodyType))); - gpuErrchk(cudaMemcpy(rb_d, rb, sizeof(RigidBodyType), + // RigidBodyType *rb_d; + gpuErrchk(cudaMalloc(&rb_addr[i], sizeof(RigidBodyType))); + gpuErrchk(cudaMemcpy(rb_addr[i], rb, sizeof(RigidBodyType), cudaMemcpyHostToDevice)); - + // copy rb->grid to device BaseGrid * gtmp; - gtmp = new BaseGrid[ng]; + // gtmp = new BaseGrid[ng]; size_t sz = sizeof(BaseGrid)*ng; // allocate grids on device // copy temporary host pointer to device pointer // copy grids to device through temporary host poin gpuErrchk(cudaMalloc((void **) >mp, sz)); - gpuErrchk(cudaMemcpy(&(rb_d->rawPotentialGrids), >mp, + gpuErrchk(cudaMemcpy(&(rb_addr[i]->rawPotentialGrids), >mp, sizeof(BaseGrid*) * ng, cudaMemcpyHostToDevice )); gpuErrchk(cudaMemcpy(gtmp, &(rb->rawPotentialGrids), sizeof(BaseGrid) * ng, cudaMemcpyHostToDevice )); - + for (int gid = 0; gid < ng; gid++) { + gpuErrchk(cudaMemcpy(&(gtmp[gid]), &(rb->rawPotentialGrids[gid]), + sizeof(BaseGrid), cudaMemcpyHostToDevice )); + } // RBTODO: segfault when gtmp is deleted --> why? // delete[] gtmp; + printf(" RigidBodyType %d: numGrids = %d\n", i, ng); - // // copy grid data to device - // for (int gid = 0; gid < ng; gid++) { - // BaseGrid *g = &(rb->rawPotentialGrids[gid]); // convenience - // int len = g->getSize(); - // float *tmpData; - // tmpData = new float[len]; - - // // allocate grid data on device - // // copy temporary host pointer to device pointer - // // copy data to device through temporary host pointer - // sz = sizeof(float*) * len; - // gpuErrchk(cudaMalloc((void **) &tmpData, sz)); - // gpuErrchk(cudaMemcpy( (rb_d->rawPotentialGrids[gid].val), &tmpData, - // sizeof(float*), cudaMemcpyHostToDevice)); - // sz = sizeof(float) * len; - // gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); - // // RBTODO: why can't this be deleted? - // // delete[] tmpData; - // } + + // copy grid data to device + for (int gid = 0; gid < ng; gid++) { + BaseGrid *g = &(rb->rawPotentialGrids[gid]); // convenience + int len = g->getSize(); + float **tmpData; + tmpData = new float*[len]; + + printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, gid, len); + for (int k = 0; k < len; k++) + printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", + i, gid, k, g->val[k]); + + // allocate grid data on device + // copy temporary host pointer to device pointer + // copy data to device through temporary host pointer + sz = sizeof(float*) * len; + gpuErrchk(cudaMalloc((void **) &tmpData, sz)); + // gpuErrchk(cudaMemcpy( &(rb_addr[i]->rawPotentialGrids[gid].val), &tmpData, + // sizeof(float*), cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpy( &(gtmp[gid].val), &tmpData, + sizeof(float*), cudaMemcpyHostToDevice)); + sz = sizeof(float) * len; + gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); + // RBTODO: why can't this be deleted? + // delete[] tmpData; + } @@ -614,7 +627,9 @@ void Configuration::copyToCUDA() { // cudaMemcpyHostToDevice)); // } - } + } + gpuErrchk(cudaMemcpy(rbType_d, rb_addr, sizeof(RigidBodyType*) * numRigidTypes, + cudaMemcpyHostToDevice)); printf("Done copying RBs\n"); // kTGrid_d diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index 4ded8c3..fdf5036 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -32,7 +32,9 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, outFilePrefixes.push_back(out_prefix.str()); printf("About to devicePrint\n"); - devicePrint<<<1,1>>>(&(c.rigidBody[0])); + // devicePrint<<<1,1>>>(&(c.rigidBody[0])); + devicePrint<<<1,1>>>(c.rbType_d); + cudaDeviceSynchronize(); printf("Done with devicePrint\n"); } diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh index 7989fdd..5f8c1a7 100644 --- a/GrandBrownTown.cuh +++ b/GrandBrownTown.cuh @@ -142,9 +142,35 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion, return sys->getBasis().transform(l) + sys->getOrigin(); } -__global__ void devicePrint(RigidBodyType* rb) { - printf("RigidBodyType: numGrids = %d\n", rb->numPotGrids); +__global__ void devicePrint(RigidBodyType* rb[]) { + // printf("Device printing\n"); + int i = 0; + printf("RigidBodyType %d: numGrids = %d\n", i, rb[i]->numPotGrids); + printf(" RigidBodyType %d: potGrid: %p\n", i, rb[i]->rawPotentialGrids); + int j = 0; + printf(" RigidBodyType %d: potGrid[%d]: %p\n", i, j, &(rb[i]->rawPotentialGrids[j])); + printf(" RigidBodyType %d: potGrid[%d] size: %d\n", i, j, rb[i]->rawPotentialGrids[j].getSize()); + // BaseGrid g = rb[i]->rawPotentialGrids[j]; + // for (int k = 0; k < rb[i]->rawPotentialGrids[j].size(); k++) + for (int k = 0; k < rb[i]->rawPotentialGrids[j].getSize(); k++) + printf(" rbType_d[%d]->potGrid[%d].val[%d]: %g\n", + i, j, k, rb[i]->rawPotentialGrids[j].val[k]); + // i, j, k, rb[i]->rawPotentialGrids[j]).val[k]; + } -// __device__ void devicePrint(BaseGrid g) { -// printf("RigidBodyType: numGrids = %d\n", numPotGrids); -// }; + +// __global__ void devicePrint(RigidBodyType* rb[]) { +// // printf("Device printing\n"); +// int i = 0; +// printf("RigidBodyType %d: numGrids = %d\n", i, rb[i]->numPotGrids); +// printf("RigidBodyType %d: potGrid: %p\n", i, rb[i]->rawPotentialGrids); +// int j = 0; +// printf("RigidBodyType %d: potGrid[%d]: %p\n", i, &(rb[i]->rawPotentialGrids[j])); +// BaseGrid g = rb[i]->rawPotentialGrids[j]; +// // for (int k = 0; k < rb[i]->rawPotentialGrids[j].size(); k++) +// for (int k = 0; k < g->getSize(); k++) +// printf("rbType_d[%d]->potGrid[%d].val[%d]: %g\n", +// i, j, k, g.val[k]); +// // i, j, k, rb[i]->rawPotentialGrids[j]).val[k]; + +// } diff --git a/RigidBodyType.cu b/RigidBodyType.cu index f8f1da6..b587174 100644 --- a/RigidBodyType.cu +++ b/RigidBodyType.cu @@ -94,5 +94,11 @@ void RigidBodyType::updateRaw() { rawPotentialGrids = new BaseGrid[numPotGrids]; if (numDenGrids > 0) rawDensityGrids = new BaseGrid[numDenGrids]; + + for (int i=0; i < numPotGrids; i++) + rawPotentialGrids[i] = potentialGrids[i]; + for (int i=0; i < numDenGrids; i++) + rawDensityGrids[i] = densityGrids[i]; + } diff --git a/makefile b/makefile index c5a3a6d..c119d1b 100644 --- a/makefile +++ b/makefile @@ -61,7 +61,8 @@ all: $(TARGET) $(TARGET): $(CU_OBJ) $(CC_OBJ) runBrownTown.cpp vmdsock.c imd.c imd.h $(EXEC) $(CC) $(CC_FLAGS) $(LD_FLAGS) $(EX_FLAGS) runBrownTown.cpp vmdsock.c imd.c $(CU_OBJ) $(CC_OBJ) -o $(TARGET) -$(CU_OBJ): %.o: %.cu %.h +.SECONDEXPANSION: +$(CU_OBJ): %.o: %.cu $$(wildcard %.h) $$(wildcard %.cuh) $(EXEC) $(NVCC) $(NV_FLAGS) $(EX_FLAGS) -c $< -o $@ $(CC_OBJ): %.o: %.cpp %.h diff --git a/notes.org b/notes.org new file mode 100644 index 0000000..9cf5d48 --- /dev/null +++ b/notes.org @@ -0,0 +1 @@ +* Q: vector classes in thrust look like they could simplify code; bad idea? -- GitLab