From a8a60fa427396a8ba2e329392c9aa77fb55c36b5 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 10 Nov 2015 19:10:22 -0600 Subject: [PATCH] copying memory for RB to device compiles --- BaseGrid.cu | 10 +++ BaseGrid.h | 4 - Configuration.cpp | 193 ++++++++++++++++++++++++++++++++++++++++----- Configuration.h | 2 +- GrandBrownTown.cu | 9 +++ GrandBrownTown.cuh | 7 ++ RigidBodyType.cu | 21 ++++- RigidBodyType.h | 46 +++++------ makefile | 6 +- runBrownTown.cpp | 2 + 10 files changed, 245 insertions(+), 55 deletions(-) diff --git a/BaseGrid.cu b/BaseGrid.cu index 5ef2390..6db492b 100644 --- a/BaseGrid.cu +++ b/BaseGrid.cu @@ -19,7 +19,17 @@ void BaseGrid::init() { } BaseGrid::BaseGrid() { BaseGrid tmp(Matrix3(),Vector3(),1,1,1); + val = new float[1]; *this = tmp; // TODO: verify that this is OK + + // basis = Matrix3(); + // origin = Vector3(); + // nx = 1; + // ny = 1; + // nz = 1; + + // init(); + // zero(); } // The most obvious of constructors. diff --git a/BaseGrid.h b/BaseGrid.h index b6ff917..9156f95 100644 --- a/BaseGrid.h +++ b/BaseGrid.h @@ -37,15 +37,12 @@ class BaseGrid { private: // Initialize the variables that get used a lot. // Also, allocate the main value array. - HOST DEVICE void init(); public: - HOST DEVICE BaseGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted BaseGrid in a struct // The most obvious of constructors. - HOST DEVICE BaseGrid(Matrix3 basis0, Vector3 origin0, int nx0, int ny0, int nz0); // Make an orthogonal grid given the box dimensions and resolution. @@ -91,7 +88,6 @@ public: // Write the valies in a single column. virtual void writePotential(const char* fileName) const; - HOST DEVICE virtual ~BaseGrid(); void zero(); diff --git a/Configuration.cpp b/Configuration.cpp index c1f1a43..1fddb08 100644 --- a/Configuration.cpp +++ b/Configuration.cpp @@ -175,16 +175,17 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : currSerial = 0; // RigidBodies... - if (numRBs > 0) { + if (numRigidTypes > 0) { printf("\nCounting rigid bodies specified in the configuration file.\n"); numRB = 0; - for (int i = 0; i < numRBs; i++) num += rigidBody[i].num; + for (int i = 0; i < numRigidTypes; i++) numRB += rigidBody[i].num; // // state data // rbPos = new Vector3[numRB * simNum]; // type = new int[numRB * simNum]; } + printf("Initial RigidBodies: %d\n", numRB); // Create exclusions from the exclude rule, if it was specified in the config file @@ -400,17 +401,13 @@ void Configuration::copyToCUDA() { printf("Copying to GPU %d\n", GPUManager::current()); BrownianParticleType **part_addr = new BrownianParticleType*[numParts]; - RigidBodyType **rb_addr = new RigidBodyType*[numRBs]; + RigidBodyType **rb_addr = new RigidBodyType*[numRigidTypes]; // Copy the BaseGrid objects and their member variables/objects gpuErrchk(cudaMalloc(&part_d, sizeof(BrownianParticleType*) * numParts)); - gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRBs)); + gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRigidTypes)); // TODO: The above line fails when there is not enough memory. If it fails, stop. - // TODO: what's going on here? - // pmf_h is made as a copy of *part[i].pmf, which is then asynchronously copied to Device, and - // is not deleted - // seems like bad code, but not 100% sure for (int i = 0; i < numParts; i++) { BaseGrid *pmf = NULL, *diffusionGrid = NULL; BrownianParticleType *b = new BrownianParticleType(part[i]); @@ -451,14 +448,174 @@ void Configuration::copyToCUDA() { cudaMemcpyHostToDevice)); } - // TODO ? utilize Thrust more extensively - for (int i = 0; i < numRBs; i++) { - gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRBs)); - rigidBody[i].potentialGrids_D = rigidBody[i].potentialGrids; - rigidBody[i].densityGrids_D = rigidBody[i].densityGrids; + printf("copying RBs\n"); + // Copy rigidbody types + // http://stackoverflow.com/questions/16024087/copy-an-object-to-device + for (int i = 0; i < numRigidTypes; i++) { + printf("working on RB %d\n",i); + RigidBodyType *rb = &(rigidBody[i]); // temporary for convenience + rb->updateRaw(); + + int ng = rb->numPotGrids; + + // copy rigidbody to device + RigidBodyType *rb_d; + gpuErrchk(cudaMalloc((void **) &rb_d, sizeof(RigidBodyType))); + gpuErrchk(cudaMemcpy(rb_d, rb, sizeof(RigidBodyType), + cudaMemcpyHostToDevice)); + + // copy rb->grid to device + BaseGrid * gtmp; + gtmp = new BaseGrid[ng]; + size_t sz = sizeof(BaseGrid)*ng; - } - + // allocate grids on device + // copy temporary host pointer to device pointer + // copy grids to device through temporary host poin + gpuErrchk(cudaMalloc((void **) >mp, sz)); + gpuErrchk(cudaMemcpy(&(rb_d->rawPotentialGrids), >mp, + sizeof(BaseGrid*) * ng, cudaMemcpyHostToDevice )); + gpuErrchk(cudaMemcpy(gtmp, &(rb->rawPotentialGrids), + sizeof(BaseGrid) * ng, cudaMemcpyHostToDevice )); + + // RBTODO: segfault when gtmp is deleted --> why? + // delete[] gtmp; + + + // // copy grid data to device + // for (int gid = 0; gid < ng; gid++) { + // BaseGrid *g = &(rb->rawPotentialGrids[gid]); // convenience + // int len = g->getSize(); + // float *tmpData; + // tmpData = new float[len]; + + // // allocate grid data on device + // // copy temporary host pointer to device pointer + // // copy data to device through temporary host pointer + // sz = sizeof(float*) * len; + // gpuErrchk(cudaMalloc((void **) &tmpData, sz)); + // gpuErrchk(cudaMemcpy( (rb_d->rawPotentialGrids[gid].val), &tmpData, + // sizeof(float*), cudaMemcpyHostToDevice)); + // sz = sizeof(float) * len; + // gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice)); + // // RBTODO: why can't this be deleted? + // // delete[] tmpData; + // } + + + + + // // gpuErrchk(cudaMemcpy(&(rb_d->potentialGrids[i]), >mp, + // // sz, cudaMemcpyHostToDevice )); + + // // size_t sz; + // // for (int gid = 0; gid < ng; gid++) { + // // gpuErrchk(cudaMalloc((void**) &(gtmp[i]), sizeof(BaseGrid))); + // // gpuErrchk(cudaMemcpy(&(gtmp[i]->potentialGrids[i]), &(gtmp[i]), + // // sizeof(BaseGrid*), cudaMemcpyHostToDevice)); + // // } + + + // { + // BaseGrid *tmpData; + // size_t sz = sizeof(BaseGrid)*ng; + + // gpuErrchk(cudaMalloc((void **) &tmpData, sz)); + // gpuErrchk(cudaMemcpy(&(rb_d->potentialGrids[i]), &tmpData, + // sizeof(BaseGrid*), cudaMemcpyHostToDevice )); + // sz = sizeof(float) * ng + // gpuErrchk(cudaMemcpy(tmpData, rbg->val, sz, cudaMemcpyHostToDevice)); + // } + + + // size_t sz = sizeof(float) * rb->potentialGrids[gid]->getSize(); + // // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); + + // gpuErrchk(cudaMalloc(&tmpData, sz)); + // gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], + // sizeof(BaseGrid), cudaMemcpyHostToDevice)); + + // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice)); + + // set rb pointers appropriately + // rb_d->potentialGrids[gid] = g_d; + // g_d->val = val_d; // hopefully? + + + + // BaseGrid *g_d; + // float *val_d; + // size_t sz = sizeof(float) * rb.potentialGrids[gid]->getSize(); + // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); + // gpuErrchk(cudaMalloc(&val_d, sz)); + // gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], + // sizeof(BaseGrid), cudaMemcpyHostToDevice)); + // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice)); + + // // set rb pointers appropriately + // rb_d->potentialGrids[gid] = g_d; + // g_d->val = val_d; // hopefully? + + // } + + + // // Copy rigidbody types + // for (int i = 0; i < numRigidTypes; i++) { + // RigidBodyType *rb = rigidBody[i]; // temporary for convenience + // // gpuErrchk(cudaMemcpy(&rb, sizeof(RigidBodyType))); + + // RigidBodyType *rb_d; + + + // for (int gid = 0; i < rb.numPotGrids) { + // BaseGrid *g_d; + // float *val_d; + // size_t sz = sizeof(float) * rb.potentialGrids[gid]->getSize(); + // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); + // gpuErrchk(cudaMalloc(&val_d, sz)); + // gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], + // sizeof(BaseGrid), cudaMemcpyHostToDevice)); + // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice)); + + // // set rb pointers appropriately + // rb_d->potentialGrids[gid] = g_d; + // g_d->val = val_d; // hopefully? + + // } + // for (int gid = 0; i < rb.numDenGrids) { + // BaseGrid *g_d; + // float *val_d; + // size_t sz = sizeof(float) * rb->densityGrids[gid]->getSize(); + // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid))); + // gpuErrchk(cudaMalloc(&val_d, sz)); + // gpuErrchk(cudaMemcpyAsync(g_d, rb->densityGrids[gid], + // sizeof(BaseGrid), cudaMemcpyHostToDevice)); + // gpuErrchk(cudaMemcpy(val_d, rb->densityGrids[gid], sz, cudaMemcpyHostToDevice)); + + // } + + + // // RBTODO + // // b->pmf = pmf; + // // b->diffusionGrid = diffusionGrid; + // gpuErrchk(cudaMalloc(&rb_addr[i], sizeof(BrownianParticleType))); + // gpuErrchk(cudaMemcpyAsync( rb_addr[i], rb_d, + // sizeof(BrownianParticleType), cudaMemcpyHostToDevice)); + + + // gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRigidTypes)); + // rigidBody[i].potentialGrids_D = rigidBody[i].potentialGrids; + // rigidBody[i].densityGrids_D = rigidBody[i].densityGrids; + + // gpuErrchk(cudaMemcpyAsync(rb_addr[i], rb, sizeof(RigidBodyType), + // cudaMemcpyHostToDevice)); + + // gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts, + // cudaMemcpyHostToDevice)); + + // } + } + printf("Done copying RBs\n"); // kTGrid_d kTGrid_d = NULL; @@ -574,7 +731,7 @@ int Configuration::readParameters(const char * config_file) { // Get the number of particles. const int numParams = config.length(); numParts = config.countParameter("particle"); - numRBs = config.countParameter("rigidBody"); + numRigidTypes = config.countParameter("rigidBody"); // Allocate the particle variables. part = new BrownianParticleType[numParts]; @@ -591,7 +748,7 @@ int Configuration::readParameters(const char * config_file) { partTableIndex1 = new int[numParts*numParts]; // Allocate rigid body types - rigidBody = new RigidBodyType[numRBs]; + rigidBody = new RigidBodyType[numRigidTypes]; int btfcap = 10; bondTableFile = new String[btfcap]; @@ -765,7 +922,7 @@ int Configuration::readParameters(const char * config_file) { } // RIGID BODY else if (param == String("rigidBody")) { - part[++currPart] = BrownianParticleType(value); + // part[++currPart] = BrownianParticleType(value); rigidBody[++currRB] = RigidBodyType(value); currPartClass = partClassRB; } diff --git a/Configuration.h b/Configuration.h index ecefd16..0734a33 100644 --- a/Configuration.h +++ b/Configuration.h @@ -192,7 +192,7 @@ public: // RigidBody parameters. RigidBodyType* rigidBody; - int numRBs; + int numRigidTypes; }; diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu index b25d8ec..4ded8c3 100644 --- a/GrandBrownTown.cu +++ b/GrandBrownTown.cu @@ -9,6 +9,9 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { } } +#include "RigidBodyType.h" //temporary + + bool GrandBrownTown::DEBUG; cudaEvent_t START, STOP; @@ -27,6 +30,11 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, outCurrFiles.push_back(curr_file.str()); restartFiles.push_back(restart_file.str()); outFilePrefixes.push_back(out_prefix.str()); + + printf("About to devicePrint\n"); + devicePrint<<<1,1>>>(&(c.rigidBody[0])); + printf("Done with devicePrint\n"); + } GrandBrownTown::DEBUG = debug; @@ -146,6 +154,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, } else { r_seed = seed + randomSeed; } + printf("Setting up Random Generator\n"); randoGen = new Random(num * numReplicas, r_seed); printf("Random Generator Seed: %lu -> %lu\n", randomSeed, r_seed); diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh index 64853d3..7989fdd 100644 --- a/GrandBrownTown.cuh +++ b/GrandBrownTown.cuh @@ -141,3 +141,10 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion, return sys->getBasis().transform(l) + sys->getOrigin(); } + +__global__ void devicePrint(RigidBodyType* rb) { + printf("RigidBodyType: numGrids = %d\n", rb->numPotGrids); +} +// __device__ void devicePrint(BaseGrid g) { +// printf("RigidBodyType: numGrids = %d\n", numPotGrids); +// }; diff --git a/RigidBodyType.cu b/RigidBodyType.cu index cf2cc13..f8f1da6 100644 --- a/RigidBodyType.cu +++ b/RigidBodyType.cu @@ -10,11 +10,14 @@ void RigidBodyType::clear() { // TODO: make sure that this actually removes grid data potentialGrids.clear(); densityGrids.clear(); - potentialGrids_D.clear(); - densityGrids_D.clear(); + if (numPotGrids > 0) delete[] rawPotentialGrids; + if (numDenGrids > 0) delete[] rawDensityGrids; + rawPotentialGrids = NULL; + rawDensityGrids = NULL; } + // void RigidBodyType::copy(const RigidBodyType& src) { // this = new RigidBodyType(src.name); // num = src.num; @@ -64,6 +67,8 @@ void RigidBodyType::addPotentialGrid(String s) { BaseGrid g(token[1]); potentialGrids.push_back( g ); + // [numPotGrids] = g; + // numPotGrids++; } void RigidBodyType::addDensityGrid(String s) { // tokenize and return @@ -79,3 +84,15 @@ void RigidBodyType::addDensityGrid(String s) { densityGrids.push_back( g ); } + +void RigidBodyType::updateRaw() { + if (numPotGrids > 0) delete[] rawPotentialGrids; + if (numDenGrids > 0) delete[] rawDensityGrids; + numPotGrids = potentialGrids.size(); + numDenGrids = densityGrids.size(); + if (numPotGrids > 0) + rawPotentialGrids = new BaseGrid[numPotGrids]; + if (numDenGrids > 0) + rawDensityGrids = new BaseGrid[numDenGrids]; +} + diff --git a/RigidBodyType.h b/RigidBodyType.h index e1826d4..d20f1e6 100644 --- a/RigidBodyType.h +++ b/RigidBodyType.h @@ -2,13 +2,15 @@ // Author: Chris Maffeo <cmaffeo2@illinois.edu> #pragma once -// #include <vector> -#include <thrust/host_vector.h> -#include <thrust/device_vector.h> +#include <vector> +/* #include <thrust/host_vector.h> */ +/* #include <thrust/device_vector.h> */ #include "Reservoir.h" #include "useful.h" #include "BaseGrid.h" +#include <cstdio> + class RigidBodyType { private: // Deletes all members @@ -25,25 +27,7 @@ public: RigidBodyType(const String& name = "") : name(name), num(0), reservoir(NULL), mass(1.0f), inertia(), transDamping(), - rotDamping() { - /* potentialGrids = *(new thrust::host_vector<BaseGrid>()); */ - /* densityGrids = *(new thrust::host_vector<BaseGrid>()); */ - /* potentialGrids = *(new thrust::host_vector<BaseGrid>()); */ - /* densityGrids = *(new thrust::host_vector<BaseGrid>()); */ - - /* thrust::host_vector<BaseGrid> potentialGrids; */ - /* thrust::host_vector<BaseGrid> densityGrids; */ - /* thrust::device_vector<BaseGrid> potentialGrids_D; */ - /* thrust::device_vector<BaseGrid> densityGrids_D; */ - - /* potentialGrids = thrust::host_vector<BaseGrid>(); */ - /* densityGrids = thrust::host_vector<BaseGrid>(); */ - /* potentialGrids = thrust::host_vector<BaseGrid>(); */ - /* densityGrids = thrust::host_vector<BaseGrid>(); */ - - } - - + rotDamping(), numPotGrids(0), numDenGrids(0) { } /* RigidBodyType(const RigidBodyType& src) { copy(src); } */ ~RigidBodyType() { clear(); } @@ -52,8 +36,10 @@ RigidBodyType(const String& name = "") : void addPotentialGrid(String s); void addDensityGrid(String s); - + void updateRaw(); + public: + String name; int num; // number of particles of this type @@ -64,8 +50,14 @@ public: Vector3 transDamping; Vector3 rotDamping; - thrust::host_vector<BaseGrid> potentialGrids; - thrust::host_vector<BaseGrid> densityGrids; - thrust::device_vector<BaseGrid> potentialGrids_D; - thrust::device_vector<BaseGrid> densityGrids_D; + std::vector<BaseGrid> potentialGrids; + std::vector<BaseGrid> densityGrids; + + // for device + int numPotGrids; + int numDenGrids; + BaseGrid* rawPotentialGrids; + BaseGrid* rawDensityGrids; + }; + diff --git a/makefile b/makefile index 32d4d6d..c64722a 100644 --- a/makefile +++ b/makefile @@ -9,8 +9,9 @@ include ./findcudalib.mk INCLUDE = $(CUDA_PATH)/include -CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE) -NV_FLAGS = +DEBUG = -g +CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE) $(DEBUG) -std=c++0x -pedantic +NV_FLAGS = $(DEBUG) EX_FLAGS = -O3 -m$(OS_SIZE) ifneq ($(MAVERICKS),) @@ -40,7 +41,6 @@ NV_FLAGS += $(CODE_10) $(CODE_12) $(CODE_20) $(CODE_30) $(CODE_35) ### Sources - CC_SRC := $(wildcard *.cpp) CC_SRC := $(filter-out runBrownTown.cpp, $(CC_SRC)) CU_SRC := $(wildcard *.cu) diff --git a/runBrownTown.cpp b/runBrownTown.cpp index 5de068f..4cd7714 100644 --- a/runBrownTown.cpp +++ b/runBrownTown.cpp @@ -103,7 +103,9 @@ int main(int argc, char* argv[]) { GPUManager::init(); GPUManager::safe(safe); Configuration config(configFile, replicas, debug); + printf("Done with config\n"); GPUManager::set(0); + printf("Done with gpumanager\n"); config.copyToCUDA(); GrandBrownTown brown(config, outArg, randomSeed, -- GitLab