Skip to content
Snippets Groups Projects
Commit a1621ecb authored by cmaffeo2's avatar cmaffeo2
Browse files

cleaned up code

parent 3e0875fb
No related branches found
No related tags found
No related merge requests found
...@@ -197,33 +197,20 @@ void RigidBodyController::updateForces(int s) { ...@@ -197,33 +197,20 @@ void RigidBodyController::updateForces(int s) {
for (int i=0; i < forcePairs.size(); i++) for (int i=0; i < forcePairs.size(); i++)
forcePairs[i].callGridForceKernel(i,s); forcePairs[i].callGridForceKernel(i,s);
/* for (int i=0; i < forcePairs.size(); i++) */ // each kernel call is followed by async memcpy for previous; now get last
/* forcePairs[i].retrieveForces(); */ RigidBodyForcePair* fp = RigidBodyForcePair::lastRbForcePair;
RigidBodyForcePair::lastRigidBodyForcePair->retrieveForcesForGrid( fp->retrieveForcesForGrid( fp->lastRbGridID );
RigidBodyForcePair::lastRigidBodyGridID); fp->lastRbGridID = -1;
RigidBodyForcePair::lastRigidBodyGridID = -1;
// sync streams
for (int i = 0; i < NUMSTREAMS; i++) {
const cudaStream_t &s = fp->stream[i];
gpuErrchk(cudaStreamSynchronize( s ));
}
for (int i=0; i < forcePairs.size(); i++) for (int i=0; i < forcePairs.size(); i++)
forcePairs[i].processForces(); forcePairs[i].processGPUForces();
// RBTODO: see if there is a better way to sync
// gpuErrchk(cudaDeviceSynchronize());
/*/ debug
if (s %10 == 0) {
int tmp = 0;
for (int i = 0; i < rigidBodyByType.size(); i++) {
for (int j = 0; j < rigidBodyByType[i].size(); j++) {
RigidBody& rb = rigidBodyByType[i][j];
tmp++;
Vector3 p = rb.getPosition();
Vector3 t = rb.torque;
printf("RBTORQUE: %d %f %f %f %f %f %f\n", tmp, p.x, p.y, p.z, t.x,t.y,t.z);
}
}
}
*/
} }
void RigidBodyController::integrate(int step) { void RigidBodyController::integrate(int step) {
// tell RBs to integrate // tell RBs to integrate
...@@ -285,11 +272,10 @@ void RigidBodyController::integrate(int step) { ...@@ -285,11 +272,10 @@ void RigidBodyController::integrate(int step) {
// allocate and initialize an array of stream handles // allocate and initialize an array of stream handles
cudaStream_t *RigidBodyForcePair::stream = (cudaStream_t *) malloc(NUMSTREAMS * sizeof(cudaStream_t)); cudaStream_t *RigidBodyForcePair::stream = (cudaStream_t *) malloc(NUMSTREAMS * sizeof(cudaStream_t));
bool *RigidBodyForcePair::isStreamLaunched = (bool *) malloc(NUMSTREAMS * sizeof(bool));
// new cudaStream_t[NUMSTREAMS];
int RigidBodyForcePair::nextStreamID = 0; /* used during stream init */ int RigidBodyForcePair::nextStreamID = 0; /* used during stream init */
int RigidBodyForcePair::lastRigidBodyGridID = -1; /* used to schedule kernel interaction */ int RigidBodyForcePair::lastRbGridID = -1; /* used to schedule kernel interaction */
RigidBodyForcePair* RigidBodyForcePair::lastRigidBodyForcePair = NULL; RigidBodyForcePair* RigidBodyForcePair::lastRbForcePair = NULL;
void RigidBodyForcePair::createStreams() { void RigidBodyForcePair::createStreams() {
gpuErrchk( cudaProfilerStart() ); gpuErrchk( cudaProfilerStart() );
for (int i = 0; i < NUMSTREAMS; i++) for (int i = 0; i < NUMSTREAMS; i++)
...@@ -333,12 +319,8 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { ...@@ -333,12 +319,8 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) {
const int nb = numBlocks[i]; const int nb = numBlocks[i];
const int k1 = gridKeyId1[i]; const int k1 = gridKeyId1[i];
const int k2 = gridKeyId2[i]; const int k2 = gridKeyId2[i];
const int sid = streamID[i]; const cudaStream_t &s = stream[streamID[i]];
const cudaStream_t &s = stream[sid];
/* if (isStreamLaunched[sid]) */
/* retrieveForcesForGrid(i); */
/* /*
ijk: index of grid value ijk: index of grid value
r: postion of point ijk in real space r: postion of point ijk in real space
...@@ -374,66 +356,31 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) { ...@@ -374,66 +356,31 @@ void RigidBodyForcePair::callGridForceKernel(int pairId, int s) {
forces_d[i], torques_d[i]); forces_d[i], torques_d[i]);
} }
if (lastRigidBodyGridID >= 0) if (lastRbGridID >= 0)
lastRigidBodyForcePair->retrieveForcesForGrid(lastRigidBodyGridID); lastRbForcePair->retrieveForcesForGrid(lastRbGridID);
lastRigidBodyForcePair = this; lastRbForcePair = this;
lastRigidBodyGridID = i; lastRbGridID = i;
/* isStreamLaunched[sid] = true; */
/* gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(Vector3)*nb, */
/* cudaMemcpyDeviceToHost, s)); */
/* gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, */
/* cudaMemcpyDeviceToHost, s)); */
} }
} }
void RigidBodyForcePair::retrieveForcesForGrid(const int i) { void RigidBodyForcePair::retrieveForcesForGrid(const int i) {
// i: grid ID (less than numGrids) // i: grid ID (less than numGrids)
const int sid = streamID[i]; const cudaStream_t &s = stream[streamID[i]];
const cudaStream_t &s = stream[sid];
const int nb = numBlocks[i]; const int nb = numBlocks[i];
gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(Vector3)*nb, gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(Vector3)*nb,
cudaMemcpyDeviceToHost, s)); cudaMemcpyDeviceToHost, s));
gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb,
cudaMemcpyDeviceToHost, s)); cudaMemcpyDeviceToHost, s));
//gpuErrchk(cudaStreamSynchronize( s ));
} }
void RigidBodyForcePair::retrieveForces() { void RigidBodyForcePair::processGPUForces() {
// sum forces + torques
const int numGrids = gridKeyId1.size();
Vector3 f = Vector3(0.0f);
Vector3 t = Vector3(0.0f);
for (int i = 0; i < numGrids; i++)
retrieveForcesForGrid(i);
/* { */
/* const int sid = streamID[i]; */
/* const cudaStream_t &s = stream[sid]; */
/* const int nb = numBlocks[i]; */
/* isStreamLaunched[sid] = false; */
/* gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(Vector3)*nb, */
/* cudaMemcpyDeviceToHost, s)); */
/* gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, */
/* cudaMemcpyDeviceToHost, s)); */
/* } */
}
void RigidBodyForcePair::processForces() {
const int numGrids = gridKeyId1.size(); const int numGrids = gridKeyId1.size();
Vector3 f = Vector3(0.0f); Vector3 f = Vector3(0.0f);
Vector3 t = Vector3(0.0f); Vector3 t = Vector3(0.0f);
for (int i = 0; i < numGrids; i++) { for (int i = 0; i < numGrids; i++) {
const int sid = streamID[i];
const cudaStream_t &s = stream[sid];
const int nb = numBlocks[i]; const int nb = numBlocks[i];
isStreamLaunched[sid] = false;
gpuErrchk(cudaStreamSynchronize( s ));
Vector3 tmpF = Vector3(0.0f); Vector3 tmpF = Vector3(0.0f);
Vector3 tmpT = Vector3(0.0f); Vector3 tmpT = Vector3(0.0f);
...@@ -453,13 +400,6 @@ void RigidBodyForcePair::processForces() { ...@@ -453,13 +400,6 @@ void RigidBodyForcePair::processForces() {
t = t + tmpT; t = t + tmpT;
} }
// transform torque from lab-frame origin to rb centers
// add forces to rbs
/* Vector3 tmp; */
/* /\* tmp = rb1->position; *\/ */
/* /\* printf("rb1->position: (%f,%f,%f)\n", tmp.x, tmp.y, tmp.z); *\/ */
/* tmp = rb1->getPosition(); */
/* printf("rb1->getPosition(): (%f,%f,%f)\n", tmp.x, tmp.y, tmp.z); */
rb1->addForce( f ); rb1->addForce( f );
rb1->addTorque( t ); rb1->addTorque( t );
......
...@@ -65,17 +65,15 @@ private: ...@@ -65,17 +65,15 @@ private:
static int nextStreamID; static int nextStreamID;
std::vector<int> streamID; std::vector<int> streamID;
static cudaStream_t* stream; static cudaStream_t* stream;
static bool* isStreamLaunched;
static void createStreams(); static void createStreams();
static int lastStreamID; static int lastStreamID;
static RigidBodyForcePair* lastRigidBodyForcePair; static RigidBodyForcePair* lastRbForcePair;
static int lastRigidBodyGridID; static int lastRbGridID;
void callGridForceKernel(int pairId, int s); void callGridForceKernel(int pairId, int s);
void retrieveForcesForGrid(const int i); void retrieveForcesForGrid(const int i);
void retrieveForces(); void processGPUForces();
void processForces();
Matrix3 getBasis1(const int i); Matrix3 getBasis1(const int i);
Matrix3 getBasis2(const int i); Matrix3 getBasis2(const int i);
Vector3 getOrigin1(const int i); Vector3 getOrigin1(const int i);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment