Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
arbd
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tbgl
tools
arbd
Commits
94128103
Commit
94128103
authored
8 years ago
by
cmaffeo2
Browse files
Options
Downloads
Patches
Plain Diff
made restart file write asynchronous
parent
acd1ecbe
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
GrandBrownTown.cu
+77
-42
77 additions, 42 deletions
GrandBrownTown.cu
makefile
+5
-3
5 additions, 3 deletions
makefile
nvtxEvents.h
+10
-0
10 additions, 0 deletions
nvtxEvents.h
with
92 additions
and
45 deletions
GrandBrownTown.cu
+
77
−
42
View file @
94128103
#include
"GrandBrownTown.h"
#include
"GrandBrownTown.cuh"
#include
"nvtxEvents.h"
/* #include "ComputeGridGrid.cuh" */
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
...
...
@@ -313,7 +315,14 @@ void GrandBrownTown::run() {
printf
(
"Configuration: %d particles | %d replicas
\n
"
,
num
,
numReplicas
);
// Main loop over Brownian dynamics steps
#pragma omp parallel sections
{
/* # pragma omp single nowait */
{
// Main loop over Brownian dynamics steps
for
(
long
int
s
=
1
;
s
<
steps
;
s
++
)
{
// Compute the internal forces. Only calculate the energy when we are about to output.
bool
get_energy
=
((
s
%
outputEnergyPeriod
)
==
0
);
...
...
@@ -322,6 +331,10 @@ void GrandBrownTown::run() {
// Set the timer
rt_timer_start
(
cputimer
);
// compute forces
nvtxRangeId_t
rid0
=
nvtxStart
(
"Compute 'atom' forces"
,
1
);
// 'interparticleForce' - determines whether particles interact with each other
if
(
interparticleForce
)
{
...
...
@@ -345,8 +358,7 @@ void GrandBrownTown::run() {
get_energy
);
break
;
default
:
// [ N^2 ] interactions, no cutoff | decompositions
energy
=
internal
->
computeTabulatedFull
(
forceInternal_d
,
pos_d
,
type_d
,
energy
=
internal
->
computeTabulatedFull
(
forceInternal_d
,
pos_d
,
type_d
,
bonds_d
,
bondMap_d
,
excludes_d
,
excludeMap_d
,
angles_d
,
dihedrals_d
,
...
...
@@ -401,6 +413,8 @@ void GrandBrownTown::run() {
int
numBlocks
=
(
num
*
numReplicas
)
/
NUM_THREADS
+
(
num
*
numReplicas
%
NUM_THREADS
==
0
?
0
:
1
);
int
tl
=
temperatureGrid
.
length
();
nvtxRangeEnd
(
rid0
);
rid0
=
nvtxStart
(
"RigidBody and update"
,
1
);
RBC
.
updateForces
(
s
);
/* update RB forces before update particle positions... */
// Call the kernel to update the positions of each particle
...
...
@@ -421,7 +435,11 @@ void GrandBrownTown::run() {
/* computeGridGridForce<<< numBlocks, NUM_THREADS >>>(grid1_d, grid2_d); */
// int numBlocks = (numRB ) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1);
nvtxRangeEnd
(
rid0
);
rid0
=
nvtxStart
(
"I/O"
,
1
);
Vector3
force0
(
0.0
f
);
if
(
imd_on
&&
clientsock
&&
s
%
outputPeriod
==
0
)
{
...
...
@@ -460,37 +478,48 @@ void GrandBrownTown::run() {
}
}
bool
posFetched
=
false
;
// Output trajectories (to files)
if
(
s
%
outputPeriod
==
0
)
{
// wait for previous writes before copying new data
#pragma omp taskwait
// Copy particle positions back to CPU
gpuErrchk
(
cudaMemcpy
(
pos
,
pos_d
,
sizeof
(
Vector3
)
*
num
*
numReplicas
,
cudaMemcpyDeviceToHost
));
posFetched
=
true
;
#pragma omp task untied
{
nvtxRangeId_t
rid
=
nvtxStart
(
"Write output"
,
1
);
// Nanoseconds computed
t
=
s
*
timestep
;
gpuErrchk
(
cudaMemcpy
(
pos
,
pos_d
,
sizeof
(
Vector3
)
*
num
*
numReplicas
,
cudaMemcpyDeviceToHost
));
// Nanoseconds computed
t
=
s
*
timestep
;
// Loop over all replicas
for
(
int
repID
=
0
;
repID
<
numReplicas
;
++
repID
)
{
// Loop over all replicas
for
(
int
repID
=
0
;
repID
<
numReplicas
;
++
repID
)
{
// *Analysis*: ionic current
if
(
currentSegmentZ
<=
0.0
f
)
writeCurrent
(
repID
,
t
);
else
writeCurrentSegment
(
repID
,
t
,
currentSegmentZ
);
// *Analysis*: ionic current
if
(
currentSegmentZ
<=
0.0
f
)
writeCurrent
(
repID
,
t
);
else
writeCurrentSegment
(
repID
,
t
,
currentSegmentZ
);
//
if
(
numberFluct
==
1
)
updateNameList
();
// no need for it here if particles stay the same
// TODO: doublecheck
//
if
(
numberFluct
==
1
)
updateNameList
();
// no need for it here if particles stay the same
// TODO: doublecheck
// Write the trajectory.
writers
[
repID
]
->
append
(
pos
+
(
repID
*
num
),
name
,
serial
,
t
,
num
);
}
// Write the trajectory.
writers
[
repID
]
->
append
(
pos
+
(
repID
*
num
),
name
,
serial
,
t
,
num
);
}
// Handle particle fluctuations.
// TODO: Currently, not compatible with replicas. Needs a fix.
if
(
numberFluct
==
1
)
updateReservoirs
();
// Handle particle fluctuations.
// TODO: Currently, not compatible with replicas. Needs a fix.
if
(
numberFluct
==
1
)
updateReservoirs
();
// Store the current positions.
// We must do this after particles have been added.
remember
(
t
);
// Store the current positions.
// We must do this after particles have been added.
remember
(
t
);
nvtxRangeEnd
(
rid
);
}
}
// s % outputPeriod == 0
...
...
@@ -500,7 +529,7 @@ void GrandBrownTown::run() {
rt_timer_stop
(
timerS
);
// Copy back forces to display (internal only)
gpuErrchk
(
cudaMemcpy
(
&
force0
,
forceInternal_d
,
sizeof
(
Vector3
),
cudaMemcpyDeviceToHost
));
//
gpuErrchk(cudaMemcpy(&force0, forceInternal_d, sizeof(Vector3), cudaMemcpyDeviceToHost));
// Nanoseconds computed
t
=
s
*
timestep
;
...
...
@@ -520,25 +549,31 @@ void GrandBrownTown::run() {
t, energy, force0.x, force0.y, force0.z);
*/
// Copy positions from GPU to CPU.
gpuErrchk
(
cudaMemcpy
(
pos
,
pos_d
,
sizeof
(
Vector3
)
*
num
*
numReplicas
,
cudaMemcpyDeviceToHost
));
// Write restart files for each replica.
for
(
int
repID
=
0
;
repID
<
numReplicas
;
++
repID
)
writeRestart
(
repID
);
// Copy positions from GPU to CPU.
#pragma omp taskwait
if
(
!
posFetched
)
{
gpuErrchk
(
cudaMemcpy
(
pos
,
pos_d
,
sizeof
(
Vector3
)
*
num
*
numReplicas
,
cudaMemcpyDeviceToHost
));
}
#pragma omp task untied
{
// Write restart files for each replica.
nvtxRangeId_t
rid
=
nvtxStart
(
"Write Restart"
,
1
);
for
(
int
repID
=
0
;
repID
<
numReplicas
;
++
repID
)
writeRestart
(
repID
);
nvtxRangeEnd
(
rid
);
}
// restart the timer
rt_timer_start
(
timerS
);
}
// s % outputEnergyPeriod
{
int
numBlocks
=
(
num
*
numReplicas
)
/
NUM_THREADS
+
(
num
*
numReplicas
%
NUM_THREADS
==
0
?
0
:
1
);
clearInternalForces
<<<
numBlocks
,
NUM_THREADS
>>>
(
forceInternal_d
,
num
,
numReplicas
);
}
nvtxRangeEnd
(
rid0
);
}
// done with all Brownian dynamics steps
}
// If IMD is on & our socket is still open.
if
(
imd_on
and
clientsock
)
{
if
(
vmdsock_selread
(
clientsock
,
0
)
==
1
)
{
...
...
@@ -562,6 +597,7 @@ void GrandBrownTown::run() {
}
}
}
}
// Stop the main timer.
rt_timer_stop
(
timer0
);
...
...
@@ -612,10 +648,9 @@ void GrandBrownTown::populate() {
void
GrandBrownTown
::
writeRestart
(
int
repID
)
const
{
FILE
*
out
=
fopen
(
restartFiles
[
repID
].
c_str
(),
"w"
);
const
int
offset
=
repID
*
num
;
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
const
int
ind
=
i
+
offset
;
const
Vector3
&
p
=
pos
[
ind
];
fprintf
(
out
,
"%d %.10g %.10g %.10g
\n
"
,
type
[
ind
],
p
.
x
,
p
.
y
,
p
.
z
);
for
(
int
i
=
offset
;
i
<
offset
+
num
;
++
i
)
{
const
Vector3
&
p
=
pos
[
i
];
fprintf
(
out
,
"%d %.10g %.10g %.10g
\n
"
,
type
[
i
],
p
.
x
,
p
.
y
,
p
.
z
);
}
fclose
(
out
);
}
...
...
This diff is collapsed.
Click to expand it.
makefile
+
5
−
3
View file @
94128103
...
...
@@ -13,9 +13,11 @@ INCLUDE = $(CUDA_PATH)/include
# DEBUG = -g -O0
CC_FLAGS
=
-Wall
-Wno-write-strings
-I
$(
INCLUDE
)
$(
DEBUG
)
-std
=
c++0x
-pedantic
CC_FLAGS
=
-Wall
-Wno-write-strings
-I
$(
INCLUDE
)
$(
DEBUG
)
-std
=
c++0x
-pedantic
-fopenmp
# NV_FLAGS = --maxrregcount 63 -Xptxas -v # -v,-abi=no
NV_FLAGS
=
-Xptxas
-v
# -v,-abi=no
NV_FLAGS
=
-Xptxas
-v
# -v,-abi=no
NV_FLAGS
+=
-Xcompiler
-fopenmp
ifneq
($(DEBUG),)
NV_FLAGS
+=
-g
-G
#debug
EX_FLAGS
=
-O0
-m
$(
OS_SIZE
)
...
...
@@ -56,7 +58,7 @@ NV_FLAGS += -gencode arch=compute_$(SM),code=compute_$(SM)
NVLD_FLAGS
:=
$(
NV_FLAGS
)
--device-link
# NV_FLAGS += -rdc=true
LD_FLAGS
=
-L
$(
LIBRARY
)
-lcurand
-lcudart
-lcudadevrt
-Wl
,-rpath,
$(
LIBRARY
)
LD_FLAGS
=
-L
$(
LIBRARY
)
-lcurand
-lcudart
-lcudadevrt
-lnvToolsExt
-Wl
,-rpath,
$(
LIBRARY
)
LD_FLAGS
+=
-lcuda
...
...
This diff is collapsed.
Click to expand it.
nvtxEvents.h
0 → 100644
+
10
−
0
View file @
94128103
#pragma once
#include
<nvToolsExt.h>
// http://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx
nvtxRangeId_t
nvtxStart
(
const
char
*
name
,
int
colorId
)
{
return
nvtxRangeStartA
(
name
);
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment