Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
arbd
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tbgl
tools
arbd
Commits
ea0ddc48
Commit
ea0ddc48
authored
1 year ago
by
cmaffeo2
Browse files
Options
Downloads
Patches
Plain Diff
Basic Array benchmarking
parent
f3f3feae
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/Tests/array.cu
+118
-3
118 additions, 3 deletions
src/Tests/array.cu
src/Types/Array.h
+13
-6
13 additions, 6 deletions
src/Types/Array.h
with
131 additions
and
9 deletions
src/Tests/array.cu
+
118
−
3
View file @
ea0ddc48
...
...
@@ -9,6 +9,7 @@
#include
<nvfunctional>
#include
<catch2/catch_test_macros.hpp>
#include
<catch2/benchmark/catch_benchmark.hpp>
#include
<catch2/matchers/catch_matchers_floating_point.hpp>
namespace
Tests
::
TestArray
{
...
...
@@ -114,6 +115,22 @@ namespace Tests::TestArray {
// print_enable_if_value_helper<has_copy_to_cuda<T>>(typename has_copy_to_cuda<T>::type{});
// }
template
<
typename
T
>
__host__
__device__
void
print_it
(
T
x
)
{
printf
(
"Unsupported type
\n
"
);
}
template
<
>
__host__
__device__
void
print_it
(
const
int
x
)
{
printf
(
"int %d
\n
"
,
x
);
}
template
<
>
__host__
__device__
void
print_it
(
const
long
int
x
)
{
printf
(
"long int %ld
\n
"
,
x
);
}
template
<
>
__host__
__device__
void
print_it
(
const
float
x
)
{
printf
(
"float %f
\n
"
,
x
);
}
template
<
>
__host__
__device__
void
print_it
(
const
double
x
)
{
printf
(
"double %lf
\n
"
,
x
);
}
template
<
>
__host__
__device__
void
print_it
(
const
Vector3
&&
x
)
{
x
.
print
();
}
template
<
>
__host__
__device__
void
print_it
(
const
Vector3
&
x
)
{
x
.
print
();
}
template
<
typename
T
>
void
print_enable_if_value
()
{
if
(
has_copy_to_cuda
<
T
>::
value
)
{
...
...
@@ -123,15 +140,70 @@ namespace Tests::TestArray {
}
}
template
<
typename
T
>
Array
<
T
>
cre
ate_array
(
size_t
num
)
{
template
<
typename
T
>
Array
<
T
>
alloc
ate_array
_host
(
size_t
num
)
{
Array
<
T
>
arr
(
num
);
return
arr
;
}
template
<
typename
T
>
Array
<
T
>*
allocate_array_device
(
size_t
num
)
{
Array
<
T
>
arr
(
num
);
return
arr
.
copy_to_cuda
();
}
template
<
typename
T
>
T
*
allocate_plain_array_host
(
size_t
num
)
{
T
*
arr
=
new
T
[
num
];
return
arr
;
}
template
<
typename
T
>
T
*
allocate_plain_array_device
(
size_t
num
)
{
T
*
arr
=
allocate_plain_array_host
<
T
>
(
num
);
T
*
arr_d
;
size_t
sz
=
sizeof
(
T
)
*
num
;
gpuErrchk
(
cudaMalloc
(
&
arr_d
,
sz
));
gpuErrchk
(
cudaMemcpy
(
arr_d
,
arr
,
sz
,
cudaMemcpyHostToDevice
));
delete
[]
arr
;
return
arr_d
;
}
template
<
typename
T
>
HOST
DEVICE
void
inline
_copy_helper
(
size_t
&
idx
,
T
*
__restrict__
out
,
const
T
*
__restrict__
inp
)
{
out
[
idx
]
=
inp
[
idx
];
}
// HOST DEVICE void inline _copy_helper(size_t& idx, float* __restrict__ out, const float* __restrict__ inp) {
// out[idx] = inp[idx];
// }
template
<
typename
T
>
HOST
DEVICE
void
inline
_copy_helper
(
size_t
&
idx
,
Array
<
T
>*
__restrict__
out
,
const
Array
<
T
>*
__restrict__
inp
)
{
(
*
out
)[
idx
]
=
(
*
inp
)[
idx
];
}
template
<
typename
T
>
__global__
void
copy_kernel
(
size_t
num
,
T
*
__restrict__
out
,
const
T
*
__restrict__
inp
)
{
for
(
size_t
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
_copy_helper
(
i
,
out
,
inp
);
}
}
template
<
typename
T
>
void
call_copy_kernel
(
size_t
num
,
T
*
__restrict__
out
,
const
T
*
__restrict__
inp
,
size_t
block_size
=
256
)
{
copy_kernel
<<<
block_size
,
1
,
0
>>>
(
num
,
out
,
inp
);
gpuErrchk
(
cudaDeviceSynchronize
()
);
}
// Array<T> _copy_array_cuda(size_t num) {
// Array<T> arr(num);
// return arr;
// }
TEST_CASE
(
"Test Array assignment and copy_to_cuda"
,
"[Array]"
)
{
{
// Creation and copy assignment
Array
<
Vector3
>
a
=
cre
ate_array
<
Vector3
>
(
10
);
Array
<
Vector3
>
a
=
alloc
ate_array
_host
<
Vector3
>
(
10
);
}
{
...
...
@@ -227,4 +299,47 @@ namespace Tests::TestArray {
b_d
->
remove_from_cuda
(
b_d
);
}
}
//Benchmark showing that Array<Vector3> performs similarly to plain array for device copy, at least
/*
TEST_CASE( "Test performance copying Array vs plain arrays", "[Array]" ) {
size_t num = 100000;
float* inp3 = allocate_plain_array_device<float>(3*num);
float* out3 = allocate_plain_array_device<float>(3*num);
float* inp4 = allocate_plain_array_device<float>(4*num);
float* out4 = allocate_plain_array_device<float>(4*num);
float4* inpF4 = allocate_plain_array_device<float4>(num);
float4* outF4 = allocate_plain_array_device<float4>(num);
Array<Vector3>* inpV = allocate_array_device<Vector3>(num);
Array<Vector3>* outV = allocate_array_device<Vector3>(num);
// call_copy_kernel(3*num, out3, inp3);
// call_copy_kernel(4*num, out4, inp4);
// call_copy_kernel(num, outV, inpV);
BENCHMARK("Call 3x num float copy") {
call_copy_kernel(3*num, out3, inp3);
};
BENCHMARK("Call num Vector3 copy") {
call_copy_kernel(num, outV, inpV);
};
BENCHMARK("Call num float4 copy") {
call_copy_kernel(num, outF4, inpF4);
};
BENCHMARK("Call 3x num float copy (repeat)") {
call_copy_kernel(3*num, out3, inp3);
};
BENCHMARK("Call 4x num float copy") {
call_copy_kernel(4*num, out4, inp4);
};
BENCHMARK("Call num Vector3 copy (repeat)") {
call_copy_kernel(num, outV, inpV);
};
BENCHMARK("Call num float4 copy (repeat)") {
call_copy_kernel(num, outF4, inpF4);
};
// */
}
}
This diff is collapsed.
Click to expand it.
src/Types/Array.h
+
13
−
6
View file @
ea0ddc48
...
...
@@ -12,7 +12,7 @@
template
<
typename
T
>
class
Array
{
public:
HOST
inline
Array
<
T
>
()
:
num
(
0
),
values
(
nullptr
)
{}
// printf("Creating Array1 %x\n",this);
HOST
DEVICE
inline
Array
<
T
>
()
:
num
(
0
),
values
(
nullptr
)
{}
// printf("Creating Array1 %x\n",this);
HOST
inline
Array
<
T
>
(
size_t
num
)
:
num
(
num
),
values
(
nullptr
)
{
// printf("Constructing Array<%s> %x with values %x\n", type_name<T>().c_str(), this, values);
host_allocate
();
...
...
@@ -43,22 +43,28 @@ public:
a
.
num
=
0
;
// not needed?
// printf("Move-constructed Array<T> with values %x\n", values);
}
HOST
inline
Array
<
T
>&
operator
=
(
const
Array
<
T
>&
a
)
{
// copy assignment operator
HOST
DEVICE
inline
Array
<
T
>&
operator
=
(
const
Array
<
T
>&
a
)
{
// copy assignment operator
num
=
a
.
num
;
#ifndef __CUDA_ARCH__
host_allocate
();
#endif
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
values
[
i
]
=
a
[
i
];
}
printf
(
"Copy-operator for Array<T> %x with values %x
\n
"
,
this
,
values
);
// printf("Copy-operator for Array<T> %x with values %x\n",this, values);
printf
(
"Copy-operator for Array<T>
\n
"
);
return
*
this
;
}
HOST
inline
Array
<
T
>&
operator
=
(
Array
<
T
>&&
a
)
{
// move assignment operator
HOST
DEVICE
inline
Array
<
T
>&
operator
=
(
Array
<
T
>&&
a
)
{
// move assignment operator
#ifndef __CUDA_ARCH__
host_deallocate
();
#endif
num
=
a
.
num
;
values
=
a
.
values
;
a
.
num
=
0
;
a
.
values
=
nullptr
;
printf
(
"Move-operator for Array<T> %x with values %x
\n
"
,
this
,
values
);
// printf("Move-operator for Array<T> %x with values %x\n",this, values);
printf
(
"Move-operator for Array<T>
\n
"
);
return
*
this
;
}
HOST
DEVICE
inline
T
&
operator
[](
size_t
i
)
{
...
...
@@ -139,6 +145,7 @@ public:
template
<
typename
Dummy
=
void
,
typename
std
::
enable_if_t
<!
has_copy_to_cuda
<
T
>
::
value
,
Dummy
>*
=
nullptr
>
HOST
static
Array
<
T
>
copy_from_cuda
(
Array
<
T
>*
dev_ptr
)
{
// TODO add argument: dest = nullptr
// Create host object, copy raw device data over
Array
<
T
>
tmp
(
0
);
if
(
dev_ptr
!=
nullptr
)
{
...
...
@@ -243,5 +250,5 @@ private:
}
size_t
num
;
T
*
values
;
T
*
__restrict__
values
;
};
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment