Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hpvm-release
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
llvm
hpvm-release
Commits
2cdc4519
Commit
2cdc4519
authored
5 years ago
by
Hashim Sharif
Browse files
Options
Downloads
Patches
Plain Diff
Removing unused code from tensor_runtime.cu
parent
d7c6508a
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
+0
-31
0 additions, 31 deletions
llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+12
-299
12 additions, 299 deletions
...jects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
with
12 additions
and
330 deletions
llvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
+
0
−
31
View file @
2cdc4519
...
...
@@ -80,37 +80,6 @@ public:
void
testTensorGemm
(){
printf
(
"***** TensorSgemm *****
\n\n
"
);
void
*
lhs_ptr
=
create4DTensor
(
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
5
,
4
,
1
,
1
);
struct
Tensor
*
lhs
=
(
struct
Tensor
*
)
lhs_ptr
;
fillTensorWithOnes
(
lhs
);
float
*
data_arr
=
(
float
*
)
lhs
->
host_data
;
for
(
int
i
=
0
;
i
<
lhs
->
num_elems
;
i
++
){
data_arr
[
i
]
=
(
i
/
4
)
+
1
;
}
void
*
rhs
=
create4DTensor
(
CUDNN_TENSOR_NCHW
,
CUDNN_DATA_FLOAT
,
1
,
1
,
4
,
3
);
fillTensorWithOnes
(
rhs
);
void
*
output
=
tensorGemmCPU
(
lhs
,
rhs
);
printTensorValues
(
output
);
void
*
bias_ptr
=
create4DTensor
(
CUDNN_TENSOR_NCHW
,
CUDNN_DATA_FLOAT
,
1
,
3
,
1
,
1
);
struct
Tensor
*
bias
=
(
struct
Tensor
*
)
bias_ptr
;
fillTensorWithOnes
(
bias
);
float
*
bias_arr
=
(
float
*
)
bias
->
host_data
;
for
(
int
i
=
0
;
i
<
bias
->
num_elems
;
i
++
){
bias_arr
[
i
]
=
i
+
1
;
}
void
*
output2
=
tensorAdd
(
output
,
bias
);
printTensorValues
(
output2
);
}
void
testTensorHgemm
(
UnitTestResults
&
unitTestResults
){
...
...
This diff is collapsed.
Click to expand it.
llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+
12
−
299
View file @
2cdc4519
...
...
@@ -45,9 +45,6 @@
#include
"half_precision_api.h"
#include
"approx_simulation.h"
//** Potential Improvements:
// 1) Add support for dataypes beyond floats and half
// 2) Support for more CUDNN operations
...
...
@@ -62,12 +59,17 @@ void llvm_hpvm_initTensorRt(int gpuid){
checkCudaErrors
(
cublasCreate
(
&
cublasHandle
));
checkCUDNN
(
cudnnCreate
(
&
cudnnHandle
));
printf
(
"CREATED HANDLES %d
\n
"
,
gpuid
);
#ifdef PROMISE_TUNER_ENABLED
// readOpenTunerFlags("opentuner_flags");
readOpenTunerFlags
(
"promise_flags"
);
initializeAutotuner
();
printf
(
"Read PROMISE FLAGS %d
\n
"
,
gpuid
);
#endif
...
...
@@ -78,7 +80,9 @@ void llvm_hpvm_initTensorRt(int gpuid){
runtime_initialized
=
true
;
}
printf
(
"DONE INTIALIZING GPU %d
\n
"
,
gpuid
);
}
...
...
@@ -673,77 +677,9 @@ void* tensorPooling(void* input_ptr,
void
*
tensorGemmCPU
(
void
*
lhs_ptr
,
void
*
rhs_ptr
){
INFO
(
"*** TensorGemmCPU
\n
"
);
Tensor
*
lhs
=
(
Tensor
*
)
lhs_ptr
;
Tensor
*
rhs
=
(
Tensor
*
)
rhs_ptr
;
// The operation is done on the CPU
deviceToHostCopy
(
lhs
);
deviceToHostCopy
(
rhs
);
if
(
lhs
->
data_type
!=
CUDNN_DATA_FLOAT
){
ERROR
(
"Currently only Floating point is supported "
);
}
profileEvent
(
"tensorGemmCPU"
);
INFO
(
"rhs->dims.num_dims = %d
\n
"
,
rhs
->
dims
.
num_dims
);
INFO
(
"lhs->dims.num_dims = %d
\n
"
,
lhs
->
dims
.
num_dims
);
// FIXIT: Need to be more aware of the implications of alpha and beta
//float alpha = 1.0f;
// float beta = 0.0f;
// 'm' holds the batch dimension - assuming NCHW format Tensors
int
m
=
lhs
->
dims
.
dim_sizes
[
0
];
// The rhs must be a 2D tensor
int
n
=
rhs
->
dims
.
dim_sizes
[
rhs
->
dims
.
num_dims
-
1
];
// output neurons
int
k
=
1
;
// Flattening the dimensions after the batch dimension
// NOTE: Allowing any number of dimensions > 2 for lhs
for
(
int
j
=
1
;
j
<
lhs
->
dims
.
num_dims
;
j
++
){
k
=
k
*
lhs
->
dims
.
dim_sizes
[
j
];
// input neurons
}
int
rhs_k
=
rhs
->
dims
.
dim_sizes
[
rhs
->
dims
.
num_dims
-
2
];
// Dimension-note: Check if k is same across the two tensors
INFO
(
"m = %d, n = %d, k = %d
\n
"
,
m
,
n
,
k
);
if
(
rhs_k
!=
k
){
ERROR
(
"rhs=%d and lhs=%d columns/rows don't match"
,
rhs_k
,
k
);
}
// NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
Tensor
*
output
=
(
Tensor
*
)
create4DTensor
(
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
m
,
n
,
1
,
1
);
// Changing output tensor placement from host to device
changeTensorPlacement
(
output
,
HOST
);
float
*
lhs_arr
=
(
float
*
)
lhs
->
host_data
;
float
*
rhs_arr
=
(
float
*
)
rhs
->
host_data
;
float
*
output_arr
=
(
float
*
)
output
->
host_data
;
for
(
int
i
=
0
;
i
<
m
;
i
++
){
for
(
int
j
=
0
;
j
<
n
;
j
++
){
float
sum
=
0.0
;
for
(
int
l
=
0
;
l
<
k
;
l
++
){
float
mul
=
lhs_arr
[
i
*
k
+
l
]
*
rhs_arr
[
l
*
n
+
j
];
sum
=
sum
+
mul
;
}
output_arr
[
i
*
n
+
j
]
=
sum
;
}
}
profileEvent
(
"tensorGemmCPU_end"
,
true
);
return
output
;
}
/
/
Reference: https://gist.github.com/peterwittek/6303527
void
*
tensorGemmGPU
(
void
*
lhs_ptr
,
void
*
rhs_ptr
){
//, void* result_tensor){
/
*
Reference
Implementation based on
: https://gist.github.com/peterwittek/6303527
*/
void
*
tensorGemmGPU
(
void
*
lhs_ptr
,
void
*
rhs_ptr
){
INFO
(
"*** TensorGemmGPU
\n
"
);
profileEvent
(
"Mul"
);
...
...
@@ -780,16 +716,7 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ //, void* result_tensor){
DEBUG
(
"Creating new TENSOR *
\n
"
);
output
=
(
Tensor
*
)
create4DTensor
(
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
m
,
n
,
1
,
1
);
/* else{
DEBUG("Reusing TENSOR *\n");
// FIXIT: Add Assertion to check for null pointer and dimension matching
output = (Tensor*) result_tensor;
// FIXIT: output value is trashing - Is this deallocated?
INFO("output->num_elems = %lu \n", output->data_type);
}
*/
DEBUG
(
"Changing placement *
\n
"
);
// Changing output tensor placement from host to device
changeTensorPlacement
(
output
,
DEVICE
);
...
...
@@ -846,107 +773,6 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ //, void* result_tensor){
void
*
tensorGemm
(
void
*
lhs_ptr
,
void
*
rhs_ptr
){
INFO
(
"*** TensorGemm
\n
"
);
profileEvent
(
"tensorGemm"
);
Tensor
*
lhs
=
(
Tensor
*
)
lhs_ptr
;
Tensor
*
rhs
=
(
Tensor
*
)
rhs_ptr
;
INFO
(
"rhs->dims.num_dims = %d
\n
"
,
rhs
->
dims
.
num_dims
);
INFO
(
"lhs->dims.num_dims = %d
\n
"
,
lhs
->
dims
.
num_dims
);
// FIXIT: Need to be more aware of the implications of alpha and beta
float
alpha
=
1.0
f
,
beta
=
0.0
f
;
// 'm' holds the batch dimension - assuming NCHW format Tensors
int
m
=
lhs
->
dims
.
dim_sizes
[
0
];
// The rhs last dimension must contain the neurons
int
n
=
rhs
->
dims
.
dim_sizes
[
rhs
->
dims
.
num_dims
-
1
];
// output neurons
int
k
=
1
;
// Flattening the dimensions after the batch dimension
// NOTE: Allowing any number of dimensions > 2 for lhs
for
(
int
j
=
1
;
j
<
lhs
->
dims
.
num_dims
;
j
++
){
k
=
k
*
lhs
->
dims
.
dim_sizes
[
j
];
// input neurons
}
int
rhs_k
=
rhs
->
dims
.
dim_sizes
[
rhs
->
dims
.
num_dims
-
2
];
// Dimension-note: Check if k is same across the two tensors
INFO
(
"m = %d, n = %d, k = %d
\n
"
,
m
,
n
,
k
);
if
(
rhs_k
!=
k
){
ERROR
(
"rhs=%d and lhs=%d columns/rows don't match"
,
rhs_k
,
k
);
}
// NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
Tensor
*
output
=
(
Tensor
*
)
create4DTensor
(
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
m
,
n
,
1
,
1
);
// Changing output tensor placement from host to device
changeTensorPlacement
(
output
,
DEVICE
);
hostToDeviceCopy
(
lhs
);
hostToDeviceCopy
(
rhs
);
// NOTE: cuBlas uses column-major format
// NOTE: The leading dimension is the FIRST Dimension
// NOTE: The output is N * M in column-major format, M*N in row-major - what cuDNN expects
checkCudaErrors
(
cublasSgemm
(
cublasHandle
,
CUBLAS_OP_T
,
CUBLAS_OP_N
,
n
,
m
,
k
,
&
alpha
,
(
float
*
)
rhs
->
gpu_data
,
k
,
(
float
*
)
lhs
->
gpu_data
,
k
,
&
beta
,
(
float
*
)
output
->
gpu_data
,
n
));
profileEvent
(
"tensorGemm_end"
,
true
);
return
output
;
}
// FIXIT: Add dimension check assertions throughout the code
void
*
tensorGemmBias
(
void
*
input_ptr
,
void
*
bias_ptr
){
INFO
(
"*** TensorGemmBias
\n
"
);
profileEvent
(
"tensorGemmBias"
);
Tensor
*
input
=
(
Tensor
*
)
input_ptr
;
Tensor
*
bias
=
(
Tensor
*
)
bias_ptr
;
// NOTE: beta is set to 1 to append to input
// C = A * B + Beta * C
float
alpha
=
1.0
f
,
beta
=
1.0
f
;
// 'm' holds the batch dimension - assuming NCHW format Tensors
int
m
=
input
->
dims
.
dim_sizes
[
0
];
// The bias must be a 2D tensor
int
n
=
bias
->
dims
.
dim_sizes
[
bias
->
dims
.
num_dims
-
1
];
// output neurons
INFO
(
"m = %d, n = %d
\n
"
,
m
,
n
);
hostToDeviceCopy
(
input
);
hostToDeviceCopy
(
bias
);
struct
Tensor
*
onevec
=
(
Tensor
*
)
create2DTensor
(
CUDNN_DATA_FLOAT
,
m
,
1
);
fillOnes
(
onevec
);
hostToDeviceCopy
(
onevec
);
// NOTE: cuBlas uses column-major format
// NOTE: The leading dimension is just the FIRST Dimension
checkCudaErrors
(
cublasSgemm
(
cublasHandle
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
n
,
m
,
1
,
&
alpha
,
(
float
*
)
bias
->
gpu_data
,
n
,
(
float
*
)
onevec
->
gpu_data
,
1
,
&
beta
,
(
float
*
)
input
->
gpu_data
,
n
));
profileEvent
(
"tensorGemmBias_end"
,
true
);
return
input
;
}
void
*
tensorRelu
(
void
*
input_ptr
){
INFO
(
"*** TensorRelu
\n
"
);
...
...
@@ -1019,17 +845,6 @@ void* tensorSoftmax(void* input_ptr){
__global__
void
clipValues
(
float
*
A
,
float
min
,
float
max
,
int
n
){
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
id
<
n
){
A
[
id
]
=
fmaxf
(
min
,
A
[
id
]);
A
[
id
]
=
fminf
(
max
,
A
[
id
]);
}
}
void
*
tensorRelu2
(
void
*
input_ptr
,
float
min
,
float
max
){
...
...
@@ -1184,105 +999,3 @@ void* tensorBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr,
/************* GPU Layer API *************/
void
*
ConvLayer_GPU
(
void
*
input
,
void
*
filter
,
void
*
bias
,
int
conv_pad_h
,
int
conv_pad_w
,
int
conv_stride_h
,
int
conv_stride_w
,
int
pool_id
,
int
pool_size
,
int
activation_id
,
// Relu, Tanh, ClipRelu
float
out_min
,
float
out_max
){
// NOTE: min_val, max_val apply to 'ClippedRelu'
void
*
conv_out
=
tensorConvolution
(
input
,
filter
,
conv_pad_h
,
conv_pad_w
,
conv_stride_h
,
conv_stride_w
,
1
,
0
);
void
*
conv_add
;
if
(
bias
!=
NULL
){
conv_add
=
tensorAdd
(
conv_out
,
bias
);
}
else
{
conv_add
=
conv_out
;
}
void
*
activation_out
;
switch
(
activation_id
){
case
-
1
:
activation_out
=
conv_add
;
INFO
(
"NO Activation Function
\n
"
);
break
;
case
0
:
activation_out
=
tensorTanh
(
conv_add
);
break
;
case
1
:
activation_out
=
tensorRelu
(
conv_add
);
break
;
case
2
:
activation_out
=
tensorRelu2
(
conv_add
,
out_min
,
out_max
);
break
;
default:
ERROR
(
"Activation id %d NOT supported
\n
"
,
activation_out
);
break
;
}
void
*
pool_out
=
activation_out
;
// NOTE: Skip pooling on negative pool sizes
if
(
pool_size
>
0
){
//FIXME: Currently only using MaxPooling
pool_out
=
tensorPooling
(
activation_out
,
0
,
pool_size
,
pool_size
,
0
,
0
,
pool_size
,
pool_size
);
}
else
{
pool_out
=
activation_out
;
}
return
pool_out
;
}
void
*
FCLayer_GPU
(
void
*
input
,
void
*
weights
,
void
*
bias
,
int
activation_id
,
float
out_min
,
float
out_max
){
// NOTE: min_val, max_val apply to 'ClippedRelu'
void
*
gemm_out
=
tensorGemmGPU
(
input
,
weights
);
void
*
gemmbias_out
;
if
(
bias
!=
NULL
){
gemmbias_out
=
tensorAdd
(
gemm_out
,
bias
);
}
else
{
gemmbias_out
=
gemm_out
;
}
void
*
activation_out
;
switch
(
activation_id
){
case
-
1
:
activation_out
=
gemmbias_out
;
INFO
(
"No Activation Function
\n
"
);
break
;
case
0
:
activation_out
=
tensorTanh
(
gemmbias_out
);
break
;
case
1
:
activation_out
=
tensorRelu
(
gemmbias_out
);
break
;
case
2
:
activation_out
=
tensorRelu2
(
gemmbias_out
,
out_min
,
out_max
);
break
;
default:
ERROR
(
"Activation id %d NOT supported
\n
"
,
activation_out
);
break
;
}
return
activation_out
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment