Skip to content
Snippets Groups Projects
Commit 73c2b68a authored by Hashim Sharif's avatar Hashim Sharif
Browse files

Adding BatchNorm to Tensor Runtime

parent 1f4e4528
No related branches found
No related tags found
No related merge requests found
......@@ -238,6 +238,11 @@ void testTensorConv3(){
}
void testLRN(){
void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 20, 20, 20, 20);
......@@ -317,10 +322,10 @@ void testTensorError(){
void testTensorConv(){
// NOTE: The input channel count value (param2 to Tensor and Filter) must be the same
void* x3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 1, 4, 4);
void* x3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 4, 4);
// NOTE: Filter descriptors do NOT have batch size
// NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
void* filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, 1, 2, 2);
void* filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2);
fillTensorWithOnes(x3);
fillTensorWithOnes(filter);
......@@ -331,11 +336,31 @@ void testTensorConv(){
1, 1, conv_mode, compute_precision);
printTensorValues(conv1);
// NOTE: For cudnnTensorAdd, the only dimension that MUST match is channels
void* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
fillTensorWithOnes(bias3);
tensorAdd(conv1, bias3);
}
void testTensorGroupedConv(){
// NOTE: The input channel count value (param2 to Tensor and Filter) must be the same
void* x3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 4, 4);
// NOTE: Filter descriptors do NOT have batch size
// NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
void* filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2);
fillTensorWithOnes(x3);
fillTensorWithOnes(filter);
int conv_mode = 1; // NOTE: uses CROSS_CORRELATION
int conv_groups = 2;
void* conv1 = tensorConvolution(x3, filter, 0, 0,
1, 1, conv_mode, conv_groups);
printTensorValues(conv1);
// NOTE: For cudnnTensorAdd, the only dimension that MUST match is channels
//void* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
// fillTensorWithOnes(bias3);
//tensorAdd(conv1, bias3);
//printTensorValues(conv1);
}
......@@ -347,6 +372,28 @@ void testTensorPool(){
}
void testTensorBatchNorm(){
void* x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 2, 2);
fillTensorWithVal(x, 3);
void* gamma = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
fillTensorWithVal(gamma, 1);
void* beta = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
fillTensorWithVal(beta, 0);
void* mean = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
fillTensorWithVal(mean, 1);
void* variance = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
fillTensorWithVal(variance, 1);
void* output = tensorBatchNorm(x, gamma, beta, mean, variance, 0.01);
printTensorValues(output);
}
void testTensorRelu(){
......@@ -438,8 +485,13 @@ int main(){
//testTensorConv();
//testTensorError();
testQuantization();
//testQuantization();
//testTensorConv();
//testTensorGroupedConv();
testTensorBatchNorm();
//testTensorGemm();
//testTensorGemmGPU();
//testTensorGemmBias();
......
......@@ -100,6 +100,11 @@ extern "C"{
// NOTE: In-place operation
void* tensorSoftmax(void* input);
// NOTE: In-place operation
void* tensorBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr,
void* mean_ptr, void* variance_ptr, double epsilon);
/* Error injection API - used for accuracy tuning */
void* tensorAddError(void* x_ptr, int error_scale);
......
......@@ -34,6 +34,7 @@ void dummyFunction(){
void* tensorTanhPtr = (void*) &tensorTanh;
void* tensorHalfTanhPtr = (void*) &tensorHalfTanh;
void* tensorSoftmaxPtr = (void*) &tensorSoftmax;
void* tensorBatchNormPtr = (void*) &tensorBatchNorm;
void* tensorAddErrorPtr = (void*) &tensorAddError;
void* ConvLayer = (void*) &ConvLayer_PROMISE;
void* FCLayer = (void*) &FCLayer_PROMISE;
......
......@@ -1090,6 +1090,58 @@ void* tensorTanh(void* input_ptr){
void* tensorBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr,
void* mean_ptr, void* variance_ptr, double epsilon){
INFO("*** TensorBatchNorm \n");
profileEvent("tensorBatchNorm");
Tensor* input = (Tensor*) input_ptr;
Tensor* gamma = (Tensor*) gamma_ptr;
Tensor* beta = (Tensor*) beta_ptr;
Tensor* mean = (Tensor*) mean_ptr;
Tensor* variance = (Tensor*) variance_ptr;
float alpha_val = 1.0f, beta_val = 0.0f;
hostToDeviceCopy(input);
hostToDeviceCopy(gamma);
hostToDeviceCopy(beta);
hostToDeviceCopy(mean);
hostToDeviceCopy(variance);
checkCUDNN(cudnnBatchNormalizationForwardInference(cudnnHandle, CUDNN_BATCHNORM_SPATIAL,
&alpha_val, &beta_val,
input->tensor_desc, input->gpu_data,
input->tensor_desc, input->gpu_data,
gamma->tensor_desc, gamma->gpu_data,
beta->gpu_data, mean->gpu_data, variance->gpu_data,
epsilon));
profileEvent("tensorBatchNorm_end", true);
#ifdef ERROR_INJECTION_ENABLED
if(op_counter >= total_ops){
ERROR("No accuracy flag found \n");
}
int op_acc = op_accuracies[op_counter];
void* error_norms = tensorAddError(input, op_acc);
add_norms(error_norms, "tensorBatchNorm", op_acc);
add_relu_overheads(input, op_acc);
op_counter++;
#endif
return input;
}
/************* GPU Layer API *************/
void* ConvLayer_GPU(void* input,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment