-
Hashim Sharif authoredHashim Sharif authored
tests.h 13.93 KiB
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <vector>
#include <string.h>
#include "tensor_runtime.h"
#include "tensor_cpu_runtime.h"
#include "tensorUtils.h"
#include "tensor_custom_ops_cpu.h"
using namespace std;
class UnitTestResults {
private:
unsigned int total_tests;
unsigned int failed_tests;
unsigned int passed_tests;
std::vector<string> failed_test_ids;
public:
UnitTestResults() {
total_tests = 0;
failed_tests = 0;
passed_tests = 0;
}
void evalTestResult(Tensor *res, const float *expected_result,
size_t num_elems, float epsilon, string test_name) {
total_tests += 1;
if (res->num_elems != num_elems) {
failed_tests += 1;
failed_test_ids.push_back(test_name);
return;
}
float *data_ptr = (float *)res->host_data;
for (unsigned int i = 0; i < res->num_elems; i++) {
if (std::abs(data_ptr[i] - expected_result[i]) > epsilon) {
failed_tests += 1;
failed_test_ids.push_back(test_name);
return;
}
}
passed_tests += 1;
}
void compareTensors(Tensor *res, Tensor *gold_res, float epsilon,
string test_name) {
const float *expected_result = (float *)gold_res->host_data;
unsigned int num_elems = res->num_elems;
evalTestResult(res, expected_result, num_elems, epsilon, test_name);
}
void printSummary() {
printf("\n\n\n ************* Printing Results Summary ********** \n\n");
printf("-- Total tests := %d \n", total_tests);
printf("-- Tests Passed := %d \n", passed_tests);
printf("-- Tests Failed := %d \n", failed_tests);
printf("\n\n Tests that failed : \n\n");
for (int i = 0; i < failed_test_ids.size(); i++) {
printf("*** Test = %s \n", failed_test_ids[i].c_str());
}
if (failed_test_ids.size() > 0){
printf("Some Tests Failed. Aborting");
exit(1);
}
}
};
void testSampleFilter() {
printf("***** Tensor Sample Filter ***** \n\n");
Tensor *input =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3);
fillWithOnesAndTwos(input);
Tensor *input2 = (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
3, 2, 32, 32);
fillTensorWithVal(input2, 1);
printTensorValues(input);
void *exact_res = tensorConvolution(input2, input, 0, 0, 1, 1, 1, 1);
printTensorValues(exact_res);
void *res = tensorConvSampSim(input2, input, 0, 0, 1, 1, 1, 1, 4, 0);
printTensorValues(res);
}
void testPerforationCalls(void *input, void *filter, int pad_h, int pad_w,
int stride_h, int stride_w, int row, int col,
UnitTestResults &unitTestResults) {
float interpolation_rate = 1.0;
for (int offset = 0; offset < 2; offset++) {
printf("\n\n\n**Test -- pad_h = %d pad_w = %d stride_h = %d stride_w = %d "
"row = %d col = %d offset= %d \n\n",
pad_h, pad_w, stride_h, stride_w, row, col, offset);
void *res_exact = tensorConvolution(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1);
printf("tensorConvolution Result :");
printTensorValues(res_exact);
void *res_exact2 = tensorConvApprox(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1, 1, 1, 1, 1);
printf("\nBaseline Result :");
printTensorValues(res_exact2);
void *res_exact3 = tensorConvApproxHalf2(
input, filter, pad_h, pad_w, stride_h, stride_w, 1, 1, 1, 1, 1, 1);
convertToFP32((struct Tensor *)res_exact3);
printf("\nFP16_Baseline Result :");
printTensorValues(res_exact3);
void *res_sim = tensorConvPerfCuda(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1, row, col, offset);
printf("\nConvPerfCuda Result :");
printTensorValues(res_sim);
void *res = tensorConvApprox(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1, row, col, 1, offset);
printf("\nConvApprox Result :");
printTensorValues(res);
hpvm_request_tensor(input, HOST);
hpvm_request_tensor(filter, HOST);
void *res_cpu = tensorConvApproxCPU(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1, row, col, 1, offset);
printf("\nConvApproxCPU Result :");
printTensorValues(res_cpu);
void *res_half =
tensorConvApproxHalf2(input, filter, pad_h, pad_w, stride_h, stride_w,
1, 1, row, col, 1, offset);
convertToFP32((struct Tensor *)res_half);
printf("\nConvApproxHalf2 Result :");
printTensorValues(res_half);
std::string suffix =
std::string(" pad_h = ") + std::to_string(pad_h) +
std::string(" pad_w = ") + std::to_string(pad_w) +
std::string(" stride_h = ") + std::to_string(stride_h) +
std::string(" stride_w = ") + std::to_string(stride_w) +
std::string(" row = ") + std::to_string(row) + std::string(" col = ") +
std::to_string(col) + std::string(" offset = ") +
std::to_string(offset);
std::string test_name = std::string("PERF_FP32 ") + suffix;
unitTestResults.compareTensors((Tensor *)res, (Tensor *)res_sim, 0.05,
test_name);
std::string fp16_test_name = std::string("PERF_FP16 ") + suffix;
unitTestResults.compareTensors((Tensor *)res_half, (Tensor *)res_sim, 0.1,
fp16_test_name);
std::string cpu_test_name = std::string("PERF_CPU ") + suffix;
unitTestResults.compareTensors((Tensor *)res_cpu, (Tensor *)res_sim, 0.05,
cpu_test_name);
}
printf("\n\n\n--- End of Test \n\n\n");
}
/**** Tests Perforation for a set of different inputs */
void testPerforation(UnitTestResults &unitTestResults) {
printf("***** Tests Sample for a sample 3 * 3 Filter ***** \n\n");
Tensor *input =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 4, 4);
fillTensorWithVal(input, 1);
Tensor *filter =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 3, 3);
fillTensorWithVal(filter, 1);
testPerforationCalls(input, filter, 0, 0, 1, 1, 1, 2, unitTestResults);
testPerforationCalls(input, filter, 0, 0, 1, 1, 2, 1, unitTestResults);
testPerforationCalls(input, filter, 1, 1, 1, 1, 1, 3, unitTestResults);
testPerforationCalls(input, filter, 1, 1, 1, 1, 3, 1, unitTestResults);
testPerforationCalls(input, filter, 1, 1, 2, 2, 1, 4, unitTestResults);
testPerforationCalls(input, filter, 1, 1, 2, 2, 4, 1, unitTestResults);
}
void testSampling() {
printf("***** Testing Sampling ***** \n\n");
Tensor *input =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 4, 4);
fillTensorWithVal(input, 1);
Tensor *filter =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 3, 3);
fillTensorWithVal(filter, 1);
float *host_ptr = (float *)((struct Tensor *)filter)->host_data;
host_ptr[0] = 2;
host_ptr[2] = 2;
host_ptr[4] = 2;
host_ptr[6] = 2;
host_ptr[8] = 2;
host_ptr[10] = 2;
host_ptr[12] = 2;
host_ptr[14] = 2;
host_ptr[16] = 2;
host_ptr[18] = 2;
host_ptr[20] = 2;
host_ptr[22] = 2;
host_ptr[24] = 2;
host_ptr[26] = 2;
// printTensorValues(input);
void *res = tensorConvApprox(input, filter, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
printTensorValues(res);
void *res2 = tensorConvApprox(input, filter, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1);
printTensorValues(res2);
void *res2_sim = tensorConvSampSim(input, filter, 0, 0, 1, 1, 1, 1, 2, 0);
printTensorValues(res2_sim);
void *res3 = tensorConvApprox(input, filter, 0, 0, 1, 1, 1, 1, 1, 1, 2, 0);
printTensorValues(res3);
void *res4 = tensorConvApprox(input, filter, 0, 0, 1, 1, 1, 1, 1, 1, 4, 0);
printTensorValues(res4);
void *res4_half =
tensorConvApproxHalf2(input, filter, 0, 0, 1, 1, 1, 1, 1, 1, 4, 0);
convertToFP32((struct Tensor *)res4_half);
printTensorValues(res4_half);
}
void testSamplingCalls(void *input, void *filter, int pad_h, int pad_w,
int stride_h, int stride_w, int skip_every,
std::string filter_string,
UnitTestResults &unitTestResults) {
float interpolation_rate = 1.0;
for (int offset = 0; offset < 2; offset++) {
printf("\n\n\n**Test -- pad_h = %d pad_w = %d stride_h = %d stride_w = %d "
"skip_every = %d offset= %d interpolation_rate = %f \n\n",
pad_h, pad_w, stride_h, stride_w, skip_every, offset,
interpolation_rate);
void *res_exact = tensorConvolution(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1);
printf("tensorConvolution Result :");
printTensorValues(res_exact);
void *res_exact2 = tensorConvApprox(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1, 1, 1, 1, 1);
printf("\nBaseline Result :");
printTensorValues(res_exact2);
void *res_exact3 = tensorConvApproxHalf2(
input, filter, pad_h, pad_w, stride_h, stride_w, 1, 1, 1, 1, 1, 1);
convertToFP32((struct Tensor *)res_exact3);
printf("\nFP16_Baseline Result :");
printTensorValues(res_exact3);
void *res_sim =
tensorConvSampSim2(input, filter, pad_h, pad_w, stride_h, stride_w, 1,
1, skip_every, offset, interpolation_rate);
printf("\nConvSampSim Result :");
printTensorValues(res_sim);
void *res = tensorConvApprox(input, filter, pad_h, pad_w, stride_h,
stride_w, 1, 1, 1, 1, skip_every, offset);
printf("\nConvApprox Result :");
printTensorValues(res);
hpvm_request_tensor(input, HOST);
hpvm_request_tensor(filter, HOST);
void *res_cpu =
tensorConvApproxCPU(input, filter, pad_h, pad_w, stride_h, stride_w, 1,
1, 1, 1, skip_every, offset);
printf("\nConvApproxCPU Result :");
printTensorValues(res_cpu);
void *res_half =
tensorConvApproxHalf2(input, filter, pad_h, pad_w, stride_h, stride_w,
1, 1, 1, 1, skip_every, offset);
convertToFP32((struct Tensor *)res_half);
printf("\nConvApproxHalf2 Result :");
printTensorValues(res_half);
std::string suffix =
"filter = " + std::string(filter_string) + std::string(" pad_h = ") +
std::to_string(pad_h) + std::string(" pad_w = ") +
std::to_string(pad_w) + std::string(" stride_h = ") +
std::to_string(stride_h) + std::string(" stride_w = ") +
std::to_string(stride_w) + std::string(" skip_every = ") +
std::to_string(skip_every) + std::string(" offset = ") +
std::to_string(offset);
std::string test_name = std::string("SAMP_FP32 ") + suffix;
unitTestResults.compareTensors((Tensor *)res, (Tensor *)res_sim, 0.05,
test_name);
std::string fp16_test_name = std::string("SAMP_FP16 ") + suffix;
unitTestResults.compareTensors((Tensor *)res_half, (Tensor *)res_sim, 0.1,
fp16_test_name);
std::string cpu_test_name = std::string("SAMP_CPU ") + suffix;
unitTestResults.compareTensors((Tensor *)res_cpu, (Tensor *)res_sim, 0.05,
cpu_test_name);
}
printf("\n\n\n --- End of Test \n\n\n");
}
/**** Tests Sample for a sample 3 * 3 Filter */
void testSampling_3_3(UnitTestResults &unitTestResults) {
printf("***** Tests Sample for a sample 3 * 3 Filter ***** \n\n");
Tensor *input =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 4, 4);
fillTensorWithVal(input, 1);
// fillWithOnesAndTwos(input);
Tensor *filter =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 3, 3);
fillTensorWithVal(filter, 1);
float *host_ptr = (float *)((struct Tensor *)filter)->host_data;
host_ptr[0] = 10;
host_ptr[2] = 2;
host_ptr[4] = 2;
host_ptr[6] = 2;
host_ptr[8] = 2;
host_ptr[10] = 2;
host_ptr[12] = 2;
host_ptr[14] = 2;
host_ptr[16] = 2;
host_ptr[18] = 2;
host_ptr[20] = 2;
host_ptr[22] = 2;
host_ptr[24] = 2;
host_ptr[26] = 10;
// Tests with padding = 0 stride = 1
testSamplingCalls(input, filter, 0, 0, 1, 1, 2, "3_3", unitTestResults);
testSamplingCalls(input, filter, 0, 0, 1, 1, 3, "3_3", unitTestResults);
testSamplingCalls(input, filter, 0, 0, 1, 1, 4, "3_3", unitTestResults);
// Tests with padding = 1 stride = 1
testSamplingCalls(input, filter, 1, 1, 1, 1, 2, "3_3", unitTestResults);
testSamplingCalls(input, filter, 1, 1, 1, 1, 3, "3_3", unitTestResults);
testSamplingCalls(input, filter, 1, 1, 1, 1, 4, "3_3", unitTestResults);
// Tests with padding = 1 stride = 2
testSamplingCalls(input, filter, 1, 1, 2, 2, 2, "3_3", unitTestResults);
testSamplingCalls(input, filter, 1, 1, 2, 2, 3, "3_3", unitTestResults);
testSamplingCalls(input, filter, 1, 1, 2, 2, 4, "3_3", unitTestResults);
}
/**** Tests Sample for a sample 1 * 1 Filter */
void testSampling_1_1(UnitTestResults &unitTestResults) {
Tensor *input =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 9, 2, 2);
fillTensorWithVal(input, 2);
// fillWithOnesAndTwos(input);
Tensor *filter =
(Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 4, 9, 1, 1);
fillTensorWithVal(filter, 2);
// Tests with padding = 0 stride = 1
testSamplingCalls(input, filter, 0, 0, 1, 1, 2, "1_1", unitTestResults);
testSamplingCalls(input, filter, 0, 0, 1, 1, 3, "1_1", unitTestResults);
testSamplingCalls(input, filter, 0, 0, 1, 1, 4, "1_1", unitTestResults);
// Tests with padding = 1 stride = 1
testSamplingCalls(input, filter, 1, 1, 1, 1, 2, "1_1", unitTestResults);
testSamplingCalls(input, filter, 1, 1, 1, 1, 3, "1_1", unitTestResults);
testSamplingCalls(input, filter, 1, 1, 1, 1, 4, "1_1", unitTestResults);
}
void testSampling(UnitTestResults &unitTestResults){
testSampling_3_3(unitTestResults);
testSampling_1_1(unitTestResults);
}