diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt index e7b6c1e9214236c5735df61d30933dec863efd02..87746c35294cb540998bef0b81a8dd51ad5b6589 100644 --- a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -48,6 +48,7 @@ endif() target_link_libraries(tensor_cpu_runtime) + # Adding rule for the debugging source add_executable(test_ops dnn_sources/src/test_ops.cc) target_link_libraries(test_ops tensor_runtime) @@ -59,12 +60,6 @@ target_link_libraries(fc2_cpu tensor_cpu_runtime) # Full-Precision versions -add_executable(fc2_clipped dnn_sources/src/fc2_clipped.cc) -target_link_libraries(fc2_clipped tensor_runtime) - -add_executable(fc3_clipped dnn_sources/src/fc3_clipped.cc) -target_link_libraries(fc3_clipped tensor_runtime) - add_executable(fc4_clipped dnn_sources/src/fc4_clipped.cc) target_link_libraries(fc4_clipped tensor_runtime) @@ -78,9 +73,6 @@ target_link_libraries(lenet_keras tensor_runtime) add_executable(alexnet_cifar10 dnn_sources/src/alexnet_cifar10_front.cc) target_link_libraries(alexnet_cifar10 tensor_runtime) -add_executable(alexnet_cifar10_test dnn_sources/src/alexnet_cifar10_test.cc) -target_link_libraries(alexnet_cifar10_test tensor_runtime) - add_executable(alexnet_cifar10_tuner dnn_sources/src/alexnet_cifar10_tuner.cc) target_link_libraries(alexnet_cifar10_tuner tensor_runtime) @@ -132,13 +124,6 @@ target_link_libraries(pipeline_GSME tensor_runtime) #*** Half precision networks - -add_executable(fc2_half dnn_sources/src/half/fc2_half.cc) -target_link_libraries(fc2_half tensor_runtime) - -add_executable(fc3_half dnn_sources/src/half/fc3_half.cc) -target_link_libraries(fc3_half tensor_runtime) - add_executable(fc4_half dnn_sources/src/half/fc4_half.cc) target_link_libraries(fc4_half tensor_runtime) diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h index 2aa97bc4d91b616e2fbe2e6c73ef3fa996fa5b35..1d9fed9e2c05eaf35fad010b1daca40eb0828cc7 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h @@ -88,6 +88,22 @@ void fillWithOnesAndTwos(void* tensor_ptr){ } +void fillTensorWithVal(void* tensor_ptr, float target_value){ + + struct Tensor* tensor = (struct Tensor*) tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + // initialization is specific to the floating point type + if(tensor->data_type == CUDNN_DATA_FLOAT){ + float* data_arr = (float*) tensor->host_data; + for(unsigned int i = 0; i < tensor->num_elems; i++){ + data_arr[i] = target_value; + } + } +} + + void fillTensorWithNegOnes(void* tensor_ptr){ struct Tensor* tensor = (struct Tensor*) tensor_ptr; @@ -223,6 +239,42 @@ void* readInputTensor(const char* file_name, int data_type, int dim1_size, int d } +//*** FIXIT: Move this to CPU-only +struct Tensor* readTrainedWeightsCPU(const char* file_name, int data_type, + int dim1_size, int dim2_size, + int dim3_size, int dim4_size){ + + // FIXIT: Don't assume floating point types + int type_size = 4; // NOTE: Assuming floating point tensors + long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; + long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; + float* tensor_data = (float*) malloc(sizeof(float) * num_elems); + int file_header_size = 0; + + FILE* file = fopen(file_name, "rb"); + if(file == NULL){ + printf("Data file %s is not found. Aborting... \n", file_name); + abort(); + } + + fseek(file, file_header_size, SEEK_CUR); // Skipping the file header + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + + printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, bytes_read); + + fclose(file); + + + struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, + dim3_size, dim4_size); + + initTensorData(weights, tensor_data, size_in_bytes); + //compareValues(weights, tensor_data, num_elems); + free(tensor_data); + + return weights; +} + struct Tensor* readTrainedWeights(const char* file_name, int data_type, long int dim1_size, long int dim2_size, @@ -416,7 +468,7 @@ float computeAccuracy2(uint8_t* labels, int num_labels, void* result_ptr, unsign float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; printf("****** Accuracy = %f \n\n", accuracy); - printf("****** Zero class labels %d \n", num_zeros); + //printf("****** Zero class labels %d \n", num_zeros); FILE* fp = fopen("final_accuracy", "w+"); if(fp != NULL){ @@ -519,6 +571,38 @@ void dumpFinalAccuracy(float accuracy){ } + +void dumpAvgPSNR(float avg_psnr){ + + FILE* fp = fopen("avg_psnr", "w+"); + if(fp != NULL){ + std::ostringstream ss; + ss << std::fixed << avg_psnr; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + } + + fclose(fp); +} + + +void dumpPSNRStd(float psnr_std){ + + FILE* fp = fopen("psnr_std.txt", "w+"); + if(fp != NULL){ + std::ostringstream ss; + ss << std::fixed << psnr_std; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + } + + fclose(fp); +} + + + + + void dumpExecutionAccuracies(){ FILE* fp = fopen("run_accuracies.txt", "w+"); @@ -538,9 +622,27 @@ void dumpExecutionAccuracies(){ } +float readPSNRFromFile(const char* file_name){ + + float psnr; + FILE* pFile = fopen(file_name, "r"); + if(pFile == NULL){ + printf("ERROR: psnr.txt not found! \n"); + abort(); + } + + fscanf(pFile, "%f", &psnr); + printf("**** PSNR read = %f \n\n", psnr); + return psnr; +} + float computePSNRViolation(void* gold_ptr, void* approx_ptr, float PSNR_threshold){ + + PSNR_threshold = readPSNRFromFile("psnr.txt"); + std::vector<float> psnr_list; + struct Tensor* gold_tensor = (struct Tensor*) gold_ptr; struct Tensor* approx_tensor = (struct Tensor*) approx_ptr; @@ -548,11 +650,14 @@ float computePSNRViolation(void* gold_ptr, void* approx_ptr, float PSNR_threshol size_t batch_dim = dim_sizes[0]; size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; - printf("batch_dim = %d, image_size = %d \n", batch_dim, image_size); + printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size); float* gold_data = (float*) gold_tensor->host_data; float* approx_data = (float*) approx_tensor->host_data; - + + FILE* fp = fopen("img_psnr.txt", "w+"); + + float sum_psnr = 0.0; int num_errors = 0; for(size_t i = 0; i < batch_dim; i++){ float mse_sum = 0.0; @@ -570,17 +675,44 @@ float computePSNRViolation(void* gold_ptr, void* approx_ptr, float PSNR_threshol } mse_sum = mse_sum / image_size; - float psnr = 20 * log10(max_val / sqrt(mse_sum)); + float psnr = 20 * log10(255 / sqrt(mse_sum)); + sum_psnr += psnr; if (psnr < PSNR_threshold) num_errors += 1; printf("PSNR value = %f \n", psnr); - } + psnr_list.push_back(psnr); + std::ostringstream ss; + ss << std::fixed << psnr; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + fwrite("\n", 1, 1, fp); + } float violation_rate = (num_errors * 1.0) / batch_dim * 100.0; printf("*** violation_rate= %f \n\n", violation_rate); + + float avg_psnr = sum_psnr / batch_dim; + printf("*** avg_psnr = %f \n\n", avg_psnr); + dumpAvgPSNR(avg_psnr); + + float success_rate = 100.0 - violation_rate; + dumpFinalAccuracy(success_rate); + + fclose(fp); + + + float var = 0.0; + for(size_t i = 0; i < batch_dim; i++){ + var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr); + } + + var /= batch_dim; + float std = sqrt(var); + + dumpPSNRStd(std); return violation_rate; }