diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt index b3b6cb6e179ee39c82459f13b1734f8c0e626e01..562ca1538dfddc812c32d84ff8b8595473289974 100644 --- a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -64,6 +64,7 @@ set( tensor_utils.cu wrapper_runtime.cu approx_knobs_utils.cc + init_api.cc ) foreach(FILE ${RUNTIME_SRCS_FILENAME}) list(APPEND RUNTIME_SRCS "tensor_runtime/src/${FILE}") diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/init_api.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/init_api.h new file mode 100644 index 0000000000000000000000000000000000000000..ac742876b054c88d50634ecae306b25d471f5c06 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/init_api.h @@ -0,0 +1,53 @@ + + + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <iostream> +#include <map> +#include <sstream> +#include <string> +#include <cuda_runtime.h> +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> +#include <cuda_fp16.h> + +// Tensor runtime header files +#include "tensor_runtime.h" +#include "tensor_utils.h" +#include "debug.h" +#include "profiling.h" +#include "global_data.h" +#include "error.h" +#include "tensor.h" +#include "op_overheads.h" +#include "approx_simulation.h" + + + +void llvm_hpvm_initTensorRt(int gpuid); + +void llvm_hpvm_cleanupTensorRt(); + +void llvm_hpvm_initApproxhpvmRt(int gpuid); + +void llvm_hpvm_cleanupApproxhpvmRt(); + +void dumpAccuracyNorms(); + +// Returns the number of GPUs active on the platform +unsigned int getGPUCount(); + +void clearTensorMap(); + +void startMemTracking(); + +void freeOutputTensors(); + +void clearOpCounter(); + +void freeBatchMemory(); + diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..b311f50f99bf6ffc8ec508300d3e92bd9b314796 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc @@ -0,0 +1,157 @@ + + +#include <stdio.h> +#include <stdarg.h> +#include <cstdio> +#include <cstdlib> +#include <iostream> +#include <map> +#include <sstream> +#include <string> +#include <cuda_runtime.h> +#include <cublas_v2.h> +#include <cudnn.h> +#include <cublas_api.h> +#include <cuda_fp16.h> + +// Tensor runtime header files +#include "tensor_runtime.h" +#include "tensor_utils.h" +#include "debug.h" +#include "profiling.h" +#include "global_data.h" +#include "error.h" +#include "tensor.h" +#include "op_overheads.h" +#include "approx_simulation.h" +#include "init_api.h" + + +void llvm_hpvm_initTensorRt(int gpuid){ + + if(!runtime_initialized){ + + printf("INITIALIZING GPU %d \n", gpuid); + // NOTE: Setting the target GPU. Can we use multiple GPUs? + checkCudaErrors(cudaSetDevice(gpuid)); + // Initializing cuDNN and cuBlas handles + checkCudaErrors(cublasCreate(&cublasHandle)); + checkCUDNN(cudnnCreate(&cudnnHandle)); + + printf("CREATED HANDLES %d \n", gpuid); + + +#ifdef PROMISE_TUNER_ENABLED + // readOpenTunerFlags("opentuner_flags"); + + readOpenTunerFlags("promise_flags"); + initializeAutotuner(); + + printf("Read PROMISE FLAGS %d \n", gpuid); + +#endif + + +#ifdef ERROR_INJECTION_ENABLED + readOpenTunerFlags("opentuner_flags"); +#endif + + + runtime_initialized = true; + } + + printf("DONE INTIALIZING GPU %d \n", gpuid); + +} + + +void llvm_hpvm_cleanupTensorRt(){ + DEBUG("**** llvm_hpvm_cleanupTensorRt ***\n"); + dumpAccuracyNorms(); +} + + +void llvm_hpvm_initApproxhpvmRt(int gpuid){ + llvm_hpvm_initTensorRt(gpuid); + approxhpvm_runtime_mode = true; +} + +void llvm_hpvm_cleanupApproxhpvmRt(){ + +} + + + +void dumpAccuracyNorms(){ + + #ifdef ERROR_INJECTION_ENABLED + + + #endif + + dump_result("accuracy_summary"); + +} + + +// Returns the number of GPUs active on the platform +unsigned int getGPUCount(){ + int num_gpus; + checkCudaErrors(cudaGetDeviceCount(&num_gpus)); + return num_gpus; +} + + + +void clearTensorMap(){ + tensors_ptr.clear(); + host_ptr.clear(); + obj_ptr.clear(); + tracked_tensors.clear(); +} + + +void startMemTracking(){ + tensors_ptr.clear(); + host_ptr.clear(); + obj_ptr.clear(); + + tracked_tensors.clear(); +} + + +void freeOutputTensors(){ + + DEBUG("**** Freeing Ouput Tensors *** \n"); + for (void *ptr: tensors_ptr) + cudaFree(ptr); + + for(void *ptr: host_ptr) + free(ptr); + + for(void *ptr: obj_ptr) + free(ptr); + + clearTensorMap(); +} + + + +void clearOpCounter(){ + total_ops = 0; + op_counter = 0; + op_accuracies.clear(); +} + + + +void freeBatchMemory(){ + // Free allocated memory for the current mini-batch + freeOutputTensors(); + // Reinitialize couter for OpenTuner flags - next mini-batch of execution + op_counter = 0; + // Clearing profiling data map + func_counters.clear(); +} + + diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu index 59d3b5618389871d4e0463d4b36ed5bb93f12452..201b75429daf152cdd35989da7ff32bfdc2fb604 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu @@ -35,6 +35,7 @@ // Tensor runtime header files #include "tensor_runtime.h" #include "tensor_utils.h" +#include "init_api.h" #include "debug.h" #include "profiling.h" #include "fp16_conversion.h" @@ -48,135 +49,6 @@ -void llvm_hpvm_initTensorRt(int gpuid){ - - if(!runtime_initialized){ - - printf("INITIALIZING GPU %d \n", gpuid); - // NOTE: Setting the target GPU. Can we use multiple GPUs? - checkCudaErrors(cudaSetDevice(gpuid)); - // Initializing cuDNN and cuBlas handles - checkCudaErrors(cublasCreate(&cublasHandle)); - checkCUDNN(cudnnCreate(&cudnnHandle)); - - printf("CREATED HANDLES %d \n", gpuid); - - -#ifdef PROMISE_TUNER_ENABLED - // readOpenTunerFlags("opentuner_flags"); - - readOpenTunerFlags("promise_flags"); - initializeAutotuner(); - - printf("Read PROMISE FLAGS %d \n", gpuid); - -#endif - - -#ifdef ERROR_INJECTION_ENABLED - readOpenTunerFlags("opentuner_flags"); -#endif - - - runtime_initialized = true; - } - - printf("DONE INTIALIZING GPU %d \n", gpuid); - -} - - -void llvm_hpvm_cleanupTensorRt(){ - DEBUG("**** llvm_hpvm_cleanupTensorRt ***\n"); - dumpAccuracyNorms(); -} - - -void llvm_hpvm_initApproxhpvmRt(int gpuid){ - llvm_hpvm_initTensorRt(gpuid); - approxhpvm_runtime_mode = true; -} - -void llvm_hpvm_cleanupApproxhpvmRt(){ - -} - - - -void dumpAccuracyNorms(){ - - #ifdef ERROR_INJECTION_ENABLED - - - #endif - - dump_result("accuracy_summary"); - -} - - -// Returns the number of GPUs active on the platform -int getGPUCount(){ - int num_gpus; - checkCudaErrors(cudaGetDeviceCount(&num_gpus)); - return num_gpus; -} - - - -void clearTensorMap(){ - tensors_ptr.clear(); - host_ptr.clear(); - obj_ptr.clear(); - tracked_tensors.clear(); -} - - -void startMemTracking(){ - tensors_ptr.clear(); - host_ptr.clear(); - obj_ptr.clear(); - - tracked_tensors.clear(); -} - - -void freeOutputTensors(){ - - DEBUG("**** Freeing Ouput Tensors *** \n"); - for (void *ptr: tensors_ptr) - cudaFree(ptr); - - for(void *ptr: host_ptr) - free(ptr); - - for(void *ptr: obj_ptr) - free(ptr); - - clearTensorMap(); -} - - - -void clearOpCounter(){ - total_ops = 0; - op_counter = 0; - op_accuracies.clear(); -} - - - -void freeBatchMemory(){ - // Free allocated memory for the current mini-batch - freeOutputTensors(); - // Reinitialize couter for OpenTuner flags - next mini-batch of execution - op_counter = 0; - // Clearing profiling data map - func_counters.clear(); -} - - - // FIXIT: Fix any assumptions on the NCHW format // TODO: benchmark split performance and check if it is prohibitively high?