diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h index 1fc475ec05162199cd5459777fe7fbfaa8f144a4..0b91030b717d257664ef2cb1bf8e06bd2bcc9508 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_runtime_utils.h @@ -155,8 +155,13 @@ void* handleTensorConvApproximationTuples( return t_out; } case GPUNodeConfiguration::APPROX::PERFORATION : + case GPUNodeConfiguration::APPROX::PERFORATION_HP : { PerfParams params = perfParamSet->getPerfParams(param); + //PerfParams params = PerfParamSet().getPerfParams(param); + INFO("perforation param = %i\n", param); + INFO("params.row = %i, params.col = %i, params.skip_offset = %i\n", + params.row, params.col, params.skip_offset); void* t_out; RC->resume_profiler(); t_out = tensorConvApproxHalf2(input, filter, @@ -173,8 +178,13 @@ void* handleTensorConvApproximationTuples( return t_out; } case GPUNodeConfiguration::APPROX::INPUT_SAMPLING : + case GPUNodeConfiguration::APPROX::INPUT_SAMPLING_HP : { SampParams params = sampParamSet->getSampParams(param); + //SampParams params = SampParamSet().getSampParams(param); + INFO("sampling param = %i\n", param); + INFO("params.skip_rate = %i, params.skip_offset = %i\n", + params.skip_rate, params.skip_offset); void* t_out; RC->resume_profiler(); t_out = tensorConvApproxHalf2(input, filter, diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h index 75e60a1c4a5d6fda8bf32fcaee3e2b9d192cbbcd..2624ea43c12426edc3535471df5dafc0360b9a81 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/configuration.h @@ -52,7 +52,9 @@ public: FP32, FP16, PERFORATION, + PERFORATION_HP, INPUT_SAMPLING, + INPUT_SAMPLING_HP, REDUCTION_SAMPLING, // ADDITIONAL_APPROXIMATION_METHOD APPROX_END diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp index 63dd927cd09f25ed995a8dd3ec2fd3f91c1a6032..9efbea07c9a1ef31a87a3266de89cb9d10660621 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/configuration.cpp @@ -184,9 +184,15 @@ void GPUNodeConfiguration::print() { case G_APPROX::PERFORATION: printf("perf"); break; + case G_APPROX::PERFORATION_HP: + printf("perf_fp16"); + break; case G_APPROX::INPUT_SAMPLING: printf("samp"); break; + case G_APPROX::INPUT_SAMPLING_HP: + printf("samp_fp16"); + break; case G_APPROX::REDUCTION_SAMPLING: printf("red_samp"); break; diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp index cc01caaef64f6685d4bf78be60c5ee86d146e89c..89968f2b094e34d3ec4b8a790ab678fa4d81201d 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp @@ -65,8 +65,10 @@ bool fileExists(const std::string &file) { return (stat(file.c_str(), &buf) == 0); } +// There will be no frequency request for the first batch +// Therefore, we skip the first element by initializing to 1, not 0. FrequencyIndexList::FrequencyIndexList(std::vector<int> il, unsigned rf) : - idx_list(il), rep_factor(rf), count(0), idx(0) {} + idx_list(il), rep_factor(rf), count(1), idx(0) {} unsigned FrequencyIndexList::getNextIndex() { if (count == rep_factor) { @@ -475,7 +477,7 @@ std::pair<double, double> RuntimeController::conv_profile( // Constructor and descructor RuntimeController::RuntimeController() { configurationIdx = 0; - FIL = new FrequencyIndexList({13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, 5); + FIL = new FrequencyIndexList({13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, 10); #ifdef ACTIVE_PROFILING PI = new ProfileInfo(); profiler = new Profiler(); @@ -807,6 +809,13 @@ void RuntimeController::readConfigurationFile(const char *str) { NodeConf->pushNewApproximationChoiceForOperation( GPUNodeConfiguration::APPROX::PERFORATION, perf); idx += 2; + } else if (tokens[idx] == "perf_fp16") { + DEBUG("Found perf_fp16 option\n"); + int perf_fp16 = std::stoi(tokens[idx + 1]); + DEBUG("perf_fp16 parameter: %d\n", perf_fp16); + NodeConf->pushNewApproximationChoiceForOperation( + GPUNodeConfiguration::APPROX::PERFORATION_HP, perf_fp16); + idx += 2; } else if (tokens[idx] == "samp") { DEBUG("Found samp option\n"); int samp = std::stoi(tokens[idx + 1]); @@ -814,6 +823,13 @@ void RuntimeController::readConfigurationFile(const char *str) { NodeConf->pushNewApproximationChoiceForOperation( GPUNodeConfiguration::APPROX::INPUT_SAMPLING, samp); idx += 2; + } else if (tokens[idx] == "samp_fp16") { + DEBUG("Found samp_fp16 option\n"); + int samp_fp16 = std::stoi(tokens[idx + 1]); + DEBUG("samp_fp16 parameter: %d\n", samp_fp16); + NodeConf->pushNewApproximationChoiceForOperation( + GPUNodeConfiguration::APPROX::INPUT_SAMPLING_HP, samp_fp16); + idx += 2; } else if (tokens[idx] == "red_samp") { DEBUG("Found red_samp option\n"); int red_samp = std::stoi(tokens[idx + 1]);