diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc index dcc6e2e7ce832afc24b560006cb07272f08842cf..700793529f1a7e1e9d8c887c28e7aefbd9afba93 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc @@ -1026,32 +1026,34 @@ void *tensorSoftmaxCPU(void *input_ptr) { int n = input->dims.dim_sizes[0]; int c = input->dims.dim_sizes[1]; - float max = logits[0]; - for (unsigned int i = 0; i < n * c; i++){ - if (logits[i] > max){ - max = logits[i]; - } - } omp_set_num_threads(4); #pragma omp parallel for for (int i = 0; i < n; i++) { + + float max = logits[i * c]; + for (unsigned int k = i * c; k < c + i * c; k++){ + if (logits[k] > max){ + max = logits[k]; + } + } + double x = 0; - for (int j = i * c; j < c + i * c; j++) { - logits[j] = exp(logits[j] / max ); + for (int j = i * c; j < c + i * c; j++) { + logits[j] = exp( logits[j] - max ); } #pragma omp simd reduction(+ : x) for (int j = i * c; j < i * c + c; j++) { x += logits[j]; } - - //printf("x = %f \n ", x); #pragma omp simd for (int j = i * c; j < i * c + c; j++) { logits[j] /= x; } + + //printf("logits[i * c] = %f \n ", logits[i * c]); } return input;