diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc index cf8e651357f4053b09449822ebe1ed936cd69cdd..93c7d6cd71b1b8d202cbc06f0c36a4a6188a1688 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc @@ -1099,7 +1099,7 @@ void testSampling_1_1(){ -void testTensorArgMax(){ +void* testTensorArgMax(){ Tensor* input = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 4, 3, 1, 1); @@ -1126,15 +1126,24 @@ void testTensorArgMax(){ host_ptr[11] = 8; void* argmax_out = tensorArgMax(input); - + + // Expect Output of call below to be: + // 1 2 2 0 printTensorValues(argmax_out); - + + return argmax_out; } -void testTensorSelect(){ +void* testTensorSelect(void* argmax_out){ + + void* select_out = tensorSelect(argmax_out, 2); + printf ("***** tensorSelect output \n"); + + printTensorValues(select_out); + return select_out; } @@ -1148,8 +1157,8 @@ void testTensorContract(){ void testNewTensorOps(){ - testTensorArgMax(); - testTensorSelect(); + void* argmax_out = testTensorArgMax(); + testTensorSelect(argmax_out); testTensorContract(); } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h index ac5ee1004e9231a532848ead63af1a0c54212716..502f04639e0eb0a07897ee2d5520bc02f48eca0d 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_custom_ops_cpu.h @@ -1,9 +1,10 @@ #include "tensor.h" +#include <stdlib.h> -void* tensorArgMax(Tensor* input_ptr){ +void* tensorArgMax(void* input_ptr){ Tensor* input = (Tensor*) input_ptr; float* host_ptr = (float*) input->host_data; @@ -39,3 +40,35 @@ void* tensorArgMax(Tensor* input_ptr){ return output; } + + + + +void* tensorSelect(void* input_ptr, float target_value){ + + Tensor* input = (Tensor*) input_ptr; + float* host_ptr = (float*) input->host_data; + + int batch_size = input->dims.dim_sizes[0]; + int channels = input->dims.dim_sizes[1]; + + if (channels != 1){ + printf("* Channels dimension must be 1 \n"); + abort(); + } + + Tensor* output = (Tensor *) create4DTensor(0, 0, batch_size, 1, 1, 1); + changeTensorPlacement(output, HOST); + float* out_ptr = (float*) output->host_data; + + for(int i = 0; i < batch_size; i++){ + if (host_ptr[i] == target_value){ + out_ptr[i] = 1; + } + else{ + out_ptr[i] = 0; + } + } + + return output; +}