diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc index 89148bd5e05e350c29ac49c2eed0a5b93696d38a..24f8c7d370a2922ba117eb1ec573dfc6af4742cc 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc @@ -75,17 +75,15 @@ void *tensorRegularConvolutionCPU(void *input_ptr, void *filter_ptr, horizontal_stride); int num_filter_elem = kernel_height * kernel_width * channels; int output_size = output_width * output_height; - printf("--CREATE 4D TENSOR\n"); + Tensor *output = (Tensor *)create4DTensor(0, 0, batch_size, num_filters, output_height, output_width); float *__restrict__ output_data = (float *)output->host_data; - printf("CREATED 4D TENSOR\n"); + long int conv_data_size = sizeof(float) * num_filter_elem * output_height * output_width * batch_size; float *host_data = (float *)malloc(conv_data_size); - printf("host data: %p\n", host_data); - printf("conv_data_size: %d\n", conv_data_size); - printf("number of batches: %d\n", batch_size); + omp_set_num_threads(4); #pragma omp parallel for for (int b = 0; b < batch_size; b++) { @@ -131,8 +129,9 @@ void *tensorRegularConvolutionCPU(void *input_ptr, void *filter_ptr, } } } + free(host_data); - printf("END: %p\n", output); + return output; } @@ -667,19 +666,19 @@ void *tensorConvApproxCPU(void *input_ptr, void *filter_ptr, int vertical_pad, int compute_precision, int row, int col, int skip_every, int start) { if (row > 1) { - printf("ROW PERFORATION\n"); + //printf("ROW PERFORATION\n"); return tensorRowPerfConvolutionCPU( input_ptr, filter_ptr, vertical_pad, horizontal_pad, vertical_stride, horizontal_stride, conv_mode, compute_precision, row, start); } if (col > 1) { - printf("COL PERFORATION\n"); + //printf("COL PERFORATION\n"); return tensorColPerfConvolutionCPU( input_ptr, filter_ptr, vertical_pad, horizontal_pad, vertical_stride, horizontal_stride, conv_mode, compute_precision, col, start); } if (skip_every > 1) { - printf("INPUT FILTERING\n"); + //printf("INPUT FILTERING\n"); Tensor *filter = (Tensor *)filter_ptr; const int kernel_height = filter->dims.dim_sizes[2]; @@ -694,7 +693,7 @@ void *tensorConvApproxCPU(void *input_ptr, void *filter_ptr, int vertical_pad, input_ptr, filter_ptr, vertical_pad, horizontal_pad, vertical_stride, horizontal_stride, conv_mode, compute_precision, skip_every, start); } - printf("---REGULAR CONV\n"); + return tensorRegularConvolutionCPU( input_ptr, filter_ptr, vertical_pad, horizontal_pad, vertical_stride, horizontal_stride, conv_mode, compute_precision); diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu index c2e116a56fbf038628396eeb611711295a4a9170..79ea02592dace30d9a6be4a0b4001c38f3073968 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/wrapper_runtime.cu @@ -123,7 +123,6 @@ wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter, void *activation_out; switch (activation_id) { case -1: { // No activation - // INFO("No activation Function\n"); activation_out = add_out; } break; case 0: { // TanH activation @@ -204,7 +203,7 @@ wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter, std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP, std::vector< std::pair<CPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = CPUConf->getApproxChoices(); + int> > > > &ApproxChoices = CPUConf->getApproxChoices(); // Check for convolution as first operation CUSTOM_ASSERT((ApproxChoices.size() >= 1) && @@ -230,7 +229,6 @@ wrapper_ConvLayer(const char *hpvm_node_id, void *input, void *filter, switch (activation_id) { case -1: { // No activation - INFO("No activation Function\n"); activation_out = add_out; } break; @@ -333,7 +331,7 @@ void *wrapper_ConvLayer2( // NOTE: out_min, out_max are only relevant for ClippedRelu float out_min, float out_max) { - //INFO("*** ------Conv Layer \n"); + INFO("*** TensorConv \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); if (NodeConf->isGPUNodeConfiguration()) { @@ -345,11 +343,6 @@ void *wrapper_ConvLayer2( std::vector<std::pair<GPUNodeConfiguration::APPROX, int>>>> &ApproxChoices = GPUConf->getApproxChoices(); - // printf("*** Convolution \n ApproxChoice = %d \n BatchNorm = %d \n CONV = - // %d \n", ApproxChoices[0].first, - // GPUNodeConfiguration::TENSOR_OP::BATCHNORM, - // GPUNodeConfiguration::TENSOR_OP::CONV); - // Check for convolution as first operation CUSTOM_ASSERT( (ApproxChoices.size() >= 1) && @@ -377,7 +370,6 @@ void *wrapper_ConvLayer2( void *activation_out; switch (activation_id) { case -1: { // No activation - // INFO("No activation Function\n"); activation_out = add_out; } break; case 0: { // TanH activation @@ -628,7 +620,6 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights, CUSTOM_ASSERT( (ApproxChoices.size() == 2) && "Incorrect number of operations in provided FC layer configuration"); - // INFO("No activation Function\n"); activation_out = add_out; } break; case 0: { // TanH activation @@ -739,7 +730,7 @@ wrapper_FCLayer(const char *hpvm_node_id, void *input, void *weights, void *wrapper_tensorRelu(const char *hpvm_node_id, void *input_ptr) { - INFO("*** Relu Operation \n"); + INFO("*** TensorRelu \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); @@ -893,7 +884,7 @@ void *wrapper_tensorBatchNorm(const char *hpvm_node_id, void *input_ptr, void *gamma_ptr, void *beta_ptr, void *mean_ptr, void *variance_ptr, double epsilon) { - INFO("*** BatchNorm Operation \n"); + INFO("*** TensorBatchNorm \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); @@ -981,7 +972,7 @@ void *wrapper_tensorAdd(const char *hpvm_node_id, void *input_ptr, } else if (NodeConf->isCPUNodeConfiguration()) { DEBUG("Add operation: CPU Configuration\n"); // Mapped to CPU - get a CPU configuration - CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *)NodeConf; + CPUNodeConfiguration *CPUConf = (CPUNodeConfiguration *) NodeConf; std::vector< std::pair< CPUNodeConfiguration::TENSOR_OP, std::vector< std::pair<CPUNodeConfiguration::APPROX, @@ -994,7 +985,7 @@ void *wrapper_tensorAdd(const char *hpvm_node_id, void *input_ptr, "Invalid configuration generated for tensor add wrapper operation"); return handleTensorAddApproximationTuples_CPU(ApproxChoices[0].second, - input_ptr, bias_ptr); + input_ptr, bias_ptr); } else { ERROR("Unsupported Configuration"); abort(); @@ -1008,7 +999,7 @@ void *wrapper_tensorPooling(const char *hpvm_node_id, void *input_ptr, int horizontal_pad, int vertical_stride, int horizontal_stride) { - INFO("*** TensorPooling Operation \n"); + INFO("*** TensorPooling \n"); NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);