diff --git a/hpvm/test/hpvm-cava/src/cam_pipe.c b/hpvm/test/hpvm-cava/src/cam_pipe.c index cdeaf393320121706d13d423212896e2551142c8..7874ff9d529afebc40d1660637e85b3a1e00f23e 100644 --- a/hpvm/test/hpvm-cava/src/cam_pipe.c +++ b/hpvm/test/hpvm-cava/src/cam_pipe.c @@ -1,11 +1,11 @@ -#include "cam_pipe_utility.h" -#include "dma_interface.h" -#include "load_cam_model.h" -#include "pipe_stages.h" -#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <assert.h> +#include "pipe_stages.h" +#include "load_cam_model.h" +#include "cam_pipe_utility.h" +#include "dma_interface.h" #ifdef DMA_MODE #include "gem5_harness.h" #endif @@ -13,7 +13,7 @@ // FIXME: Include gem5/dma_interface.cc/h separately #ifndef DMA_INTERFACE_V3 #define DMA_INTERFACE_V3 -#endif // DMA_INTERFACE_V3 +#endif//DMA_INTERFACE_V3 /////////////////////////////////////////////////////////////// // Camera Model Parameters @@ -71,8 +71,7 @@ void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, uint8_t *acc_input, *acc_result; float *acc_input_scaled, *acc_result_scaled; float *host_TsTw, *host_ctrl_pts, *host_weights, *host_coefs, *host_tone_map; - float *acc_TsTw, *acc_ctrl_pts, *acc_weights, *acc_coefs, *acc_tone_map, - *acc_l2_dist; + float *acc_TsTw, *acc_ctrl_pts, *acc_weights, *acc_coefs, *acc_tone_map, *acc_l2_dist; strcat(cam_model_path, "cam_models/NikonD7000/"); @@ -85,25 +84,20 @@ void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, host_coefs = get_coefs(cam_model_path, num_ctrl_pts); host_tone_map = get_tone_map(cam_model_path); - acc_input = (uint8_t *)malloc_aligned(sizeof(uint8_t) * row_size * col_size * - CHAN_SIZE); - acc_result = (uint8_t *)malloc_aligned(sizeof(uint8_t) * row_size * col_size * - CHAN_SIZE); - acc_input_scaled = - (float *)malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); - acc_result_scaled = - (float *)malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); - acc_TsTw = (float *)malloc_aligned(sizeof(float) * 9); - acc_ctrl_pts = - (float *)malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); - acc_weights = - (float *)malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); - acc_coefs = (float *)malloc_aligned(sizeof(float) * 12); - acc_tone_map = (float *)malloc_aligned(sizeof(float) * 256 * CHAN_SIZE); - acc_l2_dist = (float *)malloc_aligned(sizeof(float) * num_ctrl_pts); + acc_input = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); + acc_result = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); + acc_input_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); + acc_result_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); + acc_TsTw = (float*) malloc_aligned(sizeof(float) * 9); + acc_ctrl_pts = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); + acc_weights = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); + acc_coefs = (float*) malloc_aligned(sizeof(float) * 12); + acc_tone_map = (float*) malloc_aligned(sizeof(float) * 256 * CHAN_SIZE); + acc_l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts); // Load camera model parameters for the ISP - MAP_ARRAY_TO_ACCEL(ISP, "host_TsTw", host_TsTw, sizeof(float) * 9); + MAP_ARRAY_TO_ACCEL(ISP, "host_TsTw", host_TsTw, + sizeof(float) * 9); MAP_ARRAY_TO_ACCEL(ISP, "host_ctrl_pts", host_ctrl_pts, sizeof(float) * num_ctrl_pts * CHAN_SIZE); MAP_ARRAY_TO_ACCEL(ISP, "host_weights", host_weights, @@ -142,3 +136,4 @@ void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, free(acc_tone_map); free(acc_l2_dist); } + diff --git a/hpvm/test/hpvm-cava/src/cam_pipe_utility.c b/hpvm/test/hpvm-cava/src/cam_pipe_utility.c index 864f02d5b28f2c4738279cf66cba5f4312c2a3de..f806e9ee1a2e288fabcb8ad658a47c3919fbb661 100644 --- a/hpvm/test/hpvm-cava/src/cam_pipe_utility.c +++ b/hpvm/test/hpvm-cava/src/cam_pipe_utility.c @@ -1,6 +1,6 @@ -#include <assert.h> #include <stdio.h> #include <stdlib.h> +#include <assert.h> #include "cam_pipe_utility.h" //#include "pipe_stages.h" @@ -26,11 +26,10 @@ uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size) { return image; } -void write_image_to_binary(char *file_path, uint8_t *image, int row_size, - int col_size) { +void write_image_to_binary(char *file_path, uint8_t *image, int row_size, int col_size) { FILE *fp = fopen(file_path, "w"); - int shape[3] = {row_size, col_size, CHAN_SIZE}; + int shape[3] = { row_size, col_size, CHAN_SIZE }; fwrite(shape, sizeof(int), 3, fp); int size = row_size * col_size * CHAN_SIZE; @@ -41,8 +40,8 @@ void write_image_to_binary(char *file_path, uint8_t *image, int row_size, float *transpose_mat(float *inmat, int width, int height) { // Define vectors float *outmat; - int err = posix_memalign((void **)&outmat, CACHELINE_SIZE, - sizeof(float) * height * width); + int err = + posix_memalign((void **)&outmat, CACHELINE_SIZE, sizeof(float) * height * width); assert(err == 0 && "Failed to allocate memory!"); // Transpose the matrix @@ -72,7 +71,7 @@ void convert_chw_to_hwc(uint8_t *input, int row_size, int col_size, uint8_t **result) { if (*result == NULL) { *result = (uint8_t *)malloc_aligned(row_size * col_size * CHAN_SIZE * - sizeof(uint8_t)); + sizeof(uint8_t)); } ARRAY_3D(uint8_t, _input, input, row_size, col_size); ARRAY_3D(uint8_t, _result, *result, col_size, CHAN_SIZE); diff --git a/hpvm/test/hpvm-cava/src/cam_pipe_utility.h b/hpvm/test/hpvm-cava/src/cam_pipe_utility.h index b61b7cc9b52aa59522f93661895fca960b947f17..b4fb6cde0c438b23c2b596cf0418953aaedca501 100644 --- a/hpvm/test/hpvm-cava/src/cam_pipe_utility.h +++ b/hpvm/test/hpvm-cava/src/cam_pipe_utility.h @@ -1,8 +1,8 @@ #ifndef _CAM_PIPE_UTILITY_H_ #define _CAM_PIPE_UTILITY_H_ -#include "pipe_stages.h" #include "utility.h" +#include "pipe_stages.h" uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size); void write_image_to_binary(char *file_path, uint8_t *image, int row_size, diff --git a/hpvm/test/hpvm-cava/src/defs.h b/hpvm/test/hpvm-cava/src/defs.h index 0fa95ef3d2ea55c67a921e0bc5fc8a6ec6ba949f..ccc8acc857c36fd13115670932a38dc3a406dc29 100644 --- a/hpvm/test/hpvm-cava/src/defs.h +++ b/hpvm/test/hpvm-cava/src/defs.h @@ -10,46 +10,46 @@ typedef unsigned long uint64_t; // Debugging message macros. #if DEBUG_LEVEL >= 1 -#define INFO_MSG(args...) printf(args) - -#if DEBUG_LEVEL >= 2 -#define PRINT_MSG(args...) printf(args) -#define PRINT_DEBUG(hid, rows, cols, num_cols) \ - print_debug(hid, rows, cols, num_cols) -#define PRINT_DEBUG4D(hid, rows, cols, height) \ - print_debug4d(hid, rows, cols, height) -#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \ - print_debug4d_fp16(hid, num, height, rows, cols) - -#if DEBUG_LEVEL >= 3 -#define PRINT_DEBUG_V(hid, rows, cols, num_cols) \ - print_debug(hid, rows, cols, num_cols) -#define PRINT_DEBUG4D_V(hid, rows, cols, height) \ - print_debug4d(hid, rows, cols, height) -#define PRINT_MSG_V(args...) printf(args) + #define INFO_MSG(args...) printf(args) + + #if DEBUG_LEVEL >= 2 + #define PRINT_MSG(args...) printf(args) + #define PRINT_DEBUG(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \ + print_debug4d_fp16(hid, num, height, rows, cols) + + #if DEBUG_LEVEL >= 3 + #define PRINT_DEBUG_V(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) + #define PRINT_MSG_V(args...) printf(args) + #else + #define PRINT_DEBUG_V(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) + #endif + #else + #define PRINT_MSG(args...) + #define PRINT_DEBUG(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) + #define PRINT_DEBUG_V(hid, rows, cols, height) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) + #endif #else -#define PRINT_DEBUG_V(hid, rows, cols, num_cols) -#define PRINT_DEBUG4D_V(hid, rows, cols, height) -#define PRINT_MSG_V(args...) -#endif -#else -#define PRINT_MSG(args...) -#define PRINT_DEBUG(hid, rows, cols, num_cols) -#define PRINT_DEBUG4D(hid, rows, cols, height) -#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) -#define PRINT_DEBUG_V(hid, rows, cols, height) -#define PRINT_DEBUG4D_V(hid, rows, cols, height) -#define PRINT_MSG_V(args...) -#endif -#else -#define INFO_MSG(args...) -#define PRINT_DEBUG(hid, rows, cols, num_cols) -#define PRINT_DEBUG4D(hid, rows, cols, height) -#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) -#define PRINT_MSG(args...) -#define PRINT_DEBUG_V(hid, rows, cols, height) -#define PRINT_DEBUG4D_V(hid, rows, cols, height) -#define PRINT_MSG_V(args...) + #define INFO_MSG(args...) + #define PRINT_DEBUG(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) + #define PRINT_MSG(args...) + #define PRINT_DEBUG_V(hid, rows, cols, height) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) #endif #define STRING(arg) #arg @@ -72,9 +72,9 @@ typedef unsigned long uint64_t; #define max3(e0, e1, e2) max2(max2(e0, e1), e2) #define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3)) #define max8(e0, e1, e2, e3, e4, e5, e6, e7) \ - max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7)) + max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7)) #define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \ - max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8) + max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8) #define min2(A, B) (((A) < (B)) ? (A) : (B)) @@ -92,8 +92,7 @@ typedef unsigned long uint64_t; // If GEM5_HARNESS is defined: // // MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize) -// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, -// mySize) +// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, mySize) // // INVOKE_KERNEL(myReqCode, kernelFuncName, args...) // ===> invokeAcceleratorAndBlock(myReqCode) @@ -108,69 +107,69 @@ typedef unsigned long uint64_t; #ifdef GEM5_HARNESS #define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ - mapArrayToAccelerator(req_code, name, base_addr, size) + mapArrayToAccelerator(req_code, name, base_addr, size) #define INVOKE_KERNEL(req_code, kernel_ptr, args...) \ - do { \ - UNUSED(kernel_ptr); \ - invokeAcceleratorAndBlock(req_code); \ - } while (0) + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndBlock(req_code); \ + } while (0) #define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ - do { \ - UNUSED(kernel_ptr); \ - invokeAcceleratorAndReturn2(req_code, finish_flag); \ - } while (0) + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndReturn2(req_code, finish_flag); \ + } while (0) #define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \ + } while (0) #define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \ + } while (0) #define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \ + } while (0) #define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \ - } while (0) + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \ + } while (0) #else #define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ - do { \ - INFO_MSG("Mapping array %s @ %p, size %d.\n", name, (void *)base_addr, \ - (int)(size)); \ - UNUSED(req_code); \ - UNUSED(name); \ - UNUSED(base_addr); \ - UNUSED(size); \ - } while (0) + do { \ + INFO_MSG("Mapping array %s @ %p, size %d.\n", \ + name, (void*)base_addr, (int)(size)); \ + UNUSED(req_code); \ + UNUSED(name); \ + UNUSED(base_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_KERNEL(req_code, kernel_ptr, args...) kernel_ptr(args) #define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ - kernel_ptr(args) + kernel_ptr(args) #define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ - do { \ - UNUSED(start_addr); \ - UNUSED(size); \ - } while (0) + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) #endif @@ -178,14 +177,14 @@ typedef unsigned long uint64_t; // // This assumes that the current name of the base pointer is also the name of // the array in the top level function of the dynamic trace. THIS IS VERY -// IMPORTANT - if the argument passed to a top level function has been renamed -// in the function, then this WILL NOT WORK! +// IMPORTANT - if the argument passed to a top level function has been renamed in +// the function, then this WILL NOT WORK! // // MAP_ARRAY(myReqCode, myArray, mySize) // ===> MAP_ARRAY_TO_ACCEL(myReqCode, "myArray", myArray, mySize) #define MAP_ARRAY(req_code, name_and_base_addr, size) \ - MAP_ARRAY_TO_ACCEL(req_code, STRING(name_and_base_addr), name_and_base_addr, \ - size) + MAP_ARRAY_TO_ACCEL( \ + req_code, STRING(name_and_base_addr), name_and_base_addr, size) // Use these convenience macros to cast a raw pointer into a multidimensional // variable-length array, which lets us use [] notation inside of the ugly @@ -203,24 +202,23 @@ typedef unsigned long uint64_t; // // And so on... #define ARRAY_1D(TYPE, output_array_name, input_array_name) \ - TYPE *output_array_name = (TYPE *)input_array_name + TYPE* output_array_name = (TYPE*)input_array_name #define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \ - TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name + TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name #define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \ - TYPE(*output_array_name) \ - [DIM_1][DIM_2] = (TYPE(*)[DIM_1][DIM_2])input_array_name - -#define ARRAY_4D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2, \ - DIM_3) \ - TYPE(*output_array_name) \ - [DIM_1][DIM_2][DIM_3] = (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name - -#define ARRAY_5D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2, \ - DIM_3, DIM_4) \ - TYPE(*output_array_name) \ - [DIM_1][DIM_2][DIM_3][DIM_4] = \ - (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name + TYPE(*output_array_name)[DIM_1][DIM_2] = \ + (TYPE(*)[DIM_1][DIM_2])input_array_name + +#define ARRAY_4D( \ + TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \ + TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name + +#define ARRAY_5D( \ + TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \ + TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name #endif diff --git a/hpvm/test/hpvm-cava/src/dma_interface.c b/hpvm/test/hpvm-cava/src/dma_interface.c index 68698635a4fceb4fe67e323bd0f354bd70bca99d..81bce54469886153170f994a77250a784cc9b7d7 100644 --- a/hpvm/test/hpvm-cava/src/dma_interface.c +++ b/hpvm/test/hpvm-cava/src/dma_interface.c @@ -1,6 +1,6 @@ -#include "dma_interface.h" #include <assert.h> #include <string.h> +#include "dma_interface.h" // All _dmaImplN functions must be always inlined or we'll get extra functions // in the trace. @@ -10,22 +10,22 @@ // Starting with version 3, all versioning will be distinguished by the return // value of the DMA functions. -__attribute__((__always_inline__)) int _dmaImpl3(void *dst_addr, void *src_addr, - size_t size) { +__attribute__((__always_inline__)) +int _dmaImpl3(void* dst_addr, void* src_addr, size_t size) { assert(size > 0); memmove(dst_addr, src_addr, size); return 3; } -int dmaLoad(void *dst_addr, void *src_host_addr, size_t size) { +int dmaLoad(void* dst_addr, void* src_host_addr, size_t size) { return _dmaImpl3(dst_addr, src_host_addr, size); } -int dmaStore(void *dst_host_addr, void *src_addr, size_t size) { +int dmaStore(void* dst_host_addr, void* src_addr, size_t size) { return _dmaImpl3(dst_host_addr, src_addr, size); } -int setReadyBits(void *start_addr, size_t size, unsigned value) { +int setReadyBits(void* start_addr, size_t size, unsigned value) { asm(""); return 0; } @@ -35,37 +35,39 @@ int setReadyBits(void *start_addr, size_t size, unsigned value) { // With version 2 and earlier, we return (void*)NULL and use the number of // function arguments to distinguish the DMA functions. -__attribute__((__always_inline__)) void * -_dmaImpl2(void *base_addr, size_t src_off, size_t dst_off, size_t size) { +__attribute__((__always_inline__)) +void* _dmaImpl2(void* base_addr, size_t src_off, size_t dst_off, size_t size) { assert(size > 0); memmove(base_addr + dst_off, base_addr + src_off, size); return NULL; } -void *dmaLoad(void *base_addr, size_t src_off, size_t dst_off, size_t size) { +void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size) { return _dmaImpl2(base_addr, src_off, dst_off, size); } -void *dmaStore(void *base_addr, size_t src_off, size_t dst_off, size_t size) { +void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size) { return _dmaImpl2(base_addr, src_off, dst_off, size); } #else -__attribute__((__always_inline__)) void *_dmaImpl1(void *base_addr, - size_t offset, size_t size) { +__attribute__((__always_inline__)) +void* _dmaImpl1(void* base_addr, size_t offset, size_t size) { assert(size > 0); asm(""); return NULL; } -void *dmaLoad(void *addr, size_t offset, size_t size) { +void* dmaLoad(void* addr, size_t offset, size_t size) { return _dmaImpl1(addr, offset, size); } -void *dmaStore(void *addr, size_t offset, size_t size) { +void* dmaStore(void* addr, size_t offset, size_t size) { return _dmaImpl1(addr, offset, size); } #endif -void dmaFence() { asm(""); } +void dmaFence() { + asm(""); +} diff --git a/hpvm/test/hpvm-cava/src/dma_interface.h b/hpvm/test/hpvm-cava/src/dma_interface.h index 771ece523824cff5923581aca671ab7d26fae706..f23234eede4df99db84b144646530dfe240c6e62 100644 --- a/hpvm/test/hpvm-cava/src/dma_interface.h +++ b/hpvm/test/hpvm-cava/src/dma_interface.h @@ -10,12 +10,12 @@ // Version 3 of the DMA interface enables memcpy operations from arbitrary // source and destination addresses. -int dmaLoad(void *dst_addr, void *src_host_addr, size_t size); -int dmaStore(void *dst_host_addr, void *src_addr, size_t size); +int dmaLoad(void* dst_addr, void* src_host_addr, size_t size); +int dmaStore(void* dst_host_addr, void* src_addr, size_t size); // The user can explicitly toggle the state of ready bits, if ready mode is // enabled. This requires support from DMA v3. -int setReadyBits(void *start_addr, size_t size, unsigned value); +int setReadyBits(void* start_addr, size_t size, unsigned value); #elif defined(DMA_INTERFACE_V2) @@ -26,18 +26,17 @@ int setReadyBits(void *start_addr, size_t size, unsigned value); // actually copied from source to destination (the memory copy will not show up // in the trace). -void *dmaLoad(void *base_addr, size_t src_off, size_t dst_off, size_t size); -void *dmaStore(void *base_addr, size_t src_off, size_t dst_off, size_t size); +void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size); +void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size); #else #warning "DMA interface v1 is deprecated!" -// Version 1 of the DMA interface is now deprecated and will be removed -// entirely. +// Version 1 of the DMA interface is now deprecated and will be removed entirely. -void *dmaLoad(void *addr, size_t offset, size_t size); -void *dmaStore(void *addr, size_t offset, size_t size); +void* dmaLoad(void* addr, size_t offset, size_t size); +void* dmaStore(void* addr, size_t offset, size_t size); #endif void dmaFence(); diff --git a/hpvm/test/hpvm-cava/src/load_cam_model.c b/hpvm/test/hpvm-cava/src/load_cam_model.c index baec19ad4963e68ddfe2360e53f58dc436fd0da6..124fe0b7d175c2655feac562ecd6e2a5b73cc96a 100644 --- a/hpvm/test/hpvm-cava/src/load_cam_model.c +++ b/hpvm/test/hpvm-cava/src/load_cam_model.c @@ -1,14 +1,13 @@ -#include "load_cam_model.h" -#include "utility.h" -#include <assert.h> #include <stdio.h> #include <stdlib.h> -// clang-format: pipe_stages.h must come after stdlib.h -#include "pipe_stages.h" #include <string.h> +#include <assert.h> +#include "utility.h" +#include "pipe_stages.h" +#include "load_cam_model.h" // Get color space transform -float *get_Ts(char *cam_model_path) { +float* get_Ts(char* cam_model_path) { float *Ts; int err = posix_memalign((void **)&Ts, CACHELINE_SIZE, sizeof(float) * 9); assert(err == 0 && "Failed to allocate memory!"); @@ -33,7 +32,7 @@ float *get_Ts(char *cam_model_path) { str = strtok(line, " \n"); int i = 0; while (str != NULL) { - line_data[i] = atof(str); + line_data[i] = atof(str); str = strtok(NULL, " \n"); i++; } @@ -51,7 +50,7 @@ float *get_Ts(char *cam_model_path) { } // Get white balance transform -float *get_Tw(char *cam_model_path, int wb_index) { +float* get_Tw(char* cam_model_path, int wb_index) { float *Tw; int err = posix_memalign((void **)&Tw, CACHELINE_SIZE, sizeof(float) * 9); assert(err == 0 && "Failed to allocate memory!"); @@ -63,7 +62,7 @@ float *get_Tw(char *cam_model_path, int wb_index) { // Calculate base for the white balance transform selected // For more details see the camera model readme - int wb_base = 8 + 5 * (wb_index - 1); + int wb_base = 8 + 5*(wb_index-1); // Open file for reading // Open file for reading @@ -82,15 +81,15 @@ float *get_Tw(char *cam_model_path, int wb_index) { str = strtok(line, " \n"); int i = 0; while (str != NULL) { - line_data[i] = atof(str); + line_data[i] = atof(str); str = strtok(NULL, " \n"); i++; } if (line_idx == wb_base) { // Convert the white balance vector into a diagaonal matrix - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { + for (int i=0; i<3; i++) { + for (int j=0; j<3; j++) { if (i == j) { Tw[i * 3 + j] = line_data[i]; } else { @@ -106,8 +105,9 @@ float *get_Tw(char *cam_model_path, int wb_index) { return Tw; } + // Get combined transforms for checking -float *get_TsTw(char *cam_model_path, int wb_index) { +float* get_TsTw(char* cam_model_path, int wb_index) { float *TsTw; int err = posix_memalign((void **)&TsTw, CACHELINE_SIZE, sizeof(float) * 9); assert(err == 0 && "Failed to allocate memory!"); @@ -119,7 +119,7 @@ float *get_TsTw(char *cam_model_path, int wb_index) { // Calculate base for the white balance transform selected // For more details see the camera model readme - int wb_base = 5 + 5 * (wb_index - 1); + int wb_base = 5 + 5*(wb_index-1); // Open file for reading char file_name[] = "raw2jpg_transform.txt"; @@ -137,7 +137,7 @@ float *get_TsTw(char *cam_model_path, int wb_index) { str = strtok(line, " \n"); int i = 0; while (str != NULL) { - line_data[i] = atof(str); + line_data[i] = atof(str); str = strtok(NULL, " \n"); i++; } @@ -155,7 +155,7 @@ float *get_TsTw(char *cam_model_path, int wb_index) { } // Get control points -float *get_ctrl_pts(char *cam_model_path, int num_cntrl_pts) { +float* get_ctrl_pts(char* cam_model_path, int num_cntrl_pts) { float *ctrl_pnts; int err = posix_memalign((void **)&ctrl_pnts, CACHELINE_SIZE, sizeof(float) * num_cntrl_pts * 3); @@ -200,7 +200,7 @@ float *get_ctrl_pts(char *cam_model_path, int num_cntrl_pts) { } // Get weights -float *get_weights(char *cam_model_path, int num_cntrl_pts) { +float* get_weights(char* cam_model_path, int num_cntrl_pts) { float *weights; int err = posix_memalign((void **)&weights, CACHELINE_SIZE, sizeof(float) * num_cntrl_pts * 3); @@ -245,7 +245,7 @@ float *get_weights(char *cam_model_path, int num_cntrl_pts) { } // Get coeficients -float *get_coefs(char *cam_model_path, int num_cntrl_pts) { +float* get_coefs(char* cam_model_path, int num_cntrl_pts) { float *coefs; int err = posix_memalign((void **)&coefs, CACHELINE_SIZE, sizeof(float) * 12); assert(err == 0 && "Failed to allocate memory!"); @@ -288,8 +288,9 @@ float *get_coefs(char *cam_model_path, int num_cntrl_pts) { return coefs; } + // Get tone mapping table -float *get_tone_map(char *cam_model_path) { +float* get_tone_map(char* cam_model_path) { float *tone_map; int err = posix_memalign((void **)&tone_map, CACHELINE_SIZE, sizeof(float) * 256 * CHAN_SIZE); diff --git a/hpvm/test/hpvm-cava/src/main.c b/hpvm/test/hpvm-cava/src/main.c index 8e7bd197d026773b47fd0e954b56821cd151c60a..e43bbb4f25c4c97c9907ebae37251c854860c3b5 100644 --- a/hpvm/test/hpvm-cava/src/main.c +++ b/hpvm/test/hpvm-cava/src/main.c @@ -1,14 +1,14 @@ -#include "utility.h" #include <argp.h> -#include <assert.h> -#include <math.h> #include <stdio.h> #include <stdlib.h> +#include <assert.h> #include <string.h> +#include <math.h> +#include "utility.h" #include "cam_pipe_utility.h" -#include "load_cam_model.h" #include "pipe_stages.h" +#include "load_cam_model.h" #include "visc.h" @@ -17,138 +17,120 @@ int NUM_CLASSES; int INPUT_DIM; int NUM_WORKER_THREADS; -// Type of struct holding the return value from the last node. -struct RetStruct { - size_t bytesRet; -}; - // Type of struct that is used to pass arguments to the HPVM dataflow graph // using the hpvm launch operation typedef struct __attribute__((__packed__)) { - uint8_t *input; - size_t bytes_input; - uint8_t *result; - size_t bytes_result; - float *input_scaled; - size_t bytes_input_scaled; - float *result_scaled; - size_t bytes_result_scaled; - float *demosaic_out; - size_t bytes_demosaic_out; - float *denoise_out; - size_t bytes_denoise_out; - float *transform_out; - size_t bytes_transform_out; - float *gamut_out; - size_t bytes_gamut_out; - float *TsTw; - size_t bytes_TsTw; - float *ctrl_pts; - size_t bytes_ctrl_pts; - float *weights; - size_t bytes_weights; - float *coefs; - size_t bytes_coefs; - float *l2_dist; - size_t bytes_l2_dist; - float *tone_map; - size_t bytes_tone_map; - int row_size; - int col_size; - struct RetStruct ret; // Instance of RetStruct holding the return value. -} RootIn; + uint8_t *input; size_t bytes_input; + uint8_t *result; size_t bytes_result; + float *input_scaled; size_t bytes_input_scaled; + float *result_scaled; size_t bytes_result_scaled; + float *demosaic_out; size_t bytes_demosaic_out; + float *denoise_out; size_t bytes_denoise_out; + float *transform_out; size_t bytes_transform_out; + float *gamut_out;size_t bytes_gamut_out; + float *TsTw; size_t bytes_TsTw; + float *ctrl_pts; size_t bytes_ctrl_pts; + float *weights; size_t bytes_weights; + float*coefs; size_t bytes_coefs; + float *l2_dist; size_t bytes_l2_dist; + float *tone_map; size_t bytes_tone_map; + size_t row_size; size_t col_size; +} +RootIn; typedef enum _argnum { - RAW_IMAGE_BIN, - OUTPUT_IMAGE_BIN, - NUM_REQUIRED_ARGS, - DATA_FILE = NUM_REQUIRED_ARGS, - NUM_ARGS, + RAW_IMAGE_BIN, + OUTPUT_IMAGE_BIN, + NUM_REQUIRED_ARGS, + DATA_FILE = NUM_REQUIRED_ARGS, + NUM_ARGS, } argnum; typedef struct _arguments { - char *args[NUM_ARGS]; - int num_inputs; - int num_threads; + char* args[NUM_ARGS]; + int num_inputs; + int num_threads; } arguments; static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n"; static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary"; static struct argp_option options[] = { - {"num-inputs", 'n', "N", 0, "Number of input images"}, - {0}, - {"data-file", 'f', "F", 0, - "File to read data and weights from (if data-init-mode == READ_FILE or " - "save-params is true). *.txt files are decoded as text files, while " - "*.bin files are decoded as binary files."}, + { "num-inputs", 'n', "N", 0, "Number of input images" }, { 0 }, + { "data-file", 'f', "F", 0, + "File to read data and weights from (if data-init-mode == READ_FILE or " + "save-params is true). *.txt files are decoded as text files, while " + "*.bin files are decoded as binary files." }, }; -static error_t parse_opt(int key, char *arg, struct argp_state *state) { - arguments *args = (arguments *)(state->input); - switch (key) { - case 'n': { - args->num_inputs = strtol(arg, NULL, 10); - break; - } - case 'f': { - args->args[DATA_FILE] = arg; - break; - } - case 't': { - args->num_threads = strtol(arg, NULL, 10); - break; - } - case ARGP_KEY_ARG: { - if (state->arg_num >= NUM_REQUIRED_ARGS) - argp_usage(state); - args->args[state->arg_num] = arg; - break; - } - case ARGP_KEY_END: { - if (state->arg_num < NUM_REQUIRED_ARGS) { - fprintf(stderr, "Not enough arguments! Got %d, require %d.\n", - state->arg_num, NUM_REQUIRED_ARGS); - argp_usage(state); +static error_t parse_opt(int key, char* arg, struct argp_state* state) { + arguments* args = (arguments*)(state->input); + switch (key) { + case 'n': { + args->num_inputs = strtol(arg, NULL, 10); + break; + } + case 'f': { + args->args[DATA_FILE] = arg; + break; + } + case 't': { + args->num_threads = strtol(arg, NULL, 10); + break; + } + case ARGP_KEY_ARG: { + if (state->arg_num >= NUM_REQUIRED_ARGS) + argp_usage(state); + args->args[state->arg_num] = arg; + break; + } + case ARGP_KEY_END: { + if (state->arg_num < NUM_REQUIRED_ARGS) { + fprintf(stderr, + "Not enough arguments! Got %d, require %d.\n", + state->arg_num, + NUM_REQUIRED_ARGS); + argp_usage(state); + } + break; + } + default: + return ARGP_ERR_UNKNOWN; } - break; - } - default: - return ARGP_ERR_UNKNOWN; - } - return 0; + return 0; } -void set_default_args(arguments *args) { - args->num_inputs = 1; - args->num_threads = 0; - for (int i = 0; i < NUM_ARGS; i++) { - args->args[i] = NULL; - } +void set_default_args(arguments* args) { + args->num_inputs = 1; + args->num_threads = 0; + for (int i = 0; i < NUM_ARGS; i++) { + args->args[i] = NULL; + } } -static struct argp parser = {options, parse_opt, args_doc, prog_doc}; +static struct argp parser = { options, parse_opt, args_doc, prog_doc }; // Helper function for printing intermediate results -void descale_cpu(float *input, size_t bytes_input, uint8_t *output, - size_t bytes_result, size_t row_size, size_t col_size) { - +void descale_cpu(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + size_t row_size, size_t col_size) { + for (int chan = 0; chan < CHAN_SIZE; chan++) for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; + int index = (chan*row_size + row) * col_size + col; output[index] = min(max(input[index] * 255, 0), 255); } } static void sort(float arr[], int n) { - int i, j; - for (i = 0; i < n - 1; i++) - for (j = 0; j < n - i - 1; j++) - if (arr[j] > arr[j + 1]) { - float temp = arr[j]; - arr[j] = arr[j + 1]; - arr[j + 1] = temp; - } + int i, j; + for (i = 0; i < n - 1; i++) + for (j = 0; j < n - i - 1; j++) + if (arr[j] > arr[j + 1]) { + float temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } } /**************************************************************/ @@ -158,258 +140,255 @@ static void sort(float arr[], int n) { // In this benchmark, no use of HPVM query intrinsics in the leaf node functions // Leaf HPVM node function for scale -void scale_fxp(uint8_t *input, size_t bytes_input, float *output, - size_t bytes_output, size_t row_size, size_t col_size) { +void scale_fxp(uint8_t *input, size_t bytes_input, + float *output, size_t bytes_output, + size_t row_size, size_t col_size) { - // Specifies compilation target for current node + //Specifies compilation target for current node __visc__hint(CPU_TARGET); // Specifies pointer arguments that will be used as "in" and "out" arguments // - count of "in" arguments // - list of "in" argument , and similar for "out" __visc__attributes(2, input, output, 1, output); - void *thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - // for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; - output[index] = input[index] * 1.0 / 255; - } +// for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++){ + int index = (chan*row_size + row) * col_size + col; + output[index] = input[index] * 1.0 / 255; + } __visc__return(1, bytes_output); } // Leaf HPVM node function for descale -void descale_fxp(float *input, size_t bytes_input, uint8_t *output, - size_t bytes_result, size_t row_size, size_t col_size) { +void descale_fxp(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, output, 1, output); - + for (int chan = 0; chan < CHAN_SIZE; chan++) for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; + int index = (chan*row_size + row) * col_size + col; output[index] = min(max(input[index] * 255, 0), 255); } __visc__return(1, bytes_result); } // Leaf HPVM node function for demosaicing -void demosaic_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, size_t col_size) { +void demosaic_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(DEVICE); __visc__attributes(2, input, result, 1, result); - - void *thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); - // for (int row = 1; row < row_size - 1; row++) - for (int col = 1; col < col_size - 1; col++) { - int index_0 = (0 * row_size + row) * col_size + col; - int index_1 = (1 * row_size + row) * col_size + col; - int index_2 = (2 * row_size + row) * col_size + col; - if (row % 2 == 0 && col % 2 == 0) { - // Green pixel - // Getting the R values - float R1 = input[index_0 - 1]; - float R2 = input[index_0 + 1]; - // Getting the B values - float B1 = input[index_2 - col_size]; - float B2 = input[index_2 + col_size]; - // R - result[index_0] = (R1 + R2) / 2; - // G - result[index_1] = input[index_1] * 2; - // B - result[index_2] = (B1 + B2) / 2; - } else if (row % 2 == 0 && col % 2 == 1) { - // Red pixel - // Getting the G values - float G1 = input[index_1 - col_size]; - float G2 = input[index_1 + col_size]; - float G3 = input[index_1 - 1]; - float G4 = input[index_1 + 1]; - // Getting the B values - float B1 = input[index_2 - col_size - 1]; - float B2 = input[index_2 - col_size + 1]; - float B3 = input[index_2 + col_size - 1]; - float B4 = input[index_2 + col_size + 1]; - // R - result[index_0] = input[index_0]; - // G - result[index_1] = (G1 + G2 + G3 + G4) / 2; - // B (center pixel) - result[index_2] = (B1 + B2 + B3 + B4) / 4; - } else if (row % 2 == 1 && col % 2 == 0) { - // Blue pixel - // Getting the R values - float R1 = input[index_0 - col_size - 1]; - float R2 = input[index_0 + col_size - 1]; - float R3 = input[index_0 - col_size + 1]; - float R4 = input[index_0 + col_size + 1]; - // Getting the G values - float G1 = input[index_1 - col_size]; - float G2 = input[index_1 + col_size]; - float G3 = input[index_1 - 1]; - float G4 = input[index_1 + 1]; - // R - result[index_0] = (R1 + R2 + R3 + R4) / 4; - // G - result[index_1] = (G1 + G2 + G3 + G4) / 2; - // B - result[index_2] = input[index_2]; - } else { - // Bottom Green pixel - // Getting the R values - float R1 = input[index_0 - col_size]; - float R2 = input[index_0 + col_size]; - // Getting the B values - float B1 = input[index_2 - 1]; - float B2 = input[index_2 + 1]; - // R - result[index_0] = (R1 + R2) / 2; - // G - result[index_1] = input[index_1] * 2; - // B - result[index_2] = (B1 + B2) / 2; - } - } + + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); +// for (int row = 1; row < row_size - 1; row++) + for (int col = 1; col < col_size - 1; col++) { + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + if (row % 2 == 0 && col % 2 == 0) { + // Green pixel + // Getting the R values + float R1 = input[index_0 - 1]; + float R2 = input[index_0 + 1]; + // Getting the B values + float B1 = input[index_2 - col_size]; + float B2 = input[index_2 + col_size]; + // R + result[index_0] = (R1 + R2) / 2; + // G + result[index_1] = input[index_1] * 2; + // B + result[index_2] = (B1 + B2) / 2; + } else if (row % 2 == 0 && col % 2 == 1) { + // Red pixel + // Getting the G values + float G1 = input[index_1 - col_size]; + float G2 = input[index_1 + col_size]; + float G3 = input[index_1 - 1]; + float G4 = input[index_1 + 1]; + // Getting the B values + float B1 = input[index_2 - col_size - 1]; + float B2 = input[index_2 - col_size + 1]; + float B3 = input[index_2 + col_size - 1]; + float B4 = input[index_2 + col_size + 1]; + // R + result[index_0] = input[index_0]; + // G + result[index_1] = (G1 + G2 + G3 + G4) / 2; + // B (center pixel) + result[index_2] = (B1 + B2 + B3 + B4) / 4; + } else if (row % 2 == 1 && col % 2 == 0) { + // Blue pixel + // Getting the R values + float R1 = input[index_0 - col_size - 1]; + float R2 = input[index_0 + col_size - 1]; + float R3 = input[index_0 - col_size + 1]; + float R4 = input[index_0 + col_size + 1]; + // Getting the G values + float G1 = input[index_1 - col_size]; + float G2 = input[index_1 + col_size]; + float G3 = input[index_1 - 1]; + float G4 = input[index_1 + 1]; + // R + result[index_0] = (R1 + R2 + R3 + R4) / 4; + // G + result[index_1] = (G1 + G2 + G3 + G4) / 2; + // B + result[index_2] = input[index_2]; + } else { + // Bottom Green pixel + // Getting the R values + float R1 = input[index_0 - col_size]; + float R2 = input[index_0 + col_size]; + // Getting the B values + float B1 = input[index_2 - 1]; + float B2 = input[index_2 + 1]; + // R + result[index_0] = (R1 + R2) / 2; + // G + result[index_1] = input[index_1] * 2; + // B + result[index_2] = (B1 + B2) / 2; + } + } __visc__return(1, bytes_result); } // Leaf HPVM node function for denoise -void denoise_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, size_t col_size) { +void denoise_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); - - void *thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - // for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) - if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { - float filter[9]; - for (int i = -1; i < 2; i++) - for (int j = -1; j < 2; j++) { - int index = ((i + row) - row + 1) * 3 + (j + col) - col + 1; - filter[index] = - input[(chan * row_size + (i + row)) * col_size + (j + col)]; - } - sort(filter, 9); - result[(chan * row_size + row) * col_size + col] = filter[4]; - } else { - result[(chan * row_size + row) * col_size + col] = - input[(chan * row_size + row) * col_size + col]; - } +// for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) + if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { + float filter[9]; + for (int i = -1; i < 2; i++) + for (int j = -1; j < 2; j++) { + int index = ((i+row) - row + 1) * 3 + (j+col) - col + 1; + filter[index] = input[(chan * row_size + (i + row)) * col_size + (j + col)]; + } + sort(filter, 9); + result[(chan * row_size + row) * col_size + col] = filter[4]; + } else { + result[(chan * row_size + row) * col_size + col] = input[(chan * row_size + row) * col_size + col]; + } __visc__return(1, bytes_result); } // Leaf HPVM node function, for color map and white balance transform -void transform_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, +void transform_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, size_t row_size, size_t col_size) { __visc__hint(DEVICE); __visc__attributes(3, input, result, TsTw_tran, 1, result); - - void *thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - // for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; - int index_0 = (0 * row_size + row) * col_size + col; - int index_1 = (1 * row_size + row) * col_size + col; - int index_2 = (2 * row_size + row) * col_size + col; - int index_2d_0 = 0 * CHAN_SIZE + chan; - int index_2d_1 = 1 * CHAN_SIZE + chan; - int index_2d_2 = 2 * CHAN_SIZE + chan; - result[index] = max(input[index_0] * TsTw_tran[index_2d_0] + - input[index_1] * TsTw_tran[index_2d_1] + - input[index_2] * TsTw_tran[index_2d_2], - 0); - } +// for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + int index_2d_0 = 0 * CHAN_SIZE + chan; + int index_2d_1 = 1 * CHAN_SIZE + chan; + int index_2d_2 = 2 * CHAN_SIZE + chan; + result[index] = + max(input[index_0] * TsTw_tran[index_2d_0] + + input[index_1] * TsTw_tran[index_2d_1] + + input[index_2] * TsTw_tran[index_2d_2], + 0); + } __visc__return(1, bytes_result); } // Leaf HPVM node function, for gamut mapping -void gamut_map_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, float *coefs, - size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, +void gamut_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); - __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, - result, l2_dist); - - // First, get the L2 norm from every pixel to the control points, - // Then, sum it and weight it. Finally, add the bias. - void *thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); - // for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - float chan_val_0 = 0.0; - float chan_val_1 = 0.0; - float chan_val_2 = 0.0; - for (int cp = 0; cp < 3702; cp++) { - int index_0 = (0 * row_size + row) * col_size + col; - int index_1 = (1 * row_size + row) * col_size + col; - int index_2 = (2 * row_size + row) * col_size + col; - float val1 = (input[index_0] - ctrl_pts[cp * 3 + 0]); - float val2 = (input[index_0] - ctrl_pts[cp * 3 + 0]); - float val3 = (input[index_1] - ctrl_pts[cp * 3 + 1]); - float val4 = (input[index_1] - ctrl_pts[cp * 3 + 1]); - float val5 = (input[index_2] - ctrl_pts[cp * 3 + 2]); - float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]); - float val = val1 * val2 + val3 * val4 + val5 * val6; - float sqrt_val = sqrt(val); - chan_val_0 += sqrt_val * weights[cp * CHAN_SIZE + 0]; - chan_val_1 += sqrt_val * weights[cp * CHAN_SIZE + 1]; - chan_val_2 += sqrt_val * weights[cp * CHAN_SIZE + 2]; + __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, result, l2_dist); + + // First, get the L2 norm from every pixel to the control points, + // Then, sum it and weight it. Finally, add the bias. + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); +// for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + float chan_val_0 = 0.0; + float chan_val_1 = 0.0; + float chan_val_2 = 0.0; + for (int cp = 0; cp < 3702; cp++) { + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + float val1 = (input[index_0] - ctrl_pts[cp * 3 + 0]); + float val2 = (input[index_0] - ctrl_pts[cp * 3 + 0]); + float val3 = (input[index_1] - ctrl_pts[cp * 3 + 1]); + float val4 = (input[index_1] - ctrl_pts[cp * 3 + 1]); + float val5 = (input[index_2] - ctrl_pts[cp * 3 + 2]); + float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]); + float val = val1 * val2 + val3 * val4 + val5 * val6; + float sqrt_val = sqrt(val); + chan_val_0 += sqrt_val * weights[cp * CHAN_SIZE + 0]; + chan_val_1 += sqrt_val * weights[cp * CHAN_SIZE + 1]; + chan_val_2 += sqrt_val * weights[cp * CHAN_SIZE + 2]; + } + chan_val_0 += coefs[0 * CHAN_SIZE + 0] + + coefs[1 * CHAN_SIZE + 0] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 0] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 0] * input[(2 * row_size + row) * col_size + col]; + chan_val_1 += coefs[0 * CHAN_SIZE + 1] + + coefs[1 * CHAN_SIZE + 1] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 1] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 1] * input[(2 * row_size + row) * col_size + col]; + chan_val_2 += coefs[0 * CHAN_SIZE + 2] + + coefs[1 * CHAN_SIZE + 2] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 2] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 2] * input[(2 * row_size + row) * col_size + col]; + result[(0 * row_size + row) * col_size + col] = max(chan_val_0, 0); + result[(1 * row_size + row) * col_size + col] = max(chan_val_1, 0); + result[(2 * row_size + row) * col_size + col] = max(chan_val_2, 0); } - chan_val_0 += - coefs[0 * CHAN_SIZE + 0] + - coefs[1 * CHAN_SIZE + 0] * - input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + 0] * - input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + 0] * input[(2 * row_size + row) * col_size + col]; - chan_val_1 += - coefs[0 * CHAN_SIZE + 1] + - coefs[1 * CHAN_SIZE + 1] * - input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + 1] * - input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + 1] * input[(2 * row_size + row) * col_size + col]; - chan_val_2 += - coefs[0 * CHAN_SIZE + 2] + - coefs[1 * CHAN_SIZE + 2] * - input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + 2] * - input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + 2] * input[(2 * row_size + row) * col_size + col]; - result[(0 * row_size + row) * col_size + col] = max(chan_val_0, 0); - result[(1 * row_size + row) * col_size + col] = max(chan_val_1, 0); - result[(2 * row_size + row) * col_size + col] = max(chan_val_2, 0); - } __visc__return(1, bytes_result); } // HPVM leaf node function, for tone mapping -void tone_map_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *tone_map, size_t bytes_tone_map, +void tone_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, size_t row_size, size_t col_size) { __visc__hint(DEVICE); __visc__attributes(3, input, result, tone_map, 1, result); - - void *thisNode = __visc__getNode(); - int row = __visc__getNodeInstanceID_x(thisNode); + + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - // for (int row = 0; row < row_size; row++) - for (int col = 0; col < col_size; col++) { - int index = (chan * row_size + row) * col_size + col; - uint8_t x = input[index] * 255; - result[index] = tone_map[x * CHAN_SIZE + chan]; - } +// for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + uint8_t x = input[index] * 255; + result[index] = tone_map[x * CHAN_SIZE + chan]; + } __visc__return(1, bytes_result); } @@ -421,8 +400,9 @@ void tone_map_fxp(float *input, size_t bytes_input, float *result, // requirement for the FPGA backend . The CPU backend also supports this, // so it does not cause a portability issue. -void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, size_t col_size) { +void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); @@ -447,9 +427,9 @@ void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, float *result, __visc__bindOut(ScaleNode, 0, 0, 0); } -void descale_fxp_wrapper(float *input, size_t bytes_input, uint8_t *result, - size_t bytes_result, size_t row_size, - size_t col_size) { +void descale_fxp_wrapper(float *input, size_t bytes_input, + uint8_t *result, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); void *DescaleNode = __visc__createNodeND(1, descale_fxp, row_size); @@ -459,13 +439,13 @@ void descale_fxp_wrapper(float *input, size_t bytes_input, uint8_t *result, __visc__bindIn(DescaleNode, 3, 3, 0); // bind bytes_result __visc__bindIn(DescaleNode, 4, 4, 0); // bind row_size __visc__bindIn(DescaleNode, 5, 5, 0); // bind col_size - + __visc__bindOut(DescaleNode, 0, 0, 0); } -void demosaic_fxp_wrapper(float *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, - size_t col_size) { +void demosaic_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); void *DemosaicNode = __visc__createNodeND(1, demosaic_fxp, row_size); @@ -475,13 +455,13 @@ void demosaic_fxp_wrapper(float *input, size_t bytes_input, float *result, __visc__bindIn(DemosaicNode, 3, 3, 0); // bind bytes_result __visc__bindIn(DemosaicNode, 4, 4, 0); // bind row_size __visc__bindIn(DemosaicNode, 5, 5, 0); // bind col_size - + __visc__bindOut(DemosaicNode, 0, 0, 0); } -void denoise_fxp_wrapper(float *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, - size_t col_size) { +void denoise_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); void *DenoiseNode = __visc__createNodeND(1, denoise_fxp, row_size); @@ -491,14 +471,14 @@ void denoise_fxp_wrapper(float *input, size_t bytes_input, float *result, __visc__bindIn(DenoiseNode, 3, 3, 0); // bind bytes_result __visc__bindIn(DenoiseNode, 4, 4, 0); // bind row_size __visc__bindIn(DenoiseNode, 5, 5, 0); // bind col_size - + __visc__bindOut(DenoiseNode, 0, 0, 0); } -void transform_fxp_wrapper(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *TsTw_tran, - size_t bytes_TsTw, size_t row_size, - size_t col_size) { +void transform_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(3, input, result, TsTw_tran, 1, result); void *TransformNode = __visc__createNodeND(1, transform_fxp, row_size); @@ -510,41 +490,41 @@ void transform_fxp_wrapper(float *input, size_t bytes_input, float *result, __visc__bindIn(TransformNode, 5, 5, 0); // bind bytes_tstw __visc__bindIn(TransformNode, 6, 6, 0); // bind row_size __visc__bindIn(TransformNode, 7, 7, 0); // bind col_size - + __visc__bindOut(TransformNode, 0, 0, 0); } -void gamut_fxp_wrapper(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *ctrl_pts, - size_t bytes_ctrl_pts, float *weights, - size_t bytes_weights, float *coefs, size_t bytes_coefs, - float *l2_dist, size_t bytes_l2_dist, size_t row_size, - size_t col_size) { +void gamut_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); - __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, - result); + __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); void *GamutNode = __visc__createNodeND(1, gamut_map_fxp, row_size); - __visc__bindIn(GamutNode, 0, 0, 0); // bind input - __visc__bindIn(GamutNode, 1, 1, 0); // bind bytes_input - __visc__bindIn(GamutNode, 2, 2, 0); // bind result - __visc__bindIn(GamutNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(GamutNode, 4, 4, 0); // bind ctrl_pts - __visc__bindIn(GamutNode, 5, 5, 0); // bind bytes_ctrl_pts - __visc__bindIn(GamutNode, 6, 6, 0); // bind weights - __visc__bindIn(GamutNode, 7, 7, 0); // bind bytes_weights - __visc__bindIn(GamutNode, 8, 8, 0); // bind coefs - __visc__bindIn(GamutNode, 9, 9, 0); // bind bytes_coefs + __visc__bindIn(GamutNode, 0, 0, 0); // bind input + __visc__bindIn(GamutNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(GamutNode, 2, 2, 0); // bind result + __visc__bindIn(GamutNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(GamutNode, 4, 4, 0); // bind ctrl_pts + __visc__bindIn(GamutNode, 5, 5, 0); // bind bytes_ctrl_pts + __visc__bindIn(GamutNode, 6, 6, 0); // bind weights + __visc__bindIn(GamutNode, 7, 7, 0); // bind bytes_weights + __visc__bindIn(GamutNode, 8, 8, 0); // bind coefs + __visc__bindIn(GamutNode, 9, 9, 0); // bind bytes_coefs __visc__bindIn(GamutNode, 10, 10, 0); // bind l2_dist __visc__bindIn(GamutNode, 11, 11, 0); // bind bytes_l2_dist __visc__bindIn(GamutNode, 12, 12, 0); // bind row_size __visc__bindIn(GamutNode, 13, 13, 0); // bind col_size - + __visc__bindOut(GamutNode, 0, 0, 0); } -void tone_map_fxp_wrapper(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *tone_map, - size_t bytes_tone_map, size_t row_size, - size_t col_size) { +void tone_map_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(3, input, result, tone_map, 1, result); @@ -553,52 +533,52 @@ void tone_map_fxp_wrapper(float *input, size_t bytes_input, float *result, __visc__bindIn(ToneMapNode, 1, 1, 0); // bind bytes_input __visc__bindIn(ToneMapNode, 2, 2, 0); // bind result __visc__bindIn(ToneMapNode, 3, 3, 0); // bind bytes_result - __visc__bindIn(ToneMapNode, 4, 4, 0); // bind tone_map + __visc__bindIn(ToneMapNode, 4, 4, 0); // bind tone_map __visc__bindIn(ToneMapNode, 5, 5, 0); // bind bytes_tone_map __visc__bindIn(ToneMapNode, 6, 6, 0); // bind row_size __visc__bindIn(ToneMapNode, 7, 7, 0); // bind col_size - + __visc__bindOut(ToneMapNode, 0, 0, 0); } + /*** ROOT Node - Top Level of the Graph Hierarchy ***/ -void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, - /*2*/ uint8_t *result, /*3*/ size_t bytes_result, - /*4*/ float *input_scaled, /*5*/ size_t bytes_input_scaled, - /*6*/ float *result_scaled, /*7*/ size_t bytes_result_scaled, - /*8*/ float *demosaic_out, /*9*/ size_t bytes_demosaic_out, - /*10*/ float *denoise_out, /*11*/ size_t bytes_denoise_out, - /*12*/ float *transform_out, /*13*/ size_t bytes_transform_out, - /*14*/ float *gamut_out, /*15*/ size_t bytes_gamut_out, - /*16*/ float *TsTw, /*17*/ size_t bytes_TsTw, - /*18*/ float *ctrl_pts, /*19*/ size_t bytes_ctrl_pts, - /*20*/ float *weights, /*21*/ size_t bytes_weights, - /*22*/ float *coefs, /*23*/ size_t bytes_coefs, - /*24*/ float *l2_dist, /*25*/ size_t bytes_l2_dist, - /*26*/ float *tone_map, /*27*/ size_t bytes_tone_map, - /*28*/ size_t row_size, /*29*/ size_t col_size) { - - // Specifies compilation target for current node - __visc__hint(CPU_TARGET); +void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, + /*2*/ uint8_t *result, /*3*/ size_t bytes_result, + /*4*/ float *input_scaled, /*5*/ size_t bytes_input_scaled, + /*6*/ float *result_scaled, /*7*/ size_t bytes_result_scaled, + /*8*/ float *demosaic_out, /*9*/ size_t bytes_demosaic_out, + /*10*/ float *denoise_out, /*11*/ size_t bytes_denoise_out, + /*12*/ float *transform_out, /*13*/ size_t bytes_transform_out, + /*14*/ float *gamut_out, /*15*/ size_t bytes_gamut_out, + /*16*/ float *TsTw, /*17*/ size_t bytes_TsTw, + /*18*/ float *ctrl_pts, /*19*/ size_t bytes_ctrl_pts, + /*20*/ float *weights, /*21*/ size_t bytes_weights, + /*22*/ float*coefs, /*23*/ size_t bytes_coefs, + /*24*/ float *l2_dist, /*25*/ size_t bytes_l2_dist, + /*26*/ float *tone_map, /*27*/ size_t bytes_tone_map, + /*28*/ size_t row_size, /*29*/ size_t col_size) { + + //Specifies compilation target for current node + __visc__hint(CPU_TARGET); // Specifies pointer arguments that will be used as "in" and "out" arguments // - count of "in" arguments // - list of "in" argument , and similar for "out" - __visc__attributes(14, input, result, input_scaled, result_scaled, - demosaic_out, denoise_out, transform_out, gamut_out, TsTw, - ctrl_pts, weights, coefs, tone_map, l2_dist, 5, result, - demosaic_out, denoise_out, transform_out, gamut_out); + __visc__attributes(14, input, result, input_scaled, result_scaled, demosaic_out, denoise_out, + transform_out, gamut_out, TsTw, ctrl_pts, weights, coefs, tone_map, l2_dist, + 5, result, demosaic_out, denoise_out, transform_out, gamut_out); // Create an 0D (specified by 1st argument) HPVM node - so a single node // associated with node function ---_fxp_wrapper - void *ScNode = __visc__createNodeND(0, scale_fxp_wrapper); - void *DmNode = __visc__createNodeND(0, demosaic_fxp_wrapper); - void *DnNode = __visc__createNodeND(0, denoise_fxp_wrapper); - void *TrNode = __visc__createNodeND(0, transform_fxp_wrapper); - void *GmNode = __visc__createNodeND(0, gamut_fxp_wrapper); - void *TnNode = __visc__createNodeND(0, tone_map_fxp_wrapper); - void *DsNode = __visc__createNodeND(0, descale_fxp_wrapper); - + void* ScNode = __visc__createNodeND(0, scale_fxp_wrapper); + void* DmNode = __visc__createNodeND(0, demosaic_fxp_wrapper); + void *DnNode = __visc__createNodeND(0, denoise_fxp_wrapper); + void *TrNode = __visc__createNodeND(0, transform_fxp_wrapper); + void *GmNode = __visc__createNodeND(0, gamut_fxp_wrapper); + void *TnNode = __visc__createNodeND(0, tone_map_fxp_wrapper); + void *DsNode = __visc__createNodeND(0, descale_fxp_wrapper); + // BindIn binds inputs of current node with specified node // - destination node // - argument position in argument list of function of source node @@ -612,283 +592,268 @@ void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, // - destination position (in argument list of destination node) // - streaming (1) or non-streaming (0) - // scale_fxp inputs - __visc__bindIn(ScNode, 0, 0, 0); // input -> ScNode:input - __visc__bindIn(ScNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input - __visc__bindIn(ScNode, 4, 2, 0); // input_scaled -> ScNode:result - __visc__bindIn(ScNode, 5, 3, 0); // bytes_input_scaled -> ScNode:bytes_result - __visc__bindIn(ScNode, 28, 4, 0); // row_size -> ScNode:row_size - __visc__bindIn(ScNode, 29, 5, 0); // col_size -> ScNode:col_size - - // demosaic_fxp inputs - __visc__bindIn(DmNode, 4, 0, 0); // input_scaled -> DmNode:input - __visc__edge(ScNode, DmNode, 1, 0, 1, - 0); // SCNode:bytes_result -> DmNode:bytes_input - __visc__bindIn(DmNode, 8, 2, 0); // demosaic_out -> DmNode:result - __visc__bindIn(DmNode, 9, 3, 0); // bytes_demosaic_out -> DmNode:bytes_result - __visc__bindIn(DmNode, 28, 4, 0); // row_size -> DmNode:row_size - __visc__bindIn(DmNode, 29, 5, 0); // col_size -> DmNode:col_size - - // denoise_fxp inputs - __visc__bindIn(DnNode, 8, 0, 0); // demosaic_out -> DnNode:input - __visc__edge(DmNode, DnNode, 1, 0, 1, - 0); // DMNode:bytes_result -> DnNode:bytes_input - __visc__bindIn(DnNode, 10, 2, 0); // denoise_out -> DnNode:result - __visc__bindIn(DnNode, 11, 3, 0); // bytes_denoise_out -> DnNode:bytes_result - __visc__bindIn(DnNode, 28, 4, 0); // row_size -> DnNode:row_size - __visc__bindIn(DnNode, 29, 5, 0); // col_size -> DnNode:col_size - - // transform_fxp inputs - __visc__bindIn(TrNode, 10, 0, 0); // denoise_out -> TrNode:input - __visc__edge(DnNode, TrNode, 1, 0, 1, - 0); // DnNode:bytes_result -> TrNode:bytes_input - __visc__bindIn(TrNode, 12, 2, 0); // transform_out -> TrNode:result - __visc__bindIn(TrNode, 13, 3, - 0); // bytes_result_scaled -> TrNode:bytes_result - __visc__bindIn(TrNode, 16, 4, 0); // TsTw -> TrNode:TsTw_trann - __visc__bindIn(TrNode, 17, 5, 0); // bytes_TsTw -> TrNode:bytes_TsTw - __visc__bindIn(TrNode, 28, 6, 0); // row_size -> TrNode:row_size - __visc__bindIn(TrNode, 29, 7, 0); // col_size -> TrNode:col_size - - // gamut_fxp inputs - __visc__bindIn(GmNode, 12, 0, 0); // transform_out -> GmNode:input - __visc__edge(TrNode, GmNode, 1, 0, 1, - 0); // TrNode:bytes_result -> GmNode:bytes_input - __visc__bindIn(GmNode, 14, 2, 0); // gamut_out -> GmNode:result - __visc__bindIn(GmNode, 15, 3, 0); // bytes_gamut_out -> GmNode:bytes_result - __visc__bindIn(GmNode, 18, 4, 0); // ctrl_pts -> GmNode:ctrl_pts - __visc__bindIn(GmNode, 19, 5, 0); // bytes_ctrl_pts -> GmNode:bytes_ctrl_pts - __visc__bindIn(GmNode, 20, 6, 0); // weights -> GmNode:weights - __visc__bindIn(GmNode, 21, 7, 0); // bytes_weights -> GmNode:bytes_weights - __visc__bindIn(GmNode, 22, 8, 0); // coefs -> GmNode:coefs - __visc__bindIn(GmNode, 23, 9, 0); // bytes_coefs -> GmNode:bytes_coefs - __visc__bindIn(GmNode, 24, 10, 0); // l2_dist -> GmNode: l2_dist - __visc__bindIn(GmNode, 25, 11, 0); // bytes_l2_dist -> GmNode:bytes_l2_dist - __visc__bindIn(GmNode, 28, 12, 0); // row_size -> GmNode:row_size - __visc__bindIn(GmNode, 29, 13, 0); // col_size -> GmNode:col_size - - // tone_map_fxp inputs - __visc__bindIn(TnNode, 14, 0, 0); // gamut_out -> TnNode:input - __visc__edge(GmNode, TnNode, 1, 0, 1, - 0); // GmNode:bytes_result -> TnNode:bytes_input - __visc__bindIn(TnNode, 6, 2, 0); // result_scaled -> TnNode:result - __visc__bindIn(TnNode, 7, 3, 0); // bytes_result_scaled -> TnNode:bytes_result - __visc__bindIn(TnNode, 26, 4, 0); // tone_map -> TnNode:tone_map - __visc__bindIn(TnNode, 27, 5, 0); // bytes_tone_map -> TnNode:bytes_tone_map - __visc__bindIn(TnNode, 28, 6, 0); // row_size -> TnNode:row_size - __visc__bindIn(TnNode, 29, 7, 0); // col_size -> TnNode:col_size - - // descale_fxp inputs - __visc__bindIn(DsNode, 6, 0, 0); // result_scaled -> DsNode:input - __visc__edge(TnNode, DsNode, 1, 0, 1, - 0); // TnNode:bytes_result -> DsNode:bytes_input - __visc__bindIn(DsNode, 2, 2, 0); // result -> DsNode:result - __visc__bindIn(DsNode, 3, 3, 0); // bytes_result -> DsNode:bytes_result - __visc__bindIn(DsNode, 28, 4, 0); // row_size -> DsNode:row_size - __visc__bindIn(DsNode, 29, 5, 0); // col_size -> DsNode:col_size + // scale_fxp inputs + __visc__bindIn(ScNode, 0, 0, 0); // input -> ScNode:input + __visc__bindIn(ScNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input + __visc__bindIn(ScNode, 4, 2, 0); // input_scaled -> ScNode:result + __visc__bindIn(ScNode, 5, 3, 0); // bytes_input_scaled -> ScNode:bytes_result + __visc__bindIn(ScNode, 28, 4, 0); // row_size -> ScNode:row_size + __visc__bindIn(ScNode, 29, 5, 0); // col_size -> ScNode:col_size + + // demosaic_fxp inputs + __visc__bindIn(DmNode, 4, 0, 0); // input_scaled -> DmNode:input + __visc__edge(ScNode, DmNode, 1, 0, 1, 0); // SCNode:bytes_result -> DmNode:bytes_input + __visc__bindIn(DmNode, 8, 2, 0); // demosaic_out -> DmNode:result + __visc__bindIn(DmNode, 9, 3, 0); // bytes_demosaic_out -> DmNode:bytes_result + __visc__bindIn(DmNode, 28, 4, 0); // row_size -> DmNode:row_size + __visc__bindIn(DmNode, 29, 5, 0); // col_size -> DmNode:col_size + + // denoise_fxp inputs + __visc__bindIn(DnNode, 8, 0, 0); // demosaic_out -> DnNode:input + __visc__edge(DmNode, DnNode, 1, 0, 1, 0); // DMNode:bytes_result -> DnNode:bytes_input + __visc__bindIn(DnNode, 10, 2, 0); // denoise_out -> DnNode:result + __visc__bindIn(DnNode, 11, 3, 0); // bytes_denoise_out -> DnNode:bytes_result + __visc__bindIn(DnNode, 28, 4, 0); // row_size -> DnNode:row_size + __visc__bindIn(DnNode, 29, 5, 0); // col_size -> DnNode:col_size + + // transform_fxp inputs + __visc__bindIn(TrNode, 10, 0, 0); // denoise_out -> TrNode:input + __visc__edge(DnNode, TrNode, 1, 0, 1, 0); // DnNode:bytes_result -> TrNode:bytes_input + __visc__bindIn(TrNode, 12, 2, 0); // transform_out -> TrNode:result + __visc__bindIn(TrNode, 13, 3, 0); // bytes_result_scaled -> TrNode:bytes_result + __visc__bindIn(TrNode, 16, 4, 0); // TsTw -> TrNode:TsTw_trann + __visc__bindIn(TrNode, 17, 5, 0); // bytes_TsTw -> TrNode:bytes_TsTw + __visc__bindIn(TrNode, 28, 6, 0); // row_size -> TrNode:row_size + __visc__bindIn(TrNode, 29, 7, 0); // col_size -> TrNode:col_size + + // gamut_fxp inputs + __visc__bindIn(GmNode, 12, 0, 0); // transform_out -> GmNode:input + __visc__edge(TrNode, GmNode, 1, 0, 1, 0); // TrNode:bytes_result -> GmNode:bytes_input + __visc__bindIn(GmNode, 14, 2, 0); // gamut_out -> GmNode:result + __visc__bindIn(GmNode, 15, 3, 0); // bytes_gamut_out -> GmNode:bytes_result + __visc__bindIn(GmNode, 18, 4, 0); // ctrl_pts -> GmNode:ctrl_pts + __visc__bindIn(GmNode, 19, 5, 0); // bytes_ctrl_pts -> GmNode:bytes_ctrl_pts + __visc__bindIn(GmNode, 20, 6, 0); // weights -> GmNode:weights + __visc__bindIn(GmNode, 21, 7, 0); // bytes_weights -> GmNode:bytes_weights + __visc__bindIn(GmNode, 22, 8, 0); // coefs -> GmNode:coefs + __visc__bindIn(GmNode, 23, 9, 0); // bytes_coefs -> GmNode:bytes_coefs + __visc__bindIn(GmNode, 24, 10, 0); // l2_dist -> GmNode: l2_dist + __visc__bindIn(GmNode, 25, 11, 0); // bytes_l2_dist -> GmNode:bytes_l2_dist + __visc__bindIn(GmNode, 28, 12, 0); // row_size -> GmNode:row_size + __visc__bindIn(GmNode, 29, 13, 0); // col_size -> GmNode:col_size + + // tone_map_fxp inputs + __visc__bindIn(TnNode, 14, 0, 0); // gamut_out -> TnNode:input + __visc__edge(GmNode, TnNode, 1, 0, 1, 0); // GmNode:bytes_result -> TnNode:bytes_input + __visc__bindIn(TnNode, 6, 2, 0); // result_scaled -> TnNode:result + __visc__bindIn(TnNode, 7, 3, 0); // bytes_result_scaled -> TnNode:bytes_result + __visc__bindIn(TnNode, 26, 4, 0); // tone_map -> TnNode:tone_map + __visc__bindIn(TnNode, 27, 5, 0); // bytes_tone_map -> TnNode:bytes_tone_map + __visc__bindIn(TnNode, 28, 6, 0); // row_size -> TnNode:row_size + __visc__bindIn(TnNode, 29, 7, 0); // col_size -> TnNode:col_size + + // descale_fxp inputs + __visc__bindIn(DsNode, 6, 0, 0); // result_scaled -> DsNode:input + __visc__edge(TnNode, DsNode, 1, 0, 1, 0); // TnNode:bytes_result -> DsNode:bytes_input + __visc__bindIn(DsNode, 2, 2, 0); // result -> DsNode:result + __visc__bindIn(DsNode, 3, 3, 0); // bytes_result -> DsNode:bytes_result + __visc__bindIn(DsNode, 28, 4, 0); // row_size -> DsNode:row_size + __visc__bindIn(DsNode, 29, 5, 0); // col_size -> DsNode:col_size // Similar to bindIn, but for the output. Output of a node is a struct, and // we consider the fields in increasing ordering. - __visc__bindOut(DsNode, 0, 0, 0); + __visc__bindOut(DsNode, 0, 0, 0); + } -int main(int argc, char *argv[]) { - // Parse the arguments. - arguments args; - set_default_args(&args); - argp_parse(&parser, argc, argv, 0, 0, &args); - - // Read a raw image. - // NOTE: We deliberately perform this file I/O outside of the kernel. - printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]); - size_t row_size, col_size; - uint8_t *image_in = - read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size); - - printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE); - - // Allocate a buffer for storing the output image data. - // (This is currently the same size as the input image data.) - size_t bytes_image = sizeof(uint8_t) * row_size * col_size * CHAN_SIZE; - size_t bytes_fimage = sizeof(float) * row_size * col_size * CHAN_SIZE; - uint8_t *image_out = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *image_out_gamut = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *image_out_demosaic = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *image_out_denoise = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *image_out_transform = (uint8_t *)malloc_aligned(bytes_image); - - __visc__init(); - - /////////////////////////////////////////////////////////////// - // Camera Model Parameters - /////////////////////////////////////////////////////////////// - // Path to the camera model to be used - // char cam_model_path[100]; - // char cam_model_path = "cam_models/NikonD7000/"; - // White balance index (select white balance from transform file) - // The first white balance in the file has a wb_index of 1 - // For more information on model format see the readme - int wb_index = 6; - - // Number of control points - int num_ctrl_pts = 3702; - uint8_t *input, *result; - float *input_scaled, *result_scaled, *demosaic_out, *denoise_out, - *transform_out, *gamut_out; - float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; - - TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); - float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); - free(TsTw); - TsTw = trans; - ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); - weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); - coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); - tone_map = get_tone_map("cam_models/NikonD7000/"); - - input_scaled = (float *)malloc_aligned(bytes_fimage); - result_scaled = (float *)malloc_aligned(bytes_fimage); - demosaic_out = (float *)malloc_aligned(bytes_fimage); - denoise_out = (float *)malloc_aligned(bytes_fimage); - transform_out = (float *)malloc_aligned(bytes_fimage); - gamut_out = (float *)malloc_aligned(bytes_fimage); - l2_dist = (float *)malloc_aligned(sizeof(float) * num_ctrl_pts); - - // This is host_input in cam_pipe() - input = (uint8_t *)malloc_aligned(bytes_image); - convert_hwc_to_chw(image_in, row_size, col_size, &input); - - // This is host_result in cam_pipe() - result = (uint8_t *)malloc_aligned(bytes_image); - - // Allocate struct to pass DFG inputs - RootIn *rootArgs = (RootIn *)malloc(sizeof(RootIn)); - - // Set up HPVM DFG inputs in the rootArgs struct. - rootArgs->input = input; - rootArgs->bytes_input = bytes_image; - - rootArgs->result = result; - rootArgs->bytes_result = bytes_image; - - rootArgs->input_scaled = input_scaled; - rootArgs->bytes_input_scaled = bytes_fimage; - - rootArgs->result_scaled = result_scaled; - rootArgs->bytes_result_scaled = bytes_fimage; - - rootArgs->demosaic_out = demosaic_out; - rootArgs->bytes_demosaic_out = bytes_fimage; - - rootArgs->denoise_out = denoise_out; - rootArgs->bytes_denoise_out = bytes_fimage; - - rootArgs->transform_out = transform_out; - rootArgs->bytes_transform_out = bytes_fimage; - - rootArgs->gamut_out = gamut_out; - rootArgs->bytes_gamut_out = bytes_fimage; - - rootArgs->TsTw = TsTw; - rootArgs->bytes_TsTw = CHAN_SIZE * CHAN_SIZE * sizeof(float); - - rootArgs->ctrl_pts = ctrl_pts; - rootArgs->bytes_ctrl_pts = num_ctrl_pts * CHAN_SIZE * sizeof(float); - - rootArgs->weights = weights; - rootArgs->bytes_weights = num_ctrl_pts * CHAN_SIZE * sizeof(float); - - rootArgs->coefs = coefs; - rootArgs->bytes_coefs = 4 * CHAN_SIZE * sizeof(float); - - rootArgs->tone_map = tone_map; - rootArgs->bytes_tone_map = 256 * CHAN_SIZE * sizeof(float); - - rootArgs->l2_dist = l2_dist; - rootArgs->bytes_l2_dist = num_ctrl_pts * sizeof(float); - - rootArgs->row_size = row_size; - rootArgs->col_size = col_size; - - // Memory tracking is required for pointer arguments. - // Nodes can be scheduled on different targets, and - // dataflow edge implementation needs to request data. - // The pair (pointer, size) is inserted in memory tracker using this call - llvm_visc_track_mem(input, bytes_image); - llvm_visc_track_mem(result, bytes_image); - llvm_visc_track_mem(input_scaled, bytes_fimage); - llvm_visc_track_mem(result_scaled, bytes_fimage); - llvm_visc_track_mem(demosaic_out, bytes_fimage); - llvm_visc_track_mem(denoise_out, bytes_fimage); - llvm_visc_track_mem(transform_out, bytes_fimage); - llvm_visc_track_mem(gamut_out, bytes_fimage); - llvm_visc_track_mem(TsTw, CHAN_SIZE * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(ctrl_pts, num_ctrl_pts * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(weights, num_ctrl_pts * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(coefs, 4 * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(tone_map, 256 * CHAN_SIZE * sizeof(float)); - llvm_visc_track_mem(l2_dist, num_ctrl_pts * sizeof(float)); - - printf("\n\nLaunching CAVA pipeline!\n"); - - void *camPipeDFG = __visc__launch(0, CamPipeRoot, (void *)rootArgs); - __visc__wait(camPipeDFG); - - printf("\n\nPipeline execution completed!\n"); - printf("Pipeline final stage returned %lu; should be %lu\n", - rootArgs->ret.bytesRet, bytes_image); - printf("\n\nRequesting memory!\n"); - - // Request data from graph. - llvm_visc_request_mem(result, bytes_image); - llvm_visc_request_mem(demosaic_out, bytes_fimage); - llvm_visc_request_mem(denoise_out, bytes_fimage); - llvm_visc_request_mem(transform_out, bytes_fimage); - llvm_visc_request_mem(gamut_out, bytes_fimage); - printf("\n\nDone requesting memory!\n"); - - uint8_t *gamut_out_descaled = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *demosaic_out_descaled = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *transform_out_descaled = (uint8_t *)malloc_aligned(bytes_image); - uint8_t *denoise_out_descaled = (uint8_t *)malloc_aligned(bytes_image); - - descale_cpu(demosaic_out, bytes_fimage, demosaic_out_descaled, bytes_image, - row_size, col_size); - descale_cpu(gamut_out, bytes_fimage, gamut_out_descaled, bytes_image, - row_size, col_size); - descale_cpu(denoise_out, bytes_fimage, denoise_out_descaled, bytes_image, - row_size, col_size); - descale_cpu(transform_out, bytes_fimage, transform_out_descaled, bytes_image, - row_size, col_size); - - convert_chw_to_hwc(result, row_size, col_size, &image_out); - convert_chw_to_hwc(gamut_out_descaled, row_size, col_size, &image_out_gamut); - convert_chw_to_hwc(demosaic_out_descaled, row_size, col_size, - &image_out_demosaic); - convert_chw_to_hwc(denoise_out_descaled, row_size, col_size, - &image_out_denoise); - convert_chw_to_hwc(transform_out_descaled, row_size, col_size, - &image_out_transform); - - // Remove tracked pointers. - llvm_visc_untrack_mem(input); - llvm_visc_untrack_mem(result); - llvm_visc_untrack_mem(input_scaled); - llvm_visc_untrack_mem(result_scaled); - llvm_visc_untrack_mem(demosaic_out); - llvm_visc_untrack_mem(denoise_out); - llvm_visc_untrack_mem(transform_out); - llvm_visc_untrack_mem(gamut_out); - - llvm_visc_untrack_mem(TsTw); - llvm_visc_untrack_mem(ctrl_pts); - llvm_visc_untrack_mem(weights); - llvm_visc_untrack_mem(coefs); - llvm_visc_untrack_mem(tone_map); - llvm_visc_untrack_mem(l2_dist); - - // Output the image. - // NOTE: We deliberately perform this file I/O outside of the kernel. +int main(int argc, char* argv[]) { + // Parse the arguments. + arguments args; + set_default_args(&args); + argp_parse(&parser, argc, argv, 0, 0, &args); + + // Read a raw image. + // NOTE: We deliberately perform this file I/O outside of the kernel. + printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]); + size_t row_size, col_size; + uint8_t *image_in = read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size); + + printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE); + + // Allocate a buffer for storing the output image data. + // (This is currently the same size as the input image data.) + size_t bytes_image = sizeof(uint8_t) * row_size * col_size * CHAN_SIZE; + size_t bytes_fimage = sizeof(float) * row_size * col_size * CHAN_SIZE; + uint8_t *image_out = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_gamut = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_demosaic = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_denoise = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_transform = (uint8_t*) malloc_aligned(bytes_image); + + __visc__init(); + + /////////////////////////////////////////////////////////////// + // Camera Model Parameters + /////////////////////////////////////////////////////////////// + // Path to the camera model to be used +// char cam_model_path[100]; +// char cam_model_path = "cam_models/NikonD7000/"; + // White balance index (select white balance from transform file) + // The first white balance in the file has a wb_index of 1 + // For more information on model format see the readme + int wb_index = 6; + + // Number of control points + int num_ctrl_pts = 3702; + uint8_t *input, *result; + float *input_scaled, *result_scaled, *demosaic_out, *denoise_out, *transform_out, *gamut_out; + float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; + + TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); + float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); + free(TsTw); + TsTw = trans; + ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); + weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); + coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); + tone_map = get_tone_map("cam_models/NikonD7000/"); + + input_scaled = (float*) malloc_aligned(bytes_fimage); + result_scaled = (float*) malloc_aligned(bytes_fimage); + demosaic_out = (float*) malloc_aligned(bytes_fimage); + denoise_out = (float*) malloc_aligned(bytes_fimage); + transform_out = (float*) malloc_aligned(bytes_fimage); + gamut_out = (float*) malloc_aligned(bytes_fimage); + l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts); + + // This is host_input in cam_pipe() + input = (uint8_t*) malloc_aligned(bytes_image); + convert_hwc_to_chw(image_in, row_size, col_size, &input); + + // This is host_result in cam_pipe() + result = (uint8_t*) malloc_aligned(bytes_image); + + // Allocate struct to pass DFG inputs + RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn)); + + // Set up HPVM DFG inputs in the rootArgs struct. + rootArgs->input = input; + rootArgs->bytes_input = bytes_image; + + rootArgs->result = result; + rootArgs->bytes_result = bytes_image; + + rootArgs->input_scaled = input_scaled; + rootArgs->bytes_input_scaled = bytes_fimage; + + rootArgs->result_scaled = result_scaled; + rootArgs->bytes_result_scaled = bytes_fimage; + + rootArgs->demosaic_out = demosaic_out; + rootArgs->bytes_demosaic_out = bytes_fimage; + + rootArgs->denoise_out = denoise_out; + rootArgs->bytes_denoise_out = bytes_fimage; + + rootArgs->transform_out = transform_out; + rootArgs->bytes_transform_out = bytes_fimage; + + rootArgs->gamut_out = gamut_out; + rootArgs->bytes_gamut_out = bytes_fimage; + + rootArgs->TsTw = TsTw; + rootArgs->bytes_TsTw = CHAN_SIZE * CHAN_SIZE * sizeof(float); + + rootArgs->ctrl_pts = ctrl_pts; + rootArgs->bytes_ctrl_pts = num_ctrl_pts * CHAN_SIZE * sizeof(float); + + rootArgs->weights = weights; + rootArgs->bytes_weights = num_ctrl_pts * CHAN_SIZE * sizeof(float); + + rootArgs->coefs = coefs; + rootArgs->bytes_coefs = 4 * CHAN_SIZE * sizeof(float); + + rootArgs->tone_map = tone_map; + rootArgs->bytes_tone_map = 256 * CHAN_SIZE * sizeof(float); + + rootArgs->l2_dist = l2_dist; + rootArgs->bytes_l2_dist = num_ctrl_pts * sizeof(float); + + rootArgs->row_size = row_size; + rootArgs->col_size = col_size; + + // Memory tracking is required for pointer arguments. + // Nodes can be scheduled on different targets, and + // dataflow edge implementation needs to request data. + // The pair (pointer, size) is inserted in memory tracker using this call + llvm_visc_track_mem(input, bytes_image); + llvm_visc_track_mem(result, bytes_image); + llvm_visc_track_mem(input_scaled, bytes_fimage); + llvm_visc_track_mem(result_scaled, bytes_fimage); + llvm_visc_track_mem(demosaic_out, bytes_fimage); + llvm_visc_track_mem(denoise_out, bytes_fimage); + llvm_visc_track_mem(transform_out, bytes_fimage); + llvm_visc_track_mem(gamut_out, bytes_fimage); + llvm_visc_track_mem(TsTw, CHAN_SIZE * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(ctrl_pts, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(weights, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(coefs, 4 * CHAN_SIZE *sizeof(float)); + llvm_visc_track_mem(tone_map, 256 * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(l2_dist, num_ctrl_pts * sizeof(float)); + + printf("\n\nLaunching CAVA pipeline!\n"); + + void* camPipeDFG = __visc__launch(0, CamPipeRoot, (void*) rootArgs); + __visc__wait(camPipeDFG); + + printf("\n\nPipeline execution completed!\n"); + printf("\n\nRequesting memory!\n"); + + // Request data from graph. + llvm_visc_request_mem(result, bytes_image); + llvm_visc_request_mem(demosaic_out, bytes_fimage); + llvm_visc_request_mem(denoise_out, bytes_fimage); + llvm_visc_request_mem(transform_out, bytes_fimage); + llvm_visc_request_mem(gamut_out, bytes_fimage); + printf("\n\nDone requesting memory!\n"); + + + uint8_t* gamut_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + uint8_t* demosaic_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + uint8_t* transform_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + uint8_t* denoise_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + + descale_cpu(demosaic_out, bytes_fimage, demosaic_out_descaled, bytes_image, row_size, col_size); + descale_cpu(gamut_out, bytes_fimage, gamut_out_descaled, bytes_image, row_size, col_size); + descale_cpu(denoise_out, bytes_fimage, denoise_out_descaled, bytes_image, row_size, col_size); + descale_cpu(transform_out, bytes_fimage, transform_out_descaled, bytes_image, row_size, col_size); + + convert_chw_to_hwc(result, row_size, col_size, &image_out); + convert_chw_to_hwc(gamut_out_descaled, row_size, col_size, &image_out_gamut); + convert_chw_to_hwc(demosaic_out_descaled, row_size, col_size, &image_out_demosaic); + convert_chw_to_hwc(denoise_out_descaled, row_size, col_size, &image_out_denoise); + convert_chw_to_hwc(transform_out_descaled, row_size, col_size, &image_out_transform); + + + // Remove tracked pointers. + llvm_visc_untrack_mem(input); + llvm_visc_untrack_mem(result); + llvm_visc_untrack_mem(input_scaled); + llvm_visc_untrack_mem(result_scaled); + llvm_visc_untrack_mem(demosaic_out); + llvm_visc_untrack_mem(denoise_out); + llvm_visc_untrack_mem(transform_out); + llvm_visc_untrack_mem(gamut_out); + + llvm_visc_untrack_mem(TsTw); + llvm_visc_untrack_mem(ctrl_pts); + llvm_visc_untrack_mem(weights); + llvm_visc_untrack_mem(coefs); + llvm_visc_untrack_mem(tone_map); + llvm_visc_untrack_mem(l2_dist); + + // Output the image. + // NOTE: We deliberately perform this file I/O outside of the kernel. char str[50], base_str[50]; strcpy(base_str, args.args[OUTPUT_IMAGE_BIN]); strcpy(str, base_str); @@ -912,7 +877,8 @@ int main(int argc, char *argv[]) { printf("Writing output image to %s\n", str); write_image_to_binary(str, image_out_transform, row_size, col_size); - __visc__cleanup(); + __visc__cleanup(); - return 0; + return 0; } + diff --git a/hpvm/test/hpvm-cava/src/pipe_stages.c b/hpvm/test/hpvm-cava/src/pipe_stages.c index 253052af872838f6ed363e3497ef64dd288db84e..2ebedec936915b5e7f11881c5001c84b6db26474 100644 --- a/hpvm/test/hpvm-cava/src/pipe_stages.c +++ b/hpvm/test/hpvm-cava/src/pipe_stages.c @@ -1,43 +1,44 @@ +#include <stdio.h> +#include <math.h> #include "pipe_stages.h" #include "cam_pipe_utility.h" -#include <math.h> -#include <stdio.h> -// void scale_fxp(uint8_t *input, int row_size, int col_size, float *output) { -void scale_fxp(uint8_t *input, size_t bytes_input, float *output, - size_t bytes_output, int row_size, int col_size) { +//void scale_fxp(uint8_t *input, int row_size, int col_size, float *output) { +void scale_fxp(uint8_t *input, size_t bytes_input, + float *output, size_t bytes_output, + int row_size, int col_size) { __visc__hint(DEVICE); __visc__attributes(2, input, output, 1, output); - + ARRAY_3D(uint8_t, _input, input, row_size, col_size); ARRAY_3D(float, _output, output, row_size, col_size); -sl_chan: + sl_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - sl_row: + sl_row: for (int row = 0; row < row_size; row++) - sl_col: + sl_col: for (int col = 0; col < col_size; col++) _output[chan][row][col] = _input[chan][row][col] * 1.0 / 255; __visc__return(1, bytes_output); } -// void descale_fxp(float *input, int row_size, int col_size, uint8_t *output) { -void descale_fxp(float *input, size_t bytes_input, uint8_t *output, - size_t bytes_result, int row_size, int col_size) { +//void descale_fxp(float *input, int row_size, int col_size, uint8_t *output) { +void descale_fxp(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + int row_size, int col_size) { __visc__hint(DEVICE); __visc__attributes(2, input, output, 1, output); - + ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(uint8_t, _output, output, row_size, col_size); -dsl_chan: + dsl_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - dsl_row: + dsl_row: for (int row = 0; row < row_size; row++) - dsl_col: + dsl_col: for (int col = 0; col < col_size; col++) - _output[chan][row][col] = - min(max(_input[chan][row][col] * 255, 0), 255); + _output[chan][row][col] = min(max(_input[chan][row][col] * 255, 0), 255); __visc__return(1, bytes_output); } @@ -45,125 +46,127 @@ dsl_chan: // Demosaicing stage // G R // B G -// void demosaic_fxp(float *input, int row_size, int col_size, float *result) { -void demosaic_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, int row_size, int col_size) { +//void demosaic_fxp(float *input, int row_size, int col_size, float *result) { +void demosaic_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { __visc__hint(DEVICE); __visc__attributes(2, input, result, 1, result); - + printf("Demosaicing.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); -dm_row: + dm_row: for (int row = 1; row < row_size - 1; row++) - dm_col: + dm_col: for (int col = 1; col < col_size - 1; col++) - if (row % 2 == 0 && col % 2 == 0) { - // Green pixel - // Getting the R values - float R1 = _input[0][row][col - 1]; - float R2 = _input[0][row][col + 1]; - // Getting the B values - float B1 = _input[2][row - 1][col]; - float B2 = _input[2][row + 1][col]; - // R - _result[0][row][col] = (R1 + R2) / 2; - // G - _result[1][row][col] = _input[1][row][col] * 2; - // B - _result[2][row][col] = (B1 + B2) / 2; - } else if (row % 2 == 0 && col % 2 == 1) { - // Red pixel - // Getting the G values - float G1 = _input[1][row - 1][col]; - float G2 = _input[1][row + 1][col]; - float G3 = _input[1][row][col - 1]; - float G4 = _input[1][row][col + 1]; - // Getting the B values - float B1 = _input[2][row - 1][col - 1]; - float B2 = _input[2][row - 1][col + 1]; - float B3 = _input[2][row + 1][col - 1]; - float B4 = _input[2][row + 1][col + 1]; - // R - _result[0][row][col] = _input[0][row][col]; - // G - _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; - // B (center pixel) - _result[2][row][col] = (B1 + B2 + B3 + B4) / 4; - } else if (row % 2 == 1 && col % 2 == 0) { - // Blue pixel - // Getting the R values - float R1 = _input[0][row - 1][col - 1]; - float R2 = _input[0][row + 1][col - 1]; - float R3 = _input[0][row - 1][col + 1]; - float R4 = _input[0][row + 1][col + 1]; - // Getting the G values - float G1 = _input[1][row - 1][col]; - float G2 = _input[1][row + 1][col]; - float G3 = _input[1][row][col - 1]; - float G4 = _input[1][row][col + 1]; - // R - _result[0][row][col] = (R1 + R2 + R3 + R4) / 4; - // G - _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; - // B - _result[2][row][col] = _input[2][row][col]; - } else { - // Bottom Green pixel - // Getting the R values - float R1 = _input[0][row - 1][col]; - float R2 = _input[0][row + 1][col]; - // Getting the B values - float B1 = _input[2][row][col - 1]; - float B2 = _input[2][row][col + 1]; - // R - _result[0][row][col] = (R1 + R2) / 2; - // G - _result[1][row][col] = _input[1][row][col] * 2; - // B - _result[2][row][col] = (B1 + B2) / 2; - } + if (row % 2 == 0 && col % 2 == 0) { + // Green pixel + // Getting the R values + float R1 = _input[0][row][col - 1]; + float R2 = _input[0][row][col + 1]; + // Getting the B values + float B1 = _input[2][row - 1][col]; + float B2 = _input[2][row + 1][col]; + // R + _result[0][row][col] = (R1 + R2) / 2; + // G + _result[1][row][col] = _input[1][row][col] * 2; + // B + _result[2][row][col] = (B1 + B2) / 2; + } else if (row % 2 == 0 && col % 2 == 1) { + // Red pixel + // Getting the G values + float G1 = _input[1][row - 1][col]; + float G2 = _input[1][row + 1][col]; + float G3 = _input[1][row][col - 1]; + float G4 = _input[1][row][col + 1]; + // Getting the B values + float B1 = _input[2][row - 1][col - 1]; + float B2 = _input[2][row - 1][col + 1]; + float B3 = _input[2][row + 1][col - 1]; + float B4 = _input[2][row + 1][col + 1]; + // R + _result[0][row][col] = _input[0][row][col]; + // G + _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; + // B (center pixel) + _result[2][row][col] = (B1 + B2 + B3 + B4) / 4; + } else if (row % 2 == 1 && col % 2 == 0) { + // Blue pixel + // Getting the R values + float R1 = _input[0][row - 1][col - 1]; + float R2 = _input[0][row + 1][col - 1]; + float R3 = _input[0][row - 1][col + 1]; + float R4 = _input[0][row + 1][col + 1]; + // Getting the G values + float G1 = _input[1][row - 1][col]; + float G2 = _input[1][row + 1][col]; + float G3 = _input[1][row][col - 1]; + float G4 = _input[1][row][col + 1]; + // R + _result[0][row][col] = (R1 + R2 + R3 + R4) / 4; + // G + _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; + // B + _result[2][row][col] = _input[2][row][col]; + } else { + // Bottom Green pixel + // Getting the R values + float R1 = _input[0][row - 1][col]; + float R2 = _input[0][row + 1][col]; + // Getting the B values + float B1 = _input[2][row][col - 1]; + float B2 = _input[2][row][col + 1]; + // R + _result[0][row][col] = (R1 + R2) / 2; + // G + _result[1][row][col] = _input[1][row][col] * 2; + // B + _result[2][row][col] = (B1 + B2) / 2; + } __visc__return(1, bytes_result); } static void sort(float arr[], int n) { - int i, j; -dn_sort_i: - for (i = 0; i < n - 1; i++) - dn_sort_j: - for (j = 0; j < n - i - 1; j++) - if (arr[j] > arr[j + 1]) { - float temp = arr[j]; - arr[j] = arr[j + 1]; - arr[j + 1] = temp; - } + int i, j; + dn_sort_i: + for (i = 0; i < n - 1; i++) + dn_sort_j: + for (j = 0; j < n - i - 1; j++) + if (arr[j] > arr[j + 1]) { + float temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } } // Simple denoise -// void denoise_fxp(float *input, int row_size, int col_size, float *result) { -void denoise_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, int row_size, int col_size) { +//void denoise_fxp(float *input, int row_size, int col_size, float *result) { +void denoise_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { __visc__hint(DEVICE); __visc__attributes(2, input, result, 1, result); - + printf("Denoising.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); -dn_chan: + dn_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - dn_row: + dn_row: for (int row = 0; row < row_size; row++) - dn_col: + dn_col: for (int col = 0; col < col_size; col++) if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { float filter[9]; - dn_slide_row: - for (int i = row - 1; i < row + 2; i++) - dn_slide_col: - for (int j = col - 1; j < col + 2; j++) { + dn_slide_row: + for (int i = row-1; i < row+2; i++) + dn_slide_col: + for (int j = col-1; j < col+2; j++) { int index = (i - row + 1) * 3 + j - col + 1; filter[index] = _input[chan][i][j]; } @@ -176,24 +179,25 @@ dn_chan: } // Color map and white balance transform -// void transform_fxp(float *input, int row_size, int col_size, float *result, +//void transform_fxp(float *input, int row_size, int col_size, float *result, // float *TsTw_tran) { -void transform_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, +void transform_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, int row_size, int col_size) { __visc__hint(DEVICE); __visc__attributes(3, input, result, TsTw_tran, 1, result); - + printf("Color mapping.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); ARRAY_2D(float, _TsTw_tran, TsTw_tran, 3); -tr_chan: + tr_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - tr_row: + tr_row: for (int row = 0; row < row_size; row++) - tr_col: + tr_col: for (int col = 0; col < col_size; col++) _result[chan][row][col] = max(_input[0][row][col] * _TsTw_tran[0][chan] + @@ -206,18 +210,18 @@ tr_chan: // // Weighted radial basis function for gamut mapping // -// void gamut_map_fxp(float *input, int row_size, int col_size, float *result, -// float *ctrl_pts, float *weights, float *coefs, float -// *l2_dist) { -void gamut_map_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, float *coefs, - size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, +//void gamut_map_fxp(float *input, int row_size, int col_size, float *result, +// float *ctrl_pts, float *weights, float *coefs, float *l2_dist) { +void gamut_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, int row_size, int col_size) { __visc__hint(DEVICE); - __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, - result); - + __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); + printf("Gamut mapping.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); @@ -225,25 +229,26 @@ void gamut_map_fxp(float *input, size_t bytes_input, float *result, ARRAY_2D(float, _weights, weights, 3); ARRAY_2D(float, _coefs, coefs, 3); -// First, get the L2 norm from every pixel to the control points, -// Then, sum it and weight it. Finally, add the bias. -gm_rbf_row: + // First, get the L2 norm from every pixel to the control points, + // Then, sum it and weight it. Finally, add the bias. + gm_rbf_row: for (int row = 0; row < row_size; row++) - gm_rbf_col: + gm_rbf_col: for (int col = 0; col < col_size; col++) { - gm_rbf_cp0: + gm_rbf_cp0: for (int cp = 0; cp < num_ctrl_pts; cp++) { - l2_dist[cp] = sqrt((_input[0][row][col] - _ctrl_pts[cp][0]) * - (_input[0][row][col] - _ctrl_pts[cp][0]) + - (_input[1][row][col] - _ctrl_pts[cp][1]) * - (_input[1][row][col] - _ctrl_pts[cp][1]) + - (_input[2][row][col] - _ctrl_pts[cp][2]) * - (_input[2][row][col] - _ctrl_pts[cp][2])); + l2_dist[cp] = + sqrt((_input[0][row][col] - _ctrl_pts[cp][0]) * + (_input[0][row][col] - _ctrl_pts[cp][0]) + + (_input[1][row][col] - _ctrl_pts[cp][1]) * + (_input[1][row][col] - _ctrl_pts[cp][1]) + + (_input[2][row][col] - _ctrl_pts[cp][2]) * + (_input[2][row][col] - _ctrl_pts[cp][2])); } - gm_rbf_chan: + gm_rbf_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) { float chan_val = 0.0; - gm_rbf_cp1: + gm_rbf_cp1: for (int cp = 0; cp < num_ctrl_pts; cp++) { chan_val += l2_dist[cp] * _weights[cp][chan]; } @@ -258,24 +263,25 @@ gm_rbf_row: } // Tone mapping -// void tone_map_fxp(float *input, int row_size, int col_size, float *tone_map, +//void tone_map_fxp(float *input, int row_size, int col_size, float *tone_map, // float *result) { -void tone_map_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *tone_map, size_t bytes_tone_map, +void tone_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, int row_size, int col_size) { __visc__hint(DEVICE); __visc__attributes(3, input, result, tone_map, 1, result); - + printf("Tone mapping.\n"); ARRAY_3D(float, _input, input, row_size, col_size); ARRAY_3D(float, _result, result, row_size, col_size); ARRAY_2D(float, _tone_map, tone_map, 3); -tm_chan: + tm_chan: for (int chan = 0; chan < CHAN_SIZE; chan++) - tm_row: + tm_row: for (int row = 0; row < row_size; row++) - tm_col: + tm_col: for (int col = 0; col < col_size; col++) { uint8_t x = _input[chan][row][col] * 255; _result[chan][row][col] = _tone_map[x][chan]; diff --git a/hpvm/test/hpvm-cava/src/pipe_stages.h b/hpvm/test/hpvm-cava/src/pipe_stages.h index 4fa24354c73a5792c9d6c344dbee5236f3379aa7..8d98cb65cc8af7353cc1faf08988f3b1a6758046 100644 --- a/hpvm/test/hpvm-cava/src/pipe_stages.h +++ b/hpvm/test/hpvm-cava/src/pipe_stages.h @@ -2,58 +2,59 @@ #define _PIPE_STAGES_H_ #include "defs.h" -#include <stddef.h> #define CHAN_SIZE 3 #define ISP 0x4 -#define max(a, b) \ - ({ \ - __typeof__(a) _a = (a); \ - __typeof__(b) _b = (b); \ - _a > _b ? _a : _b; \ - }) - -#define min(a, b) \ - ({ \ - __typeof__(a) _a = (a); \ - __typeof__(b) _b = (b); \ - _a < _b ? _a : _b; \ - }) - -#define abs(a) \ - ({ \ - __typeof__(a) _a = (a); \ - _a < 0 ? -_a : _a; \ - }) +#define max(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + +#define min(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; }) + +#define abs(a) \ + ({ __typeof__ (a) _a = (a); \ + _a < 0 ? -_a : _a; }) extern int num_ctrl_pts; -void scale_fxp(uint8_t *input, size_t bytes_input, float *output, - size_t bytes_output, size_t row_size, size_t col_size); +void scale_fxp(uint8_t *input, size_t bytes_input, + float *output, size_t bytes_output, + size_t row_size, size_t col_size); -void descale_fxp(float *input, size_t bytes_input, uint8_t *output, - size_t bytes_result, size_t row_size, size_t col_size); +void descale_fxp(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + size_t row_size, size_t col_size); -void demosaic_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, size_t col_size); +void demosaic_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size); -void denoise_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, size_t row_size, size_t col_size); +void denoise_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + size_t row_size, size_t col_size); -void transform_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, +void transform_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, size_t row_size, size_t col_size); -void gamut_map_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *ctrl_pts, size_t bytes_ctrl_pts, - float *weights, size_t bytes_weights, float *coefs, - size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, +void gamut_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, size_t row_size, size_t col_size); -void tone_map_fxp(float *input, size_t bytes_input, float *result, - size_t bytes_result, float *tone_map, size_t bytes_tone_map, +void tone_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, size_t row_size, size_t col_size); void tone_map_approx_fxp(float *input, size_t row_size, size_t col_size, diff --git a/hpvm/test/hpvm-cava/src/utility.c b/hpvm/test/hpvm-cava/src/utility.c index 86bd018183403f637ca8fb7cfb634a09c3ceace8..c1eaee3333c2afffdcae827f956efa4e25705352 100644 --- a/hpvm/test/hpvm-cava/src/utility.c +++ b/hpvm/test/hpvm-cava/src/utility.c @@ -1,7 +1,7 @@ -#include "utility.h" -#include "defs.h" -#include <assert.h> #include <stdlib.h> +#include <assert.h> +#include "defs.h" +#include "utility.h" void *malloc_aligned(size_t size) { void *ptr = NULL;