From 20c65dd282d6baaac81cbe345554f2ff6f445746 Mon Sep 17 00:00:00 2001 From: Adel Ejjeh <aejjeh@hpvmfpga1.cs.illinois.edu> Date: Sun, 19 Jan 2020 13:06:56 -0600 Subject: [PATCH] Modified CAVA to use node replication and fixed spmv and stencil makefile --- .../parboil/benchmarks/hpvm-cava/Makefile | 14 +-- .../benchmarks/hpvm-cava/Makefile.config | 3 +- .../parboil/benchmarks/hpvm-cava/src/main.c | 115 ++++++++++-------- .../benchmarks/hpvm-cava/src/pipe_stages.h | 16 +-- hpvm/test/parboil/benchmarks/spmv/Makefile | 2 +- .../parboil/benchmarks/spmv/src/visc/Makefile | 4 +- hpvm/test/parboil/benchmarks/stencil/Makefile | 2 +- 7 files changed, 86 insertions(+), 70 deletions(-) diff --git a/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile b/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile index 671c4f7c0a..d51743070c 100644 --- a/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile +++ b/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile @@ -87,7 +87,6 @@ APP_CUDALDFLAGS=-lm -lstdc++ APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include APP_LDFLAGS=$(LFLAGS) -OPT_FLAGS = -tti -targetlibinfo -tbaa -scoped-noalias -assumption-cache-tracker -profile-summary-info -forceattrs -inferattrs -ipsccp -globalopt -domtree -mem2reg -deadargelim -domtree -basicaa -aa -simplifycfg -pgo-icall-prom -basiccg -globals-aa -prune-eh -always-inline -functionattrs -domtree -sroa -early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -domtree -basicaa -aa -libcalls-shrinkwrap -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -licm -loop-unswitch -simplifycfg -domtree -basicaa -aa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -indvars -loop-idiom -loop-deletion -memdep -memcpyopt -sccp -domtree -demanded-bits -bdce -basicaa -aa -lazy-value-info -jump-threading -correlated-propagation -domtree -basicaa -aa -memdep -dse -loops -loop-simplify -lcssa-verification -lcssa -aa -scalar-evolution -licm -postdomtree -adce -simplifycfg -domtree -basicaa -aa -barrier -basiccg -rpo-functionattrs -globals-aa -float2int -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -loop-accesses -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-distribute -loop-simplify -lcssa-verification -lcssa -branch-prob -block-freq -scalar-evolution -basicaa -aa -loop-accesses -demanded-bits -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-vectorize -loop-simplify -scalar-evolution -aa -loop-accesses -loop-load-elim -basicaa -aa -simplifycfg -domtree -basicaa -aa -loops -scalar-evolution -alignment-from-assumptions -strip-dead-prototypes -domtree -loops -branch-prob -block-freq -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -branch-prob -block-freq -loop-sink -instsimplify CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) @@ -188,7 +187,7 @@ else ifeq ($(TARGET),fpga) else KERNEL_LINKED = $(BUILD_DIR)/$(APP).kernels.linked.ll #KERNEL = $(TEST_OBJS).kernels.ll - PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s + KERNEL_OCL = $(TEST_OBJS).kernels.cl endif HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll @@ -201,14 +200,11 @@ FAILSAFE= endif # Targets -default: $(FAILSAFE) $(BUILD_DIR) $(EXE) -#default: $(FAILSAFE) $(BUILD_DIR) $(PTX_ASSEMBLY) $(SPIR_ASSEMBLY) $(AOC_CL) $(AOCL_ASSEMBLY) $(EXE) +default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) +#default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(SPIR_ASSEMBLY) $(AOC_CL) $(AOCL_ASSEMBLY) $(EXE) -$(PTX_ASSEMBLY) : $(KERNEL_LINKED) - $(CC) $(KERNEL_GEN_FLAGS) -S $< -o $@ - -$(KERNEL_LINKED) : $(KERNEL) - $(LLVM_LINK) $(LIBCLC_NVPTX_LIB) -S $< -o $@ +$(KERNEL_OCL) : $(KERNEL) + $(OCLBE) --debug $< -o $@ $(SPIR_ASSEMBLY) : $(KERNEL) python $(PYTHON_LLVM_40_34) $< $(BUILD_DIR)/kernel_34.ll diff --git a/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile.config b/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile.config index f724aaee7e..9ece12fe0e 100644 --- a/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile.config +++ b/hpvm/test/parboil/benchmarks/hpvm-cava/Makefile.config @@ -5,13 +5,14 @@ CUDA_LIB_PATH=/usr/local/cuda/lib64 OPENCL_PATH=/opt/intel/opencl-sdk/ OPENCL_LIB_PATH=$(OPENCL_PATH)/lib64 -#LLVM_SRC_ROOT=/home/kotsifa2/HPVM/Gitlab/hpvm/llvm +LLVM_SRC_ROOT=/home/aejjeh/work_dir/hpvm-reorg-9/hpvm/llvm/ # NOTE: You may need to configure this based on your root path. VISC_SRC_ROOT=$(LLVM_SRC_ROOT) VISC_BUILD_DIR =$(VISC_SRC_ROOT)/../build CC = $(VISC_BUILD_DIR)/bin/clang PLATFORM_CFLAGS = -I$(LLVM_SRC_ROOT)/include -I$(VISC_BUILD_DIR)/include +OCLBE = $(VISC_BUILD_DIR)/bin/llvm-cbe CXX = $(VISC_BUILD_DIR)/bin/clang++ PLATFORM_CXXFLAGS = -I$(LLVM_SRC_ROOT)/include -I$(VISC_BUILD_DIR)/include diff --git a/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c b/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c index ea42ad0bf8..e43bbb4f25 100644 --- a/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c +++ b/hpvm/test/parboil/benchmarks/hpvm-cava/src/main.c @@ -34,7 +34,7 @@ typedef struct __attribute__((__packed__)) { float*coefs; size_t bytes_coefs; float *l2_dist; size_t bytes_l2_dist; float *tone_map; size_t bytes_tone_map; - int row_size; int col_size; + size_t row_size; size_t col_size; } RootIn; @@ -112,7 +112,7 @@ static struct argp parser = { options, parse_opt, args_doc, prog_doc }; // Helper function for printing intermediate results void descale_cpu(float *input, size_t bytes_input, uint8_t *output, size_t bytes_result, - int row_size, int col_size) { + size_t row_size, size_t col_size) { for (int chan = 0; chan < CHAN_SIZE; chan++) for (int row = 0; row < row_size; row++) @@ -142,18 +142,19 @@ static void sort(float arr[], int n) { // Leaf HPVM node function for scale void scale_fxp(uint8_t *input, size_t bytes_input, float *output, size_t bytes_output, - int row_size, int col_size) { + size_t row_size, size_t col_size) { //Specifies compilation target for current node - __visc__hint(DEVICE); + __visc__hint(CPU_TARGET); // Specifies pointer arguments that will be used as "in" and "out" arguments // - count of "in" arguments // - list of "in" argument , and similar for "out" __visc__attributes(2, input, output, 1, output); - + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - for (int row = 0; row < row_size; row++) +// for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++){ int index = (chan*row_size + row) * col_size + col; output[index] = input[index] * 1.0 / 255; @@ -164,8 +165,8 @@ void scale_fxp(uint8_t *input, size_t bytes_input, // Leaf HPVM node function for descale void descale_fxp(float *input, size_t bytes_input, uint8_t *output, size_t bytes_result, - int row_size, int col_size) { - __visc__hint(DEVICE); + size_t row_size, size_t col_size) { + __visc__hint(CPU_TARGET); __visc__attributes(2, input, output, 1, output); for (int chan = 0; chan < CHAN_SIZE; chan++) @@ -180,11 +181,13 @@ void descale_fxp(float *input, size_t bytes_input, // Leaf HPVM node function for demosaicing void demosaic_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(DEVICE); __visc__attributes(2, input, result, 1, result); - for (int row = 1; row < row_size - 1; row++) + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); +// for (int row = 1; row < row_size - 1; row++) for (int col = 1; col < col_size - 1; col++) { int index_0 = (0 * row_size + row) * col_size + col; int index_1 = (1 * row_size + row) * col_size + col; @@ -261,12 +264,14 @@ void demosaic_fxp(float *input, size_t bytes_input, // Leaf HPVM node function for denoise void denoise_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size) { - __visc__hint(DEVICE); + size_t row_size, size_t col_size) { + __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - for (int row = 0; row < row_size; row++) +// for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { float filter[9]; @@ -287,12 +292,14 @@ void denoise_fxp(float *input, size_t bytes_input, void transform_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(DEVICE); __visc__attributes(3, input, result, TsTw_tran, 1, result); + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - for (int row = 0; row < row_size; row++) +// for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { int index = (chan * row_size + row) * col_size + col; int index_0 = (0 * row_size + row) * col_size + col; @@ -317,15 +324,19 @@ void gamut_map_fxp(float *input, size_t bytes_input, float *weights, size_t bytes_weights, float *coefs, size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, result, l2_dist); // First, get the L2 norm from every pixel to the control points, // Then, sum it and weight it. Finally, add the bias. - - for (int row = 0; row < row_size; row++) + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); +// for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { + float chan_val_0 = 0.0; + float chan_val_1 = 0.0; + float chan_val_2 = 0.0; for (int cp = 0; cp < 3702; cp++) { int index_0 = (0 * row_size + row) * col_size + col; int index_1 = (1 * row_size + row) * col_size + col; @@ -338,19 +349,25 @@ void gamut_map_fxp(float *input, size_t bytes_input, float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]); float val = val1 * val2 + val3 * val4 + val5 * val6; float sqrt_val = sqrt(val); - l2_dist[cp] = sqrt_val; - } - for (int chan = 0; chan < CHAN_SIZE; chan++) { - float chan_val = 0.0; - for (int cp = 0; cp < 3702; cp++) { - chan_val += l2_dist[cp] * weights[cp * CHAN_SIZE + chan]; - } - chan_val += coefs[0 * CHAN_SIZE + chan] + - coefs[1 * CHAN_SIZE + chan] * input[(0 * row_size + row) * col_size + col] + - coefs[2 * CHAN_SIZE + chan] * input[(1 * row_size + row) * col_size + col] + - coefs[3 * CHAN_SIZE + chan] * input[(2 * row_size + row) * col_size + col]; - result[(chan * row_size + row) * col_size + col] = max(chan_val, 0); + chan_val_0 += sqrt_val * weights[cp * CHAN_SIZE + 0]; + chan_val_1 += sqrt_val * weights[cp * CHAN_SIZE + 1]; + chan_val_2 += sqrt_val * weights[cp * CHAN_SIZE + 2]; } + chan_val_0 += coefs[0 * CHAN_SIZE + 0] + + coefs[1 * CHAN_SIZE + 0] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 0] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 0] * input[(2 * row_size + row) * col_size + col]; + chan_val_1 += coefs[0 * CHAN_SIZE + 1] + + coefs[1 * CHAN_SIZE + 1] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 1] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 1] * input[(2 * row_size + row) * col_size + col]; + chan_val_2 += coefs[0 * CHAN_SIZE + 2] + + coefs[1 * CHAN_SIZE + 2] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + 2] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + 2] * input[(2 * row_size + row) * col_size + col]; + result[(0 * row_size + row) * col_size + col] = max(chan_val_0, 0); + result[(1 * row_size + row) * col_size + col] = max(chan_val_1, 0); + result[(2 * row_size + row) * col_size + col] = max(chan_val_2, 0); } __visc__return(1, bytes_result); } @@ -359,12 +376,14 @@ void gamut_map_fxp(float *input, size_t bytes_input, void tone_map_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, float *tone_map, size_t bytes_tone_map, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(DEVICE); __visc__attributes(3, input, result, tone_map, 1, result); + void* thisNode = __visc__getNode(); + int row = __visc__getNodeInstanceID_x(thisNode); for (int chan = 0; chan < CHAN_SIZE; chan++) - for (int row = 0; row < row_size; row++) +// for (int row = 0; row < row_size; row++) for (int col = 0; col < col_size; col++) { int index = (chan * row_size + row) * col_size + col; uint8_t x = input[index] * 255; @@ -383,13 +402,13 @@ void tone_map_fxp(float *input, size_t bytes_input, void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); // Create an 1D (specified by 1st argument) HPVM node with 1 dynamic // instance (last argument) associated with node function scale_fxp - void *ScaleNode = __visc__createNodeND(1, scale_fxp, (size_t)1); + void *ScaleNode = __visc__createNodeND(1, scale_fxp, row_size); // Binds inputs of current node with specified node // - destination node @@ -410,10 +429,10 @@ void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, void descale_fxp_wrapper(float *input, size_t bytes_input, uint8_t *result, size_t bytes_result, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); - void *DescaleNode = __visc__createNodeND(1, descale_fxp, (size_t)1); + void *DescaleNode = __visc__createNodeND(1, descale_fxp, row_size); __visc__bindIn(DescaleNode, 0, 0, 0); // bind input __visc__bindIn(DescaleNode, 1, 1, 0); // bind bytes_input __visc__bindIn(DescaleNode, 2, 2, 0); // bind result @@ -426,10 +445,10 @@ void descale_fxp_wrapper(float *input, size_t bytes_input, void demosaic_fxp_wrapper(float *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); - void *DemosaicNode = __visc__createNodeND(1, demosaic_fxp, (size_t)1); + void *DemosaicNode = __visc__createNodeND(1, demosaic_fxp, row_size); __visc__bindIn(DemosaicNode, 0, 0, 0); // bind input __visc__bindIn(DemosaicNode, 1, 1, 0); // bind bytes_input __visc__bindIn(DemosaicNode, 2, 2, 0); // bind result @@ -442,10 +461,10 @@ void demosaic_fxp_wrapper(float *input, size_t bytes_input, void denoise_fxp_wrapper(float *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(2, input, result, 1, result); - void *DenoiseNode = __visc__createNodeND(1, denoise_fxp, (size_t)1); + void *DenoiseNode = __visc__createNodeND(1, denoise_fxp, row_size); __visc__bindIn(DenoiseNode, 0, 0, 0); // bind input __visc__bindIn(DenoiseNode, 1, 1, 0); // bind bytes_input __visc__bindIn(DenoiseNode, 2, 2, 0); // bind result @@ -459,10 +478,10 @@ void denoise_fxp_wrapper(float *input, size_t bytes_input, void transform_fxp_wrapper(float *input, size_t bytes_input, float *result, size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(3, input, result, TsTw_tran, 1, result); - void *TransformNode = __visc__createNodeND(1, transform_fxp, (size_t)1); + void *TransformNode = __visc__createNodeND(1, transform_fxp, row_size); __visc__bindIn(TransformNode, 0, 0, 0); // bind input __visc__bindIn(TransformNode, 1, 1, 0); // bind bytes_input __visc__bindIn(TransformNode, 2, 2, 0); // bind result @@ -481,10 +500,10 @@ void gamut_fxp_wrapper(float *input, size_t bytes_input, float *weights, size_t bytes_weights, float *coefs, size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); - void *GamutNode = __visc__createNodeND(1, gamut_map_fxp, (size_t)1); + void *GamutNode = __visc__createNodeND(1, gamut_map_fxp, row_size); __visc__bindIn(GamutNode, 0, 0, 0); // bind input __visc__bindIn(GamutNode, 1, 1, 0); // bind bytes_input __visc__bindIn(GamutNode, 2, 2, 0); // bind result @@ -505,11 +524,11 @@ void gamut_fxp_wrapper(float *input, size_t bytes_input, void tone_map_fxp_wrapper(float *input, size_t bytes_input, float *result, size_t bytes_result, float *tone_map, size_t bytes_tone_map, - int row_size, int col_size) { + size_t row_size, size_t col_size) { __visc__hint(CPU_TARGET); __visc__attributes(3, input, result, tone_map, 1, result); - void *ToneMapNode = __visc__createNodeND(1, tone_map_fxp, (size_t)1); + void *ToneMapNode = __visc__createNodeND(1, tone_map_fxp, row_size); __visc__bindIn(ToneMapNode, 0, 0, 0); // bind input __visc__bindIn(ToneMapNode, 1, 1, 0); // bind bytes_input __visc__bindIn(ToneMapNode, 2, 2, 0); // bind result @@ -538,7 +557,7 @@ void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, /*22*/ float*coefs, /*23*/ size_t bytes_coefs, /*24*/ float *l2_dist, /*25*/ size_t bytes_l2_dist, /*26*/ float *tone_map, /*27*/ size_t bytes_tone_map, - /*28*/ int row_size, /*29*/ int col_size) { + /*28*/ size_t row_size, /*29*/ size_t col_size) { //Specifies compilation target for current node __visc__hint(CPU_TARGET); @@ -656,7 +675,7 @@ int main(int argc, char* argv[]) { // Read a raw image. // NOTE: We deliberately perform this file I/O outside of the kernel. printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]); - int row_size, col_size; + size_t row_size, col_size; uint8_t *image_in = read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size); printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE); diff --git a/hpvm/test/parboil/benchmarks/hpvm-cava/src/pipe_stages.h b/hpvm/test/parboil/benchmarks/hpvm-cava/src/pipe_stages.h index eae4347b99..8d98cb65cc 100644 --- a/hpvm/test/parboil/benchmarks/hpvm-cava/src/pipe_stages.h +++ b/hpvm/test/parboil/benchmarks/hpvm-cava/src/pipe_stages.h @@ -25,24 +25,24 @@ extern int num_ctrl_pts; void scale_fxp(uint8_t *input, size_t bytes_input, float *output, size_t bytes_output, - int row_size, int col_size); + size_t row_size, size_t col_size); void descale_fxp(float *input, size_t bytes_input, uint8_t *output, size_t bytes_result, - int row_size, int col_size); + size_t row_size, size_t col_size); void demosaic_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size); + size_t row_size, size_t col_size); void denoise_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, - int row_size, int col_size); + size_t row_size, size_t col_size); void transform_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, float *TsTw_tran, size_t bytes_TsTw, - int row_size, int col_size); + size_t row_size, size_t col_size); void gamut_map_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, @@ -50,14 +50,14 @@ void gamut_map_fxp(float *input, size_t bytes_input, float *weights, size_t bytes_weights, float *coefs, size_t bytes_coefs, float *l2_dist, size_t bytes_l2_dist, - int row_size, int col_size); + size_t row_size, size_t col_size); void tone_map_fxp(float *input, size_t bytes_input, float *result, size_t bytes_result, float *tone_map, size_t bytes_tone_map, - int row_size, int col_size); + size_t row_size, size_t col_size); -void tone_map_approx_fxp(float *input, int row_size, int col_size, +void tone_map_approx_fxp(float *input, size_t row_size, size_t col_size, float *result); #endif diff --git a/hpvm/test/parboil/benchmarks/spmv/Makefile b/hpvm/test/parboil/benchmarks/spmv/Makefile index 71e2246343..23e1d49900 100644 --- a/hpvm/test/parboil/benchmarks/spmv/Makefile +++ b/hpvm/test/parboil/benchmarks/spmv/Makefile @@ -1,4 +1,4 @@ -PARBOIL_ROOT = $(LLVM_SRC_ROOT)/../test/parboil +PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil APP = spmv # Default compile visc diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile b/hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile index efed901e6e..a289d68f34 100644 --- a/hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile +++ b/hpvm/test/parboil/benchmarks/spmv/src/visc/Makefile @@ -5,7 +5,7 @@ TOOLS_SRC=common_src/convert-dataset SRCDIR_OBJS=gpu_info.ll file.ll VISC_OBJS=main.visc.ll APP_CUDALDFLAGS=-lm -APP_CFLAGS=-ffast-math -O3 -I$(TOOLS_SRC) -APP_CXXFLAGS=-ffast-math -O3 -I$(TOOLS_SRC) +APP_CFLAGS=-ffast-math -O1 -I$(TOOLS_SRC) +APP_CXXFLAGS=-ffast-math -O1 -I$(TOOLS_SRC) include $(TOOLS_SRC)/commontools.mk diff --git a/hpvm/test/parboil/benchmarks/stencil/Makefile b/hpvm/test/parboil/benchmarks/stencil/Makefile index eeac0b5f7e..a44dd0dbf0 100644 --- a/hpvm/test/parboil/benchmarks/stencil/Makefile +++ b/hpvm/test/parboil/benchmarks/stencil/Makefile @@ -1,4 +1,4 @@ -PARBOIL_ROOT = $(LLVM_SRC_ROOT)/test/VISC/parboil +PARBOIL_ROOT = $(LLVM_SRC_ROOT)/tools/hpvm/test/parboil APP = stencil # Default compile visc -- GitLab