From 22976cf77148d4ee95bbd4fdee0804fda6e8ae31 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Thu, 28 Nov 2019 01:37:44 -0600 Subject: [PATCH] Adding barebones test case for ApproxHPVM-HPVM merge --- .../benchmarks/cava_test/Makefile | 254 +++++ .../cava_test/src/cam-vision-native | Bin 0 -> 37128 bytes .../benchmarks/cava_test/src/cam_pipe.c | 139 +++ .../benchmarks/cava_test/src/cam_pipe.h | 7 + .../cava_test/src/cam_pipe_utility.c | 82 ++ .../cava_test/src/cam_pipe_utility.h | 16 + .../benchmarks/cava_test/src/defs.h | 224 +++++ .../benchmarks/cava_test/src/dma_interface.c | 73 ++ .../benchmarks/cava_test/src/dma_interface.h | 44 + .../benchmarks/cava_test/src/gem5_harness.h | 10 + .../benchmarks/cava_test/src/load_cam_model.c | 335 +++++++ .../benchmarks/cava_test/src/load_cam_model.h | 25 + .../benchmarks/cava_test/src/main.c | 156 ++++ .../benchmarks/cava_test/src/main_old.c | 865 ++++++++++++++++++ .../benchmarks/cava_test/src/new_main.c | 115 +++ .../benchmarks/cava_test/src/pipe_stages.c | 290 ++++++ .../benchmarks/cava_test/src/pipe_stages.h | 63 ++ .../benchmarks/cava_test/src/rename.sh | 5 + .../benchmarks/cava_test/src/utility.c | 11 + .../benchmarks/cava_test/src/utility.h | 8 + .../benchmarks/cava_test/src/visc.h | 110 +++ 21 files changed, 2832 insertions(+) create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile create mode 100755 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam-vision-native create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h create mode 100755 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile new file mode 100644 index 0000000000..f6a82b3e3d --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile @@ -0,0 +1,254 @@ +# This Makefile compiles the HPVM-CAVA pilot project. +# It builds HPVM-related dependencies, then the native camera pipeline ISP code. +# +# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, +# which can be copied from Makefile.config.example for a start. + +CONFIG_FILE := Makefile.config + +ifeq ($(wildcard $(CONFIG_FILE)),) + $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) +endif +include $(CONFIG_FILE) + +# Compiler Flags + +DLEVEL ?= 0 +#CAM_CFLAGS?=-O0 \ + #-Wno-psabi \ + #-Wno-unused-label -Wno-unused-but-set-variable \ + #-Wno-maybe-uninitialized -DARCHITECTURE=SMV -DTRANSPOSE_WEIGHTS=1 \ + -DDEBUG_LEVEL=$(DLEVEL) +LFLAGS += -lm -lrt + +# Build dirs +ifeq ($(VERSION),) + VERSION = Default +endif +SRC_DIR = src/ +CAM_PIPE_SRC_DIR = $(SRC_DIR) +BUILD_DIR = build/$(TARGET)_$(VERSION) +CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +# Source files for the frontend camera pipeline +COMMON_SRCS = main.c \ + utility.c + + +CAM_PIPE_SRCS = load_cam_model.c \ + cam_pipe_utility.c \ + dma_interface.c # FIXME: This is a hack until external C++ files can be included in build. + + +# NOTE: We have temporarily removed gem5 and other dependencies for simplicity. +SRCS = $(COMMON_SRCS) $(CAM_PIPE_SRCS) +#GEM5_DMA_SRC = gem5/dma_interface.c +#GEM5_SYS_SRCS = gem5/aladdin_sys_connection.cpp gem5/aladdin_sys_constants.cpp +#GEM5_UTIL_SRCS = ../../util/m5/m5op_x86.S + +# NATIVE_FULL_PATH_SRCS contains all the full path source files for the camera vision pipeline. +NATIVE_FULL_PATH_SRCS = $(patsubst %, $(SRC_DIR)/%, $(COMMON_SRCS)) +NATIVE_FULL_PATH_SRCS += $(patsubst %, $(CAM_PIPE_SRC_DIR)/%, $(CAM_PIPE_SRCS)) + +# GEM5_FULL_PATH_SRCS contains all the full path source files for the gem5 files. +#GEM5_FULL_PATH_SRCS = $(patsubst %, $(ALADDIN_HOME)/%, $(GEM5_DMA_SRC) $(GEM5_SYS_SRCS) $(GEM5_UTIL_SRCS)) + +#INCLUDES += -I$(ALADDIN_HOME) \ +# -I$(ALADDIN_HOME)/../../include \ +# -I$(ALADDIN_HOME)/gem5 +INCLUDES += -I$(SRC_DIR) \ + -I$(CAM_PIPE_SRC_DIR) \ + +ifneq ($(CONFUSE_ROOT),) +INCLUDES += -I$(CONFUSE_ROOT)/include +LFLAGS += -L$(CONFUSE_ROOT)/lib +endif + +EXE = cava-visc-$(VERSION)-$(TARGET) + +CAM_CFLAGS += -mf16c -flax-vector-conversions +LFLAGS += -pthread + +#$(DEBUG): $(NATIVE_FULL_PATH_SRCS) $(GEM5_FULL_PATH_SRCS) +# @echo Building benchmark for native machine with debug support. +# #@mkdir -p $(BUILD_DIR) +# @$(CC) $(CAM_CFLAGS) -ggdb3 $(INCLUDES) -DGEM5 -DDMA_MODE -DDMA_INTERFACE_V3 -o $(DEBUG) $^ $(LFLAGS) +# +#clean-native: +# rm -f $(NATIVE) $(DEBUG) + +## BEGIN HPVM MAKEFILE +LANGUAGE=visc +SRCDIR_OBJS= load_cam_model.ll cam_pipe_utility.ll dma_interface.ll utility.ll +OBJS_SRC=src/cam_pipe.c src/pipe_stages.c src/load_cam_model.c src/cam_pipe_utility.c src/dma_interface.c src/utility.c +VISC_OBJS=main.visc.ll +APP = $(EXE) +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 +APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include +APP_LDFLAGS=$(LFLAGS) +OPT_FLAGS = -tti -targetlibinfo -tbaa -scoped-noalias -assumption-cache-tracker -profile-summary-info -forceattrs -inferattrs -ipsccp -globalopt -domtree -mem2reg -deadargelim -domtree -basicaa -aa -simplifycfg -pgo-icall-prom -basiccg -globals-aa -prune-eh -always-inline -functionattrs -domtree -sroa -early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -domtree -basicaa -aa -libcalls-shrinkwrap -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -licm -loop-unswitch -simplifycfg -domtree -basicaa -aa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -indvars -loop-idiom -loop-deletion -memdep -memcpyopt -sccp -domtree -demanded-bits -bdce -basicaa -aa -lazy-value-info -jump-threading -correlated-propagation -domtree -basicaa -aa -memdep -dse -loops -loop-simplify -lcssa-verification -lcssa -aa -scalar-evolution -licm -postdomtree -adce -simplifycfg -domtree -basicaa -aa -barrier -basiccg -rpo-functionattrs -globals-aa -float2int -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -loop-accesses -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-distribute -loop-simplify -lcssa-verification -lcssa -branch-prob -block-freq -scalar-evolution -basicaa -aa -loop-accesses -demanded-bits -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-vectorize -loop-simplify -scalar-evolution -aa -loop-accesses -loop-load-elim -basicaa -aa -simplifycfg -domtree -basicaa -aa -loops -scalar-evolution -alignment-from-assumptions -strip-dead-prototypes -domtree -loops -branch-prob -block-freq -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -branch-prob -block-freq -loop-sink -instsimplify + +CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) +OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) +CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) +LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) + +LIBCLC_LIB_PATH = $(LLVM_SRC_ROOT)/../libclc/built_libs +VISC_RT_PATH = $(LLVM_SRC_ROOT)/projects/visc-rt + +VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.ll +#LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc +LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx64--nvidiacl.bc +#LIBCLC_NVPTX_LIB = nvptx64--nvidiacl.bc + +LLVM_34_AS = $(LLVM_34_ROOT)/build/bin/llvm-as + +TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce +KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl + +ifeq ($(TARGET),x86) + DEVICE = SPIR_TARGET + VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_SPIR.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-spir -dfg2llvm-x86 -clearDFG + CFLAGS += -DOPENCL_CPU + VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir +else ifeq ($(TARGET),seq) + DEVICE = CPU_TARGET + VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG + VISC_OPTFLAGS += -visc-timers-x86 +else ifeq ($(TARGET),fpga) + DEVICE = FPGA_TARGET + VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_FPGA.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-fpga -dfg2llvm-x86 -clearDFG + CFLAGS += -DOPENCL_CPU + VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-fpga +else + DEVICE = GPU_TARGET + VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG + VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx +endif + TESTGEN_OPTFLAGS += -visc-timers-gen + +CFLAGS += -DDEVICE=$(DEVICE) +CXXFLAGS += -DDEVICE=$(DEVICE) + +#ifeq ($(TIMER),x86) +# VISC_OPTFLAGS += -visc-timers-x86 +#else ifeq ($(TIMER),ptx) +# VISC_OPTFLAGS += -visc-timers-ptx +#else ifeq ($(TIMER),gen) +# TESTGEN_OPTFLAGS += -visc-timers-gen +#else ifeq ($(TIMER),spir) +# TESTGEN_OPTFLAGS += -visc-timers-spir +#else ifeq ($(TIMER),fpga) +# TESTGEN_OPTFLAGS += -visc-timers-fpga +#else ifeq ($(TIMER),no) +#else +# ifeq ($(TARGET),x86) +# VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir +# else ifeq ($(TARGET),seq) +# VISC_OPTFLAGS += -visc-timers-x86 +# else ifeq ($(TARGET),fpga) +# VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-fpga +# else ifeq ($(TARGET),seqx86) +# VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir +# else ifeq ($(TARGET),seqgpu) +# VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx +# else +# VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx +# endif +# TESTGEN_OPTFLAGS += -visc-timers-gen +#endif + +# Add BUILDDIR as a prefix to each element of $1 +INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) + +# Add SRCDIR as a prefix to each element of $1 +#INSRCDIR=$(addprefix $(SRCDIR)/,$(1)) + +PYTHON_LLVM_40_34 = ../llvm-40-34.py + +.PRECIOUS: $(BUILD_DIR)/%.ll + +OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) +TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS)) +KERNEL = $(TEST_OBJS).kernels.ll + +ifeq ($(TARGET),x86) + SPIR_ASSEMBLY = $(TEST_OBJS).kernels.bc +else ifeq ($(TARGET),seq) +else ifeq ($(TARGET),fpga) + AOC_CL = $(TEST_OBJS).kernels.cl + AOCL_ASSEMBLY = $(TEST_OBJS).kernels.aocx + BOARD = a10gx + ifeq ($(EMULATION),1) + EXE = cava-visc-emu + AOC_EMU = -march=emulator + BUILD_DIR = build/$(TARGET)-emu + endif +else + KERNEL_LINKED = $(BUILD_DIR)/$(APP).kernels.linked.ll + #KERNEL = $(TEST_OBJS).kernels.ll + PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s +endif + +HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll +HOST = $(BUILD_DIR)/$(APP).host.ll + +ifeq ($(OPENCL_PATH),) +FAILSAFE=no_opencl +else +FAILSAFE= +endif + +# Targets +default: $(FAILSAFE) $(BUILD_DIR) $(EXE) +#default: $(FAILSAFE) $(BUILD_DIR) $(PTX_ASSEMBLY) $(SPIR_ASSEMBLY) $(AOC_CL) $(AOCL_ASSEMBLY) $(EXE) + +$(PTX_ASSEMBLY) : $(KERNEL_LINKED) + $(CC) $(KERNEL_GEN_FLAGS) -S $< -o $@ + +$(KERNEL_LINKED) : $(KERNEL) + $(LLVM_LINK) $(LIBCLC_NVPTX_LIB) -S $< -o $@ + +$(SPIR_ASSEMBLY) : $(KERNEL) + python $(PYTHON_LLVM_40_34) $< $(BUILD_DIR)/kernel_34.ll + $(LLVM_34_AS) $(BUILD_DIR)/kernel_34.ll -o $@ + +$(AOCL_ASSEMBLY) : $(AOC_CL) + aoc --report $(AOC_EMU) $(AOC_CL) -o $(AOCL_ASSEMBLY) -board=$(BOARD) + +$(AOC_CL) : $(KERNEL) + llvm-cbe --debug $(KERNEL) + +$(EXE) : $(HOST_LINKED) + $(CXX) -O3 $(LDFLAGS) $< -o $@ + +$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB) + $(LLVM_LINK) $^ -S -o $@ + +$(VISC_RT_LIB) : $(VISC_RT_PATH)/visc-rt.cpp + make -C $(LLVM_LIB_PATH) + +$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS) + $(OPT) -debug $(VISC_OPTFLAGS) -S $< -o $(HOST) +# mv *.ll $(BUILD_DIR) +# $(OPT) -debug-only=DFG2LLVM_SPIR,DFG2LLVM_X86,DFG2LLVM_FPGA,GENVISC $(VISC_OPTFLAGS) -S $< -o $(HOST) +#$(OBJS): $(OBJS_SRC) +# $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c + $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< + +$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c + $(CC) $(CFLAGS) -emit-llvm -S -o $@ $< + +#$(BUILD_DIR)/main.opt.ll : $(BUILD_DIR)/main.ll +# $(OPT) $(OPT_FLAGS) $< -S -o $@ + +$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll + $(OPT) -debug-only=genvisc $(TESTGEN_OPTFLAGS) $< -S -o $@ + +## END HPVM MAKEFILE diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam-vision-native b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam-vision-native new file mode 100755 index 0000000000000000000000000000000000000000..938e1b1a15d86e71b9b044e594472dad10fd9bc2 GIT binary patch literal 37128 zcmeHwdtg&lw)e>cC{UW9s8w+c-sltuYRkiE1!s~rffGqe?L&%!Nl8;$?Q7Zu3Rj^{ zTjWd#_Kpt2cdz5^%$=Fb%ymZYTy>C9NNEE_9YuVBcop9%_$a7Q5Xkpi`|OjPq^-`q z-tYVV`A&Pz-h1t})?Rz>wby>0ob`6+0<A_PD3@0Ffk2@HlPnA*KLtUQSdhc=gz<t! zxLTMhj0I#2ZVQVcSN19H5NEBpX&g@nSC2bSD+BVh7UoghsWKQT$d@7^xs+bwvT(2L zb}W;FiYo{v1|T|;b#OTg!u2qZ;&ySSnS{hH6MK?)1WLP;1>t&_M{x(aV2Z2ckMc%$ z8P8vt)`Ad)E#wF`_Dr~hOIPs+{wnf^Z)HKa7UoghWjsB_Rp@R7-8h-#Z+aRzRtZmU zv~n?W`4yKZFWgX9yW++<H`G<ltgCGZw9ahJyK&}?bFy0+vuCk<6Mw>8SX9nZQp(YU z5Xn3RH|YSyuY2nk>wfw4;Z5K9w{zxTqq+6f8LvG|_*dg59;`~_gqbu1&p#PdR0E;m z*CgOyG#o6ni~^qsT+;~nI{+JrPu3`W-W&zrI|_bm6#QpKfxj>c{K`@I-#QBX#!=wb zQQ+T2x+dJI*D(M_DxZ!~;8ml*$BvTjKa7I!7zO`dM!|n?6!?vRn{cOI*`x4(1c)Q$ zciJfUCq}{l-6-%|05{=Iy{3TwWZ?>-&Ff)Nik%VA4(qAe-6*&nWiEG>ui3Y<w#Dyj zE^`&uH8%LlDp%C`1h;!-ePe^W#b4R%ce@4mEvwum9IvphvZck>B9o6u-14Jle>El4 z<gaP=RaQ}QITZHtgu%DT+qA~*t+^X1H8s~Z_^VmK*IMfrsvDbp4MJ06OKq#W-dA5) zSGy9yfFDVcK;F9v)m>d#TL%Ei0ufoL#oz32tP+@Wx9ku~wJL#XZ(U=HPXN=pMz65a z=l3<N0wzkKS*UIF`s;-1)y=hjpHNML69UYYPpE8O+2n4jYzAq|niemp1xQxkxQa@O zGR8})zOuGKMuwn%pS#9a*F>y*!a}E`pwK-ldsg<`xPNx`oVb4uYDtSf`AI%4?49f? za6Ni5Ev%soRWK<`e)-Y}^d|^pPffT3SJaND9<Y^wxHRCp7OrB?{g+d`Ug$tP!KT#K zP9jU&#lteaHcmg}_5e$^u?FUoABvt)w@CI@xiUQ-=B2)di@?Gr6<m>>BC=F)rLQ7* zjtZ{s&+}C9cvlR3iwdsp=R_4e-X?%wtb(IJ;#Y+VJ|PYj1dj?%eSvb-sNm@e5N?wS zjslKfttxm%94ZKHD!5Wt6unUepRB^~Qo+^sW~&N*u?oLi1*g7Mxwfg`mncBE+g0#O zRq!1u_+=`1SOvda1@BkEuTa4csNh$s;D=Ojbq9V_1;0v#FSdW2CI-j2%1wgU8TOBf z{smemw)ds=%1E)f1&E4WYs1rUwFN#3*HGlZ(HO34Z=x`%>c9aOo<(6&(Sa}vUr%8j zg}1ZtH54XQ9q4A^D=17VI?%<!7g3m0bD)idCs3GFa-fNYwG<{*9PqI4*)oJl1qX^* z_#}l%^#&{~e2l`RasxRm{4Rw_)dox~{04<d#RiNle2~JVS_1+LKTBb1x&wn30JwG! zg-OK*j<WDh3X^IL9AM!;QJ7R}Ak4zQr!c9~z;+h?6@{lzxSNIlk;0cyxQm5<N?}r& zfi@QY357{j2AWv-#}v+_u!n`$QkYa`pjeJCH3`*S2(!x>Dl?~v?e8BIrIV2}pM|9h z)>!jUxji?WyMMQ-(u0Y|2iB5OXUMf3s^HmYKR{Kv&;ANgSBXLWwOR0pl0BLmj+|d_ z-yRuS9sO4%BnHd3?|Qb#BJ3uD+_1RM-VKiccN@9UcNx;X+r?1%!(u3~HG1D{K?u4Y z4wi4V?jU^-rQTgbt~i6}zk?tc*cv2sSGOp=9er#ru)O74MX&u~(eT(FF*M&2-3i^2 z*@)6!F?cU9d!v^?fj|gy&~78z+4DggEZ?!~qlApnB1(dZAO-{5K)fS*1X78?<<Tl} z_EeVFfC1|7$qnCOy?wbOv|^|?+Il_~Gh9t0E@bo*hxF2&qVz7xbg;P0u`j$GSrGU2 zSeUb~E!`qSSA*b-YQxnwLD+G>*CUjRGuz;Z?L!$T?p0UPpqDkb$;)?BMC>S#W&X>1 zsg+e{E8$%fUoPJFDPz!cK?eq+G${5&e<*79iZ5O8UlQjwQQ>B|T3FsIr{8w72JJ-% zOfCmuv;hjdeD9b~p)#7!SxUQwJ4vt~3V|`l9|T>T*$E<u9auiF7BmTY+G7pYGKcg< zd8rt@E6dP9dLl|MufMu8X6X1i0`0NtzzR_cWafS%MekT{U4~>(4(&0&|Bm}&&=pOf zFnEh8_mlQB+SUIl25-I{!pI7-vma%+OID}o#VA8DICnl`chcaM>h#60VzJ}DWBi^W zix5x1A~>awc2c9yJr$jPE*6vQ=H&E)1*Y~P?W)VbasUk^&hj3x3>KQ4QdI<X8)vBI zcNi#=9TF#E6Xj7(jcVUQ6G^>qp&3s|iWU_anmn%Wy?ZZTXw;y^K$UoXppj*Y(%D@W zhWL@Hh1tw$x#8#x2;UhFn8cvdoCcjtm)l=ZZw_1wz05>uwt-Z5s%Ei8*hyw@S6eaK zP#%KOx6j66(P!~U$Y)(*z0Dz=ww6<^y^30sF6<;Jozja=sh3F;w3%x}3CTHwesi%i zSY@sdgLP()DCy17kMq&6#_YM(1d0nz>2zp{Mf4sNd*0PXUt&@J%A!6+`KzLyL=?+` zD1Ejw2g!-j3(;RP;CwS>LzLbk-LNjV-f4C36kv`AevIhqE|S+Jy+HDomr5^7e}#ym z80;{UMRNupGLwReaJRGYCUZA)A2yTG5rdn|TgjDBn9?DqRAx3A9*c<59J2^NQbiW_ zIfIXy+koW^{@T2eT;x5^2I-yBGII`<gwY8GSuXEcr*t-YERRqTc1i=%G3hOEh;Gpn z**kLZv*_#SUgEJIMTgLP5Jj2_r3^hl%x^Y1gUif$30!Nb6BR8IwkaA$M^2@d<&usn ztfzhlYR*#q$SIwOJ_8*WgZG$?D8vd#cZX9t6)(qu^$?Y{j^z61h51+MWfW$sDBWW= z?OqLi5hcGlD|-D98d_)A(D6??LD=2x^$0;&z<v7AKg2FvxN!QVY0uvGZW}ItRC);( zw`ah3dQTh5&tQ8=3@z4*2HP3wCE=x=RVYgFzPJAc48;5P5ufN|#Cq2a-pYFSs@fLu zsai~tnrfSTEEQ48;c@kiRiY%OiBgkM+|^+Feoju#4TAWTudd~WqT0I~0XGHb7A`hH zU@<c~;qL@tCqxw-Qr|9eMO4TW*;E4`QN`y(Yfht0&&>b<ODGzJ!UyMGDB@v4%8Pz} z9RUZ_Kuh#3baq@gn^}y$cs;m#UnYWL(Ho&I$Qd$2Pd<vyK|6)(=y_ku+KE{X6KX~D zG9D>KqUrGNM*1XeGzLpT7-^z+Oe0dJ{!HxoNE`i-M;H-tn>ymJh}bPRAe%Wex{HF; zS{%~e=)V(@_vY(>?~wK}K93LxtvH8tB#kw6O3xF$7W44nn6F$&6VZ80g;eczEQ@}V zM_VX5Qx*LY^~0bOVE|Zjj=stv#ZD>4ns2lh5a}&3INxk|>`Sz!RYYKNc@H`z{Fj+) zqQ7J`^UYlQqqKCQY9T*OOKL>_L2W9f5gClE5qYe)fd@65EY@&%`-rw>3IdvrQ#zxx zp0_Y$rWRGBpxldY!x=0yw~A<oHI&ahm?DT6Eo9Q6WksidNwxHLbQ)Zz^jY+B=6)2N zOmI+GqT{$vjGq5Oh80IY=e~;Q0QY&KZ*yN}^i}RNMW5rotY{B>DA4Fr%#B6=oyRnZ z_w|uoN*Kt2Ei7u%4XB{q_mSa8PUW65dIYktAy~8rzOHi7yN5a;m{Hm0@5AWMA-#YR zT))_J6v~PbnFJJjx9HvLFg&&|@|E$bQ!pHnGg;SOklmNzKQ09x6(!e3v@RgGiP8a# zIkXtv?LgAPzD_@e1#;`Nhec^f=H1x}s0|^hvj#pHXzvQzABBP0yaj_aF=*dGLr2&x z<lUb82}Yv!-2I|vkEoMrB>Ps3L+lt&Y_`V<mTz|k%e!s~r4xg#MWLyeIzv+?IyGTY zb4bkJYj`m1%758v*s{l#cEHf#2A#D%c9CI2IeZT3q_zEBgQMq|)^Yj`cy;#tvv$M6 z*O7APK|{w+kRNajyS#@ZXO@H`;UzJYORv-L*z$gB`&o@6G<8FP;Ys}!k*}@~N4~l< zvV%JNNbl`Kk+YNM`HK)|yF$ZA2bb&zg)WXr)1b)bxAy~akv?E%m|7)~I2IKnXZQDM zHt8$7^s;UG%TCQ<QRmuj3zZI9dyeU??RzwK=@o0w`+A$?+vW`VcDOVjyEN~?w{dZ3 z>cjTj_akQ;HP-e%jo98}bhP(c`rciW(;qq0awv^_VLWD^h@5CSOu^av`&O)(9cHnS zvmaiFoM}4sDZ#@Cor?6_c`ma5&M*4jvuB>tM9$xSs;{SYdJDy!0Q?-VPkh?fv!<#4 zQ-*%_!~KLFIdSLnDI^E_de(k8DD!e<?k8URiPxf|AcSyDVd4-b%E*~J`}*DsWQGYT zOh|=;_WV~2KQ=>`g05|v{`}_*9XTj2C_~s8T;A{S?iam>Sb_F0IfWW~3HRZmkojU! z^D;_%>Xj(uUewv1P=&^t|90fL+XwsZTRVBqZw;HHs2tIIgou*}P=^U3EfJvti6|XJ z|1k;?bS6QhBO<5=5zPnDx>1NAa)L;oBA#Ouq9pOgq=+|V6rv>Y#-@n(Hq?2ff=Ci? zT#9(VPY^jYZ=kdnu+oNfeLa6K7r8A|k!DvF-xkAWKbd^gol7ovYHCzz=vZC-i$YT` zc4%I<gU>`YkMYEVO0x&0`G{e24tQ{SHAw{Gs>vo08C5sP=78)B8reu>DH2aqBnDYJ z$o7mv2FcZ_rKboP9EB`J$Vo{;uB%8Eusxi?N+SXpt@-=n=`n?5@~yj0+)4`UfcexM zfQ?EcTqqUUsXcMU-2(I0ZEf$-ICb`Zr>-7Wb021pwtUy4hR&a2zTnh7L6#d6;x<up z?~e9AkagD>ItvjO8Cs30urgLx<}pQ~DcamH#_NVB)72KO2MPCbV>bVOu|14tw{QJn zHjAX0Ep<yy@242uTc0MoS1NknfC?OsM3)>CLo<E>kPta*oYXN(5Q;)`{y<@S?vc(@ z4(Y5z+P|)x&wp~mod?%v9RI$3qaNlitjXWA?t6Vb%}ez^Mu!p^%HFf;13de!JCKps zs>6}BGr|ef^{-mndo>QtVTb0JE&r{+pPZTjr{=V^{bNnwr%Jag&s))*T9-Jzd#$&% zf2xrO(qd?8Cy9klBHVsKBVyLD4I}A~4V#al`GuNeVrU9O5JUQX$ji)dE;>w`ISR$a zVAW>El1QN*6u%Zjb0$ZIvOW+sJ&4}{YQ#IuVKJ0G7WFA=20?z+y&xyD17gT_nVt~( zDR>C%uO!bP*qtNK5c<vbd(6Mq7&d2s7Y0vJn$2zB#Ta84H0Z><?vRM_&kzlQ4zwTD zIHeD0Ak`1@ZV>DcHGg$UpE@<iXk-PwdI1qni|}tlwlP-fabVUcMaAimVVU`;Lv!RQ z%GLlWJk>AK@75SPW&#M=m*}n1>As%3DoXVTrQx=uMUE|fIdZZB_I>SSW9018UO-Fq zkz*AH2{M@;+V;Ju)Ly-#y-zQ;N8pCx?t$Bbn_PnJ1#BPOUbtaBwA(fhnKy){BW)YT zY27r2=)4>CZ<SuOwtu7vT!W-8W)iJ6I?a@3DamBl53B?hn>k_F0KK;&*k@g9EEhQT z4<MFtjydxC3N-t*MhxL2&sKyZYmE~?UoQ0Z6jYo+sDRkH@Tk~>&>ed4n4C$G7%Eis zMD{HG3K{OBEatO(4qU9Hd#VS?K8xikF!&)$w4h=hB^3bkwZ^e5xwFQL5PeHpWKYF< zIT3Ir_J5BM3UbfV^Talf9vWa;^yJeH{DGFeP`Qb^)L@8aGas$)VpZFYIE|=*W@60# zG<m`_&^=1NetJL^p>RNpYVt|bANUzoXOxCwb@wLbn~C*C??Hz&M9rElINCzDUr>!p zkwc72?V-gN9MVC|N-_C+hL(3RBl;OMNst1M%Y#o>7Y3L6?Ays!IHU_~3`!b^QKWY- zjW3;81jM`yYaH@0^^kb_kTawgg7(L0%sC#)AO>AsPU+rl7`$#J=AzUue$S4PEv+)T z`Z4;H*ud2hT8Lt?he?kyG8Das4V!;W`0Zyjh7BLXXA4zoY1Po_J&r*v3MtT^i_YwQ z$Mol6#6H6P*>NY+qfo0B6)>Uimr#};r@=sp-WURI+pn-1yI1ejd`1cli{(z=9XaU^ zQ@!dVCzr$H-V-@HdBHhr{vQ8yYyRPN6WWI~>(JBpuKzu-3K%o6U`WISh=JEJ<kL7c zfqvXr*Id2>Znvmez7=jb-}SiRf$vf223@f4t}ch>#rFE`P`KAg;f7XYF(FND<Jqz& z@(E7hg5i97*o0BG7+PR5Jh(^9--q#b+8#s4<uEK5?`9Y_7~lh9`_Zde4V^yJesr8I z|A5u7a8K^R&akxC(D5LZq_-E9g}Ot<IYLv$i8^~1OenI2(wZz7I<5vE%A~&i0zoW# zLx+}w(%LWR2$W{%_&~NU1?lY<s2D8ihK_y?GPYkBLm;D}<KH-FO8bRz1e#*#+{{6l zJ8AilfSGpbRWbMs^?uUNXwG7t{-rf!AL@zfY$1=PAf*2Qn1a+<M;L<;cx&z{Sl;_V z#1s}isq_r(M4-Gjiou^TuVx=Qp>tV!GGtf@)U%~P+c+b2RVG{hht2N>8N(n;QnSaJ zA2D>~qcTuB%c0qOz^n^X8#G^|{(i`*zduxfo^+34^TmX<H8l?7RK%U8HPc~n7>*+D z$cS;IJciA?M~s8YA?{yBjKdfbarY~6(73LItnC906oDxf;QhI1-(j+g`Yy`IgHT!I z<ly>G21H=(WIH{pk*S@SstE)2<kOSSOFnA$153#VEf5A4lMi($4A|kLcB-;B*dCDk z-N=C@pD?3CV?veD!8DjnhS6EVg1qjK&}u%e5qmzvtT>cKRp@;|3ZP@dZ28`9r-aP} z!()TgLF4{URQeN08H+aQUouyEf;NmCq50^OJ4GUwLKZmAQ2IDW$dXRxO6sIdB<YY# zdXCSKhafmu-H50(M1=}tQfD4KL3=;ht4E-iXmwj%(s`%!u1k6YMvIm{u_g07IWFn6 zQ(E3eTL9ndWHL*gCXf>#w8}*03pZ?AV7nuP1sHg?Vbu=u=tbQVEOEuySu!C*4QLTL z6?i)kB<&^H;jNIpMtF<i<>Ri-BZkc{qNc=PC(CQ_7sLS)qpm}YTIzA?T-%)a(Bsg2 zBj)72V*U#-7}oLdly5g|35)Xr+YAr(jPJKj>uvvPJQQVz#qjgJwrOvP^H{Q&+J%AV zoc9ZsGNDe}ix9O^aB|MG?_l)P23`5jSqu;E74z=}<DT&+I{FPAzhmM(A(H&lPXGfX z;`GxTCk+_)6NdC!`-iYrpK8U^Z?%6oF8?jV!p||PJZ0#36&i*ur{~F3byKyuLg{+c zGio(mOBMhoi?n?_lzJQuI*kY|wWMP_(GL*CEwyCQ7$@@Xo_-jBvL0wYtlOWFE)hCp z^JZK~{WAy|4^C~BVZ+GD6s}b0Wm{-mm^K2v3>!`lyLp*HFB?wdLZmz_eHk{K9!7Q_ z8YYupwz9^BJmX37%dp|}u*j<wdf6l!7fu-=eHk{K9_IVI3VkMuc_9W?!-SV%!-+F@ zu>GR9r4!Rn%Oj2U&(eB6)3xu@NYBt@r{^P`^vryCg3nNEoPN4JHrBA=Ewnf&W0x}+ zqP8w|PQ|mY;NHwMdRWq((pxUg;l4fhX0~REnim|J{bJ9D=-KAaL1SahQul=1NT108 z<)YC1O<<J<FWL`5K<(7*#{!ruG<8$%{>YF~Z=i`<<ng6b(y)J&X)};M-Lo_t`M9DV zOCm=7#J*mDCt_de+|qrKFQ8=4lahT=hH1}aeFkkh*)pi7=`q>ng(z>$+k#K9tTFL5 zgZWQ10cz+NN1e7e@Vw~lM}I@cF3?TYE0PKr1yOT?N?5;%s&~o*q@M)W|Ae4izu!pc zxBh{Us6k`fyqz^4tOKky!jLc~#&j?Ns|Vtm@=cty#Hm{5q~>AdJeUlu#eEYe9dVla zKfy^)oDM=IL@Yb0^7UVrX$*0qHC}LP4`Z%(BEOgN!HVg>%5Gf&_4$L;YcL)itVHf- zMGY@EObqyA7M7cby6QHx+n>kq@t8~VY0&<>Q&ZoD8>5*(H{3Ap?YOt$-XUrNUHSHY zL+9sb(9;FkxY;FLa7iz_q|Y$?A*(BD%C|b_v7XP+X$GD=#H<-M#Ke4+4M~H6jaW-y z<L1a&V>(G`)Ne@10Alp_spLPXQoRirVrVcx?irX(nvHS64mPfsh}{>N?+%zgVc;w5 zkMJ-XV;oO}*+}C^BFx4c&n3c8AYtGy2(zw~_n}gFfEsgGV*W#m1qbD|cc=GMdFd^o zscb^yGCcO0c;ClV=bfi$KQjy;?L_v&rKxm{VRHm&V9i6G0KJSo6F=Ll3zcF>QJjed zipbF=M+lS!KkZtgZ@ve6J(x35Zk(9WrD0j2iIxcRD5_NqwWecbfgJ=ArTJz|0{t|~ zxPh7j0w&syY_$c)yffPbX|NkY3kKLdsxxOfG#DiH<{q?nqH70dY`Lr}!w}ARF>aR- zIkwCgIk`-v?#ie)N6s!|Q~QIFlS}tU&ca?DtJu%Fz>~N3V<l$4;U`0o5o14eh2v3O z=`c_jeI8i<2P;NRBKD1F9RfSAXE1K*)Fah>ROHB2)G0lS0oZfvra@njHs)h&IUpU$ z6nr7(4<Z@#yOYNU-bF0iyMz!E;a^D8V!J@g2EbV|$h%m%+kwo~L!Yf6iJ<%^9QkJh z8)?}=UUKNe(!xHyGyjO8^EAe6nEX!FP(jq7n5NQv*hG&kdgRa}FaI8MlcDo}(fBTP z1E-gEz&y0ST!F-vnJrXB#WV${eQ8^C%Momw1jpPl%OoJD{j@map{l8%&8uQcl@@Jm z$P#D={>;X{9aK6nAXu7mdQZ|Yg)L9r5}Lt=sil~V4mzasY+JxMX$}q<(lVM$>UBxS z9Nu&57P0n*uBY?hx>;xxGQYn2u!i=N$k`cT!;@Ht(pzxfhjjHjXf<0OL|JNV`OmI@ zoh_gpb7+oZ{cNWdn+`6`moCkqwf&qfutixK=<UKnpXmLRW;8t?XiL$>L?efmyss>w z$!maMKj|UtbHkn44cCq-Bi8%QIHczQx}=k$)*8pGfos_OM`Rj$fQ_5YG%j{ZhiJj? ztnuasnZw)_vU^nt+~ePc0$_n@GSeGc5L}gy33eYAl&IF3CfTrb=s@ORU4U?4^<VLf zuDTkEu~$vFdLL@0=XivVgUJw)^6Vp=<G+lTA8pfMQ4DK~w*1!&o#o6N4v3o1XmQcd zISpYd94s!%<xQr7?bD*5*r5=d(D(|5B20A~!Zxhxgh&TF{D4Cu<KCb{BL2U7stHFw zc1{B7t`D#)C?5iO<B<CNhq$A;a`mjcnpV1rTd}-L553C~clett8(OLxo3Q~(nwYo4 zPvwcGpDDs-V5^m_T3YI}{jJrAw?&VFGwC5L4p{9viX=tJNDx??;xxDxsn}@vGc>hY zDxMM*4g<e4jJ;|@$2o9&2J^j`^=UlF2y^|#B*jSlU|&u~Kcy7fhcGsI2>7%HmA30$ zN+z_7&PPxz@;w7HaO_H=rAGhoI(io@dFRF$p0d|=F8Yv+8+Zi@sa-t>yZ6u$v{}^K zWLu{YvZMYHHjC6}$`C9#0Fo7lkW#ltbOTG(^3sNF1rlYlMK8QYzD=xN;)iu$u>xk8 zfH1NUZwR}Gkd5eDz?U~`r$D~w)nLS0p{oB->4PYpFO@!*j|zQ*MtliQD4A(b)sjYF z7gwzj{YW)l-hYcee5%%msuA?T7V`g-PF9NNSP}f~N^zX<Nhz@6aTzRp?kB7SM3o*K zi+)H*Ob?zOK@WDwEYy1Nn^#BCgH1AS;1yf`m;MRu5zX<RvGinZutL%GNP%WGll5i_ zrcsbQ>Ew}>go0Bj$aKYQq;O_DI*Wo+6VayQTNCw32MDqtoUAV|yfUJ`934$x{s??o zU)~3g=w}F~>I*jdW&6QSOhIK58gmFTQPavwvLQ6!fDP#dLuky^PkBgVegvS(hHODX z@;E8ket6>c!=knyOkV&16B<Nhk^L|sK)W8rWIt@7IsZiJQWkxfl>jUS3FcAT5Ru!E zVs1mqm<@TF@VUBZp)LzU=2h0v0&VUm*3)l-V>-9Tq(3I&ndy{{MSn|FNp>VNZbx#W ztujxw{`g)VMSq+!F4LcT=FO`!r>~#u@b)pi$YEM@zcpkt%hqI$Y&-M}B%6~w1q9Wz zw)g1<N?}dVf014oktpd!X7tH}vR+W#sO-p4;5yh5sz$|*V69!YBoBc?){JMtCHg9Y zOf%TQ5Ba<w9rmNHL}nU4$*c4tw0=(ICEFJ|?L&w99tAh3h3TncQj1?9X_Z>6XQ{LS zC?xAeQorK$H`f(6);7fZl~yRm@P5VLlI>?EWUtM<K!~=pBKVu_h~ddH^MZ}Vmq!1I z=y)eXd%V`Csh1(EBFm|=D<`9y2{}nG)cuP`W~J5(*Gr@5#T*$o@Ji<=>u$E?pI@J+ zXob?nB-A3&$q+2Hn^6>_gt{5*Q=E<t9gy`RE&2%ajnKc4UQoRb(~EmRA?wATAbRu} z1fgvSd-QGfV-vVC{jiXJya*9g`r&4&$>lzJKjUrmRio}(<N6`n4fVK#^uiKtV!0nx zC1{{wfsrsqe@=*82WV|Jp#!jg>hZ;9LL9yRWj0Cm|Ke!%FXR57ss9(^_0N3&rTPa2 zuYZUheFni{_0Nv*(PY6YVTKALp$sv;w!`=-_@U;%^HdJi{8z~}?_=rMQtd)gf00t} zBlQ<l@5Rwpmg`ab2&(sDR`35zh`e(+4}ombVD2ZU57=-<6Mcl9x`*U`VKWgV)xWx5 z@W||v>i@5!)xV6(`h|7>!~Me0^K$)XMjv6mO>B|iZ|fKC0R^vrh#q|g!Q}d<@g<!g z{jbb7ehyZs`oDY1L{(4w>#9b77m`TWm(lwL%rbT~`&yb7cpF-hdjmGw#8ANkqu@tx zU=yMNmhn-WzB1K7Ct%x%nZ_1E=LX;`6hSr97_lGNK=?^|pza4MWNvCbus=779?Xz& zG0(W~9KCD&?WP*H$x{uqBkBhRpOy6>ExLvIMz8~;%rjO2U)F+0A$au92=a13wd3uL zQa<o{6&qki)#wnzUknXJ(6C)Lwolfxi$=(F<~fI?kPfpb2E^(9=#>P-I0Bnss+g!A zV4MZf>-<+eWxN$ygY)mguGP?=-CcM@=P;2hf;FhefnmE4nqd*WpAf@0sAWlqMd=uO z6$CRqIy(LMeuBOoeUn@)iX`5Ra7sr5SWxOSvUzp%IUuOl<CvMlix0@P^xUqWAZ_GX zJtD?X9(gW<JXoGVE_&Feqt)=(fAT!yRC&ZE%`ihl(`c12S_9nBlsiOefJsQ_&b<et zC4iFrH!%sp6%AG|zfm+1;$_KQb#L@~#3$N|d`R~k7D<kY-q&adhAAJGNnVw0GJ9KO z_*)`lkro={<xdGzv@_#H%iaPx+>gjlL=^10TSMh^q~Z@i!08D(Xp8;QE<G$l=aF?; z4)04&9Owznc_{awGC#z?R=<U|XtAz=BOzz<Uta$)R!j6&&0$ym0c?)aoE$lupU7FJ zlCxqYtQ<V1e&sw$x&jN57BQGB(#BdV7G<%jWpR4n=sbe60EW%ESZ1Zewm8Xm2fbe4 z#AGv+?m#)xnrI$Yf>7#gCMIeIB4-+}eRC4cX{KV~fffrx`mv&<H;bV~M(oisV3rsX zu`cQ~n?kn?_6*RuAOxH}@95!(p<7~bY>D!xbtHP%mvjnA>%Zd)-DRZX^yoaW4hr5l zhO0Rt=D+9+%^7=3NPnr-@L-=4YkfvG)xoZ#b^P<<JgiSx48Mpt=i&VOBR%5>+RqxW zrx}?xV4L=iEB|%E#a`$mpgc}^*6|-(r#&wxs!K;AlmhlNUHPXihAolt1GEI230kM- zRZ;V$nEx3i>&V|nv9|FaSULs_9Y@hHo%v;W$K{b8XZ{Q`F}9>3g7)dq_-r_s@jhyv ztZj}HE3Be)CifE@5+08+CzfAxu#k##!h3N?a1yK0dbID|9|_OHck3Wd0t<b8@74;h z^u4#naCl<hd$oozo<A}?i)W=F%KR_mxyI0gXRYDj1nfT*5B9xxtKro#*r}R)+aNvd zgP1fqn<ZMqbV~Eh`3Iz)KxtzA2j!lJwbgX2AHD$`Y#33+YK}NH%V=@+MW<$}nf#`F zbP$G4CoQky-51rsm(6>i*<L@Wy#vlrRXvUc%*b|V&N-z)oS>X<CKa=9yl0Gn&^>02 zD>MU3=zmopRc5ViW7QZTRIAB9X4qT{BMe)l=EImM=wHV`OANBU1p^&1_=qz3-;_X4 z41W797>pqXzHh-`EHN;D3kKte!FzOs<iA-4X~f`H--5w-Vo>=l7)&4rCeA<_Oz096 z5qg=VPkxm``k9I*6mlZ*IfPxT;YBgWY~LuOe;tE#V({~C!C(?GxJ_oz-jk8Y3CqPu zLgYWI2&Ux2u=x_kU_VayI`9TnhNwAjGdx)@*f-A5lLsvl3&7Y2M;Jk^W#bIZz&qG% z!_NDB^WxA9<KmEcGJ2HU{d^n)ssMWOh|acgP8tO+Akj$z`C|5sb7*%-gKY}D132G2 z@N;m2ZNaX~bJ7$HH}=qbSKe5(aWCLgU>0>aA~Uc8JC$AX)*Z%3=Spd8BnGF8ouBwu ziFg;#`>8V&(9sz`bU^FbCWT9S#~~eG_k%=#Lg$j5(&2SextyqJ`G7J6f~?l_`FqwM zk8cv3!6pIr-(imNE{8M!%fRoHfegJF*^4th=&fi^PY<CXI+y#;5hw%=f-o|{iA_Dr zgJxg}*&RAd*?G`!h&K1cV)A>|?RcC2sV*8R7-*zmfp2#k9?{3bG%mE^z03If#tHga z-%Q{5b@V3@`3>pcFq}R&jI9xsJgTFmOj5{~DP0KOyzDZQ`t@`+a1D<60iYt@c&DkK z9;3RqcB6tX$pTav-#`uY`3L+%Y)dmSLK}l}&+LtE20rZ`C&~jap`PE3?vzH2Y0(Ra z$DlYk=G9A0Lh$C-@szOm`G%xJVn3otl(i@wx`SThZUYrwy8Atl@bbMhmc;%I0=eOV z1wiHNtKPYt4B>3>=8baXbUCtv$g#gSw-Vss&39AeLC{X1Xmx#I^F@BYH7z*TPPmuM z!!v1ApOqWNb}$<jW$qS<t@KXx#4gez?06&>Ltbz6e#97%>0X0gLk}%Ibo9{EV;nuk z(_<PvuAxT`J#y(`p@)?o1@tJSM=?DX)8kfpl+dG;9%b~nogR14V}+1f-|~7@jE2e1 z(6x^kO?VkTG?QG6=&?!os&o1WOlNz}>!LS2gB+o`nsSdQ^^MhFylQ5XV%FG{H?d`; z5%Z4+j!`wKMi8p`!soqmRTBoh_zf0}P;nJC`c1xu#=y!N6Moz#Q15H-w@foF1oYaf z8K!36j{>#LKKQfof-YSpm8(s)^_44qrk0w@Cf`knGPNoX;4G-E^O^jOpscJiRaN>c zO_dE*rq#aMl{NkrQ+0D=y(z1<n#IhlZK(Cn#7nY1)6F-VO6*pfdx67gH#Ih!S}Irh zX5x2tD(hQJwJj!pbHFz}+jKq6qfOOFp#{SFOjSNFh^tJM!0;(CGfb;%NQUdPSJXC) zjEsoN<~4F$won2|Ya3RYDoxl?<auLqUfVK3Sb`q`qR7U8AHO`rW05_IYY5aM3+xAp ziiDy-{R&^Rsj=F`Kr&{FKqWAfBo-FP<%b^_qTJ?~uDi~3%>w*(5#$Fy_B%xQr63c2 zPN=bY&9rM|Y=Qqmk(>Sak*`W9gdKN_i(U4F4@K<{G&g`xQMuE}t4St5+GtVuFjomP zcYPsXKg2K0`~YqNTnk*l!th%Pn+4MZp{}vA%I(El&QwWt*<K*nYO5N)OO=3H_1E}J zi1jsBniv8r(P==!g<ni^`x_g4?)u6mCCnDeSgah+wSqwipBC8NP56B$Tof*ADO)Xj zxYHt#F!ZLiJK(RatM#veOsg5ax26(5Q&qc`=~AI+EpnGSZnv{iE<{v=ug=6`nO?H$ zCFxa~V7kV1O;w|>g>nb2udJhLA)G2-b!DK=Z(3DZ7w}D6)*x36+hoRXQ&9$KotOB! zS;KG>cel)$i6%dEW5=f(v}4jHW@z-|CQQ1arm^040~EaShLyhhxic&4DyyJ|H?%Z+ zZ&3UcaYI#or5nGC<!i34^!iv8Fn%mEKQQGeDY${>sBfy9jbE{<^y7!TYP~IPEKB+P zH>n|2f%>MKh(>dzZXW#>;Pn)EDi+%Ye|TpsR;<PEKRq3b(cIApvqL|dVuIPs0lXOQ zMz~?P+u%0AVh>Un-JPhzZv*wkV%>0yvCeV;u7EWpBg}0MR#+?Gc4H4_8-?GD#fB*S z7U*z($?`V%!u1?Q&kDEc-B?V(l)C$U#KYbGVJx;4?tv)Qe8@c(i{;_m<AIMsNA9Oc z4{rFgSS&3Kd<H=WH|GT6;SQdT#SX#UJ`{_Y@Y525a4m3iFfj7KZGzhdw+-%AxLt6! z!xiwB*a5f}xI=I~aE)kSZE&;TcEhz$7{f3R+(Ee8;L<fkb_C7Z5<$~y)LfZ9F0D&5 z&PZ_DQ|;Ori(Sug$Qc=55X^`p{Ng`DCe($HVO)@rd5dA<>a;fDhgZzIezy4<K!s(v zZMgIPg4dS`&z@o2q_t&aZqQjXOznE#gp5pUhOr<cZNWrsMLb}ixFqghFtIl7w@qB4 z;DZ+~lHu35qu|fv2D1&?@=0S0exkb-d7Fc{KHN1sQ1&eS^a;W;<5=w~%uRKxTE{w! zI_;_^U4~`?BJ7~B;zf9w?>V?(@U13*EyHxrgp4$i#maf2Lyn}AbVNod$S^*jEy&2+ ztg~mBLi&P?tW9HV895uqS~K$6$CYFhYXcd109Z4!3NlPU1Oakg2GT0<X$2qpt-=xc zSmS)ej5dw-xr~jPTQj;ef6o}&k{OaLxQOr5;Mat<@)_xTLOvwV<x}(BuF>^sGj?bS zGQt||`x$wNOA*zcfr_31R`k|u8b+%RL+1rn-U}v;6(7)T)`!My8teGUxb`%iW9$T! z<-&>cfno!G>%Lg*-{2=XHd8r;sGK(Gk;aBG3o~-s$7+9?kp-2Amkc4x<=g@KDCn=F zd_aF5(AtyBIgt<Dc-=6$86PK}&x6katmM_{i4UrK3Dbi`MuT=$t1$y=V4S2~Rbx!o zuJVv_RE%|!aun-2m~yz{$^mlXHx=>>V)e8WZJ6<s_3CT**<^le$C0uTKO0vk#;*?i za&WMu26D>v#qyb`uY^98qLy?6nh7J-6H{O+cY5Ko_;4)t&+t>ZZPq%d+`*5RyMvYc zy(5)7)$=*fZwGxQ*UL>>E4AB2)NXZSHAxMT+V~8VYxf&?k(JBO>v>^3y#<usEg6GQ zHHc37rLJ%$H}Ua<PYrDGw9)vW9NNbg5ubNP<3siHH24(5mQReACu^UOXPEw@IcatK zzNS2hUpo2#Bl?1illdjudqNLy$!OE+4t)c^W#D)8omlMiZ*KRD-;QtK_bB*vy*G?s zBEJd!Sxou;<Ja(`dN>DumJedFGhDu}sn5TF)rj>;_SXhGF@(PI1Ncd=xgNoyw(Ff4 zIokdS8763!+zrU)S0Mep2Xxy%ip92Yx)9loO*+tRAiL2%rU+KGWdhS8(8~r@MgKJD zjp&;Xj6lDL=oe>Lw7(jep6dG?=(|C`8?M|Qnf@f&W1_wn!W`?CXh&?13#g4#c{(u` zFk&nahM)NH@=UZxrq?zI6&R&7<$mcQ@Yx7Hxx@$W9+aYA;@M{X;p2$k_BHXZAijT; z_$grd1>&QKNAr(g1W$*ZI|?|dgK|xUht6;v0(=DlQ9IO+D*~Abg!n9seei=*yuUzg zwUZsG&d7xP){HbZ&Zq;P1$gS;U8Hn702cwjjOh7A@P`2}2K-V52$$q}9PkRjmlB+c zb8$i*s)tt)pNTP;kK$1e!}@KJ`dg9uZC8e+c_bUl<ef@mFz_LB@c*y>Hzm-WVPOfk zaaWN-(a}`*E{>qU*Ya>Wce^K8m=D`t?0Vc~VeaK}F#bF1_~|fqL6z90=<LvP4-1nX z(xt?Ic_G$F;jLN=IRZ>8yTW5E%q0^;S6FMoQxN!YUHC^Xs4`B-WU+#933nAI-c~&x z22ymgiF>HM(1oVWE~PEyq*+*4feSpW$gND|Xxc%SlKzF5oS*J-76y;;u#zsCAItCU zJf0!_UwA0LW1pJTYrI}R<nAf%j-6;>sF!m0I_}=c-38pejl0#{UB%rE+}*<6-*Wd! z?(XI8Yux>iyQjE2Hl3%>-Rrn}BX<{Y_crcUb2l}Ae{0xN>0zh`l=iYh4tu!!O?-5V zEexfBBWCchLT5|~EAYS7{Vu1k<^;JZ{4$FyU(2eg(qD6fzY!}Kt7o!hkC`jvRW&6- zUNIXvj;$8W^foqB*RGUT$A+afp-^5}!td+h*Y!-a=0m^0ZGxYzmssWX61FVa25O~U z|0CA_>3WpAkA3VC9{c!a+%s`sg?kc3Yr4>Y+Hh05_24eXU5L8?w-vVqcMfh7ZX<31 z_ZP4cG=@Bh`vmSm+(&WK8c7&8&Ck1Wcj0cs-GtkNyBN0xcMfh7ZX<31_aL03xM|!J z#=RYPH|{RnZMd6odvF)yw&2deZNhEDy|A$GCR0}Via>)uV49mfCp%~6jR6+QU6*@f zcFvsa+??rhglSez?u|J&=CEM_NJqVN5Gqe=85U2!KVCK>TqpQPgzMw>SItZZYaAXC zf2^R`=VbhG!bo;a2Wzj`!({yN=m;!o5cBI`<rVv@j?@Vg1*IQKhNlZ*Znu)*lZ2)b z?1E0n5a{=DQ!kxh5R`sR9j8MLhIxON44)kTZbvfwBEeK@QG=LYCtMuwC)JTUjGC1G zF&U2D>`spw#QZwp()jmD)R8*jGC}#B_hdMFC*J=h!_j+Jdek81*QvfgppMiDO7EV4 zQq#~2C+Nso(naG24fVz;^3!-hi}Du~JIkqvGr=t>@N5oG9mmjkRRcLUdOh&5=E>Yr z<{w_3G@c>;-6{MxF#O^5@*BWM%GaZukJ3-DwL8F`WPC0c=%=jd%41KQ0EDX%E*F&k zrHjKO94@TzFvwyKe}Ti5e&>1)KZt@JNuJjkzEPl`;ib#Kp0ceP3I8j^j|8XRrx^); z1>hR!KfU2amogrj&EfQ(j~edeaC)1AF0^oV`8b@8^V7wwI^1;}PRF?EN@q_&_&JBu zAz!*Cu%~RZNgg@@N|(ZCKk!GA|4q(^&i>J*jPJhWa5}g~7c`Px^n+C+@%awmlrC*w z)1}Ds{Za5sMu8*Q5&73K{uc>t%Phn`eywHj;q8bJMv`*_@CDeJRD1Gs#%H+Q=;rv! z_mPO2F8YB%`VvW)=a)is?PYw1+mkl|zX+vBzd}wIBZvDjhldwg2rMX_+Bgi%FJh47 z$CZFjMtjNhdRT;>M`w+Ke-p>wz9Jc)_BV)6lQ$W@jPc1324`9Xy4ac`+`Bk_H|LYa zV)2qR$8WmD!XRvI0lo(~zSHYra7E5vbNHbYd3pdhqI}+QS{T0a*blf#38>ur(9lef zKh-{e!1<SXJq%k(_Y~k%{z9&W!P(j*d=r2`@M2#6D_AVnCIF}Msq%UlMBy_J9+LCR z6geFnzbhqQC4i4qk5!|<>8Cg7`!<_W(!FmKe0rmt<UE>^?(aFDt`t7cFh1x<xctf* z$nzX-G+J1Gm2vbD4nJy;;flTe7;q|w4^ztFEW>9kk{<MvCPv8F=Jh1OzXLdxziF0* z!Py!xeAfa_<?xW#!{A`cu3QGcTv+Dyu&@FzWPBJX>9K_K@0xF6a2oH@RmtHwyxyT6 z>}p{AuMoPu9u}@*PeJ&QwbS9_pL-ep<w82gSLDfn0i=92U20(**q#S`^xL{4>BFA@ zr}DhmCF3(9Ji;7)Jf)oJw|%JIzeuU~&l&s*VXN1}SSa#nF`*#*M^o@80ZwvmcUTyH zm_5<9IQ|r`hd~rR-(&b3N#!iyeA+IuFeuwQf^RY4l&@=2^0l19^(pYXI3Ews?^2#d z8-w$J%6SlQ%GZvRd_BVX44Nzqo9*4e_j`_?=JhbRlCQsT`1}<30l-QA87XqU&*9@! z;3qjemQr6dFd#XfPLXpm;3Mfn&M0t^^9iT$spRl`Qsi#|oaFy*ik#~ie7ODG#QAJX z;lCAd(v$F27FKR-PYu4`aJYrHBMplcgg<gVbzTpHDCOM8@HvvoISjZ-6;`}a&Zj>` zp7VfHz6Mjunbw-AoGZK@#zIN=JAjW=&a*haIR*a*1lL|Jq*cm%7&AN;ar{?O<XOq# zLn-hL44#-DDf0Y`<JY95`v(qxGlkE74nLOyKLI$^%it{*CZf{+o@H<bN_tEgFPF~@ zuZO`Ed9DI{r1HrDoXR;PMSp(C;h8CL7vqEeg~NS3_2r}R{}JF+U-}ez?&t9LQ}X*8 zz)7C|lzM-X@kc*WU|}g9U{CDfb3PxZ@HxWq1<prVhh59`>IxyP%ELLx&S{?R!IX46 zfluYHd?!@tA7h+<Q=Ww}V0(Y?VgHul8+kkWH<`VV2{@I*rW84^1>88od^4BhM^o^v zoPS%ooGv@NfHW3x{Kr%9D;Pc(Lgh4bKAvl3K5VZV@ok*XU<#jq;P5=J2Us-rmapG( z_*Acl0TlS37=IZ@{oKd#O<oTJEBLQ*xG@Dj066KxRVjLSlH&_0_!mzcS<kNnoXS5d z1wVTf`~r@DAf+6Z0&Wt9U-f{KoPSNxpL-eq3?XNZg%Pp6Yxp`i|E-)qnlrn)Io#&; zu&`1-^ce<b2MR0<&h{SR+soh#l=L{l@w+S*2LFJ^p5*X0qYVEehmXO;llZq;W%w2j zzY=gNH;dQ9Ac{P*IKC&x!r=eRo;aY#;5?vmZXE^gBmS_%ZC;NGTM&LU3jQX*shrQI zlymne_)i0FO5!XBUSs&Em&q0eU}tRL`%ezvo?>r50G!g@m6Glt$M5=q%!loXgU@*m z_uMGM*;xa?$6~@x<vGXeK`7CVOdOu$^$<ATzGeVU>Gr0iyM)=F;p>4naX!zd=xr&7 z`@J4wALm~UIMsXCO%?`M#?Q?>-KSF0UBlsTroivx{6(*a@lo_-3x|7B;E!^6aSHrt z1|R<Y&jTF4FC||e08ZuedWt+J0H=JJIQ%ju6+T<S`Lw2_D|nmzE&f1tHI6L0-M18$ zxSfvDGJHZb@u5+-zut|{a5m64MBP=5?v-_oD=O>URsP227I$T!Rq!_AKqEfQS(W|$ zx%qQOMsnjMYOTAnxw&$U8)uiB*Wfdo*g0_Hpkw_Skf=g9Mp^4mLZ#1iLO%9EPj_RJ zU-mT%@M6=Ug}&g~0?B~U+_;i?-0tEMds*31_k!}G!ZOF=BDXtxR^IIV_`y_SN9R*< z3Uj5e1)*VQXh&i(J1;K}!Vc#>J3p5)K2pS-xuZmUf7U1w`6H(>Cui2kY0Sx)lgOCr z#O;Xk%*`1^p1HGT3#o@#N8&K+#+=y#Kl<v%*HzuvA8PP<sV1_$O--p$C}12_XEAQ~ zf)cCC?zR`%=o__6+Z2CNomUBN+tMPd%TWj{RSbAF`vJm7rQLRsa}#YPf_tHJae>w8 zUc6vIslCiyW-V~qDFybaS8q#zX`fKru&VKHpVjO2)!{5O)YsZjRl*Ks)9Gb=S=M4v zfA+OhTwLfbEwdLDI-KmIu*uOyi_6@Fi(SP|dzsxPxU7X@X}P1!?k-qR>b|wyUTzmw zR@Mjnbg<f8-P$DB%3Z}^XDusr+bbMpIQQ(ry|BpY6cUGm6KqNsmz23(_L7A-LJqIJ zLgrppVl7<cC|c;Y;vjx$xl5>@%RjCAHD3o#aCO<-i;9W_<;1tUvI$M5mB}|!=@j6r zyLYSdw$Sc!xeJ_&3l|~FNSTf)Qxn2j@YO3&Q&qlJ_T}H?l49THO$IrNip$H~Hit|8 z>~B?a>_{0FippK?C5uZI*-PAIBE5xBio@JEH@y(2uS=N-@>hc6XYdh+&n2@%;jktv zTS#E0Ozee))X)AVOW^j=8v*=uJ4?FIX)P_am#PY55$Y4SAb+DcnVub!r{5V7AO*d- zu%@6Eg%m%dkB<}kYmzD!g=%#=ZpUZx78gNtQK+g9CnuQCp<J}AuJwCs+}@@Dg_z}P zSyR8Fu`Y?D%~4uxEpd<$bQIa*A(opmJB)mxl`@pjOBnKoRrGacZ_R4F6yUB|?Zx*o z&<M&(99A2IQhmHRArXBcSvl@c;;e3LzT3S5AD?cZ5Ib+rUaUYnu#)*NYgDyB`6JHB z%&bl;q%CqUu%Q!bXspHAeQG~yr6_eQC{xu0Z=~*7vQpwyJ;YzRvJU2bO@o&SMIVWV z6~GG~5XxKQ^WNQrp9t~`j-oRA!V*^MZYmVTXfRWQwp-hfEL`Ht(#h~*d&vTOVVQdg ztd`rki0p>FnD`~yp4DFJ#vLysUjyu%oAgZn8nsYjC*#FT1ae;A*iu>RWm$(=Uf^r+ z`c%voEFjDhhYbypf8sh>pp-9yCzamf0=v^~tB~u5nu*+m6QaUQECg@4an>a>W*Sjk zTv}))J*{eip5WCX6s$UlxS4WVT?IC4Qe0dUNiVI=Qc^9)V#t8^D%^n<ygic;gt7@< zMWw}yOGhz;qP3*NQ94Qryn3Aumvy0imJJGmvMhz^rw@TAzG|MxA${YU<q#<g?8E4Z zIMQ@zPnB;)V5NHnH2ZD};(ZXP8Ebv24u=d4e<>)Lfx499C6OdOY<*dIiQQegw5Uw9 zmm+US1@CwHTD(5Dca0ZCo%~jdnx(7US>|vT6_%HMO?Nko)qE31PD!uxR3U{*G#YAc zcxfu}o{!tTqNRl^sXDRJ+tB}@i3m!Qby<tiF_l=CqI45qaZeNgm9Fx$kM+Y^wX4)# zqN;nPRnNA`HBwqieWLOq_XJa_7FJ~?c3U|c>?IXvX_=#}P^26;RW{Umlj7nPz<Tm> z7yu|E`=0#>>|MM)+?^b!E)b?>=pf56B)Mgky98FS1+N4Z){*689gtFw{@Qw<+`X(; zxBFp3gG5s(Eq2}JE-WdHTWOeQveGct#E4GaBnvjmt4{HOuKeB}th}(ASx5P;C~%Zd z2&rp=lD0dY#Ta`e#tx(=zSdfQa;nvgSu)J+E^`&ehkezfX$G||8c?uqo!cb5x>eii z#@lx6&AJq}G~{XKWsDSPj#5^+0_~%=rHS%|w(i8MajN#iid-4}$mLHjuO&+PHVolP z3LI|k%8FG`Km#8Y8#kKH@Op>+#=C4PW;7HmD0ePm2`O*Uk;!PTCuUT5Dr`35E3w)f z6)GSbaiKXEB#kkPmbi<Sz&c}+)|^}>YN<+PJHM3DT3vZJj0Xk`RKkfNQzGL9sN3R_ z#l>#LBM8|rYyQd=aQ)4)Tch~cn{G`)c0;4zmu)R@%)~T`hgLQOvR4FZ>#AnfRteb@ zs;O+L5wfe+G=QJ%`kUn#j5g@lSW>`^xMp8nB@u96Q=MPPW)?LY-t3i)2-2IRLN?7R zfz-%GYuUaUK98)a0uv=9lgblHnL_d3<pmm&sjP=xAhyIF5E}Yr!{iCsnA*~-wtN!( zEu1SstHG`1Jo2;_=F8&%<$NrUX1I!<+`q$(CD}NAnbv|3-fQCYCKgIOlywLNLhI*h zdcW4fqWqlR%tDDrD!mO+r*MW<k$<b!!jQM7&|{m9UCO#14+=%_U<ocRe^_fl2>XBv zLmdkW*k)pRSLku#fL*uYNhk_>c#MU?X&;d;<y^2Fl6fe8a__`VXL1#K<-FSMR4BMC z?3p-+tl%l`-H4|%zDig**VfJHJ)EB8Cmcn74i@Mf5|yb!FQi%ID2|vThlJGfQ}W+| zc%m=hc*;4s98TZF852FxDCsNon*k&JBl+l3&eL^cJ((_LpFj>J*2$Ex1dp0tId>Q4 z^xG*?bs4!|aUVihO|P8KYr=XVU5dPNXn6P+h)~nFrCS(k8>gRTRVZawNmqgX3p{Fi z<-FhHE*S&Mib)rhwfg!U0z|LWpK>lR%<0c_{Z(jE)BpEW`dcjwHGHc~LGSpguhjH+ zBT}8da?a3HDpOUZWFWOX_X8~hw<4x;?q_hhOrMIM8h#aM1{J+>&S>awq<;srB)^h= z{CpraOkAnyrG`HQ4(YB!ubgX7<Me~P{1p9G=#_fd3R-pg%6Z8lHzIH;_;M%_R^Xo@ zLQNk(SLu-{q6#5*6*|Qo0z8Sn(j#M5%1)c2SaMJ>aaVCJz(c&0{I_{M2npCfV2-d+ zcH&P{+$Vc<NSxBoNr5Z$b5r1iOPAtKhF?jWMexTwo^ZN?$l)nL#wR6<408vUzlI;! f`3|59aVzDf*e_D=k<vGjVa0VNDYxoMP5*xbWg%z> literal 0 HcmV?d00001 diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c new file mode 100644 index 0000000000..7874ff9d52 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c @@ -0,0 +1,139 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "pipe_stages.h" +#include "load_cam_model.h" +#include "cam_pipe_utility.h" +#include "dma_interface.h" +#ifdef DMA_MODE +#include "gem5_harness.h" +#endif + +// FIXME: Include gem5/dma_interface.cc/h separately +#ifndef DMA_INTERFACE_V3 +#define DMA_INTERFACE_V3 +#endif//DMA_INTERFACE_V3 + +/////////////////////////////////////////////////////////////// +// Camera Model Parameters +/////////////////////////////////////////////////////////////// + +// Path to the camera model to be used +char cam_model_path[100]; + +// White balance index (select white balance from transform file) +// The first white balance in the file has a wb_index of 1 +// For more information on model format see the readme +int wb_index = 6; + +// Number of control points +int num_ctrl_pts = 3702; + +void load_cam_params_hw(float *host_TsTw, float *host_ctrl_pts, + float *host_weights, float *host_coefs, + float *host_tone_map, float *acc_TsTw, + float *acc_ctrl_pts, float *acc_weights, + float *acc_coefs, float *acc_tone_map) { + dmaLoad(acc_TsTw, host_TsTw, 9 * sizeof(float)); + dmaLoad(acc_ctrl_pts, host_ctrl_pts, + num_ctrl_pts * CHAN_SIZE * sizeof(float)); + dmaLoad(acc_weights, host_weights, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + dmaLoad(acc_coefs, host_coefs, 4 * CHAN_SIZE * sizeof(float)); + dmaLoad(acc_tone_map, host_tone_map, 256 * CHAN_SIZE * sizeof(float)); +} + +void isp_hw(uint8_t *host_input, uint8_t *host_result, int row_size, + int col_size, uint8_t *acc_input, uint8_t *acc_result, + float *acc_input_scaled, float *acc_result_scaled, float *acc_TsTw, + float *acc_ctrl_pts, float *acc_weights, float *acc_coefs, + float *acc_tone_map, float *acc_l2_dist) { + dmaLoad(acc_input, host_input, + row_size * col_size * CHAN_SIZE * sizeof(uint8_t)); + scale_fxp(acc_input, row_size, col_size, acc_input_scaled); + demosaic_fxp(acc_input_scaled, row_size, col_size, acc_result_scaled); + denoise_fxp(acc_result_scaled, row_size, col_size, acc_input_scaled); + transform_fxp(acc_input_scaled, row_size, col_size, acc_result_scaled, + acc_TsTw); + gamut_map_fxp(acc_result_scaled, row_size, col_size, acc_input_scaled, + acc_ctrl_pts, acc_weights, acc_coefs, acc_l2_dist); + tone_map_fxp(acc_input_scaled, row_size, col_size, acc_tone_map, + acc_result_scaled); + // tone_map_approx_fxp(acc_input_scaled, row_size, col_size, + // acc_result_scaled); + descale_fxp(acc_result_scaled, row_size, col_size, acc_result); + dmaStore(host_result, acc_result, + row_size * col_size * CHAN_SIZE * sizeof(uint8_t)); +} + +void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, + int col_size) { + uint8_t *acc_input, *acc_result; + float *acc_input_scaled, *acc_result_scaled; + float *host_TsTw, *host_ctrl_pts, *host_weights, *host_coefs, *host_tone_map; + float *acc_TsTw, *acc_ctrl_pts, *acc_weights, *acc_coefs, *acc_tone_map, *acc_l2_dist; + + strcat(cam_model_path, "cam_models/NikonD7000/"); + + host_TsTw = get_TsTw(cam_model_path, wb_index); + float *trans = transpose_mat(host_TsTw, CHAN_SIZE, CHAN_SIZE); + free(host_TsTw); + host_TsTw = trans; + host_ctrl_pts = get_ctrl_pts(cam_model_path, num_ctrl_pts); + host_weights = get_weights(cam_model_path, num_ctrl_pts); + host_coefs = get_coefs(cam_model_path, num_ctrl_pts); + host_tone_map = get_tone_map(cam_model_path); + + acc_input = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); + acc_result = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); + acc_input_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); + acc_result_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE); + acc_TsTw = (float*) malloc_aligned(sizeof(float) * 9); + acc_ctrl_pts = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); + acc_weights = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE); + acc_coefs = (float*) malloc_aligned(sizeof(float) * 12); + acc_tone_map = (float*) malloc_aligned(sizeof(float) * 256 * CHAN_SIZE); + acc_l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts); + + // Load camera model parameters for the ISP + MAP_ARRAY_TO_ACCEL(ISP, "host_TsTw", host_TsTw, + sizeof(float) * 9); + MAP_ARRAY_TO_ACCEL(ISP, "host_ctrl_pts", host_ctrl_pts, + sizeof(float) * num_ctrl_pts * CHAN_SIZE); + MAP_ARRAY_TO_ACCEL(ISP, "host_weights", host_weights, + sizeof(float) * num_ctrl_pts * CHAN_SIZE); + MAP_ARRAY_TO_ACCEL(ISP, "host_coefs", host_coefs, + sizeof(float) * 4 * CHAN_SIZE); + MAP_ARRAY_TO_ACCEL(ISP, "host_tone_map", host_tone_map, + sizeof(float) * 256 * CHAN_SIZE); + INVOKE_KERNEL(ISP, load_cam_params_hw, host_TsTw, host_ctrl_pts, host_weights, + host_coefs, host_tone_map, acc_TsTw, acc_ctrl_pts, acc_weights, + acc_coefs, acc_tone_map); + + // Invoke the ISP + MAP_ARRAY_TO_ACCEL(ISP, "host_input", host_input, + sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); + MAP_ARRAY_TO_ACCEL(ISP, "host_result", host_result, + sizeof(uint8_t) * row_size * col_size * CHAN_SIZE); + INVOKE_KERNEL(ISP, isp_hw, host_input, host_result, row_size, col_size, + acc_input, acc_result, acc_input_scaled, acc_result_scaled, + acc_TsTw, acc_ctrl_pts, acc_weights, acc_coefs, acc_tone_map, + acc_l2_dist); + + free(acc_input); + free(acc_result); + free(acc_input_scaled); + free(acc_result_scaled); + free(host_TsTw); + free(host_ctrl_pts); + free(host_weights); + free(host_coefs); + free(host_tone_map); + free(acc_TsTw); + free(acc_ctrl_pts); + free(acc_weights); + free(acc_coefs); + free(acc_tone_map); + free(acc_l2_dist); +} + diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h new file mode 100644 index 0000000000..83a77e01a6 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h @@ -0,0 +1,7 @@ +#ifndef _CAM_PIPE_H_ +#define _CAM_PIPE_H_ + +void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size, + int col_size); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c new file mode 100644 index 0000000000..f806e9ee1a --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c @@ -0,0 +1,82 @@ +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "cam_pipe_utility.h" +//#include "pipe_stages.h" + +uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size) { + uint8_t *image; + FILE *fp = fopen(file_path, "r"); + int chan_size; + if (fread(row_size, sizeof(int), 1, fp) != 1) + assert("Failed to read row size from binary file!"); + if (fread(col_size, sizeof(int), 1, fp) != 1) + assert("Failed to read col size from binary file!"); + if (fread(&chan_size, sizeof(int), 1, fp) != 1) + assert("Failed to read row size from binary file!"); + assert(chan_size == CHAN_SIZE && "Channel size read from the binary file " + "doesn't equal to the default value!\n"); + + int size = *row_size * *col_size * CHAN_SIZE; + image = malloc_aligned(sizeof(uint8_t) * size); + if (fread(image, sizeof(uint8_t), size, fp) != size) + assert("Failed to read the image from binary file!"); + fclose(fp); + return image; +} + +void write_image_to_binary(char *file_path, uint8_t *image, int row_size, int col_size) { + FILE *fp = fopen(file_path, "w"); + + int shape[3] = { row_size, col_size, CHAN_SIZE }; + fwrite(shape, sizeof(int), 3, fp); + + int size = row_size * col_size * CHAN_SIZE; + fwrite(image, sizeof(uint8_t), size, fp); + fclose(fp); +} + +float *transpose_mat(float *inmat, int width, int height) { + // Define vectors + float *outmat; + int err = + posix_memalign((void **)&outmat, CACHELINE_SIZE, sizeof(float) * height * width); + assert(err == 0 && "Failed to allocate memory!"); + + // Transpose the matrix + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + outmat[j * height + i] = inmat[i * width + j]; + } + } + return outmat; +} + +void convert_hwc_to_chw(uint8_t *input, int row_size, int col_size, + uint8_t **result) { + if (*result == NULL) { + *result = (uint8_t *)malloc_aligned(row_size * col_size * CHAN_SIZE * + sizeof(uint8_t)); + } + ARRAY_3D(uint8_t, _input, input, col_size, CHAN_SIZE); + ARRAY_3D(uint8_t, _result, *result, row_size, col_size); + for (int h = 0; h < row_size; h++) + for (int w = 0; w < col_size; w++) + for (int c = 0; c < CHAN_SIZE; c++) + _result[c][h][w] = _input[h][w][c]; +} + +void convert_chw_to_hwc(uint8_t *input, int row_size, int col_size, + uint8_t **result) { + if (*result == NULL) { + *result = (uint8_t *)malloc_aligned(row_size * col_size * CHAN_SIZE * + sizeof(uint8_t)); + } + ARRAY_3D(uint8_t, _input, input, row_size, col_size); + ARRAY_3D(uint8_t, _result, *result, col_size, CHAN_SIZE); + for (int c = 0; c < CHAN_SIZE; c++) + for (int h = 0; h < row_size; h++) + for (int w = 0; w < col_size; w++) + _result[h][w][c] = _input[c][h][w]; +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h new file mode 100644 index 0000000000..b4fb6cde0c --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h @@ -0,0 +1,16 @@ +#ifndef _CAM_PIPE_UTILITY_H_ +#define _CAM_PIPE_UTILITY_H_ + +#include "utility.h" +#include "pipe_stages.h" + +uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size); +void write_image_to_binary(char *file_path, uint8_t *image, int row_size, + int col_size); +float *transpose_mat(float *inmat, int width, int height); +void convert_hwc_to_chw(uint8_t *input, int row_size, int col_size, + uint8_t **result); +void convert_chw_to_hwc(uint8_t *input, int row_size, int col_size, + uint8_t **result); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h new file mode 100644 index 0000000000..ccc8acc857 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h @@ -0,0 +1,224 @@ +#ifndef _COMMON_DEFS_H_ +#define _COMMON_DEFS_H_ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; + +#define CACHELINE_SIZE 64 + +// Debugging message macros. +#if DEBUG_LEVEL >= 1 + #define INFO_MSG(args...) printf(args) + + #if DEBUG_LEVEL >= 2 + #define PRINT_MSG(args...) printf(args) + #define PRINT_DEBUG(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \ + print_debug4d_fp16(hid, num, height, rows, cols) + + #if DEBUG_LEVEL >= 3 + #define PRINT_DEBUG_V(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) + #define PRINT_MSG_V(args...) printf(args) + #else + #define PRINT_DEBUG_V(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) + #endif + #else + #define PRINT_MSG(args...) + #define PRINT_DEBUG(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) + #define PRINT_DEBUG_V(hid, rows, cols, height) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) + #endif +#else + #define INFO_MSG(args...) + #define PRINT_DEBUG(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) + #define PRINT_MSG(args...) + #define PRINT_DEBUG_V(hid, rows, cols, height) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) +#endif + +#define STRING(arg) #arg + +// This is to avoid a ton of spurious unused variable warnings when +// we're not building for gem5. +#define UNUSED(x) (void)(x) + +// Macros for computing the maximum of a group of elements. +// +// Why macros and not functions (or a loop)? A loop takes O(n) cycles to +// compute the maximum, when it could be done in O(log n) time with a tree +// based implementation. But Aladdin regards function calls as a hard +// dependency that it does not optimize across, so we would not get the +// parallelism we expect from the tree. Thus, these must be macros. +// +// I've only implemented a few of these. These are only meant for the pooling +// layers, and we shouldn't need more than a 3x3 pooling layer anyways. +#define max2(A, B) (((A) > (B)) ? (A) : (B)) +#define max3(e0, e1, e2) max2(max2(e0, e1), e2) +#define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3)) +#define max8(e0, e1, e2, e3, e4, e5, e6, e7) \ + max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7)) +#define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \ + max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8) + +#define min2(A, B) (((A) < (B)) ? (A) : (B)) + +#define FRAC_CEIL(A, B) ((A) / (B) + ((A) % (B) != 0)) +// Convenience macros to switch between invoking an accelerator (if building a +// binary for gem5) or just calling the kernel function in software. +// +// Usage: +// +// These macros expand differently based on whether the GEM5_HARNESS macro is +// defined. If so, then this binary is meant to be run under gem5, invoking +// accelerators; if not, this binary should run the pure software version of +// the accelerated kernels. +// +// If GEM5_HARNESS is defined: +// +// MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize) +// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, mySize) +// +// INVOKE_KERNEL(myReqCode, kernelFuncName, args...) +// ===> invokeAcceleratorAndBlock(myReqCode) +// +// Otherwise: +// MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize) +// expands to nothing +// +// INVOKE_KERNEL(myReqCode, kernelFuncName, args...) +// ===> kernelFuncName(args) +// +#ifdef GEM5_HARNESS + +#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ + mapArrayToAccelerator(req_code, name, base_addr, size) +#define INVOKE_KERNEL(req_code, kernel_ptr, args...) \ + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndBlock(req_code); \ + } while (0) +#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndReturn2(req_code, finish_flag); \ + } while (0) + +#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \ + } while (0) +#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \ + } while (0) +#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \ + } while (0) +#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \ + } while (0) + +#else + +#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ + do { \ + INFO_MSG("Mapping array %s @ %p, size %d.\n", \ + name, (void*)base_addr, (int)(size)); \ + UNUSED(req_code); \ + UNUSED(name); \ + UNUSED(base_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_KERNEL(req_code, kernel_ptr, args...) kernel_ptr(args) +#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ + kernel_ptr(args) +#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) + +#endif + +// Simplified version of MAP_ARRAY_TO_ACCEL. +// +// This assumes that the current name of the base pointer is also the name of +// the array in the top level function of the dynamic trace. THIS IS VERY +// IMPORTANT - if the argument passed to a top level function has been renamed in +// the function, then this WILL NOT WORK! +// +// MAP_ARRAY(myReqCode, myArray, mySize) +// ===> MAP_ARRAY_TO_ACCEL(myReqCode, "myArray", myArray, mySize) +#define MAP_ARRAY(req_code, name_and_base_addr, size) \ + MAP_ARRAY_TO_ACCEL( \ + req_code, STRING(name_and_base_addr), name_and_base_addr, size) + +// Use these convenience macros to cast a raw pointer into a multidimensional +// variable-length array, which lets us use [] notation inside of the ugly +// sub2ind syntax! +// +// Usage: +// If we have an array like array[5][4]: +// ARRAY_2D(TYPE, output_name, array, 4); +// +// If we have an array like array[5][4][3]: +// ARRAY_3D(TYPE, output_name, array, 4, 3); +// +// If we have an array like array[5][4][3][2] +// ARRAY_4D(TYPE, output_name, array, 4, 3, 2); +// +// And so on... +#define ARRAY_1D(TYPE, output_array_name, input_array_name) \ + TYPE* output_array_name = (TYPE*)input_array_name + +#define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \ + TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name + +#define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \ + TYPE(*output_array_name)[DIM_1][DIM_2] = \ + (TYPE(*)[DIM_1][DIM_2])input_array_name + +#define ARRAY_4D( \ + TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \ + TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name + +#define ARRAY_5D( \ + TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \ + TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c new file mode 100644 index 0000000000..81bce54469 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c @@ -0,0 +1,73 @@ +#include <assert.h> +#include <string.h> +#include "dma_interface.h" + +// All _dmaImplN functions must be always inlined or we'll get extra functions +// in the trace. + +#if defined(DMA_INTERFACE_V3) + +// Starting with version 3, all versioning will be distinguished by the return +// value of the DMA functions. + +__attribute__((__always_inline__)) +int _dmaImpl3(void* dst_addr, void* src_addr, size_t size) { + assert(size > 0); + memmove(dst_addr, src_addr, size); + return 3; +} + +int dmaLoad(void* dst_addr, void* src_host_addr, size_t size) { + return _dmaImpl3(dst_addr, src_host_addr, size); +} + +int dmaStore(void* dst_host_addr, void* src_addr, size_t size) { + return _dmaImpl3(dst_host_addr, src_addr, size); +} + +int setReadyBits(void* start_addr, size_t size, unsigned value) { + asm(""); + return 0; +} + +#elif defined(DMA_INTERFACE_V2) + +// With version 2 and earlier, we return (void*)NULL and use the number of +// function arguments to distinguish the DMA functions. + +__attribute__((__always_inline__)) +void* _dmaImpl2(void* base_addr, size_t src_off, size_t dst_off, size_t size) { + assert(size > 0); + memmove(base_addr + dst_off, base_addr + src_off, size); + return NULL; +} + +void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size) { + return _dmaImpl2(base_addr, src_off, dst_off, size); +} + +void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size) { + return _dmaImpl2(base_addr, src_off, dst_off, size); +} + +#else + +__attribute__((__always_inline__)) +void* _dmaImpl1(void* base_addr, size_t offset, size_t size) { + assert(size > 0); + asm(""); + return NULL; +} + +void* dmaLoad(void* addr, size_t offset, size_t size) { + return _dmaImpl1(addr, offset, size); +} + +void* dmaStore(void* addr, size_t offset, size_t size) { + return _dmaImpl1(addr, offset, size); +} +#endif + +void dmaFence() { + asm(""); +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h new file mode 100644 index 0000000000..f23234eede --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h @@ -0,0 +1,44 @@ +#ifndef __DMA_INTERFACE_H__ +#define __DMA_INTERFACE_H__ + +#include <stddef.h> + +#define PAGE_SIZE 4096 + +#if defined(DMA_INTERFACE_V3) + +// Version 3 of the DMA interface enables memcpy operations from arbitrary +// source and destination addresses. + +int dmaLoad(void* dst_addr, void* src_host_addr, size_t size); +int dmaStore(void* dst_host_addr, void* src_addr, size_t size); + +// The user can explicitly toggle the state of ready bits, if ready mode is +// enabled. This requires support from DMA v3. +int setReadyBits(void* start_addr, size_t size, unsigned value); + +#elif defined(DMA_INTERFACE_V2) + +#warning "DMA interface v2 is deprecated!" + +// Version 2 of the DMA interface separates source and destination offsets from +// the base address into different fields, and on the host machine, memory is +// actually copied from source to destination (the memory copy will not show up +// in the trace). + +void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size); +void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size); + +#else + +#warning "DMA interface v1 is deprecated!" + +// Version 1 of the DMA interface is now deprecated and will be removed entirely. + +void* dmaLoad(void* addr, size_t offset, size_t size); +void* dmaStore(void* addr, size_t offset, size_t size); + +#endif +void dmaFence(); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h new file mode 100644 index 0000000000..36859cfe1b --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h @@ -0,0 +1,10 @@ +#ifndef _GEM5_HARNESS_H_ +#define _GEM5_HARNESS_H_ + +/* One header to include them all. */ + +//#include "aladdin_sys_connection.h" +//#include "aladdin_sys_constants.h" +#include "dma_interface.h" + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c new file mode 100644 index 0000000000..124fe0b7d1 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c @@ -0,0 +1,335 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "utility.h" +#include "pipe_stages.h" +#include "load_cam_model.h" + +// Get color space transform +float* get_Ts(char* cam_model_path) { + float *Ts; + int err = posix_memalign((void **)&Ts, CACHELINE_SIZE, sizeof(float) * 9); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Open file for reading + char file_name[] = "raw2jpg_transform.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx >= 1 && line_idx <= 3) { + for (int j = 0; j < 3; j++) { + Ts[(line_idx - 1) * 3 + j] = line_data[j]; + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return Ts; +} + +// Get white balance transform +float* get_Tw(char* cam_model_path, int wb_index) { + float *Tw; + int err = posix_memalign((void **)&Tw, CACHELINE_SIZE, sizeof(float) * 9); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Calculate base for the white balance transform selected + // For more details see the camera model readme + int wb_base = 8 + 5*(wb_index-1); + + // Open file for reading + // Open file for reading + char file_name[] = "raw2jpg_transform.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + // Read a line at a time + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx == wb_base) { + // Convert the white balance vector into a diagaonal matrix + for (int i=0; i<3; i++) { + for (int j=0; j<3; j++) { + if (i == j) { + Tw[i * 3 + j] = line_data[i]; + } else { + Tw[i * 3 + j] = 0.0; + } + } + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return Tw; +} + + +// Get combined transforms for checking +float* get_TsTw(char* cam_model_path, int wb_index) { + float *TsTw; + int err = posix_memalign((void **)&TsTw, CACHELINE_SIZE, sizeof(float) * 9); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Calculate base for the white balance transform selected + // For more details see the camera model readme + int wb_base = 5 + 5*(wb_index-1); + + // Open file for reading + char file_name[] = "raw2jpg_transform.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + // Read a line at a time + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx >= wb_base && line_idx <= (wb_base + 2)) { + for (int j = 0; j < 3; j++) { + TsTw[(line_idx - wb_base) * 3 + j] = line_data[j]; + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return TsTw; +} + +// Get control points +float* get_ctrl_pts(char* cam_model_path, int num_cntrl_pts) { + float *ctrl_pnts; + int err = posix_memalign((void **)&ctrl_pnts, CACHELINE_SIZE, + sizeof(float) * num_cntrl_pts * 3); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Open file for reading + char file_name[] = "raw2jpg_ctrlPoints.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + // Read a line at a time + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx >= 1 && line_idx <= num_cntrl_pts) { + for (int j = 0; j < 3; j++) { + ctrl_pnts[(line_idx - 1) * 3 + j] = line_data[j]; + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return ctrl_pnts; +} + +// Get weights +float* get_weights(char* cam_model_path, int num_cntrl_pts) { + float *weights; + int err = posix_memalign((void **)&weights, CACHELINE_SIZE, + sizeof(float) * num_cntrl_pts * 3); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Open file for reading + char file_name[] = "raw2jpg_coefs.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + // Read a line at a time + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx >= 1 && line_idx <= num_cntrl_pts) { + for (int j = 0; j < 3; j++) { + weights[(line_idx - 1) * 3 + j] = line_data[j]; + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return weights; +} + +// Get coeficients +float* get_coefs(char* cam_model_path, int num_cntrl_pts) { + float *coefs; + int err = posix_memalign((void **)&coefs, CACHELINE_SIZE, sizeof(float) * 12); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Open file for reading + char file_name[] = "raw2jpg_coefs.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + // Read a line at a time + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx >= (num_cntrl_pts + 1) && line_idx <= (num_cntrl_pts + 4)) { + for (int j = 0; j < 3; j++) { + coefs[(line_idx - num_cntrl_pts - 1) * 3 + j] = line_data[j]; + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return coefs; +} + + +// Get tone mapping table +float* get_tone_map(char* cam_model_path) { + float *tone_map; + int err = posix_memalign((void **)&tone_map, CACHELINE_SIZE, + sizeof(float) * 256 * CHAN_SIZE); + assert(err == 0 && "Failed to allocate memory!"); + char *line; + char *str; + float line_data[3]; + size_t len = 0; + int line_idx = 0; + + // Open file for reading + char file_name[] = "raw2jpg_respFcns.txt"; + char file_path[100]; + strcpy(file_path, cam_model_path); + strcat(file_path, file_name); + FILE *fp = fopen(file_path, "r"); + if (fp == NULL) { + printf("Didn't find the camera model file!\n"); + exit(1); + } + + // Read a line at a time + while (getline(&line, &len, fp) != -1) { + str = strtok(line, " \n"); + int i = 0; + while (str != NULL) { + line_data[i] = atof(str); + str = strtok(NULL, " \n"); + i++; + } + + if (line_idx >= 1 && line_idx <= 256) { + for (int j = 0; j < 3; j++) { + tone_map[(line_idx - 1) * 3 + j] = line_data[j]; + } + } + line_idx = line_idx + 1; + } + fclose(fp); + free(line); + return tone_map; +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h new file mode 100644 index 0000000000..8e5ee95217 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h @@ -0,0 +1,25 @@ +#ifndef _LOAD_CAM_MODEL_H_ +#define _LOAD_CAM_MODEL_H_ + +// Get color space transform +float *get_Ts(char *cam_model_path); + +// Get white balance transform +float *get_Tw(char *cam_model_path, int wb_index); + +// Get combined transforms for checking +float *get_TsTw(char *cam_model_path, int wb_index); + +// Get control points +float *get_ctrl_pts(char *cam_model_path, int num_cntrl_pts); + +// Get weights +float *get_weights(char *cam_model_path, int num_cntrl_pts); + +// Get coeficients +float *get_coefs(char *cam_model_path, int num_cntrl_pts); + +// Get tone mapping table +float *get_tone_map(char *cam_model_path); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c new file mode 100644 index 0000000000..cc75beb664 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c @@ -0,0 +1,156 @@ + +#include <stdlib.h> +#include "utility.h" +#include "visc.h" +#include "defs.h" + + +typedef struct __attribute__((__packed__)) { + float* input; size_t bytes_input; + float* result; size_t bytes_result; +} +RootIn; + +/* +typedef struct __attribute__((__packed__)) { + uint8_t *input; size_t bytes_input; + uint8_t *result; size_t bytes_result; + float *input_scaled; size_t bytes_input_scaled; + float *result_scaled; size_t bytes_result_scaled; + float *demosaic_out; size_t bytes_demosaic_out; + float *denoise_out; size_t bytes_denoise_out; + float *transform_out; size_t bytes_transform_out; + float *gamut_out;size_t bytes_gamut_out; + float *TsTw; size_t bytes_TsTw; + float *ctrl_pts; size_t bytes_ctrl_pts; + float *weights; size_t bytes_weights; + float*coefs; size_t bytes_coefs; + float *l2_dist; size_t bytes_l2_dist; + float *tone_map; size_t bytes_tone_map; + int row_size; int col_size; +} +RootIn; +*/ + + + +void scale_values(float* input, size_t num_elems) { + + __visc__hint(DEVICE); + __visc__attributes(1, input, 1, input); + + for (int ind = 0; ind < num_elems; ind++){ + input[ind] = input[ind] * 2.0; + } + + __visc__return(1, num_elems); +} + + + + +void graphRoot(/*0*/ float* input, /*1*/ size_t bytes_input, + /*2*/ float* result, /*3*/ size_t bytes_result) { + + //Specifies compilation target for current node + __visc__hint(CPU_TARGET); + + __visc__attributes(2, input, result, 2, input, result); + + // Create an 0D (specified by 1st argument) HPVM node - so a single node + // associated with node function ---_fxp_wrapper + + void* scaleNode = __visc__createNodeND(0, scale_values); + + // BindIn binds inputs of current node with specified node + // - destination node + // - argument position in argument list of function of source node + // - argument position in argument list of function of destination node + // - streaming (1) or non-streaming (0) + + // Edge transfers data between nodes within the same level of hierarchy. + // - source and destination dataflow nodes + // - edge type, all-all (1) or one-one(0) + // - source position (in output struct of source node) + // - destination position (in argument list of destination node) + // - streaming (1) or non-streaming (0) + + // scale_fxp inputs + __visc__bindIn(scaleNode, 0, 0, 0); // input -> ScNode:input + __visc__bindIn(scaleNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input + + // Similar to bindIn, but for the output. Output of a node is a struct, and + // we consider the fields in increasing ordering. + __visc__bindOut(scaleNode, 0, 0, 0); + +} + + + + + +int main(int argc, char* argv[]) { + + size_t input_size = 100; + size_t result_size = 100; + + size_t input_bytes = input_size * sizeof(float); + size_t result_bytes = result_size * sizeof(float); + + // This is host_input in cam_pipe() + float* input = (float*) malloc(input_bytes); + for(unsigned int i = 0; i < input_size; i++){ + input[i] = 1.0; + } + // This is host_result in cam_pipe() + float* result = (float*) malloc(result_bytes); + + + __visc__init(); + + RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn)); + + // Set up HPVM DFG inputs in the rootArgs struct. + rootArgs->input = input; + rootArgs->bytes_input = input_size; + + printf("input = %d input_bytes = %d \n", input, input_bytes); + + rootArgs->result = result; + rootArgs->bytes_result = result_size; + + + llvm_visc_track_mem(input, input_bytes); + llvm_visc_track_mem(result, result_bytes); + + + void* testDFG = __visc__launch(0, graphRoot, (void*) rootArgs); + __visc__wait(testDFG); + + + printf("input = %d \n", input); + + llvm_visc_request_mem(input, input_bytes); + //llvm_visc_request_mem(result, result_bytes); + + printf("requested mem \n"); + + for(unsigned int i = 0; i < input_size; i++){ + printf("input[%d] = %f \n", i, input[i]); + } + + //llvm_visc_untrack_mem(input); + //llvm_visc_untrack_mem(result); + + printf ("untracked mem \n"); + + __visc__cleanup(); + + printf ("cleaned up visc"); + + return 0; +} + + + + diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c new file mode 100644 index 0000000000..ea42ad0bf8 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c @@ -0,0 +1,865 @@ +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <string.h> +#include <math.h> +#include "utility.h" + +#include "cam_pipe_utility.h" +#include "pipe_stages.h" +#include "load_cam_model.h" + +#include "visc.h" + +int NUM_TEST_CASES; +int NUM_CLASSES; +int INPUT_DIM; +int NUM_WORKER_THREADS; + +// Type of struct that is used to pass arguments to the HPVM dataflow graph +// using the hpvm launch operation +typedef struct __attribute__((__packed__)) { + uint8_t *input; size_t bytes_input; + uint8_t *result; size_t bytes_result; + float *input_scaled; size_t bytes_input_scaled; + float *result_scaled; size_t bytes_result_scaled; + float *demosaic_out; size_t bytes_demosaic_out; + float *denoise_out; size_t bytes_denoise_out; + float *transform_out; size_t bytes_transform_out; + float *gamut_out;size_t bytes_gamut_out; + float *TsTw; size_t bytes_TsTw; + float *ctrl_pts; size_t bytes_ctrl_pts; + float *weights; size_t bytes_weights; + float*coefs; size_t bytes_coefs; + float *l2_dist; size_t bytes_l2_dist; + float *tone_map; size_t bytes_tone_map; + int row_size; int col_size; +} +RootIn; + +typedef enum _argnum { + RAW_IMAGE_BIN, + OUTPUT_IMAGE_BIN, + NUM_REQUIRED_ARGS, + DATA_FILE = NUM_REQUIRED_ARGS, + NUM_ARGS, +} argnum; + +typedef struct _arguments { + char* args[NUM_ARGS]; + int num_inputs; + int num_threads; +} arguments; + +static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n"; +static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary"; +static struct argp_option options[] = { + { "num-inputs", 'n', "N", 0, "Number of input images" }, { 0 }, + { "data-file", 'f', "F", 0, + "File to read data and weights from (if data-init-mode == READ_FILE or " + "save-params is true). *.txt files are decoded as text files, while " + "*.bin files are decoded as binary files." }, +}; + +static error_t parse_opt(int key, char* arg, struct argp_state* state) { + arguments* args = (arguments*)(state->input); + switch (key) { + case 'n': { + args->num_inputs = strtol(arg, NULL, 10); + break; + } + case 'f': { + args->args[DATA_FILE] = arg; + break; + } + case 't': { + args->num_threads = strtol(arg, NULL, 10); + break; + } + case ARGP_KEY_ARG: { + if (state->arg_num >= NUM_REQUIRED_ARGS) + argp_usage(state); + args->args[state->arg_num] = arg; + break; + } + case ARGP_KEY_END: { + if (state->arg_num < NUM_REQUIRED_ARGS) { + fprintf(stderr, + "Not enough arguments! Got %d, require %d.\n", + state->arg_num, + NUM_REQUIRED_ARGS); + argp_usage(state); + } + break; + } + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +void set_default_args(arguments* args) { + args->num_inputs = 1; + args->num_threads = 0; + for (int i = 0; i < NUM_ARGS; i++) { + args->args[i] = NULL; + } +} + +static struct argp parser = { options, parse_opt, args_doc, prog_doc }; + +// Helper function for printing intermediate results +void descale_cpu(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + int row_size, int col_size) { + + for (int chan = 0; chan < CHAN_SIZE; chan++) + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan*row_size + row) * col_size + col; + output[index] = min(max(input[index] * 255, 0), 255); + } +} + +static void sort(float arr[], int n) { + int i, j; + for (i = 0; i < n - 1; i++) + for (j = 0; j < n - i - 1; j++) + if (arr[j] > arr[j + 1]) { + float temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } +} + +/**************************************************************/ +/*** HPVM Leaf node Functions - Performing the computations ***/ +/**************************************************************/ + +// In this benchmark, no use of HPVM query intrinsics in the leaf node functions + +// Leaf HPVM node function for scale +void scale_fxp(uint8_t *input, size_t bytes_input, + float *output, size_t bytes_output, + int row_size, int col_size) { + + //Specifies compilation target for current node + __visc__hint(DEVICE); + + // Specifies pointer arguments that will be used as "in" and "out" arguments + // - count of "in" arguments + // - list of "in" argument , and similar for "out" + __visc__attributes(2, input, output, 1, output); + + for (int chan = 0; chan < CHAN_SIZE; chan++) + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++){ + int index = (chan*row_size + row) * col_size + col; + output[index] = input[index] * 1.0 / 255; + } + __visc__return(1, bytes_output); +} + +// Leaf HPVM node function for descale +void descale_fxp(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, output, 1, output); + + for (int chan = 0; chan < CHAN_SIZE; chan++) + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan*row_size + row) * col_size + col; + output[index] = min(max(input[index] * 255, 0), 255); + } + __visc__return(1, bytes_result); +} + +// Leaf HPVM node function for demosaicing +void demosaic_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, result, 1, result); + + for (int row = 1; row < row_size - 1; row++) + for (int col = 1; col < col_size - 1; col++) { + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + if (row % 2 == 0 && col % 2 == 0) { + // Green pixel + // Getting the R values + float R1 = input[index_0 - 1]; + float R2 = input[index_0 + 1]; + // Getting the B values + float B1 = input[index_2 - col_size]; + float B2 = input[index_2 + col_size]; + // R + result[index_0] = (R1 + R2) / 2; + // G + result[index_1] = input[index_1] * 2; + // B + result[index_2] = (B1 + B2) / 2; + } else if (row % 2 == 0 && col % 2 == 1) { + // Red pixel + // Getting the G values + float G1 = input[index_1 - col_size]; + float G2 = input[index_1 + col_size]; + float G3 = input[index_1 - 1]; + float G4 = input[index_1 + 1]; + // Getting the B values + float B1 = input[index_2 - col_size - 1]; + float B2 = input[index_2 - col_size + 1]; + float B3 = input[index_2 + col_size - 1]; + float B4 = input[index_2 + col_size + 1]; + // R + result[index_0] = input[index_0]; + // G + result[index_1] = (G1 + G2 + G3 + G4) / 2; + // B (center pixel) + result[index_2] = (B1 + B2 + B3 + B4) / 4; + } else if (row % 2 == 1 && col % 2 == 0) { + // Blue pixel + // Getting the R values + float R1 = input[index_0 - col_size - 1]; + float R2 = input[index_0 + col_size - 1]; + float R3 = input[index_0 - col_size + 1]; + float R4 = input[index_0 + col_size + 1]; + // Getting the G values + float G1 = input[index_1 - col_size]; + float G2 = input[index_1 + col_size]; + float G3 = input[index_1 - 1]; + float G4 = input[index_1 + 1]; + // R + result[index_0] = (R1 + R2 + R3 + R4) / 4; + // G + result[index_1] = (G1 + G2 + G3 + G4) / 2; + // B + result[index_2] = input[index_2]; + } else { + // Bottom Green pixel + // Getting the R values + float R1 = input[index_0 - col_size]; + float R2 = input[index_0 + col_size]; + // Getting the B values + float B1 = input[index_2 - 1]; + float B2 = input[index_2 + 1]; + // R + result[index_0] = (R1 + R2) / 2; + // G + result[index_1] = input[index_1] * 2; + // B + result[index_2] = (B1 + B2) / 2; + } + } + __visc__return(1, bytes_result); +} + +// Leaf HPVM node function for denoise +void denoise_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, result, 1, result); + + for (int chan = 0; chan < CHAN_SIZE; chan++) + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) + if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { + float filter[9]; + for (int i = -1; i < 2; i++) + for (int j = -1; j < 2; j++) { + int index = ((i+row) - row + 1) * 3 + (j+col) - col + 1; + filter[index] = input[(chan * row_size + (i + row)) * col_size + (j + col)]; + } + sort(filter, 9); + result[(chan * row_size + row) * col_size + col] = filter[4]; + } else { + result[(chan * row_size + row) * col_size + col] = input[(chan * row_size + row) * col_size + col]; + } + __visc__return(1, bytes_result); +} + +// Leaf HPVM node function, for color map and white balance transform +void transform_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(3, input, result, TsTw_tran, 1, result); + + for (int chan = 0; chan < CHAN_SIZE; chan++) + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + int index_2d_0 = 0 * CHAN_SIZE + chan; + int index_2d_1 = 1 * CHAN_SIZE + chan; + int index_2d_2 = 2 * CHAN_SIZE + chan; + result[index] = + max(input[index_0] * TsTw_tran[index_2d_0] + + input[index_1] * TsTw_tran[index_2d_1] + + input[index_2] * TsTw_tran[index_2d_2], + 0); + } + __visc__return(1, bytes_result); +} + +// Leaf HPVM node function, for gamut mapping +void gamut_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, result, l2_dist); + + // First, get the L2 norm from every pixel to the control points, + // Then, sum it and weight it. Finally, add the bias. + + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + for (int cp = 0; cp < 3702; cp++) { + int index_0 = (0 * row_size + row) * col_size + col; + int index_1 = (1 * row_size + row) * col_size + col; + int index_2 = (2 * row_size + row) * col_size + col; + float val1 = (input[index_0] - ctrl_pts[cp * 3 + 0]); + float val2 = (input[index_0] - ctrl_pts[cp * 3 + 0]); + float val3 = (input[index_1] - ctrl_pts[cp * 3 + 1]); + float val4 = (input[index_1] - ctrl_pts[cp * 3 + 1]); + float val5 = (input[index_2] - ctrl_pts[cp * 3 + 2]); + float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]); + float val = val1 * val2 + val3 * val4 + val5 * val6; + float sqrt_val = sqrt(val); + l2_dist[cp] = sqrt_val; + } + for (int chan = 0; chan < CHAN_SIZE; chan++) { + float chan_val = 0.0; + for (int cp = 0; cp < 3702; cp++) { + chan_val += l2_dist[cp] * weights[cp * CHAN_SIZE + chan]; + } + chan_val += coefs[0 * CHAN_SIZE + chan] + + coefs[1 * CHAN_SIZE + chan] * input[(0 * row_size + row) * col_size + col] + + coefs[2 * CHAN_SIZE + chan] * input[(1 * row_size + row) * col_size + col] + + coefs[3 * CHAN_SIZE + chan] * input[(2 * row_size + row) * col_size + col]; + result[(chan * row_size + row) * col_size + col] = max(chan_val, 0); + } + } + __visc__return(1, bytes_result); +} + +// HPVM leaf node function, for tone mapping +void tone_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(3, input, result, tone_map, 1, result); + + for (int chan = 0; chan < CHAN_SIZE; chan++) + for (int row = 0; row < row_size; row++) + for (int col = 0; col < col_size; col++) { + int index = (chan * row_size + row) * col_size + col; + uint8_t x = input[index] * 255; + result[index] = tone_map[x * CHAN_SIZE + chan]; + } + __visc__return(1, bytes_result); +} + +/********************************************************************/ +/*** HPVM Internal node Functions - Determine the graph structure ***/ +/********************************************************************/ + +// We create a wrapper node per leaf node - this is an implementation +// requirement for the FPGA backend . The CPU backend also supports this, +// so it does not cause a portability issue. + +void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(2, input, result, 1, result); + + // Create an 1D (specified by 1st argument) HPVM node with 1 dynamic + // instance (last argument) associated with node function scale_fxp + void *ScaleNode = __visc__createNodeND(1, scale_fxp, (size_t)1); + + // Binds inputs of current node with specified node + // - destination node + // - argument position in argument list of function of source node + // - argument position in argument list of function of destination node + // - streaming (1) or non-streaming (0) + __visc__bindIn(ScaleNode, 0, 0, 0); // bind input + __visc__bindIn(ScaleNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(ScaleNode, 2, 2, 0); // bind result + __visc__bindIn(ScaleNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(ScaleNode, 4, 4, 0); // bind row_size + __visc__bindIn(ScaleNode, 5, 5, 0); // bind col_size + + // Similar to bindIn, but for the output. Output of a node is a struct, and + // we consider the fields in increasing ordering. + __visc__bindOut(ScaleNode, 0, 0, 0); +} + +void descale_fxp_wrapper(float *input, size_t bytes_input, + uint8_t *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(2, input, result, 1, result); + void *DescaleNode = __visc__createNodeND(1, descale_fxp, (size_t)1); + __visc__bindIn(DescaleNode, 0, 0, 0); // bind input + __visc__bindIn(DescaleNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(DescaleNode, 2, 2, 0); // bind result + __visc__bindIn(DescaleNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(DescaleNode, 4, 4, 0); // bind row_size + __visc__bindIn(DescaleNode, 5, 5, 0); // bind col_size + + __visc__bindOut(DescaleNode, 0, 0, 0); +} + +void demosaic_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(2, input, result, 1, result); + void *DemosaicNode = __visc__createNodeND(1, demosaic_fxp, (size_t)1); + __visc__bindIn(DemosaicNode, 0, 0, 0); // bind input + __visc__bindIn(DemosaicNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(DemosaicNode, 2, 2, 0); // bind result + __visc__bindIn(DemosaicNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(DemosaicNode, 4, 4, 0); // bind row_size + __visc__bindIn(DemosaicNode, 5, 5, 0); // bind col_size + + __visc__bindOut(DemosaicNode, 0, 0, 0); +} + +void denoise_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(2, input, result, 1, result); + void *DenoiseNode = __visc__createNodeND(1, denoise_fxp, (size_t)1); + __visc__bindIn(DenoiseNode, 0, 0, 0); // bind input + __visc__bindIn(DenoiseNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(DenoiseNode, 2, 2, 0); // bind result + __visc__bindIn(DenoiseNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(DenoiseNode, 4, 4, 0); // bind row_size + __visc__bindIn(DenoiseNode, 5, 5, 0); // bind col_size + + __visc__bindOut(DenoiseNode, 0, 0, 0); +} + +void transform_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(3, input, result, TsTw_tran, 1, result); + void *TransformNode = __visc__createNodeND(1, transform_fxp, (size_t)1); + __visc__bindIn(TransformNode, 0, 0, 0); // bind input + __visc__bindIn(TransformNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(TransformNode, 2, 2, 0); // bind result + __visc__bindIn(TransformNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(TransformNode, 4, 4, 0); // bind tstw + __visc__bindIn(TransformNode, 5, 5, 0); // bind bytes_tstw + __visc__bindIn(TransformNode, 6, 6, 0); // bind row_size + __visc__bindIn(TransformNode, 7, 7, 0); // bind col_size + + __visc__bindOut(TransformNode, 0, 0, 0); +} + +void gamut_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, + int row_size, int col_size) { + __visc__hint(CPU_TARGET); + __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); + void *GamutNode = __visc__createNodeND(1, gamut_map_fxp, (size_t)1); + __visc__bindIn(GamutNode, 0, 0, 0); // bind input + __visc__bindIn(GamutNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(GamutNode, 2, 2, 0); // bind result + __visc__bindIn(GamutNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(GamutNode, 4, 4, 0); // bind ctrl_pts + __visc__bindIn(GamutNode, 5, 5, 0); // bind bytes_ctrl_pts + __visc__bindIn(GamutNode, 6, 6, 0); // bind weights + __visc__bindIn(GamutNode, 7, 7, 0); // bind bytes_weights + __visc__bindIn(GamutNode, 8, 8, 0); // bind coefs + __visc__bindIn(GamutNode, 9, 9, 0); // bind bytes_coefs + __visc__bindIn(GamutNode, 10, 10, 0); // bind l2_dist + __visc__bindIn(GamutNode, 11, 11, 0); // bind bytes_l2_dist + __visc__bindIn(GamutNode, 12, 12, 0); // bind row_size + __visc__bindIn(GamutNode, 13, 13, 0); // bind col_size + + __visc__bindOut(GamutNode, 0, 0, 0); +} +void tone_map_fxp_wrapper(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, + int row_size, int col_size) { + + __visc__hint(CPU_TARGET); + __visc__attributes(3, input, result, tone_map, 1, result); + void *ToneMapNode = __visc__createNodeND(1, tone_map_fxp, (size_t)1); + __visc__bindIn(ToneMapNode, 0, 0, 0); // bind input + __visc__bindIn(ToneMapNode, 1, 1, 0); // bind bytes_input + __visc__bindIn(ToneMapNode, 2, 2, 0); // bind result + __visc__bindIn(ToneMapNode, 3, 3, 0); // bind bytes_result + __visc__bindIn(ToneMapNode, 4, 4, 0); // bind tone_map + __visc__bindIn(ToneMapNode, 5, 5, 0); // bind bytes_tone_map + __visc__bindIn(ToneMapNode, 6, 6, 0); // bind row_size + __visc__bindIn(ToneMapNode, 7, 7, 0); // bind col_size + + __visc__bindOut(ToneMapNode, 0, 0, 0); +} + + +/*** ROOT Node - Top Level of the Graph Hierarchy ***/ +void CamPipeRoot(/*0*/ uint8_t *input, /*1*/ size_t bytes_input, + /*2*/ uint8_t *result, /*3*/ size_t bytes_result, + /*4*/ float *input_scaled, /*5*/ size_t bytes_input_scaled, + /*6*/ float *result_scaled, /*7*/ size_t bytes_result_scaled, + /*8*/ float *demosaic_out, /*9*/ size_t bytes_demosaic_out, + /*10*/ float *denoise_out, /*11*/ size_t bytes_denoise_out, + /*12*/ float *transform_out, /*13*/ size_t bytes_transform_out, + /*14*/ float *gamut_out, /*15*/ size_t bytes_gamut_out, + /*16*/ float *TsTw, /*17*/ size_t bytes_TsTw, + /*18*/ float *ctrl_pts, /*19*/ size_t bytes_ctrl_pts, + /*20*/ float *weights, /*21*/ size_t bytes_weights, + /*22*/ float*coefs, /*23*/ size_t bytes_coefs, + /*24*/ float *l2_dist, /*25*/ size_t bytes_l2_dist, + /*26*/ float *tone_map, /*27*/ size_t bytes_tone_map, + /*28*/ int row_size, /*29*/ int col_size) { + + //Specifies compilation target for current node + __visc__hint(CPU_TARGET); + + // Specifies pointer arguments that will be used as "in" and "out" arguments + // - count of "in" arguments + // - list of "in" argument , and similar for "out" + __visc__attributes(14, input, result, input_scaled, result_scaled, demosaic_out, denoise_out, + transform_out, gamut_out, TsTw, ctrl_pts, weights, coefs, tone_map, l2_dist, + 5, result, demosaic_out, denoise_out, transform_out, gamut_out); + + // Create an 0D (specified by 1st argument) HPVM node - so a single node + // associated with node function ---_fxp_wrapper + void* ScNode = __visc__createNodeND(0, scale_fxp_wrapper); + void* DmNode = __visc__createNodeND(0, demosaic_fxp_wrapper); + void *DnNode = __visc__createNodeND(0, denoise_fxp_wrapper); + void *TrNode = __visc__createNodeND(0, transform_fxp_wrapper); + void *GmNode = __visc__createNodeND(0, gamut_fxp_wrapper); + void *TnNode = __visc__createNodeND(0, tone_map_fxp_wrapper); + void *DsNode = __visc__createNodeND(0, descale_fxp_wrapper); + + // BindIn binds inputs of current node with specified node + // - destination node + // - argument position in argument list of function of source node + // - argument position in argument list of function of destination node + // - streaming (1) or non-streaming (0) + + // Edge transfers data between nodes within the same level of hierarchy. + // - source and destination dataflow nodes + // - edge type, all-all (1) or one-one(0) + // - source position (in output struct of source node) + // - destination position (in argument list of destination node) + // - streaming (1) or non-streaming (0) + + // scale_fxp inputs + __visc__bindIn(ScNode, 0, 0, 0); // input -> ScNode:input + __visc__bindIn(ScNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input + __visc__bindIn(ScNode, 4, 2, 0); // input_scaled -> ScNode:result + __visc__bindIn(ScNode, 5, 3, 0); // bytes_input_scaled -> ScNode:bytes_result + __visc__bindIn(ScNode, 28, 4, 0); // row_size -> ScNode:row_size + __visc__bindIn(ScNode, 29, 5, 0); // col_size -> ScNode:col_size + + // demosaic_fxp inputs + __visc__bindIn(DmNode, 4, 0, 0); // input_scaled -> DmNode:input + __visc__edge(ScNode, DmNode, 1, 0, 1, 0); // SCNode:bytes_result -> DmNode:bytes_input + __visc__bindIn(DmNode, 8, 2, 0); // demosaic_out -> DmNode:result + __visc__bindIn(DmNode, 9, 3, 0); // bytes_demosaic_out -> DmNode:bytes_result + __visc__bindIn(DmNode, 28, 4, 0); // row_size -> DmNode:row_size + __visc__bindIn(DmNode, 29, 5, 0); // col_size -> DmNode:col_size + + // denoise_fxp inputs + __visc__bindIn(DnNode, 8, 0, 0); // demosaic_out -> DnNode:input + __visc__edge(DmNode, DnNode, 1, 0, 1, 0); // DMNode:bytes_result -> DnNode:bytes_input + __visc__bindIn(DnNode, 10, 2, 0); // denoise_out -> DnNode:result + __visc__bindIn(DnNode, 11, 3, 0); // bytes_denoise_out -> DnNode:bytes_result + __visc__bindIn(DnNode, 28, 4, 0); // row_size -> DnNode:row_size + __visc__bindIn(DnNode, 29, 5, 0); // col_size -> DnNode:col_size + + // transform_fxp inputs + __visc__bindIn(TrNode, 10, 0, 0); // denoise_out -> TrNode:input + __visc__edge(DnNode, TrNode, 1, 0, 1, 0); // DnNode:bytes_result -> TrNode:bytes_input + __visc__bindIn(TrNode, 12, 2, 0); // transform_out -> TrNode:result + __visc__bindIn(TrNode, 13, 3, 0); // bytes_result_scaled -> TrNode:bytes_result + __visc__bindIn(TrNode, 16, 4, 0); // TsTw -> TrNode:TsTw_trann + __visc__bindIn(TrNode, 17, 5, 0); // bytes_TsTw -> TrNode:bytes_TsTw + __visc__bindIn(TrNode, 28, 6, 0); // row_size -> TrNode:row_size + __visc__bindIn(TrNode, 29, 7, 0); // col_size -> TrNode:col_size + + // gamut_fxp inputs + __visc__bindIn(GmNode, 12, 0, 0); // transform_out -> GmNode:input + __visc__edge(TrNode, GmNode, 1, 0, 1, 0); // TrNode:bytes_result -> GmNode:bytes_input + __visc__bindIn(GmNode, 14, 2, 0); // gamut_out -> GmNode:result + __visc__bindIn(GmNode, 15, 3, 0); // bytes_gamut_out -> GmNode:bytes_result + __visc__bindIn(GmNode, 18, 4, 0); // ctrl_pts -> GmNode:ctrl_pts + __visc__bindIn(GmNode, 19, 5, 0); // bytes_ctrl_pts -> GmNode:bytes_ctrl_pts + __visc__bindIn(GmNode, 20, 6, 0); // weights -> GmNode:weights + __visc__bindIn(GmNode, 21, 7, 0); // bytes_weights -> GmNode:bytes_weights + __visc__bindIn(GmNode, 22, 8, 0); // coefs -> GmNode:coefs + __visc__bindIn(GmNode, 23, 9, 0); // bytes_coefs -> GmNode:bytes_coefs + __visc__bindIn(GmNode, 24, 10, 0); // l2_dist -> GmNode: l2_dist + __visc__bindIn(GmNode, 25, 11, 0); // bytes_l2_dist -> GmNode:bytes_l2_dist + __visc__bindIn(GmNode, 28, 12, 0); // row_size -> GmNode:row_size + __visc__bindIn(GmNode, 29, 13, 0); // col_size -> GmNode:col_size + + // tone_map_fxp inputs + __visc__bindIn(TnNode, 14, 0, 0); // gamut_out -> TnNode:input + __visc__edge(GmNode, TnNode, 1, 0, 1, 0); // GmNode:bytes_result -> TnNode:bytes_input + __visc__bindIn(TnNode, 6, 2, 0); // result_scaled -> TnNode:result + __visc__bindIn(TnNode, 7, 3, 0); // bytes_result_scaled -> TnNode:bytes_result + __visc__bindIn(TnNode, 26, 4, 0); // tone_map -> TnNode:tone_map + __visc__bindIn(TnNode, 27, 5, 0); // bytes_tone_map -> TnNode:bytes_tone_map + __visc__bindIn(TnNode, 28, 6, 0); // row_size -> TnNode:row_size + __visc__bindIn(TnNode, 29, 7, 0); // col_size -> TnNode:col_size + + // descale_fxp inputs + __visc__bindIn(DsNode, 6, 0, 0); // result_scaled -> DsNode:input + __visc__edge(TnNode, DsNode, 1, 0, 1, 0); // TnNode:bytes_result -> DsNode:bytes_input + __visc__bindIn(DsNode, 2, 2, 0); // result -> DsNode:result + __visc__bindIn(DsNode, 3, 3, 0); // bytes_result -> DsNode:bytes_result + __visc__bindIn(DsNode, 28, 4, 0); // row_size -> DsNode:row_size + __visc__bindIn(DsNode, 29, 5, 0); // col_size -> DsNode:col_size + + // Similar to bindIn, but for the output. Output of a node is a struct, and + // we consider the fields in increasing ordering. + __visc__bindOut(DsNode, 0, 0, 0); + +} + +int main(int argc, char* argv[]) { + // Parse the arguments. + arguments args; + set_default_args(&args); + argp_parse(&parser, argc, argv, 0, 0, &args); + + // Read a raw image. + // NOTE: We deliberately perform this file I/O outside of the kernel. + printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]); + int row_size, col_size; + uint8_t *image_in = read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size); + + printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE); + + // Allocate a buffer for storing the output image data. + // (This is currently the same size as the input image data.) + size_t bytes_image = sizeof(uint8_t) * row_size * col_size * CHAN_SIZE; + size_t bytes_fimage = sizeof(float) * row_size * col_size * CHAN_SIZE; + uint8_t *image_out = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_gamut = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_demosaic = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_denoise = (uint8_t*) malloc_aligned(bytes_image); + uint8_t *image_out_transform = (uint8_t*) malloc_aligned(bytes_image); + + __visc__init(); + + /////////////////////////////////////////////////////////////// + // Camera Model Parameters + /////////////////////////////////////////////////////////////// + // Path to the camera model to be used +// char cam_model_path[100]; +// char cam_model_path = "cam_models/NikonD7000/"; + // White balance index (select white balance from transform file) + // The first white balance in the file has a wb_index of 1 + // For more information on model format see the readme + int wb_index = 6; + + // Number of control points + int num_ctrl_pts = 3702; + uint8_t *input, *result; + float *input_scaled, *result_scaled, *demosaic_out, *denoise_out, *transform_out, *gamut_out; + float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; + + TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); + float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); + free(TsTw); + TsTw = trans; + ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); + weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); + coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); + tone_map = get_tone_map("cam_models/NikonD7000/"); + + input_scaled = (float*) malloc_aligned(bytes_fimage); + result_scaled = (float*) malloc_aligned(bytes_fimage); + demosaic_out = (float*) malloc_aligned(bytes_fimage); + denoise_out = (float*) malloc_aligned(bytes_fimage); + transform_out = (float*) malloc_aligned(bytes_fimage); + gamut_out = (float*) malloc_aligned(bytes_fimage); + l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts); + + // This is host_input in cam_pipe() + input = (uint8_t*) malloc_aligned(bytes_image); + convert_hwc_to_chw(image_in, row_size, col_size, &input); + + // This is host_result in cam_pipe() + result = (uint8_t*) malloc_aligned(bytes_image); + + // Allocate struct to pass DFG inputs + RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn)); + + // Set up HPVM DFG inputs in the rootArgs struct. + rootArgs->input = input; + rootArgs->bytes_input = bytes_image; + + rootArgs->result = result; + rootArgs->bytes_result = bytes_image; + + rootArgs->input_scaled = input_scaled; + rootArgs->bytes_input_scaled = bytes_fimage; + + rootArgs->result_scaled = result_scaled; + rootArgs->bytes_result_scaled = bytes_fimage; + + rootArgs->demosaic_out = demosaic_out; + rootArgs->bytes_demosaic_out = bytes_fimage; + + rootArgs->denoise_out = denoise_out; + rootArgs->bytes_denoise_out = bytes_fimage; + + rootArgs->transform_out = transform_out; + rootArgs->bytes_transform_out = bytes_fimage; + + rootArgs->gamut_out = gamut_out; + rootArgs->bytes_gamut_out = bytes_fimage; + + rootArgs->TsTw = TsTw; + rootArgs->bytes_TsTw = CHAN_SIZE * CHAN_SIZE * sizeof(float); + + rootArgs->ctrl_pts = ctrl_pts; + rootArgs->bytes_ctrl_pts = num_ctrl_pts * CHAN_SIZE * sizeof(float); + + rootArgs->weights = weights; + rootArgs->bytes_weights = num_ctrl_pts * CHAN_SIZE * sizeof(float); + + rootArgs->coefs = coefs; + rootArgs->bytes_coefs = 4 * CHAN_SIZE * sizeof(float); + + rootArgs->tone_map = tone_map; + rootArgs->bytes_tone_map = 256 * CHAN_SIZE * sizeof(float); + + rootArgs->l2_dist = l2_dist; + rootArgs->bytes_l2_dist = num_ctrl_pts * sizeof(float); + + rootArgs->row_size = row_size; + rootArgs->col_size = col_size; + + // Memory tracking is required for pointer arguments. + // Nodes can be scheduled on different targets, and + // dataflow edge implementation needs to request data. + // The pair (pointer, size) is inserted in memory tracker using this call + llvm_visc_track_mem(input, bytes_image); + llvm_visc_track_mem(result, bytes_image); + llvm_visc_track_mem(input_scaled, bytes_fimage); + llvm_visc_track_mem(result_scaled, bytes_fimage); + llvm_visc_track_mem(demosaic_out, bytes_fimage); + llvm_visc_track_mem(denoise_out, bytes_fimage); + llvm_visc_track_mem(transform_out, bytes_fimage); + llvm_visc_track_mem(gamut_out, bytes_fimage); + llvm_visc_track_mem(TsTw, CHAN_SIZE * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(ctrl_pts, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(weights, num_ctrl_pts * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(coefs, 4 * CHAN_SIZE *sizeof(float)); + llvm_visc_track_mem(tone_map, 256 * CHAN_SIZE * sizeof(float)); + llvm_visc_track_mem(l2_dist, num_ctrl_pts * sizeof(float)); + + printf("\n\nLaunching CAVA pipeline!\n"); + + void* camPipeDFG = __visc__launch(0, CamPipeRoot, (void*) rootArgs); + __visc__wait(camPipeDFG); + + printf("\n\nPipeline execution completed!\n"); + printf("\n\nRequesting memory!\n"); + + // Request data from graph. + llvm_visc_request_mem(result, bytes_image); + llvm_visc_request_mem(demosaic_out, bytes_fimage); + llvm_visc_request_mem(denoise_out, bytes_fimage); + llvm_visc_request_mem(transform_out, bytes_fimage); + llvm_visc_request_mem(gamut_out, bytes_fimage); + printf("\n\nDone requesting memory!\n"); + + + uint8_t* gamut_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + uint8_t* demosaic_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + uint8_t* transform_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + uint8_t* denoise_out_descaled = (uint8_t*) malloc_aligned(bytes_image); + + descale_cpu(demosaic_out, bytes_fimage, demosaic_out_descaled, bytes_image, row_size, col_size); + descale_cpu(gamut_out, bytes_fimage, gamut_out_descaled, bytes_image, row_size, col_size); + descale_cpu(denoise_out, bytes_fimage, denoise_out_descaled, bytes_image, row_size, col_size); + descale_cpu(transform_out, bytes_fimage, transform_out_descaled, bytes_image, row_size, col_size); + + convert_chw_to_hwc(result, row_size, col_size, &image_out); + convert_chw_to_hwc(gamut_out_descaled, row_size, col_size, &image_out_gamut); + convert_chw_to_hwc(demosaic_out_descaled, row_size, col_size, &image_out_demosaic); + convert_chw_to_hwc(denoise_out_descaled, row_size, col_size, &image_out_denoise); + convert_chw_to_hwc(transform_out_descaled, row_size, col_size, &image_out_transform); + + + // Remove tracked pointers. + llvm_visc_untrack_mem(input); + llvm_visc_untrack_mem(result); + llvm_visc_untrack_mem(input_scaled); + llvm_visc_untrack_mem(result_scaled); + llvm_visc_untrack_mem(demosaic_out); + llvm_visc_untrack_mem(denoise_out); + llvm_visc_untrack_mem(transform_out); + llvm_visc_untrack_mem(gamut_out); + + llvm_visc_untrack_mem(TsTw); + llvm_visc_untrack_mem(ctrl_pts); + llvm_visc_untrack_mem(weights); + llvm_visc_untrack_mem(coefs); + llvm_visc_untrack_mem(tone_map); + llvm_visc_untrack_mem(l2_dist); + + // Output the image. + // NOTE: We deliberately perform this file I/O outside of the kernel. + char str[50], base_str[50]; + strcpy(base_str, args.args[OUTPUT_IMAGE_BIN]); + strcpy(str, base_str); + strcat(str, ".bin"); + printf("Writing output image to %s\n", str); + write_image_to_binary(str, image_out, row_size, col_size); + strcpy(str, base_str); + strcat(str, "_gamut.bin"); + printf("Writing output image to %s\n", str); + write_image_to_binary(str, image_out_gamut, row_size, col_size); + strcpy(str, base_str); + strcat(str, "_demosaic.bin"); + printf("Writing output image to %s\n", str); + write_image_to_binary(str, image_out_demosaic, row_size, col_size); + strcpy(str, base_str); + strcat(str, "_denoise.bin"); + printf("Writing output image to %s\n", str); + write_image_to_binary(str, image_out_denoise, row_size, col_size); + strcpy(str, base_str); + strcat(str, "_transform.bin"); + printf("Writing output image to %s\n", str); + write_image_to_binary(str, image_out_transform, row_size, col_size); + + __visc__cleanup(); + + return 0; +} + diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c new file mode 100644 index 0000000000..e6e36ba1db --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c @@ -0,0 +1,115 @@ + + +#include <stdlib.h> + + +typedef struct __attribute__((__packed__)) { + float* input; size_t bytes_input; + float* result; size_t bytes_result; +} +RootIn; + + + + +void scale_values(float* input, size_t bytes_input) { + + __visc__hint(DEVICE); + __visc__attributes(1, input, 1, input); + + for (int ind = 0; ind < bytes_input; ind++){ + input[ind] = input[ind] * 2.0; + } + + __visc__return(1, bytes_input); +} + + + + +void graphRoot(/*0*/ float* input, /*1*/ size_t bytes_input, + /*2*/ float* result, /*3*/ size_t bytes_result) { + + //Specifies compilation target for current node + __visc__hint(CPU_TARGET); + + __visc__attributes(2, input, result, 2, input, result); + + // Create an 0D (specified by 1st argument) HPVM node - so a single node + // associated with node function ---_fxp_wrapper + + void* scaleNode = __visc__createNodeND(0, scale_values); + + // BindIn binds inputs of current node with specified node + // - destination node + // - argument position in argument list of function of source node + // - argument position in argument list of function of destination node + // - streaming (1) or non-streaming (0) + + // Edge transfers data between nodes within the same level of hierarchy. + // - source and destination dataflow nodes + // - edge type, all-all (1) or one-one(0) + // - source position (in output struct of source node) + // - destination position (in argument list of destination node) + // - streaming (1) or non-streaming (0) + + // scale_fxp inputs + __visc__bindIn(scaleNode, 0, 0, 0); // input -> ScNode:input + __visc__bindIn(scaleNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input + + // Similar to bindIn, but for the output. Output of a node is a struct, and + // we consider the fields in increasing ordering. + __visc__bindOut(scaleNode, 0, 0, 0); + +} + + + + + +int main(int argc, char* argv[]) { + + __visc__init(); + + size_t input_size = 100; + size_t result_size = 100; + + size_t input_bytes = input_size * sizeof(float); + size_t result_bytes = result_size * sizeof(float); + + // This is host_input in cam_pipe() + input = (float*) malloc(input_bytes); + // This is host_result in cam_pipe() + result = (float*) malloc(result_bytes); + + + RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn)); + + // Set up HPVM DFG inputs in the rootArgs struct. + rootArgs->input = input; + rootArgs->bytes_input = input_bytes; + + rootArgs->result = result; + rootArgs->bytes_result = result_bytes; + + llvm_visc_track_mem(input, input_bytes); + llvm_visc_track_mem(result, result_bytes); + + + void* testDFG = __visc__launch(0, graphRoot, (void*) rootArgs); + __visc__wait(testDFG); + + + llvm_visc_request_mem(result, input_bytes); + llvm_visc_request_mem(result, result_bytes); + + + + __visc__cleanup(); + + return 0; +} + + + + diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c new file mode 100644 index 0000000000..2ebedec936 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c @@ -0,0 +1,290 @@ +#include <stdio.h> +#include <math.h> +#include "pipe_stages.h" +#include "cam_pipe_utility.h" + +//void scale_fxp(uint8_t *input, int row_size, int col_size, float *output) { +void scale_fxp(uint8_t *input, size_t bytes_input, + float *output, size_t bytes_output, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, output, 1, output); + + ARRAY_3D(uint8_t, _input, input, row_size, col_size); + ARRAY_3D(float, _output, output, row_size, col_size); + sl_chan: + for (int chan = 0; chan < CHAN_SIZE; chan++) + sl_row: + for (int row = 0; row < row_size; row++) + sl_col: + for (int col = 0; col < col_size; col++) + _output[chan][row][col] = _input[chan][row][col] * 1.0 / 255; + + __visc__return(1, bytes_output); +} + +//void descale_fxp(float *input, int row_size, int col_size, uint8_t *output) { +void descale_fxp(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, output, 1, output); + + ARRAY_3D(float, _input, input, row_size, col_size); + ARRAY_3D(uint8_t, _output, output, row_size, col_size); + dsl_chan: + for (int chan = 0; chan < CHAN_SIZE; chan++) + dsl_row: + for (int row = 0; row < row_size; row++) + dsl_col: + for (int col = 0; col < col_size; col++) + _output[chan][row][col] = min(max(_input[chan][row][col] * 255, 0), 255); + + __visc__return(1, bytes_output); +} + +// Demosaicing stage +// G R +// B G +//void demosaic_fxp(float *input, int row_size, int col_size, float *result) { +void demosaic_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, result, 1, result); + + printf("Demosaicing.\n"); + ARRAY_3D(float, _input, input, row_size, col_size); + ARRAY_3D(float, _result, result, row_size, col_size); + + dm_row: + for (int row = 1; row < row_size - 1; row++) + dm_col: + for (int col = 1; col < col_size - 1; col++) + if (row % 2 == 0 && col % 2 == 0) { + // Green pixel + // Getting the R values + float R1 = _input[0][row][col - 1]; + float R2 = _input[0][row][col + 1]; + // Getting the B values + float B1 = _input[2][row - 1][col]; + float B2 = _input[2][row + 1][col]; + // R + _result[0][row][col] = (R1 + R2) / 2; + // G + _result[1][row][col] = _input[1][row][col] * 2; + // B + _result[2][row][col] = (B1 + B2) / 2; + } else if (row % 2 == 0 && col % 2 == 1) { + // Red pixel + // Getting the G values + float G1 = _input[1][row - 1][col]; + float G2 = _input[1][row + 1][col]; + float G3 = _input[1][row][col - 1]; + float G4 = _input[1][row][col + 1]; + // Getting the B values + float B1 = _input[2][row - 1][col - 1]; + float B2 = _input[2][row - 1][col + 1]; + float B3 = _input[2][row + 1][col - 1]; + float B4 = _input[2][row + 1][col + 1]; + // R + _result[0][row][col] = _input[0][row][col]; + // G + _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; + // B (center pixel) + _result[2][row][col] = (B1 + B2 + B3 + B4) / 4; + } else if (row % 2 == 1 && col % 2 == 0) { + // Blue pixel + // Getting the R values + float R1 = _input[0][row - 1][col - 1]; + float R2 = _input[0][row + 1][col - 1]; + float R3 = _input[0][row - 1][col + 1]; + float R4 = _input[0][row + 1][col + 1]; + // Getting the G values + float G1 = _input[1][row - 1][col]; + float G2 = _input[1][row + 1][col]; + float G3 = _input[1][row][col - 1]; + float G4 = _input[1][row][col + 1]; + // R + _result[0][row][col] = (R1 + R2 + R3 + R4) / 4; + // G + _result[1][row][col] = (G1 + G2 + G3 + G4) / 2; + // B + _result[2][row][col] = _input[2][row][col]; + } else { + // Bottom Green pixel + // Getting the R values + float R1 = _input[0][row - 1][col]; + float R2 = _input[0][row + 1][col]; + // Getting the B values + float B1 = _input[2][row][col - 1]; + float B2 = _input[2][row][col + 1]; + // R + _result[0][row][col] = (R1 + R2) / 2; + // G + _result[1][row][col] = _input[1][row][col] * 2; + // B + _result[2][row][col] = (B1 + B2) / 2; + } + + __visc__return(1, bytes_result); +} + +static void sort(float arr[], int n) { + int i, j; + dn_sort_i: + for (i = 0; i < n - 1; i++) + dn_sort_j: + for (j = 0; j < n - i - 1; j++) + if (arr[j] > arr[j + 1]) { + float temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } +} + +// Simple denoise +//void denoise_fxp(float *input, int row_size, int col_size, float *result) { +void denoise_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(2, input, result, 1, result); + + printf("Denoising.\n"); + ARRAY_3D(float, _input, input, row_size, col_size); + ARRAY_3D(float, _result, result, row_size, col_size); + + dn_chan: + for (int chan = 0; chan < CHAN_SIZE; chan++) + dn_row: + for (int row = 0; row < row_size; row++) + dn_col: + for (int col = 0; col < col_size; col++) + if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) { + float filter[9]; + dn_slide_row: + for (int i = row-1; i < row+2; i++) + dn_slide_col: + for (int j = col-1; j < col+2; j++) { + int index = (i - row + 1) * 3 + j - col + 1; + filter[index] = _input[chan][i][j]; + } + sort(filter, 9); + _result[chan][row][col] = filter[4]; + } else { + _result[chan][row][col] = _input[chan][row][col]; + } + __visc__return(1, bytes_result); +} + +// Color map and white balance transform +//void transform_fxp(float *input, int row_size, int col_size, float *result, +// float *TsTw_tran) { +void transform_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(3, input, result, TsTw_tran, 1, result); + + printf("Color mapping.\n"); + ARRAY_3D(float, _input, input, row_size, col_size); + ARRAY_3D(float, _result, result, row_size, col_size); + ARRAY_2D(float, _TsTw_tran, TsTw_tran, 3); + + tr_chan: + for (int chan = 0; chan < CHAN_SIZE; chan++) + tr_row: + for (int row = 0; row < row_size; row++) + tr_col: + for (int col = 0; col < col_size; col++) + _result[chan][row][col] = + max(_input[0][row][col] * _TsTw_tran[0][chan] + + _input[1][row][col] * _TsTw_tran[1][chan] + + _input[2][row][col] * _TsTw_tran[2][chan], + 0); + __visc__return(1, bytes_result); +} + +// +// Weighted radial basis function for gamut mapping +// +//void gamut_map_fxp(float *input, int row_size, int col_size, float *result, +// float *ctrl_pts, float *weights, float *coefs, float *l2_dist) { +void gamut_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result); + + printf("Gamut mapping.\n"); + ARRAY_3D(float, _input, input, row_size, col_size); + ARRAY_3D(float, _result, result, row_size, col_size); + ARRAY_2D(float, _ctrl_pts, ctrl_pts, 3); + ARRAY_2D(float, _weights, weights, 3); + ARRAY_2D(float, _coefs, coefs, 3); + + // First, get the L2 norm from every pixel to the control points, + // Then, sum it and weight it. Finally, add the bias. + gm_rbf_row: + for (int row = 0; row < row_size; row++) + gm_rbf_col: + for (int col = 0; col < col_size; col++) { + gm_rbf_cp0: + for (int cp = 0; cp < num_ctrl_pts; cp++) { + l2_dist[cp] = + sqrt((_input[0][row][col] - _ctrl_pts[cp][0]) * + (_input[0][row][col] - _ctrl_pts[cp][0]) + + (_input[1][row][col] - _ctrl_pts[cp][1]) * + (_input[1][row][col] - _ctrl_pts[cp][1]) + + (_input[2][row][col] - _ctrl_pts[cp][2]) * + (_input[2][row][col] - _ctrl_pts[cp][2])); + } + gm_rbf_chan: + for (int chan = 0; chan < CHAN_SIZE; chan++) { + float chan_val = 0.0; + gm_rbf_cp1: + for (int cp = 0; cp < num_ctrl_pts; cp++) { + chan_val += l2_dist[cp] * _weights[cp][chan]; + } + // Add on the biases for the RBF + chan_val += _coefs[0][chan] + _coefs[1][chan] * _input[0][row][col] + + _coefs[2][chan] * _input[1][row][col] + + _coefs[3][chan] * _input[2][row][col]; + _result[chan][row][col] = max(chan_val, 0); + } + } + __visc__return(1, bytes_result); +} + +// Tone mapping +//void tone_map_fxp(float *input, int row_size, int col_size, float *tone_map, +// float *result) { +void tone_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, + int row_size, int col_size) { + __visc__hint(DEVICE); + __visc__attributes(3, input, result, tone_map, 1, result); + + printf("Tone mapping.\n"); + ARRAY_3D(float, _input, input, row_size, col_size); + ARRAY_3D(float, _result, result, row_size, col_size); + ARRAY_2D(float, _tone_map, tone_map, 3); + + tm_chan: + for (int chan = 0; chan < CHAN_SIZE; chan++) + tm_row: + for (int row = 0; row < row_size; row++) + tm_col: + for (int col = 0; col < col_size; col++) { + uint8_t x = _input[chan][row][col] * 255; + _result[chan][row][col] = _tone_map[x][chan]; + } + __visc__return(1, bytes_result); +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h new file mode 100644 index 0000000000..eae4347b99 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h @@ -0,0 +1,63 @@ +#ifndef _PIPE_STAGES_H_ +#define _PIPE_STAGES_H_ + +#include "defs.h" + +#define CHAN_SIZE 3 + +#define ISP 0x4 + +#define max(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + +#define min(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; }) + +#define abs(a) \ + ({ __typeof__ (a) _a = (a); \ + _a < 0 ? -_a : _a; }) + +extern int num_ctrl_pts; + +void scale_fxp(uint8_t *input, size_t bytes_input, + float *output, size_t bytes_output, + int row_size, int col_size); + +void descale_fxp(float *input, size_t bytes_input, + uint8_t *output, size_t bytes_result, + int row_size, int col_size); + +void demosaic_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size); + +void denoise_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + int row_size, int col_size); + +void transform_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *TsTw_tran, size_t bytes_TsTw, + int row_size, int col_size); + +void gamut_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *ctrl_pts, size_t bytes_ctrl_pts, + float *weights, size_t bytes_weights, + float *coefs, size_t bytes_coefs, + float *l2_dist, size_t bytes_l2_dist, + int row_size, int col_size); + +void tone_map_fxp(float *input, size_t bytes_input, + float *result, size_t bytes_result, + float *tone_map, size_t bytes_tone_map, + int row_size, int col_size); + +void tone_map_approx_fxp(float *input, int row_size, int col_size, + float *result); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh new file mode 100755 index 0000000000..3b6faac63a --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for f in *.cc; do + mv -- "$f" "${f%.cc}.c" +done diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c new file mode 100644 index 0000000000..c1eaee3333 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c @@ -0,0 +1,11 @@ +#include <stdlib.h> +#include <assert.h> +#include "defs.h" +#include "utility.h" + +void *malloc_aligned(size_t size) { + void *ptr = NULL; + int err = posix_memalign((void **)&ptr, CACHELINE_SIZE, size); + assert(err == 0 && "Failed to allocate memory!"); + return ptr; +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h new file mode 100644 index 0000000000..fc407c72a3 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h @@ -0,0 +1,8 @@ +#ifndef _COMMON_UTILITY_H_ +#define _COMMON_UTILITY_H_ + +#include <stddef.h> + +void *malloc_aligned(size_t size); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h new file mode 100644 index 0000000000..a263e35252 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h @@ -0,0 +1,110 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef DEVICE +#define DEVICE GPU_TARGET +#endif + +#include "llvm/SupportVISC/VISCHint.h" + +#ifdef __cplusplus +extern "C" { +void __visc__hint(visc::Target); +//void __visc__wait(void*); +#else +void __visc__hint(enum Target); +//void __visc__wait(unsigned); +#endif + +#ifdef __cplusplus +//void* __visc__node(...); +//void* __visc__createNode(...); +//void* __visc__createNode1D(...); +//void* __visc__createNode2D(...); +//void* __visc__createNode3D(...); +//void __visc__return(...); +#endif + +void* __visc__createNodeND(unsigned,...); +void __visc__return(unsigned, ...); + +void __visc__attributes(unsigned, ...); +void __visc__init(); +void __visc__cleanup(); + +void __visc__bindIn(void*, unsigned, unsigned, unsigned); +void __visc__bindOut(void*, unsigned, unsigned, unsigned); +void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned); +void __visc__push(void*, void*); +void* __visc__pop(void*); +void* __visc__launch(unsigned, ...); +void __visc__wait(void*); + +void* __visc__getNode(); +void* __visc__getParentNode(void*); +void __visc__barrier(); +void* __visc__malloc(long); +long __visc__getNodeInstanceID_x(void*); +long __visc__getNodeInstanceID_y(void*); +long __visc__getNodeInstanceID_z(void*); +long __visc__getNumNodeInstances_x(void*); +long __visc__getNumNodeInstances_y(void*); +long __visc__getNumNodeInstances_z(void*); + +// Atomic +// signed int +int __visc__atomic_cmpxchg(int*, int, int); +int __visc__atomic_add(int*, int); +int __visc__atomic_sub(int*, int); +int __visc__atomic_xchg(int*, int); +int __visc__atomic_inc(int*); +int __visc__atomic_dec(int*); +int __visc__atomic_min(int*, int); +int __visc__atomic_max(int*, int); +int __visc__atomic_umax(int*, int); +int __visc__atomic_umin(int*, int); +int __visc__atomic_and(int*, int); +int __visc__atomic_or(int*, int); +int __visc__atomic_xor(int*, int); + +// Special Func +float __visc__floor(float); +float __visc__rsqrt(float); +float __visc__sqrt(float); +float __visc__sin(float); +float __visc__cos(float); +// unsigned int +//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned); +//unsigned __visc__atomic_add(unsigned*, unsigned); +//unsigned __visc__atomic_sub(unsigned*, unsigned); +//unsigned __visc__atomic_xchg(unsigned*, unsigned); +//unsigned __visc__atomic_inc(unsigned*); +//unsigned __visc__atomic_dec(unsigned*); +//unsigned __visc__atomic_min(unsigned*, unsigned); +//unsigned __visc__atomic_max(unsigned*, unsigned); +//unsigned __visc__atomic_and(unsigned*, unsigned); +//unsigned __visc__atomic_or(unsigned*, unsigned); +//unsigned __visc__atomic_xor(unsigned*, unsigned); + + +#include <unistd.h> + +long get_global_id(int); +long get_group_id(int); +long get_local_id(int); +long get_local_size(int); + + +void llvm_visc_track_mem(void*, size_t); +void llvm_visc_untrack_mem(void*); +void llvm_visc_request_mem(void*, size_t); + +#ifdef __cplusplus +} +#endif + -- GitLab