From 22976cf77148d4ee95bbd4fdee0804fda6e8ae31 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Thu, 28 Nov 2019 01:37:44 -0600
Subject: [PATCH] Adding barebones test case for ApproxHPVM-HPVM merge

---
 .../benchmarks/cava_test/Makefile             | 254 +++++
 .../cava_test/src/cam-vision-native           | Bin 0 -> 37128 bytes
 .../benchmarks/cava_test/src/cam_pipe.c       | 139 +++
 .../benchmarks/cava_test/src/cam_pipe.h       |   7 +
 .../cava_test/src/cam_pipe_utility.c          |  82 ++
 .../cava_test/src/cam_pipe_utility.h          |  16 +
 .../benchmarks/cava_test/src/defs.h           | 224 +++++
 .../benchmarks/cava_test/src/dma_interface.c  |  73 ++
 .../benchmarks/cava_test/src/dma_interface.h  |  44 +
 .../benchmarks/cava_test/src/gem5_harness.h   |  10 +
 .../benchmarks/cava_test/src/load_cam_model.c | 335 +++++++
 .../benchmarks/cava_test/src/load_cam_model.h |  25 +
 .../benchmarks/cava_test/src/main.c           | 156 ++++
 .../benchmarks/cava_test/src/main_old.c       | 865 ++++++++++++++++++
 .../benchmarks/cava_test/src/new_main.c       | 115 +++
 .../benchmarks/cava_test/src/pipe_stages.c    | 290 ++++++
 .../benchmarks/cava_test/src/pipe_stages.h    |  63 ++
 .../benchmarks/cava_test/src/rename.sh        |   5 +
 .../benchmarks/cava_test/src/utility.c        |  11 +
 .../benchmarks/cava_test/src/utility.h        |   8 +
 .../benchmarks/cava_test/src/visc.h           | 110 +++
 21 files changed, 2832 insertions(+)
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile
 create mode 100755 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam-vision-native
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h
 create mode 100755 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h
 create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h

diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile
new file mode 100644
index 0000000000..f6a82b3e3d
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/Makefile
@@ -0,0 +1,254 @@
+# This Makefile compiles the HPVM-CAVA pilot project. 
+# It builds HPVM-related dependencies, then the native camera pipeline ISP code.
+#
+# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config,
+# which can be copied from Makefile.config.example for a start.
+
+CONFIG_FILE := Makefile.config
+
+ifeq ($(wildcard $(CONFIG_FILE)),)
+    $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example)
+endif
+include $(CONFIG_FILE)
+
+# Compiler Flags
+
+DLEVEL ?= 0
+#CAM_CFLAGS?=-O0 \
+	#-Wno-psabi \
+        #-Wno-unused-label -Wno-unused-but-set-variable \
+        #-Wno-maybe-uninitialized -DARCHITECTURE=SMV -DTRANSPOSE_WEIGHTS=1 \
+        -DDEBUG_LEVEL=$(DLEVEL)
+LFLAGS += -lm -lrt
+
+# Build dirs
+ifeq ($(VERSION),)
+    VERSION = Default
+endif
+SRC_DIR = src/
+CAM_PIPE_SRC_DIR = $(SRC_DIR)
+BUILD_DIR = build/$(TARGET)_$(VERSION)
+CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+
+# Source files for the frontend camera pipeline
+COMMON_SRCS = main.c \
+	      utility.c
+
+
+CAM_PIPE_SRCS = load_cam_model.c \
+        cam_pipe_utility.c \
+	dma_interface.c # FIXME: This is a hack until external C++ files can be included in build.
+
+
+# NOTE: We have temporarily removed gem5 and other dependencies for simplicity.
+SRCS = $(COMMON_SRCS) $(CAM_PIPE_SRCS)
+#GEM5_DMA_SRC = gem5/dma_interface.c
+#GEM5_SYS_SRCS = gem5/aladdin_sys_connection.cpp gem5/aladdin_sys_constants.cpp
+#GEM5_UTIL_SRCS = ../../util/m5/m5op_x86.S
+
+# NATIVE_FULL_PATH_SRCS contains all the full path source files for the camera vision pipeline.
+NATIVE_FULL_PATH_SRCS = $(patsubst %, $(SRC_DIR)/%, $(COMMON_SRCS))
+NATIVE_FULL_PATH_SRCS += $(patsubst %, $(CAM_PIPE_SRC_DIR)/%, $(CAM_PIPE_SRCS))
+
+# GEM5_FULL_PATH_SRCS contains all the full path source files for the gem5 files.
+#GEM5_FULL_PATH_SRCS = $(patsubst %, $(ALADDIN_HOME)/%, $(GEM5_DMA_SRC) $(GEM5_SYS_SRCS) $(GEM5_UTIL_SRCS))
+
+#INCLUDES += -I$(ALADDIN_HOME) \
+#			-I$(ALADDIN_HOME)/../../include \
+#			-I$(ALADDIN_HOME)/gem5 
+INCLUDES +=  -I$(SRC_DIR) \
+            -I$(CAM_PIPE_SRC_DIR) \
+
+ifneq ($(CONFUSE_ROOT),)
+INCLUDES += -I$(CONFUSE_ROOT)/include
+LFLAGS += -L$(CONFUSE_ROOT)/lib
+endif
+
+EXE = cava-visc-$(VERSION)-$(TARGET)
+
+CAM_CFLAGS += -mf16c -flax-vector-conversions
+LFLAGS += -pthread
+
+#$(DEBUG): $(NATIVE_FULL_PATH_SRCS) $(GEM5_FULL_PATH_SRCS)
+#	@echo Building benchmark for native machine with debug support.
+#	#@mkdir -p $(BUILD_DIR)
+#	@$(CC) $(CAM_CFLAGS) -ggdb3 $(INCLUDES) -DGEM5 -DDMA_MODE -DDMA_INTERFACE_V3 -o $(DEBUG) $^ $(LFLAGS)
+#
+#clean-native:
+#	rm -f $(NATIVE) $(DEBUG)
+
+## BEGIN HPVM MAKEFILE
+LANGUAGE=visc
+SRCDIR_OBJS= load_cam_model.ll cam_pipe_utility.ll dma_interface.ll utility.ll
+OBJS_SRC=src/cam_pipe.c src/pipe_stages.c src/load_cam_model.c src/cam_pipe_utility.c src/dma_interface.c src/utility.c
+VISC_OBJS=main.visc.ll
+APP = $(EXE)
+APP_CUDALDFLAGS=-lm -lstdc++
+APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3
+APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include
+APP_LDFLAGS=$(LFLAGS)
+OPT_FLAGS = -tti -targetlibinfo -tbaa -scoped-noalias -assumption-cache-tracker -profile-summary-info -forceattrs -inferattrs -ipsccp -globalopt -domtree -mem2reg -deadargelim -domtree -basicaa -aa -simplifycfg -pgo-icall-prom -basiccg -globals-aa -prune-eh -always-inline -functionattrs -domtree -sroa -early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -domtree -basicaa -aa -libcalls-shrinkwrap -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -licm -loop-unswitch -simplifycfg -domtree -basicaa -aa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -indvars -loop-idiom -loop-deletion -memdep -memcpyopt -sccp -domtree -demanded-bits -bdce -basicaa -aa -lazy-value-info -jump-threading -correlated-propagation -domtree -basicaa -aa -memdep -dse -loops -loop-simplify -lcssa-verification -lcssa -aa -scalar-evolution -licm -postdomtree -adce -simplifycfg -domtree -basicaa -aa -barrier -basiccg -rpo-functionattrs -globals-aa -float2int -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -loop-accesses -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-distribute -loop-simplify -lcssa-verification -lcssa -branch-prob -block-freq -scalar-evolution -basicaa -aa -loop-accesses -demanded-bits -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-vectorize -loop-simplify -scalar-evolution -aa -loop-accesses -loop-load-elim -basicaa -aa -simplifycfg -domtree -basicaa -aa -loops -scalar-evolution -alignment-from-assumptions -strip-dead-prototypes -domtree -loops -branch-prob -block-freq -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -branch-prob -block-freq -loop-sink -instsimplify 
+
+CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
+OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
+CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS)
+LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS)
+
+LIBCLC_LIB_PATH = $(LLVM_SRC_ROOT)/../libclc/built_libs
+VISC_RT_PATH = $(LLVM_SRC_ROOT)/projects/visc-rt
+
+VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.ll
+#LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc
+LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx64--nvidiacl.bc
+#LIBCLC_NVPTX_LIB = nvptx64--nvidiacl.bc
+
+LLVM_34_AS = $(LLVM_34_ROOT)/build/bin/llvm-as
+
+TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce
+KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl
+
+ifeq ($(TARGET),x86)
+  DEVICE = SPIR_TARGET
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_SPIR.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-spir -dfg2llvm-x86 -clearDFG
+  CFLAGS += -DOPENCL_CPU
+  VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir
+else ifeq ($(TARGET),seq)
+  DEVICE = CPU_TARGET
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
+  VISC_OPTFLAGS += -visc-timers-x86
+else ifeq ($(TARGET),fpga)
+  DEVICE = FPGA_TARGET
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_FPGA.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-fpga -dfg2llvm-x86 -clearDFG
+  CFLAGS += -DOPENCL_CPU
+  VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-fpga
+else
+  DEVICE = GPU_TARGET
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
+  VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+endif
+  TESTGEN_OPTFLAGS += -visc-timers-gen
+
+CFLAGS += -DDEVICE=$(DEVICE)
+CXXFLAGS += -DDEVICE=$(DEVICE)
+
+#ifeq ($(TIMER),x86)
+#  VISC_OPTFLAGS += -visc-timers-x86
+#else ifeq ($(TIMER),ptx)
+#  VISC_OPTFLAGS += -visc-timers-ptx
+#else ifeq ($(TIMER),gen)
+#  TESTGEN_OPTFLAGS += -visc-timers-gen
+#else ifeq ($(TIMER),spir)
+#  TESTGEN_OPTFLAGS += -visc-timers-spir
+#else ifeq ($(TIMER),fpga)
+#  TESTGEN_OPTFLAGS += -visc-timers-fpga
+#else ifeq ($(TIMER),no)
+#else
+#  ifeq ($(TARGET),x86)
+#    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir
+#  else ifeq ($(TARGET),seq)
+#    VISC_OPTFLAGS += -visc-timers-x86
+#  else ifeq ($(TARGET),fpga)
+#    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-fpga
+#  else ifeq ($(TARGET),seqx86)
+#    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-spir
+#  else ifeq ($(TARGET),seqgpu)
+#    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+#  else
+#    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+#  endif
+#  TESTGEN_OPTFLAGS += -visc-timers-gen
+#endif
+
+# Add BUILDDIR as a prefix to each element of $1
+INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1))
+
+# Add SRCDIR as a prefix to each element of $1
+#INSRCDIR=$(addprefix $(SRCDIR)/,$(1))
+
+PYTHON_LLVM_40_34 = ../llvm-40-34.py
+
+.PRECIOUS: $(BUILD_DIR)/%.ll
+
+OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
+TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS))
+KERNEL = $(TEST_OBJS).kernels.ll
+
+ifeq ($(TARGET),x86)
+  SPIR_ASSEMBLY = $(TEST_OBJS).kernels.bc
+else ifeq ($(TARGET),seq)
+else ifeq ($(TARGET),fpga)
+  AOC_CL = $(TEST_OBJS).kernels.cl
+  AOCL_ASSEMBLY = $(TEST_OBJS).kernels.aocx
+  BOARD = a10gx
+  ifeq ($(EMULATION),1)
+    EXE = cava-visc-emu
+    AOC_EMU = -march=emulator
+    BUILD_DIR = build/$(TARGET)-emu
+  endif
+else
+  KERNEL_LINKED = $(BUILD_DIR)/$(APP).kernels.linked.ll
+  #KERNEL = $(TEST_OBJS).kernels.ll
+  PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s
+endif
+
+HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll
+HOST = $(BUILD_DIR)/$(APP).host.ll
+
+ifeq ($(OPENCL_PATH),)
+FAILSAFE=no_opencl
+else 
+FAILSAFE=
+endif
+
+# Targets
+default: $(FAILSAFE) $(BUILD_DIR) $(EXE)
+#default: $(FAILSAFE) $(BUILD_DIR) $(PTX_ASSEMBLY) $(SPIR_ASSEMBLY) $(AOC_CL) $(AOCL_ASSEMBLY) $(EXE)
+
+$(PTX_ASSEMBLY) : $(KERNEL_LINKED)
+	$(CC) $(KERNEL_GEN_FLAGS) -S $< -o $@
+
+$(KERNEL_LINKED) : $(KERNEL)
+	$(LLVM_LINK) $(LIBCLC_NVPTX_LIB) -S $< -o $@
+
+$(SPIR_ASSEMBLY) : $(KERNEL)
+	python $(PYTHON_LLVM_40_34) $< $(BUILD_DIR)/kernel_34.ll
+	$(LLVM_34_AS) $(BUILD_DIR)/kernel_34.ll -o $@
+
+$(AOCL_ASSEMBLY) : $(AOC_CL)
+	aoc --report $(AOC_EMU) $(AOC_CL) -o $(AOCL_ASSEMBLY) -board=$(BOARD)
+
+$(AOC_CL) : $(KERNEL)
+	llvm-cbe --debug $(KERNEL)
+
+$(EXE) : $(HOST_LINKED)
+	$(CXX) -O3 $(LDFLAGS) $< -o $@
+
+$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB)
+	$(LLVM_LINK) $^ -S -o $@
+
+$(VISC_RT_LIB) : $(VISC_RT_PATH)/visc-rt.cpp
+	make -C $(LLVM_LIB_PATH)
+
+$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS)
+	$(OPT) -debug $(VISC_OPTFLAGS) -S $< -o $(HOST)
+#	mv *.ll $(BUILD_DIR) 
+#	$(OPT) -debug-only=DFG2LLVM_SPIR,DFG2LLVM_X86,DFG2LLVM_FPGA,GENVISC $(VISC_OPTFLAGS) -S $< -o $(HOST)
+#$(OBJS): $(OBJS_SRC)
+#	$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
+
+$(BUILD_DIR):
+	mkdir -p $(BUILD_DIR)
+
+$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c
+	$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
+
+$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c
+	$(CC) $(CFLAGS) -emit-llvm -S -o $@ $<
+
+#$(BUILD_DIR)/main.opt.ll : $(BUILD_DIR)/main.ll
+#	$(OPT) $(OPT_FLAGS) $< -S -o $@
+
+$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll
+	$(OPT) -debug-only=genvisc $(TESTGEN_OPTFLAGS) $< -S -o $@
+
+## END HPVM MAKEFILE
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam-vision-native b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam-vision-native
new file mode 100755
index 0000000000000000000000000000000000000000..938e1b1a15d86e71b9b044e594472dad10fd9bc2
GIT binary patch
literal 37128
zcmeHwdtg&lw)e>cC{UW9s8w+c-sltuYRkiE1!s~rffGqe?L&%!Nl8;$?Q7Zu3Rj^{
zTjWd#_Kpt2cdz5^%$=Fb%ymZYTy>C9NNEE_9YuVBcop9%_$a7Q5Xkpi`|OjPq^-`q
z-tYVV`A&Pz-h1t})?Rz>wby>0ob`6+0<A_PD3@0Ffk2@HlPnA*KLtUQSdhc=gz<t!
zxLTMhj0I#2ZVQVcSN19H5NEBpX&g@nSC2bSD+BVh7UoghsWKQT$d@7^xs+bwvT(2L
zb}W;FiYo{v1|T|;b#OTg!u2qZ;&ySSnS{hH6MK?)1WLP;1>t&_M{x(aV2Z2ckMc%$
z8P8vt)`Ad)E#wF`_Dr~hOIPs+{wnf^Z)HKa7UoghWjsB_Rp@R7-8h-#Z+aRzRtZmU
zv~n?W`4yKZFWgX9yW++<H`G<ltgCGZw9ahJyK&}?bFy0+vuCk<6Mw>8SX9nZQp(YU
z5Xn3RH|YSyuY2nk>wfw4;Z5K9w{zxTqq+6f8LvG|_*dg59;`~_gqbu1&p#PdR0E;m
z*CgOyG#o6ni~^qsT+;~nI{+JrPu3`W-W&zrI|_bm6#QpKfxj>c{K`@I-#QBX#!=wb
zQQ+T2x+dJI*D(M_DxZ!~;8ml*$BvTjKa7I!7zO`dM!|n?6!?vRn{cOI*`x4(1c)Q$
zciJfUCq}{l-6-%|05{=Iy{3TwWZ?>-&Ff)Nik%VA4(qAe-6*&nWiEG>ui3Y<w#Dyj
zE^`&uH8%LlDp%C`1h;!-ePe^W#b4R%ce@4mEvwum9IvphvZck>B9o6u-14Jle>El4
z<gaP=RaQ}QITZHtgu%DT+qA~*t+^X1H8s~Z_^VmK*IMfrsvDbp4MJ06OKq#W-dA5)
zSGy9yfFDVcK;F9v)m>d#TL%Ei0ufoL#oz32tP+@Wx9ku~wJL#XZ(U=HPXN=pMz65a
z=l3<N0wzkKS*UIF`s;-1)y=hjpHNML69UYYPpE8O+2n4jYzAq|niemp1xQxkxQa@O
zGR8})zOuGKMuwn%pS#9a*F>y*!a}E`pwK-ldsg<`xPNx`oVb4uYDtSf`AI%4?49f?
za6Ni5Ev%soRWK<`e)-Y}^d|^pPffT3SJaND9<Y^wxHRCp7OrB?{g+d`Ug$tP!KT#K
zP9jU&#lteaHcmg}_5e$^u?FUoABvt)w@CI@xiUQ-=B2)di@?Gr6<m>>BC=F)rLQ7*
zjtZ{s&+}C9cvlR3iwdsp=R_4e-X?%wtb(IJ;#Y+VJ|PYj1dj?%eSvb-sNm@e5N?wS
zjslKfttxm%94ZKHD!5Wt6unUepRB^~Qo+^sW~&N*u?oLi1*g7Mxwfg`mncBE+g0#O
zRq!1u_+=`1SOvda1@BkEuTa4csNh$s;D=Ojbq9V_1;0v#FSdW2CI-j2%1wgU8TOBf
z{smemw)ds=%1E)f1&E4WYs1rUwFN#3*HGlZ(HO34Z=x`%>c9aOo<(6&(Sa}vUr%8j
zg}1ZtH54XQ9q4A^D=17VI?%<!7g3m0bD)idCs3GFa-fNYwG<{*9PqI4*)oJl1qX^*
z_#}l%^#&{~e2l`RasxRm{4Rw_)dox~{04<d#RiNle2~JVS_1+LKTBb1x&wn30JwG!
zg-OK*j<WDh3X^IL9AM!;QJ7R}Ak4zQr!c9~z;+h?6@{lzxSNIlk;0cyxQm5<N?}r&
zfi@QY357{j2AWv-#}v+_u!n`$QkYa`pjeJCH3`*S2(!x>Dl?~v?e8BIrIV2}pM|9h
z)>!jUxji?WyMMQ-(u0Y|2iB5OXUMf3s^HmYKR{Kv&;ANgSBXLWwOR0pl0BLmj+|d_
z-yRuS9sO4%BnHd3?|Qb#BJ3uD+_1RM-VKiccN@9UcNx;X+r?1%!(u3~HG1D{K?u4Y
z4wi4V?jU^-rQTgbt~i6}zk?tc*cv2sSGOp=9er#ru)O74MX&u~(eT(FF*M&2-3i^2
z*@)6!F?cU9d!v^?fj|gy&~78z+4DggEZ?!~qlApnB1(dZAO-{5K)fS*1X78?<<Tl}
z_EeVFfC1|7$qnCOy?wbOv|^|?+Il_~Gh9t0E@bo*hxF2&qVz7xbg;P0u`j$GSrGU2
zSeUb~E!`qSSA*b-YQxnwLD+G>*CUjRGuz;Z?L!$T?p0UPpqDkb$;)?BMC>S#W&X>1
zsg+e{E8$%fUoPJFDPz!cK?eq+G${5&e<*79iZ5O8UlQjwQQ>B|T3FsIr{8w72JJ-%
zOfCmuv;hjdeD9b~p)#7!SxUQwJ4vt~3V|`l9|T>T*$E<u9auiF7BmTY+G7pYGKcg<
zd8rt@E6dP9dLl|MufMu8X6X1i0`0NtzzR_cWafS%MekT{U4~>(4(&0&|Bm}&&=pOf
zFnEh8_mlQB+SUIl25-I{!pI7-vma%+OID}o#VA8DICnl`chcaM>h#60VzJ}DWBi^W
zix5x1A~>awc2c9yJr$jPE*6vQ=H&E)1*Y~P?W)VbasUk^&hj3x3>KQ4QdI<X8)vBI
zcNi#=9TF#E6Xj7(jcVUQ6G^>qp&3s|iWU_anmn%Wy?ZZTXw;y^K$UoXppj*Y(%D@W
zhWL@Hh1tw$x#8#x2;UhFn8cvdoCcjtm)l=ZZw_1wz05>uwt-Z5s%Ei8*hyw@S6eaK
zP#%KOx6j66(P!~U$Y)(*z0Dz=ww6<^y^30sF6<;Jozja=sh3F;w3%x}3CTHwesi%i
zSY@sdgLP()DCy17kMq&6#_YM(1d0nz>2zp{Mf4sNd*0PXUt&@J%A!6+`KzLyL=?+`
zD1Ejw2g!-j3(;RP;CwS>LzLbk-LNjV-f4C36kv`AevIhqE|S+Jy+HDomr5^7e}#ym
z80;{UMRNupGLwReaJRGYCUZA)A2yTG5rdn|TgjDBn9?DqRAx3A9*c<59J2^NQbiW_
zIfIXy+koW^{@T2eT;x5^2I-yBGII`<gwY8GSuXEcr*t-YERRqTc1i=%G3hOEh;Gpn
z**kLZv*_#SUgEJIMTgLP5Jj2_r3^hl%x^Y1gUif$30!Nb6BR8IwkaA$M^2@d<&usn
ztfzhlYR*#q$SIwOJ_8*WgZG$?D8vd#cZX9t6)(qu^$?Y{j^z61h51+MWfW$sDBWW=
z?OqLi5hcGlD|-D98d_)A(D6??LD=2x^$0;&z<v7AKg2FvxN!QVY0uvGZW}ItRC);(
zw`ah3dQTh5&tQ8=3@z4*2HP3wCE=x=RVYgFzPJAc48;5P5ufN|#Cq2a-pYFSs@fLu
zsai~tnrfSTEEQ48;c@kiRiY%OiBgkM+|^+Feoju#4TAWTudd~WqT0I~0XGHb7A`hH
zU@<c~;qL@tCqxw-Qr|9eMO4TW*;E4`QN`y(Yfht0&&>b<ODGzJ!UyMGDB@v4%8Pz}
z9RUZ_Kuh#3baq@gn^}y$cs;m#UnYWL(Ho&I$Qd$2Pd<vyK|6)(=y_ku+KE{X6KX~D
zG9D>KqUrGNM*1XeGzLpT7-^z+Oe0dJ{!HxoNE`i-M;H-tn>ymJh}bPRAe%Wex{HF;
zS{%~e=)V(@_vY(>?~wK}K93LxtvH8tB#kw6O3xF$7W44nn6F$&6VZ80g;eczEQ@}V
zM_VX5Qx*LY^~0bOVE|Zjj=stv#ZD>4ns2lh5a}&3INxk|>`Sz!RYYKNc@H`z{Fj+)
zqQ7J`^UYlQqqKCQY9T*OOKL>_L2W9f5gClE5qYe)fd@65EY@&%`-rw>3IdvrQ#zxx
zp0_Y$rWRGBpxldY!x=0yw~A<oHI&ahm?DT6Eo9Q6WksidNwxHLbQ)Zz^jY+B=6)2N
zOmI+GqT{$vjGq5Oh80IY=e~;Q0QY&KZ*yN}^i}RNMW5rotY{B>DA4Fr%#B6=oyRnZ
z_w|uoN*Kt2Ei7u%4XB{q_mSa8PUW65dIYktAy~8rzOHi7yN5a;m{Hm0@5AWMA-#YR
zT))_J6v~PbnFJJjx9HvLFg&&|@|E$bQ!pHnGg;SOklmNzKQ09x6(!e3v@RgGiP8a#
zIkXtv?LgAPzD_@e1#;`Nhec^f=H1x}s0|^hvj#pHXzvQzABBP0yaj_aF=*dGLr2&x
z<lUb82}Yv!-2I|vkEoMrB>Ps3L+lt&Y_`V<mTz|k%e!s~r4xg#MWLyeIzv+?IyGTY
zb4bkJYj`m1%758v*s{l#cEHf#2A#D%c9CI2IeZT3q_zEBgQMq|)^Yj`cy;#tvv$M6
z*O7APK|{w+kRNajyS#@ZXO@H`;UzJYORv-L*z$gB`&o@6G<8FP;Ys}!k*}@~N4~l<
zvV%JNNbl`Kk+YNM`HK)|yF$ZA2bb&zg)WXr)1b)bxAy~akv?E%m|7)~I2IKnXZQDM
zHt8$7^s;UG%TCQ<QRmuj3zZI9dyeU??RzwK=@o0w`+A$?+vW`VcDOVjyEN~?w{dZ3
z>cjTj_akQ;HP-e%jo98}bhP(c`rciW(;qq0awv^_VLWD^h@5CSOu^av`&O)(9cHnS
zvmaiFoM}4sDZ#@Cor?6_c`ma5&M*4jvuB>tM9$xSs;{SYdJDy!0Q?-VPkh?fv!<#4
zQ-*%_!~KLFIdSLnDI^E_de(k8DD!e<?k8URiPxf|AcSyDVd4-b%E*~J`}*DsWQGYT
zOh|=;_WV~2KQ=>`g05|v{`}_*9XTj2C_~s8T;A{S?iam>Sb_F0IfWW~3HRZmkojU!
z^D;_%>Xj(uUewv1P=&^t|90fL+XwsZTRVBqZw;HHs2tIIgou*}P=^U3EfJvti6|XJ
z|1k;?bS6QhBO<5=5zPnDx>1NAa)L;oBA#Ouq9pOgq=+|V6rv>Y#-@n(Hq?2ff=Ci?
zT#9(VPY^jYZ=kdnu+oNfeLa6K7r8A|k!DvF-xkAWKbd^gol7ovYHCzz=vZC-i$YT`
zc4%I<gU>`YkMYEVO0x&0`G{e24tQ{SHAw{Gs>vo08C5sP=78)B8reu>DH2aqBnDYJ
z$o7mv2FcZ_rKboP9EB`J$Vo{;uB%8Eusxi?N+SXpt@-=n=`n?5@~yj0+)4`UfcexM
zfQ?EcTqqUUsXcMU-2(I0ZEf$-ICb`Zr>-7Wb021pwtUy4hR&a2zTnh7L6#d6;x<up
z?~e9AkagD>ItvjO8Cs30urgLx<}pQ~DcamH#_NVB)72KO2MPCbV>bVOu|14tw{QJn
zHjAX0Ep<yy@242uTc0MoS1NknfC?OsM3)>CLo<E>kPta*oYXN(5Q;)`{y<@S?vc(@
z4(Y5z+P|)x&wp~mod?%v9RI$3qaNlitjXWA?t6Vb%}ez^Mu!p^%HFf;13de!JCKps
zs>6}BGr|ef^{-mndo>QtVTb0JE&r{+pPZTjr{=V^{bNnwr%Jag&s))*T9-Jzd#$&%
zf2xrO(qd?8Cy9klBHVsKBVyLD4I}A~4V#al`GuNeVrU9O5JUQX$ji)dE;>w`ISR$a
zVAW>El1QN*6u%Zjb0$ZIvOW+sJ&4}{YQ#IuVKJ0G7WFA=20?z+y&xyD17gT_nVt~(
zDR>C%uO!bP*qtNK5c<vbd(6Mq7&d2s7Y0vJn$2zB#Ta84H0Z><?vRM_&kzlQ4zwTD
zIHeD0Ak`1@ZV>DcHGg$UpE@<iXk-PwdI1qni|}tlwlP-fabVUcMaAimVVU`;Lv!RQ
z%GLlWJk>AK@75SPW&#M=m*}n1>As%3DoXVTrQx=uMUE|fIdZZB_I>SSW9018UO-Fq
zkz*AH2{M@;+V;Ju)Ly-#y-zQ;N8pCx?t$Bbn_PnJ1#BPOUbtaBwA(fhnKy){BW)YT
zY27r2=)4>CZ<SuOwtu7vT!W-8W)iJ6I?a@3DamBl53B?hn>k_F0KK;&*k@g9EEhQT
z4<MFtjydxC3N-t*MhxL2&sKyZYmE~?UoQ0Z6jYo+sDRkH@Tk~>&>ed4n4C$G7%Eis
zMD{HG3K{OBEatO(4qU9Hd#VS?K8xikF!&)$w4h=hB^3bkwZ^e5xwFQL5PeHpWKYF<
zIT3Ir_J5BM3UbfV^Talf9vWa;^yJeH{DGFeP`Qb^)L@8aGas$)VpZFYIE|=*W@60#
zG<m`_&^=1NetJL^p>RNpYVt|bANUzoXOxCwb@wLbn~C*C??Hz&M9rElINCzDUr>!p
zkwc72?V-gN9MVC|N-_C+hL(3RBl;OMNst1M%Y#o>7Y3L6?Ays!IHU_~3`!b^QKWY-
zjW3;81jM`yYaH@0^^kb_kTawgg7(L0%sC#)AO>AsPU+rl7`$#J=AzUue$S4PEv+)T
z`Z4;H*ud2hT8Lt?he?kyG8Das4V!;W`0Zyjh7BLXXA4zoY1Po_J&r*v3MtT^i_YwQ
z$Mol6#6H6P*>NY+qfo0B6)>Uimr#};r@=sp-WURI+pn-1yI1ejd`1cli{(z=9XaU^
zQ@!dVCzr$H-V-@HdBHhr{vQ8yYyRPN6WWI~>(JBpuKzu-3K%o6U`WISh=JEJ<kL7c
zfqvXr*Id2>Znvmez7=jb-}SiRf$vf223@f4t}ch>#rFE`P`KAg;f7XYF(FND<Jqz&
z@(E7hg5i97*o0BG7+PR5Jh(^9--q#b+8#s4<uEK5?`9Y_7~lh9`_Zde4V^yJesr8I
z|A5u7a8K^R&akxC(D5LZq_-E9g}Ot<IYLv$i8^~1OenI2(wZz7I<5vE%A~&i0zoW#
zLx+}w(%LWR2$W{%_&~NU1?lY<s2D8ihK_y?GPYkBLm;D}<KH-FO8bRz1e#*#+{{6l
zJ8AilfSGpbRWbMs^?uUNXwG7t{-rf!AL@zfY$1=PAf*2Qn1a+<M;L<;cx&z{Sl;_V
z#1s}isq_r(M4-Gjiou^TuVx=Qp>tV!GGtf@)U%~P+c+b2RVG{hht2N>8N(n;QnSaJ
zA2D>~qcTuB%c0qOz^n^X8#G^|{(i`*zduxfo^+34^TmX<H8l?7RK%U8HPc~n7>*+D
z$cS;IJciA?M~s8YA?{yBjKdfbarY~6(73LItnC906oDxf;QhI1-(j+g`Yy`IgHT!I
z<ly>G21H=(WIH{pk*S@SstE)2<kOSSOFnA$153#VEf5A4lMi($4A|kLcB-;B*dCDk
z-N=C@pD?3CV?veD!8DjnhS6EVg1qjK&}u%e5qmzvtT>cKRp@;|3ZP@dZ28`9r-aP}
z!()TgLF4{URQeN08H+aQUouyEf;NmCq50^OJ4GUwLKZmAQ2IDW$dXRxO6sIdB<YY#
zdXCSKhafmu-H50(M1=}tQfD4KL3=;ht4E-iXmwj%(s`%!u1k6YMvIm{u_g07IWFn6
zQ(E3eTL9ndWHL*gCXf>#w8}*03pZ?AV7nuP1sHg?Vbu=u=tbQVEOEuySu!C*4QLTL
z6?i)kB<&^H;jNIpMtF<i<>Ri-BZkc{qNc=PC(CQ_7sLS)qpm}YTIzA?T-%)a(Bsg2
zBj)72V*U#-7}oLdly5g|35)Xr+YAr(jPJKj>uvvPJQQVz#qjgJwrOvP^H{Q&+J%AV
zoc9ZsGNDe}ix9O^aB|MG?_l)P23`5jSqu;E74z=}<DT&+I{FPAzhmM(A(H&lPXGfX
z;`GxTCk+_)6NdC!`-iYrpK8U^Z?%6oF8?jV!p||PJZ0#36&i*ur{~F3byKyuLg{+c
zGio(mOBMhoi?n?_lzJQuI*kY|wWMP_(GL*CEwyCQ7$@@Xo_-jBvL0wYtlOWFE)hCp
z^JZK~{WAy|4^C~BVZ+GD6s}b0Wm{-mm^K2v3>!`lyLp*HFB?wdLZmz_eHk{K9!7Q_
z8YYupwz9^BJmX37%dp|}u*j<wdf6l!7fu-=eHk{K9_IVI3VkMuc_9W?!-SV%!-+F@
zu>GR9r4!Rn%Oj2U&(eB6)3xu@NYBt@r{^P`^vryCg3nNEoPN4JHrBA=Ewnf&W0x}+
zqP8w|PQ|mY;NHwMdRWq((pxUg;l4fhX0~REnim|J{bJ9D=-KAaL1SahQul=1NT108
z<)YC1O<<J<FWL`5K<(7*#{!ruG<8$%{>YF~Z=i`<<ng6b(y)J&X)};M-Lo_t`M9DV
zOCm=7#J*mDCt_de+|qrKFQ8=4lahT=hH1}aeFkkh*)pi7=`q>ng(z>$+k#K9tTFL5
zgZWQ10cz+NN1e7e@Vw~lM}I@cF3?TYE0PKr1yOT?N?5;%s&~o*q@M)W|Ae4izu!pc
zxBh{Us6k`fyqz^4tOKky!jLc~#&j?Ns|Vtm@=cty#Hm{5q~>AdJeUlu#eEYe9dVla
zKfy^)oDM=IL@Yb0^7UVrX$*0qHC}LP4`Z%(BEOgN!HVg>%5Gf&_4$L;YcL)itVHf-
zMGY@EObqyA7M7cby6QHx+n>kq@t8~VY0&<>Q&ZoD8>5*(H{3Ap?YOt$-XUrNUHSHY
zL+9sb(9;FkxY;FLa7iz_q|Y$?A*(BD%C|b_v7XP+X$GD=#H<-M#Ke4+4M~H6jaW-y
z<L1a&V>(G`)Ne@10Alp_spLPXQoRirVrVcx?irX(nvHS64mPfsh}{>N?+%zgVc;w5
zkMJ-XV;oO}*+}C^BFx4c&n3c8AYtGy2(zw~_n}gFfEsgGV*W#m1qbD|cc=GMdFd^o
zscb^yGCcO0c;ClV=bfi$KQjy;?L_v&rKxm{VRHm&V9i6G0KJSo6F=Ll3zcF>QJjed
zipbF=M+lS!KkZtgZ@ve6J(x35Zk(9WrD0j2iIxcRD5_NqwWecbfgJ=ArTJz|0{t|~
zxPh7j0w&syY_$c)yffPbX|NkY3kKLdsxxOfG#DiH<{q?nqH70dY`Lr}!w}ARF>aR-
zIkwCgIk`-v?#ie)N6s!|Q~QIFlS}tU&ca?DtJu%Fz>~N3V<l$4;U`0o5o14eh2v3O
z=`c_jeI8i<2P;NRBKD1F9RfSAXE1K*)Fah>ROHB2)G0lS0oZfvra@njHs)h&IUpU$
z6nr7(4<Z@#yOYNU-bF0iyMz!E;a^D8V!J@g2EbV|$h%m%+kwo~L!Yf6iJ<%^9QkJh
z8)?}=UUKNe(!xHyGyjO8^EAe6nEX!FP(jq7n5NQv*hG&kdgRa}FaI8MlcDo}(fBTP
z1E-gEz&y0ST!F-vnJrXB#WV${eQ8^C%Momw1jpPl%OoJD{j@map{l8%&8uQcl@@Jm
z$P#D={>;X{9aK6nAXu7mdQZ|Yg)L9r5}Lt=sil~V4mzasY+JxMX$}q<(lVM$>UBxS
z9Nu&57P0n*uBY?hx>;xxGQYn2u!i=N$k`cT!;@Ht(pzxfhjjHjXf<0OL|JNV`OmI@
zoh_gpb7+oZ{cNWdn+`6`moCkqwf&qfutixK=<UKnpXmLRW;8t?XiL$>L?efmyss>w
z$!maMKj|UtbHkn44cCq-Bi8%QIHczQx}=k$)*8pGfos_OM`Rj$fQ_5YG%j{ZhiJj?
ztnuasnZw)_vU^nt+~ePc0$_n@GSeGc5L}gy33eYAl&IF3CfTrb=s@ORU4U?4^<VLf
zuDTkEu~$vFdLL@0=XivVgUJw)^6Vp=<G+lTA8pfMQ4DK~w*1!&o#o6N4v3o1XmQcd
zISpYd94s!%<xQr7?bD*5*r5=d(D(|5B20A~!Zxhxgh&TF{D4Cu<KCb{BL2U7stHFw
zc1{B7t`D#)C?5iO<B<CNhq$A;a`mjcnpV1rTd}-L553C~clett8(OLxo3Q~(nwYo4
zPvwcGpDDs-V5^m_T3YI}{jJrAw?&VFGwC5L4p{9viX=tJNDx??;xxDxsn}@vGc>hY
zDxMM*4g<e4jJ;|@$2o9&2J^j`^=UlF2y^|#B*jSlU|&u~Kcy7fhcGsI2>7%HmA30$
zN+z_7&PPxz@;w7HaO_H=rAGhoI(io@dFRF$p0d|=F8Yv+8+Zi@sa-t>yZ6u$v{}^K
zWLu{YvZMYHHjC6}$`C9#0Fo7lkW#ltbOTG(^3sNF1rlYlMK8QYzD=xN;)iu$u>xk8
zfH1NUZwR}Gkd5eDz?U~`r$D~w)nLS0p{oB->4PYpFO@!*j|zQ*MtliQD4A(b)sjYF
z7gwzj{YW)l-hYcee5%%msuA?T7V`g-PF9NNSP}f~N^zX<Nhz@6aTzRp?kB7SM3o*K
zi+)H*Ob?zOK@WDwEYy1Nn^#BCgH1AS;1yf`m;MRu5zX<RvGinZutL%GNP%WGll5i_
zrcsbQ>Ew}>go0Bj$aKYQq;O_DI*Wo+6VayQTNCw32MDqtoUAV|yfUJ`934$x{s??o
zU)~3g=w}F~>I*jdW&6QSOhIK58gmFTQPavwvLQ6!fDP#dLuky^PkBgVegvS(hHODX
z@;E8ket6>c!=knyOkV&16B<Nhk^L|sK)W8rWIt@7IsZiJQWkxfl>jUS3FcAT5Ru!E
zVs1mqm<@TF@VUBZp)LzU=2h0v0&VUm*3)l-V>-9Tq(3I&ndy{{MSn|FNp>VNZbx#W
ztujxw{`g)VMSq+!F4LcT=FO`!r>~#u@b)pi$YEM@zcpkt%hqI$Y&-M}B%6~w1q9Wz
zw)g1<N?}dVf014oktpd!X7tH}vR+W#sO-p4;5yh5sz$|*V69!YBoBc?){JMtCHg9Y
zOf%TQ5Ba<w9rmNHL}nU4$*c4tw0=(ICEFJ|?L&w99tAh3h3TncQj1?9X_Z>6XQ{LS
zC?xAeQorK$H`f(6);7fZl~yRm@P5VLlI>?EWUtM<K!~=pBKVu_h~ddH^MZ}Vmq!1I
z=y)eXd%V`Csh1(EBFm|=D<`9y2{}nG)cuP`W~J5(*Gr@5#T*$o@Ji<=>u$E?pI@J+
zXob?nB-A3&$q+2Hn^6>_gt{5*Q=E<t9gy`RE&2%ajnKc4UQoRb(~EmRA?wATAbRu}
z1fgvSd-QGfV-vVC{jiXJya*9g`r&4&$>lzJKjUrmRio}(<N6`n4fVK#^uiKtV!0nx
zC1{{wfsrsqe@=*82WV|Jp#!jg>hZ;9LL9yRWj0Cm|Ke!%FXR57ss9(^_0N3&rTPa2
zuYZUheFni{_0Nv*(PY6YVTKALp$sv;w!`=-_@U;%^HdJi{8z~}?_=rMQtd)gf00t}
zBlQ<l@5Rwpmg`ab2&(sDR`35zh`e(+4}ombVD2ZU57=-<6Mcl9x`*U`VKWgV)xWx5
z@W||v>i@5!)xV6(`h|7>!~Me0^K$)XMjv6mO>B|iZ|fKC0R^vrh#q|g!Q}d<@g<!g
z{jbb7ehyZs`oDY1L{(4w>#9b77m`TWm(lwL%rbT~`&yb7cpF-hdjmGw#8ANkqu@tx
zU=yMNmhn-WzB1K7Ct%x%nZ_1E=LX;`6hSr97_lGNK=?^|pza4MWNvCbus=779?Xz&
zG0(W~9KCD&?WP*H$x{uqBkBhRpOy6>ExLvIMz8~;%rjO2U)F+0A$au92=a13wd3uL
zQa<o{6&qki)#wnzUknXJ(6C)Lwolfxi$=(F<~fI?kPfpb2E^(9=#>P-I0Bnss+g!A
zV4MZf>-<+eWxN$ygY)mguGP?=-CcM@=P;2hf;FhefnmE4nqd*WpAf@0sAWlqMd=uO
z6$CRqIy(LMeuBOoeUn@)iX`5Ra7sr5SWxOSvUzp%IUuOl<CvMlix0@P^xUqWAZ_GX
zJtD?X9(gW<JXoGVE_&Feqt)=(fAT!yRC&ZE%`ihl(`c12S_9nBlsiOefJsQ_&b<et
zC4iFrH!%sp6%AG|zfm+1;$_KQb#L@~#3$N|d`R~k7D<kY-q&adhAAJGNnVw0GJ9KO
z_*)`lkro={<xdGzv@_#H%iaPx+>gjlL=^10TSMh^q~Z@i!08D(Xp8;QE<G$l=aF?;
z4)04&9Owznc_{awGC#z?R=<U|XtAz=BOzz<Uta$)R!j6&&0$ym0c?)aoE$lupU7FJ
zlCxqYtQ<V1e&sw$x&jN57BQGB(#BdV7G<%jWpR4n=sbe60EW%ESZ1Zewm8Xm2fbe4
z#AGv+?m#)xnrI$Yf>7#gCMIeIB4-+}eRC4cX{KV~fffrx`mv&<H;bV~M(oisV3rsX
zu`cQ~n?kn?_6*RuAOxH}@95!(p<7~bY>D!xbtHP%mvjnA>%Zd)-DRZX^yoaW4hr5l
zhO0Rt=D+9+%^7=3NPnr-@L-=4YkfvG)xoZ#b^P<<JgiSx48Mpt=i&VOBR%5>+RqxW
zrx}?xV4L=iEB|%E#a`$mpgc}^*6|-(r#&wxs!K;AlmhlNUHPXihAolt1GEI230kM-
zRZ;V$nEx3i>&V|nv9|FaSULs_9Y@hHo%v;W$K{b8XZ{Q`F}9>3g7)dq_-r_s@jhyv
ztZj}HE3Be)CifE@5+08+CzfAxu#k##!h3N?a1yK0dbID|9|_OHck3Wd0t<b8@74;h
z^u4#naCl<hd$oozo<A}?i)W=F%KR_mxyI0gXRYDj1nfT*5B9xxtKro#*r}R)+aNvd
zgP1fqn<ZMqbV~Eh`3Iz)KxtzA2j!lJwbgX2AHD$`Y#33+YK}NH%V=@+MW<$}nf#`F
zbP$G4CoQky-51rsm(6>i*<L@Wy#vlrRXvUc%*b|V&N-z)oS>X<CKa=9yl0Gn&^>02
zD>MU3=zmopRc5ViW7QZTRIAB9X4qT{BMe)l=EImM=wHV`OANBU1p^&1_=qz3-;_X4
z41W797>pqXzHh-`EHN;D3kKte!FzOs<iA-4X~f`H--5w-Vo>=l7)&4rCeA<_Oz096
z5qg=VPkxm``k9I*6mlZ*IfPxT;YBgWY~LuOe;tE#V({~C!C(?GxJ_oz-jk8Y3CqPu
zLgYWI2&Ux2u=x_kU_VayI`9TnhNwAjGdx)@*f-A5lLsvl3&7Y2M;Jk^W#bIZz&qG%
z!_NDB^WxA9<KmEcGJ2HU{d^n)ssMWOh|acgP8tO+Akj$z`C|5sb7*%-gKY}D132G2
z@N;m2ZNaX~bJ7$HH}=qbSKe5(aWCLgU>0>aA~Uc8JC$AX)*Z%3=Spd8BnGF8ouBwu
ziFg;#`>8V&(9sz`bU^FbCWT9S#~~eG_k%=#Lg$j5(&2SextyqJ`G7J6f~?l_`FqwM
zk8cv3!6pIr-(imNE{8M!%fRoHfegJF*^4th=&fi^PY<CXI+y#;5hw%=f-o|{iA_Dr
zgJxg}*&RAd*?G`!h&K1cV)A>|?RcC2sV*8R7-*zmfp2#k9?{3bG%mE^z03If#tHga
z-%Q{5b@V3@`3>pcFq}R&jI9xsJgTFmOj5{~DP0KOyzDZQ`t@`+a1D<60iYt@c&DkK
z9;3RqcB6tX$pTav-#`uY`3L+%Y)dmSLK}l}&+LtE20rZ`C&~jap`PE3?vzH2Y0(Ra
z$DlYk=G9A0Lh$C-@szOm`G%xJVn3otl(i@wx`SThZUYrwy8Atl@bbMhmc;%I0=eOV
z1wiHNtKPYt4B>3>=8baXbUCtv$g#gSw-Vss&39AeLC{X1Xmx#I^F@BYH7z*TPPmuM
z!!v1ApOqWNb}$<jW$qS<t@KXx#4gez?06&>Ltbz6e#97%>0X0gLk}%Ibo9{EV;nuk
z(_<PvuAxT`J#y(`p@)?o1@tJSM=?DX)8kfpl+dG;9%b~nogR14V}+1f-|~7@jE2e1
z(6x^kO?VkTG?QG6=&?!os&o1WOlNz}>!LS2gB+o`nsSdQ^^MhFylQ5XV%FG{H?d`;
z5%Z4+j!`wKMi8p`!soqmRTBoh_zf0}P;nJC`c1xu#=y!N6Moz#Q15H-w@foF1oYaf
z8K!36j{>#LKKQfof-YSpm8(s)^_44qrk0w@Cf`knGPNoX;4G-E^O^jOpscJiRaN>c
zO_dE*rq#aMl{NkrQ+0D=y(z1<n#IhlZK(Cn#7nY1)6F-VO6*pfdx67gH#Ih!S}Irh
zX5x2tD(hQJwJj!pbHFz}+jKq6qfOOFp#{SFOjSNFh^tJM!0;(CGfb;%NQUdPSJXC)
zjEsoN<~4F$won2|Ya3RYDoxl?<auLqUfVK3Sb`q`qR7U8AHO`rW05_IYY5aM3+xAp
ziiDy-{R&^Rsj=F`Kr&{FKqWAfBo-FP<%b^_qTJ?~uDi~3%>w*(5#$Fy_B%xQr63c2
zPN=bY&9rM|Y=Qqmk(>Sak*`W9gdKN_i(U4F4@K<{G&g`xQMuE}t4St5+GtVuFjomP
zcYPsXKg2K0`~YqNTnk*l!th%Pn+4MZp{}vA%I(El&QwWt*<K*nYO5N)OO=3H_1E}J
zi1jsBniv8r(P==!g<ni^`x_g4?)u6mCCnDeSgah+wSqwipBC8NP56B$Tof*ADO)Xj
zxYHt#F!ZLiJK(RatM#veOsg5ax26(5Q&qc`=~AI+EpnGSZnv{iE<{v=ug=6`nO?H$
zCFxa~V7kV1O;w|>g>nb2udJhLA)G2-b!DK=Z(3DZ7w}D6)*x36+hoRXQ&9$KotOB!
zS;KG>cel)$i6%dEW5=f(v}4jHW@z-|CQQ1arm^040~EaShLyhhxic&4DyyJ|H?%Z+
zZ&3UcaYI#or5nGC<!i34^!iv8Fn%mEKQQGeDY${>sBfy9jbE{<^y7!TYP~IPEKB+P
zH>n|2f%>MKh(>dzZXW#>;Pn)EDi+%Ye|TpsR;<PEKRq3b(cIApvqL|dVuIPs0lXOQ
zMz~?P+u%0AVh>Un-JPhzZv*wkV%>0yvCeV;u7EWpBg}0MR#+?Gc4H4_8-?GD#fB*S
z7U*z($?`V%!u1?Q&kDEc-B?V(l)C$U#KYbGVJx;4?tv)Qe8@c(i{;_m<AIMsNA9Oc
z4{rFgSS&3Kd<H=WH|GT6;SQdT#SX#UJ`{_Y@Y525a4m3iFfj7KZGzhdw+-%AxLt6!
z!xiwB*a5f}xI=I~aE)kSZE&;TcEhz$7{f3R+(Ee8;L<fkb_C7Z5<$~y)LfZ9F0D&5
z&PZ_DQ|;Ori(Sug$Qc=55X^`p{Ng`DCe($HVO)@rd5dA<>a;fDhgZzIezy4<K!s(v
zZMgIPg4dS`&z@o2q_t&aZqQjXOznE#gp5pUhOr<cZNWrsMLb}ixFqghFtIl7w@qB4
z;DZ+~lHu35qu|fv2D1&?@=0S0exkb-d7Fc{KHN1sQ1&eS^a;W;<5=w~%uRKxTE{w!
zI_;_^U4~`?BJ7~B;zf9w?>V?(@U13*EyHxrgp4$i#maf2Lyn}AbVNod$S^*jEy&2+
ztg~mBLi&P?tW9HV895uqS~K$6$CYFhYXcd109Z4!3NlPU1Oakg2GT0<X$2qpt-=xc
zSmS)ej5dw-xr~jPTQj;ef6o}&k{OaLxQOr5;Mat<@)_xTLOvwV<x}(BuF>^sGj?bS
zGQt||`x$wNOA*zcfr_31R`k|u8b+%RL+1rn-U}v;6(7)T)`!My8teGUxb`%iW9$T!
z<-&>cfno!G>%Lg*-{2=XHd8r;sGK(Gk;aBG3o~-s$7+9?kp-2Amkc4x<=g@KDCn=F
zd_aF5(AtyBIgt<Dc-=6$86PK}&x6katmM_{i4UrK3Dbi`MuT=$t1$y=V4S2~Rbx!o
zuJVv_RE%|!aun-2m~yz{$^mlXHx=>>V)e8WZJ6<s_3CT**<^le$C0uTKO0vk#;*?i
za&WMu26D>v#qyb`uY^98qLy?6nh7J-6H{O+cY5Ko_;4)t&+t>ZZPq%d+`*5RyMvYc
zy(5)7)$=*fZwGxQ*UL>>E4AB2)NXZSHAxMT+V~8VYxf&?k(JBO>v>^3y#<usEg6GQ
zHHc37rLJ%$H}Ua<PYrDGw9)vW9NNbg5ubNP<3siHH24(5mQReACu^UOXPEw@IcatK
zzNS2hUpo2#Bl?1illdjudqNLy$!OE+4t)c^W#D)8omlMiZ*KRD-;QtK_bB*vy*G?s
zBEJd!Sxou;<Ja(`dN>DumJedFGhDu}sn5TF)rj>;_SXhGF@(PI1Ncd=xgNoyw(Ff4
zIokdS8763!+zrU)S0Mep2Xxy%ip92Yx)9loO*+tRAiL2%rU+KGWdhS8(8~r@MgKJD
zjp&;Xj6lDL=oe>Lw7(jep6dG?=(|C`8?M|Qnf@f&W1_wn!W`?CXh&?13#g4#c{(u`
zFk&nahM)NH@=UZxrq?zI6&R&7<$mcQ@Yx7Hxx@$W9+aYA;@M{X;p2$k_BHXZAijT;
z_$grd1>&QKNAr(g1W$*ZI|?|dgK|xUht6;v0(=DlQ9IO+D*~Abg!n9seei=*yuUzg
zwUZsG&d7xP){HbZ&Zq;P1$gS;U8Hn702cwjjOh7A@P`2}2K-V52$$q}9PkRjmlB+c
zb8$i*s)tt)pNTP;kK$1e!}@KJ`dg9uZC8e+c_bUl<ef@mFz_LB@c*y>Hzm-WVPOfk
zaaWN-(a}`*E{>qU*Ya>Wce^K8m=D`t?0Vc~VeaK}F#bF1_~|fqL6z90=<LvP4-1nX
z(xt?Ic_G$F;jLN=IRZ>8yTW5E%q0^;S6FMoQxN!YUHC^Xs4`B-WU+#933nAI-c~&x
z22ymgiF>HM(1oVWE~PEyq*+*4feSpW$gND|Xxc%SlKzF5oS*J-76y;;u#zsCAItCU
zJf0!_UwA0LW1pJTYrI}R<nAf%j-6;>sF!m0I_}=c-38pejl0#{UB%rE+}*<6-*Wd!
z?(XI8Yux>iyQjE2Hl3%>-Rrn}BX<{Y_crcUb2l}Ae{0xN>0zh`l=iYh4tu!!O?-5V
zEexfBBWCchLT5|~EAYS7{Vu1k<^;JZ{4$FyU(2eg(qD6fzY!}Kt7o!hkC`jvRW&6-
zUNIXvj;$8W^foqB*RGUT$A+afp-^5}!td+h*Y!-a=0m^0ZGxYzmssWX61FVa25O~U
z|0CA_>3WpAkA3VC9{c!a+%s`sg?kc3Yr4>Y+Hh05_24eXU5L8?w-vVqcMfh7ZX<31
z_ZP4cG=@Bh`vmSm+(&WK8c7&8&Ck1Wcj0cs-GtkNyBN0xcMfh7ZX<31_aL03xM|!J
z#=RYPH|{RnZMd6odvF)yw&2deZNhEDy|A$GCR0}Via>)uV49mfCp%~6jR6+QU6*@f
zcFvsa+??rhglSez?u|J&=CEM_NJqVN5Gqe=85U2!KVCK>TqpQPgzMw>SItZZYaAXC
zf2^R`=VbhG!bo;a2Wzj`!({yN=m;!o5cBI`<rVv@j?@Vg1*IQKhNlZ*Znu)*lZ2)b
z?1E0n5a{=DQ!kxh5R`sR9j8MLhIxON44)kTZbvfwBEeK@QG=LYCtMuwC)JTUjGC1G
zF&U2D>`spw#QZwp()jmD)R8*jGC}#B_hdMFC*J=h!_j+Jdek81*QvfgppMiDO7EV4
zQq#~2C+Nso(naG24fVz;^3!-hi}Du~JIkqvGr=t>@N5oG9mmjkRRcLUdOh&5=E>Yr
z<{w_3G@c>;-6{MxF#O^5@*BWM%GaZukJ3-DwL8F`WPC0c=%=jd%41KQ0EDX%E*F&k
zrHjKO94@TzFvwyKe}Ti5e&>1)KZt@JNuJjkzEPl`;ib#Kp0ceP3I8j^j|8XRrx^);
z1>hR!KfU2amogrj&EfQ(j~edeaC)1AF0^oV`8b@8^V7wwI^1;}PRF?EN@q_&_&JBu
zAz!*Cu%~RZNgg@@N|(ZCKk!GA|4q(^&i>J*jPJhWa5}g~7c`Px^n+C+@%awmlrC*w
z)1}Ds{Za5sMu8*Q5&73K{uc>t%Phn`eywHj;q8bJMv`*_@CDeJRD1Gs#%H+Q=;rv!
z_mPO2F8YB%`VvW)=a)is?PYw1+mkl|zX+vBzd}wIBZvDjhldwg2rMX_+Bgi%FJh47
z$CZFjMtjNhdRT;>M`w+Ke-p>wz9Jc)_BV)6lQ$W@jPc1324`9Xy4ac`+`Bk_H|LYa
zV)2qR$8WmD!XRvI0lo(~zSHYra7E5vbNHbYd3pdhqI}+QS{T0a*blf#38>ur(9lef
zKh-{e!1<SXJq%k(_Y~k%{z9&W!P(j*d=r2`@M2#6D_AVnCIF}Msq%UlMBy_J9+LCR
z6geFnzbhqQC4i4qk5!|<>8Cg7`!<_W(!FmKe0rmt<UE>^?(aFDt`t7cFh1x<xctf*
z$nzX-G+J1Gm2vbD4nJy;;flTe7;q|w4^ztFEW>9kk{<MvCPv8F=Jh1OzXLdxziF0*
z!Py!xeAfa_<?xW#!{A`cu3QGcTv+Dyu&@FzWPBJX>9K_K@0xF6a2oH@RmtHwyxyT6
z>}p{AuMoPu9u}@*PeJ&QwbS9_pL-ep<w82gSLDfn0i=92U20(**q#S`^xL{4>BFA@
zr}DhmCF3(9Ji;7)Jf)oJw|%JIzeuU~&l&s*VXN1}SSa#nF`*#*M^o@80ZwvmcUTyH
zm_5<9IQ|r`hd~rR-(&b3N#!iyeA+IuFeuwQf^RY4l&@=2^0l19^(pYXI3Ews?^2#d
z8-w$J%6SlQ%GZvRd_BVX44Nzqo9*4e_j`_?=JhbRlCQsT`1}<30l-QA87XqU&*9@!
z;3qjemQr6dFd#XfPLXpm;3Mfn&M0t^^9iT$spRl`Qsi#|oaFy*ik#~ie7ODG#QAJX
z;lCAd(v$F27FKR-PYu4`aJYrHBMplcgg<gVbzTpHDCOM8@HvvoISjZ-6;`}a&Zj>`
zp7VfHz6Mjunbw-AoGZK@#zIN=JAjW=&a*haIR*a*1lL|Jq*cm%7&AN;ar{?O<XOq#
zLn-hL44#-DDf0Y`<JY95`v(qxGlkE74nLOyKLI$^%it{*CZf{+o@H<bN_tEgFPF~@
zuZO`Ed9DI{r1HrDoXR;PMSp(C;h8CL7vqEeg~NS3_2r}R{}JF+U-}ez?&t9LQ}X*8
zz)7C|lzM-X@kc*WU|}g9U{CDfb3PxZ@HxWq1<prVhh59`>IxyP%ELLx&S{?R!IX46
zfluYHd?!@tA7h+<Q=Ww}V0(Y?VgHul8+kkWH<`VV2{@I*rW84^1>88od^4BhM^o^v
zoPS%ooGv@NfHW3x{Kr%9D;Pc(Lgh4bKAvl3K5VZV@ok*XU<#jq;P5=J2Us-rmapG(
z_*Acl0TlS37=IZ@{oKd#O<oTJEBLQ*xG@Dj066KxRVjLSlH&_0_!mzcS<kNnoXS5d
z1wVTf`~r@DAf+6Z0&Wt9U-f{KoPSNxpL-eq3?XNZg%Pp6Yxp`i|E-)qnlrn)Io#&;
zu&`1-^ce<b2MR0<&h{SR+soh#l=L{l@w+S*2LFJ^p5*X0qYVEehmXO;llZq;W%w2j
zzY=gNH;dQ9Ac{P*IKC&x!r=eRo;aY#;5?vmZXE^gBmS_%ZC;NGTM&LU3jQX*shrQI
zlymne_)i0FO5!XBUSs&Em&q0eU}tRL`%ezvo?>r50G!g@m6Glt$M5=q%!loXgU@*m
z_uMGM*;xa?$6~@x<vGXeK`7CVOdOu$^$<ATzGeVU>Gr0iyM)=F;p>4naX!zd=xr&7
z`@J4wALm~UIMsXCO%?`M#?Q?>-KSF0UBlsTroivx{6(*a@lo_-3x|7B;E!^6aSHrt
z1|R<Y&jTF4FC||e08ZuedWt+J0H=JJIQ%ju6+T<S`Lw2_D|nmzE&f1tHI6L0-M18$
zxSfvDGJHZb@u5+-zut|{a5m64MBP=5?v-_oD=O>URsP227I$T!Rq!_AKqEfQS(W|$
zx%qQOMsnjMYOTAnxw&$U8)uiB*Wfdo*g0_Hpkw_Skf=g9Mp^4mLZ#1iLO%9EPj_RJ
zU-mT%@M6=Ug}&g~0?B~U+_;i?-0tEMds*31_k!}G!ZOF=BDXtxR^IIV_`y_SN9R*<
z3Uj5e1)*VQXh&i(J1;K}!Vc#>J3p5)K2pS-xuZmUf7U1w`6H(>Cui2kY0Sx)lgOCr
z#O;Xk%*`1^p1HGT3#o@#N8&K+#+=y#Kl<v%*HzuvA8PP<sV1_$O--p$C}12_XEAQ~
zf)cCC?zR`%=o__6+Z2CNomUBN+tMPd%TWj{RSbAF`vJm7rQLRsa}#YPf_tHJae>w8
zUc6vIslCiyW-V~qDFybaS8q#zX`fKru&VKHpVjO2)!{5O)YsZjRl*Ks)9Gb=S=M4v
zfA+OhTwLfbEwdLDI-KmIu*uOyi_6@Fi(SP|dzsxPxU7X@X}P1!?k-qR>b|wyUTzmw
zR@Mjnbg<f8-P$DB%3Z}^XDusr+bbMpIQQ(ry|BpY6cUGm6KqNsmz23(_L7A-LJqIJ
zLgrppVl7<cC|c;Y;vjx$xl5>@%RjCAHD3o#aCO<-i;9W_<;1tUvI$M5mB}|!=@j6r
zyLYSdw$Sc!xeJ_&3l|~FNSTf)Qxn2j@YO3&Q&qlJ_T}H?l49THO$IrNip$H~Hit|8
z>~B?a>_{0FippK?C5uZI*-PAIBE5xBio@JEH@y(2uS=N-@>hc6XYdh+&n2@%;jktv
zTS#E0Ozee))X)AVOW^j=8v*=uJ4?FIX)P_am#PY55$Y4SAb+DcnVub!r{5V7AO*d-
zu%@6Eg%m%dkB<}kYmzD!g=%#=ZpUZx78gNtQK+g9CnuQCp<J}AuJwCs+}@@Dg_z}P
zSyR8Fu`Y?D%~4uxEpd<$bQIa*A(opmJB)mxl`@pjOBnKoRrGacZ_R4F6yUB|?Zx*o
z&<M&(99A2IQhmHRArXBcSvl@c;;e3LzT3S5AD?cZ5Ib+rUaUYnu#)*NYgDyB`6JHB
z%&bl;q%CqUu%Q!bXspHAeQG~yr6_eQC{xu0Z=~*7vQpwyJ;YzRvJU2bO@o&SMIVWV
z6~GG~5XxKQ^WNQrp9t~`j-oRA!V*^MZYmVTXfRWQwp-hfEL`Ht(#h~*d&vTOVVQdg
ztd`rki0p>FnD`~yp4DFJ#vLysUjyu%oAgZn8nsYjC*#FT1ae;A*iu>RWm$(=Uf^r+
z`c%voEFjDhhYbypf8sh>pp-9yCzamf0=v^~tB~u5nu*+m6QaUQECg@4an>a>W*Sjk
zTv}))J*{eip5WCX6s$UlxS4WVT?IC4Qe0dUNiVI=Qc^9)V#t8^D%^n<ygic;gt7@<
zMWw}yOGhz;qP3*NQ94Qryn3Aumvy0imJJGmvMhz^rw@TAzG|MxA${YU<q#<g?8E4Z
zIMQ@zPnB;)V5NHnH2ZD};(ZXP8Ebv24u=d4e<>)Lfx499C6OdOY<*dIiQQegw5Uw9
zmm+US1@CwHTD(5Dca0ZCo%~jdnx(7US>|vT6_%HMO?Nko)qE31PD!uxR3U{*G#YAc
zcxfu}o{!tTqNRl^sXDRJ+tB}@i3m!Qby<tiF_l=CqI45qaZeNgm9Fx$kM+Y^wX4)#
zqN;nPRnNA`HBwqieWLOq_XJa_7FJ~?c3U|c>?IXvX_=#}P^26;RW{Umlj7nPz<Tm>
z7yu|E`=0#>>|MM)+?^b!E)b?>=pf56B)Mgky98FS1+N4Z){*689gtFw{@Qw<+`X(;
zxBFp3gG5s(Eq2}JE-WdHTWOeQveGct#E4GaBnvjmt4{HOuKeB}th}(ASx5P;C~%Zd
z2&rp=lD0dY#Ta`e#tx(=zSdfQa;nvgSu)J+E^`&ehkezfX$G||8c?uqo!cb5x>eii
z#@lx6&AJq}G~{XKWsDSPj#5^+0_~%=rHS%|w(i8MajN#iid-4}$mLHjuO&+PHVolP
z3LI|k%8FG`Km#8Y8#kKH@Op>+#=C4PW;7HmD0ePm2`O*Uk;!PTCuUT5Dr`35E3w)f
z6)GSbaiKXEB#kkPmbi<Sz&c}+)|^}>YN<+PJHM3DT3vZJj0Xk`RKkfNQzGL9sN3R_
z#l>#LBM8|rYyQd=aQ)4)Tch~cn{G`)c0;4zmu)R@%)~T`hgLQOvR4FZ>#AnfRteb@
zs;O+L5wfe+G=QJ%`kUn#j5g@lSW>`^xMp8nB@u96Q=MPPW)?LY-t3i)2-2IRLN?7R
zfz-%GYuUaUK98)a0uv=9lgblHnL_d3<pmm&sjP=xAhyIF5E}Yr!{iCsnA*~-wtN!(
zEu1SstHG`1Jo2;_=F8&%<$NrUX1I!<+`q$(CD}NAnbv|3-fQCYCKgIOlywLNLhI*h
zdcW4fqWqlR%tDDrD!mO+r*MW<k$<b!!jQM7&|{m9UCO#14+=%_U<ocRe^_fl2>XBv
zLmdkW*k)pRSLku#fL*uYNhk_>c#MU?X&;d;<y^2Fl6fe8a__`VXL1#K<-FSMR4BMC
z?3p-+tl%l`-H4|%zDig**VfJHJ)EB8Cmcn74i@Mf5|yb!FQi%ID2|vThlJGfQ}W+|
zc%m=hc*;4s98TZF852FxDCsNon*k&JBl+l3&eL^cJ((_LpFj>J*2$Ex1dp0tId>Q4
z^xG*?bs4!|aUVihO|P8KYr=XVU5dPNXn6P+h)~nFrCS(k8>gRTRVZawNmqgX3p{Fi
z<-FhHE*S&Mib)rhwfg!U0z|LWpK>lR%<0c_{Z(jE)BpEW`dcjwHGHc~LGSpguhjH+
zBT}8da?a3HDpOUZWFWOX_X8~hw<4x;?q_hhOrMIM8h#aM1{J+>&S>awq<;srB)^h=
z{CpraOkAnyrG`HQ4(YB!ubgX7<Me~P{1p9G=#_fd3R-pg%6Z8lHzIH;_;M%_R^Xo@
zLQNk(SLu-{q6#5*6*|Qo0z8Sn(j#M5%1)c2SaMJ>aaVCJz(c&0{I_{M2npCfV2-d+
zcH&P{+$Vc<NSxBoNr5Z$b5r1iOPAtKhF?jWMexTwo^ZN?$l)nL#wR6<408vUzlI;!
f`3|59aVzDf*e_D=k<vGjVa0VNDYxoMP5*xbWg%z>

literal 0
HcmV?d00001

diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c
new file mode 100644
index 0000000000..7874ff9d52
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.c
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "pipe_stages.h"
+#include "load_cam_model.h"
+#include "cam_pipe_utility.h"
+#include "dma_interface.h"
+#ifdef DMA_MODE
+#include "gem5_harness.h"
+#endif
+
+// FIXME: Include gem5/dma_interface.cc/h separately
+#ifndef DMA_INTERFACE_V3
+#define DMA_INTERFACE_V3
+#endif//DMA_INTERFACE_V3
+
+///////////////////////////////////////////////////////////////
+// Camera Model Parameters
+///////////////////////////////////////////////////////////////
+
+// Path to the camera model to be used
+char cam_model_path[100];
+
+// White balance index (select white balance from transform file)
+// The first white balance in the file has a wb_index of 1
+// For more information on model format see the readme
+int wb_index = 6;
+
+// Number of control points
+int num_ctrl_pts = 3702;
+
+void load_cam_params_hw(float *host_TsTw, float *host_ctrl_pts,
+                        float *host_weights, float *host_coefs,
+                        float *host_tone_map, float *acc_TsTw,
+                        float *acc_ctrl_pts, float *acc_weights,
+                        float *acc_coefs, float *acc_tone_map) {
+  dmaLoad(acc_TsTw, host_TsTw, 9 * sizeof(float));
+  dmaLoad(acc_ctrl_pts, host_ctrl_pts,
+          num_ctrl_pts * CHAN_SIZE * sizeof(float));
+  dmaLoad(acc_weights, host_weights, num_ctrl_pts * CHAN_SIZE * sizeof(float));
+  dmaLoad(acc_coefs, host_coefs, 4 * CHAN_SIZE * sizeof(float));
+  dmaLoad(acc_tone_map, host_tone_map, 256 * CHAN_SIZE * sizeof(float));
+}
+
+void isp_hw(uint8_t *host_input, uint8_t *host_result, int row_size,
+            int col_size, uint8_t *acc_input, uint8_t *acc_result,
+            float *acc_input_scaled, float *acc_result_scaled, float *acc_TsTw,
+            float *acc_ctrl_pts, float *acc_weights, float *acc_coefs,
+            float *acc_tone_map, float *acc_l2_dist) {
+  dmaLoad(acc_input, host_input,
+          row_size * col_size * CHAN_SIZE * sizeof(uint8_t));
+  scale_fxp(acc_input, row_size, col_size, acc_input_scaled);
+  demosaic_fxp(acc_input_scaled, row_size, col_size, acc_result_scaled);
+  denoise_fxp(acc_result_scaled, row_size, col_size, acc_input_scaled);
+  transform_fxp(acc_input_scaled, row_size, col_size, acc_result_scaled,
+                acc_TsTw);
+  gamut_map_fxp(acc_result_scaled, row_size, col_size, acc_input_scaled,
+                acc_ctrl_pts, acc_weights, acc_coefs, acc_l2_dist);
+  tone_map_fxp(acc_input_scaled, row_size, col_size, acc_tone_map,
+               acc_result_scaled);
+  // tone_map_approx_fxp(acc_input_scaled, row_size, col_size,
+  // acc_result_scaled);
+  descale_fxp(acc_result_scaled, row_size, col_size, acc_result);
+  dmaStore(host_result, acc_result,
+           row_size * col_size * CHAN_SIZE * sizeof(uint8_t));
+}
+
+void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size,
+              int col_size) {
+  uint8_t *acc_input, *acc_result;
+  float *acc_input_scaled, *acc_result_scaled;
+  float *host_TsTw, *host_ctrl_pts, *host_weights, *host_coefs, *host_tone_map;
+  float *acc_TsTw, *acc_ctrl_pts, *acc_weights, *acc_coefs, *acc_tone_map, *acc_l2_dist;
+
+  strcat(cam_model_path, "cam_models/NikonD7000/");
+
+  host_TsTw = get_TsTw(cam_model_path, wb_index);
+  float *trans = transpose_mat(host_TsTw, CHAN_SIZE, CHAN_SIZE);
+  free(host_TsTw);
+  host_TsTw = trans;
+  host_ctrl_pts = get_ctrl_pts(cam_model_path, num_ctrl_pts);
+  host_weights = get_weights(cam_model_path, num_ctrl_pts);
+  host_coefs = get_coefs(cam_model_path, num_ctrl_pts);
+  host_tone_map = get_tone_map(cam_model_path);
+
+  acc_input = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE);
+  acc_result = (uint8_t*) malloc_aligned(sizeof(uint8_t) * row_size * col_size * CHAN_SIZE);
+  acc_input_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE);
+  acc_result_scaled = (float*) malloc_aligned(sizeof(float) * row_size * col_size * CHAN_SIZE);
+  acc_TsTw = (float*) malloc_aligned(sizeof(float) * 9);
+  acc_ctrl_pts = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE);
+  acc_weights = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts * CHAN_SIZE);
+  acc_coefs = (float*) malloc_aligned(sizeof(float) * 12);
+  acc_tone_map = (float*) malloc_aligned(sizeof(float) * 256 * CHAN_SIZE);
+  acc_l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts);
+
+  // Load camera model parameters for the ISP
+  MAP_ARRAY_TO_ACCEL(ISP, "host_TsTw", host_TsTw,
+                     sizeof(float) * 9);
+  MAP_ARRAY_TO_ACCEL(ISP, "host_ctrl_pts", host_ctrl_pts,
+                     sizeof(float) * num_ctrl_pts * CHAN_SIZE);
+  MAP_ARRAY_TO_ACCEL(ISP, "host_weights", host_weights,
+                     sizeof(float) * num_ctrl_pts * CHAN_SIZE);
+  MAP_ARRAY_TO_ACCEL(ISP, "host_coefs", host_coefs,
+                     sizeof(float) * 4 * CHAN_SIZE);
+  MAP_ARRAY_TO_ACCEL(ISP, "host_tone_map", host_tone_map,
+                     sizeof(float) * 256 * CHAN_SIZE);
+  INVOKE_KERNEL(ISP, load_cam_params_hw, host_TsTw, host_ctrl_pts, host_weights,
+                host_coefs, host_tone_map, acc_TsTw, acc_ctrl_pts, acc_weights,
+                acc_coefs, acc_tone_map);
+
+  // Invoke the ISP
+  MAP_ARRAY_TO_ACCEL(ISP, "host_input", host_input,
+                     sizeof(uint8_t) * row_size * col_size * CHAN_SIZE);
+  MAP_ARRAY_TO_ACCEL(ISP, "host_result", host_result,
+                     sizeof(uint8_t) * row_size * col_size * CHAN_SIZE);
+  INVOKE_KERNEL(ISP, isp_hw, host_input, host_result, row_size, col_size,
+                acc_input, acc_result, acc_input_scaled, acc_result_scaled,
+                acc_TsTw, acc_ctrl_pts, acc_weights, acc_coefs, acc_tone_map,
+                acc_l2_dist);
+
+  free(acc_input);
+  free(acc_result);
+  free(acc_input_scaled);
+  free(acc_result_scaled);
+  free(host_TsTw);
+  free(host_ctrl_pts);
+  free(host_weights);
+  free(host_coefs);
+  free(host_tone_map);
+  free(acc_TsTw);
+  free(acc_ctrl_pts);
+  free(acc_weights);
+  free(acc_coefs);
+  free(acc_tone_map);
+  free(acc_l2_dist);
+}
+
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h
new file mode 100644
index 0000000000..83a77e01a6
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe.h
@@ -0,0 +1,7 @@
+#ifndef _CAM_PIPE_H_
+#define _CAM_PIPE_H_
+
+void cam_pipe(uint8_t *host_input, uint8_t *host_result, int row_size,
+              int col_size);
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c
new file mode 100644
index 0000000000..f806e9ee1a
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.c
@@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "cam_pipe_utility.h"
+//#include "pipe_stages.h"
+
+uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size) {
+  uint8_t *image;
+  FILE *fp = fopen(file_path, "r");
+  int chan_size;
+  if (fread(row_size, sizeof(int), 1, fp) != 1)
+    assert("Failed to read row size from binary file!");
+  if (fread(col_size, sizeof(int), 1, fp) != 1)
+    assert("Failed to read col size from binary file!");
+  if (fread(&chan_size, sizeof(int), 1, fp) != 1)
+    assert("Failed to read row size from binary file!");
+  assert(chan_size == CHAN_SIZE && "Channel size read from the binary file "
+                                   "doesn't equal to the default value!\n");
+
+  int size = *row_size * *col_size * CHAN_SIZE;
+  image = malloc_aligned(sizeof(uint8_t) * size);
+  if (fread(image, sizeof(uint8_t), size, fp) != size)
+    assert("Failed to read the image from binary file!");
+  fclose(fp);
+  return image;
+}
+
+void write_image_to_binary(char *file_path, uint8_t *image, int row_size, int col_size) {
+  FILE *fp = fopen(file_path, "w");
+
+  int shape[3] = { row_size, col_size, CHAN_SIZE };
+  fwrite(shape, sizeof(int), 3, fp);
+
+  int size = row_size * col_size * CHAN_SIZE;
+  fwrite(image, sizeof(uint8_t), size, fp);
+  fclose(fp);
+}
+
+float *transpose_mat(float *inmat, int width, int height) {
+  // Define vectors
+  float *outmat;
+  int err =
+      posix_memalign((void **)&outmat, CACHELINE_SIZE, sizeof(float) * height * width);
+  assert(err == 0 && "Failed to allocate memory!");
+
+  // Transpose the matrix
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      outmat[j * height + i] = inmat[i * width + j];
+    }
+  }
+  return outmat;
+}
+
+void convert_hwc_to_chw(uint8_t *input, int row_size, int col_size,
+                        uint8_t **result) {
+  if (*result == NULL) {
+    *result = (uint8_t *)malloc_aligned(row_size * col_size * CHAN_SIZE *
+                                        sizeof(uint8_t));
+  }
+  ARRAY_3D(uint8_t, _input, input, col_size, CHAN_SIZE);
+  ARRAY_3D(uint8_t, _result, *result, row_size, col_size);
+  for (int h = 0; h < row_size; h++)
+    for (int w = 0; w < col_size; w++)
+      for (int c = 0; c < CHAN_SIZE; c++)
+        _result[c][h][w] = _input[h][w][c];
+}
+
+void convert_chw_to_hwc(uint8_t *input, int row_size, int col_size,
+                        uint8_t **result) {
+  if (*result == NULL) {
+    *result = (uint8_t *)malloc_aligned(row_size * col_size * CHAN_SIZE *
+                                      sizeof(uint8_t));
+  }
+  ARRAY_3D(uint8_t, _input, input, row_size, col_size);
+  ARRAY_3D(uint8_t, _result, *result, col_size, CHAN_SIZE);
+  for (int c = 0; c < CHAN_SIZE; c++)
+    for (int h = 0; h < row_size; h++)
+      for (int w = 0; w < col_size; w++)
+        _result[h][w][c] = _input[c][h][w];
+}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h
new file mode 100644
index 0000000000..b4fb6cde0c
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/cam_pipe_utility.h
@@ -0,0 +1,16 @@
+#ifndef _CAM_PIPE_UTILITY_H_
+#define _CAM_PIPE_UTILITY_H_
+
+#include "utility.h"
+#include "pipe_stages.h"
+
+uint8_t *read_image_from_binary(char *file_path, int *row_size, int *col_size);
+void write_image_to_binary(char *file_path, uint8_t *image, int row_size,
+                           int col_size);
+float *transpose_mat(float *inmat, int width, int height);
+void convert_hwc_to_chw(uint8_t *input, int row_size, int col_size,
+                        uint8_t **result);
+void convert_chw_to_hwc(uint8_t *input, int row_size, int col_size,
+                        uint8_t **result);
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h
new file mode 100644
index 0000000000..ccc8acc857
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/defs.h
@@ -0,0 +1,224 @@
+#ifndef _COMMON_DEFS_H_
+#define _COMMON_DEFS_H_
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long uint64_t;
+
+#define CACHELINE_SIZE 64
+
+// Debugging message macros.
+#if DEBUG_LEVEL >= 1
+  #define INFO_MSG(args...) printf(args)
+
+  #if DEBUG_LEVEL >= 2
+    #define PRINT_MSG(args...) printf(args)
+    #define PRINT_DEBUG(hid, rows, cols, num_cols)                                 \
+        print_debug(hid, rows, cols, num_cols)
+    #define PRINT_DEBUG4D(hid, rows, cols, height)                                 \
+        print_debug4d(hid, rows, cols, height)
+    #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols)                       \
+        print_debug4d_fp16(hid, num, height, rows, cols)
+
+    #if DEBUG_LEVEL >= 3
+      #define PRINT_DEBUG_V(hid, rows, cols, num_cols)                               \
+          print_debug(hid, rows, cols, num_cols)
+      #define PRINT_DEBUG4D_V(hid, rows, cols, height)                               \
+          print_debug4d(hid, rows, cols, height)
+      #define PRINT_MSG_V(args...) printf(args)
+    #else
+      #define PRINT_DEBUG_V(hid, rows, cols, num_cols)
+      #define PRINT_DEBUG4D_V(hid, rows, cols, height)
+      #define PRINT_MSG_V(args...)
+    #endif
+  #else
+    #define PRINT_MSG(args...)
+    #define PRINT_DEBUG(hid, rows, cols, num_cols)
+    #define PRINT_DEBUG4D(hid, rows, cols, height)
+    #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols)
+    #define PRINT_DEBUG_V(hid, rows, cols, height)
+    #define PRINT_DEBUG4D_V(hid, rows, cols, height)
+    #define PRINT_MSG_V(args...)
+  #endif
+#else
+  #define INFO_MSG(args...)
+  #define PRINT_DEBUG(hid, rows, cols, num_cols)
+  #define PRINT_DEBUG4D(hid, rows, cols, height)
+  #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols)
+  #define PRINT_MSG(args...)
+  #define PRINT_DEBUG_V(hid, rows, cols, height)
+  #define PRINT_DEBUG4D_V(hid, rows, cols, height)
+  #define PRINT_MSG_V(args...)
+#endif
+
+#define STRING(arg) #arg
+
+// This is to avoid a ton of spurious unused variable warnings when
+// we're not building for gem5.
+#define UNUSED(x) (void)(x)
+
+// Macros for computing the maximum of a group of elements.
+//
+// Why macros and not functions (or a loop)? A loop takes O(n) cycles to
+// compute the maximum, when it could be done in O(log n) time with a tree
+// based implementation. But Aladdin regards function calls as a hard
+// dependency that it does not optimize across, so we would not get the
+// parallelism we expect from the tree. Thus, these must be macros.
+//
+// I've only implemented a few of these. These are only meant for the pooling
+// layers, and we shouldn't need more than a 3x3 pooling layer anyways.
+#define max2(A, B) (((A) > (B)) ? (A) : (B))
+#define max3(e0, e1, e2) max2(max2(e0, e1), e2)
+#define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3))
+#define max8(e0, e1, e2, e3, e4, e5, e6, e7)                                   \
+    max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7))
+#define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8)                               \
+    max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8)
+
+#define min2(A, B) (((A) < (B)) ? (A) : (B))
+
+#define FRAC_CEIL(A, B) ((A) / (B) + ((A) % (B) != 0))
+// Convenience macros to switch between invoking an accelerator (if building a
+// binary for gem5) or just calling the kernel function in software.
+//
+// Usage:
+//
+//  These macros expand differently based on whether the GEM5_HARNESS macro is
+//  defined. If so, then this binary is meant to be run under gem5, invoking
+//  accelerators; if not, this binary should run the pure software version of
+//  the accelerated kernels.
+//
+//  If GEM5_HARNESS is defined:
+//
+//     MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize)
+//        ===>   mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, mySize)
+//
+//     INVOKE_KERNEL(myReqCode, kernelFuncName, args...)
+//        ===>   invokeAcceleratorAndBlock(myReqCode)
+//
+//  Otherwise:
+//     MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize)
+//        expands to nothing
+//
+//     INVOKE_KERNEL(myReqCode, kernelFuncName, args...)
+//        ===>  kernelFuncName(args)
+//
+#ifdef GEM5_HARNESS
+
+#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size)                    \
+    mapArrayToAccelerator(req_code, name, base_addr, size)
+#define INVOKE_KERNEL(req_code, kernel_ptr, args...)                           \
+    do {                                                                       \
+        UNUSED(kernel_ptr);                                                    \
+        invokeAcceleratorAndBlock(req_code);                                   \
+    } while (0)
+#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...)      \
+    do {                                                                       \
+        UNUSED(kernel_ptr);                                                    \
+        invokeAcceleratorAndReturn2(req_code, finish_flag);                    \
+    } while (0)
+
+#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size)                          \
+    do {                                                                       \
+        invokeAladdinTrafficGenAndBlock(start_addr, size, false, false);       \
+    } while (0)
+#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size)                         \
+    do {                                                                       \
+        invokeAladdinTrafficGenAndBlock(start_addr, size, true, false);        \
+    } while (0)
+#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size)                          \
+    do {                                                                       \
+        invokeAladdinTrafficGenAndBlock(start_addr, size, false, true);        \
+    } while (0)
+#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size)                         \
+    do {                                                                       \
+        invokeAladdinTrafficGenAndBlock(start_addr, size, true, true);         \
+    } while (0)
+
+#else
+
+#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size)                    \
+    do {                                                                       \
+        INFO_MSG("Mapping array %s @ %p, size %d.\n",                          \
+                 name, (void*)base_addr, (int)(size));                         \
+        UNUSED(req_code);                                                      \
+        UNUSED(name);                                                          \
+        UNUSED(base_addr);                                                     \
+        UNUSED(size);                                                          \
+    } while (0)
+#define INVOKE_KERNEL(req_code, kernel_ptr, args...) kernel_ptr(args)
+#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...)      \
+    kernel_ptr(args)
+#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size)                          \
+    do {                                                                       \
+        UNUSED(start_addr);                                                    \
+        UNUSED(size);                                                          \
+    } while (0)
+#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size)                         \
+    do {                                                                       \
+        UNUSED(start_addr);                                                    \
+        UNUSED(size);                                                          \
+    } while (0)
+#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size)                          \
+    do {                                                                       \
+        UNUSED(start_addr);                                                    \
+        UNUSED(size);                                                          \
+    } while (0)
+#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size)                         \
+    do {                                                                       \
+        UNUSED(start_addr);                                                    \
+        UNUSED(size);                                                          \
+    } while (0)
+
+#endif
+
+// Simplified version of MAP_ARRAY_TO_ACCEL.
+//
+// This assumes that the current name of the base pointer is also the name of
+// the array in the top level function of the dynamic trace. THIS IS VERY
+// IMPORTANT - if the argument passed to a top level function has been renamed in
+// the function, then this WILL NOT WORK!
+//
+// MAP_ARRAY(myReqCode, myArray, mySize)
+//    ===>   MAP_ARRAY_TO_ACCEL(myReqCode, "myArray", myArray, mySize)
+#define MAP_ARRAY(req_code, name_and_base_addr, size)                          \
+    MAP_ARRAY_TO_ACCEL(                                                        \
+            req_code, STRING(name_and_base_addr), name_and_base_addr, size)
+
+// Use these convenience macros to cast a raw pointer into a multidimensional
+// variable-length array, which lets us use [] notation inside of the ugly
+// sub2ind syntax!
+//
+// Usage:
+//   If we have an array like array[5][4]:
+//      ARRAY_2D(TYPE, output_name, array, 4);
+//
+//   If we have an array like array[5][4][3]:
+//      ARRAY_3D(TYPE, output_name, array, 4, 3);
+//
+//   If we have an array like array[5][4][3][2]
+//      ARRAY_4D(TYPE, output_name, array, 4, 3, 2);
+//
+//   And so on...
+#define ARRAY_1D(TYPE, output_array_name, input_array_name)                    \
+    TYPE* output_array_name = (TYPE*)input_array_name
+
+#define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1)             \
+    TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name
+
+#define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2)      \
+    TYPE(*output_array_name)[DIM_1][DIM_2] =                                   \
+        (TYPE(*)[DIM_1][DIM_2])input_array_name
+
+#define ARRAY_4D(                                                              \
+    TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3)            \
+        TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] =                        \
+            (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name
+
+#define ARRAY_5D(                                                              \
+    TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4)     \
+        TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] =                 \
+            (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c
new file mode 100644
index 0000000000..81bce54469
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.c
@@ -0,0 +1,73 @@
+#include <assert.h>
+#include <string.h>
+#include "dma_interface.h"
+
+// All _dmaImplN functions must be always inlined or we'll get extra functions
+// in the trace.
+
+#if defined(DMA_INTERFACE_V3)
+
+// Starting with version 3, all versioning will be distinguished by the return
+// value of the DMA functions.
+
+__attribute__((__always_inline__))
+int _dmaImpl3(void* dst_addr, void* src_addr, size_t size) {
+  assert(size > 0);
+  memmove(dst_addr, src_addr, size);
+  return 3;
+}
+
+int dmaLoad(void* dst_addr, void* src_host_addr, size_t size) {
+  return _dmaImpl3(dst_addr, src_host_addr, size);
+}
+
+int dmaStore(void* dst_host_addr, void* src_addr, size_t size) {
+  return _dmaImpl3(dst_host_addr, src_addr, size);
+}
+
+int setReadyBits(void* start_addr, size_t size, unsigned value) {
+  asm("");
+  return 0;
+}
+
+#elif defined(DMA_INTERFACE_V2)
+
+// With version 2 and earlier, we return (void*)NULL and use the number of
+// function arguments to distinguish the DMA functions.
+
+__attribute__((__always_inline__))
+void* _dmaImpl2(void* base_addr, size_t src_off, size_t dst_off, size_t size) {
+  assert(size > 0);
+  memmove(base_addr + dst_off, base_addr + src_off, size);
+  return NULL;
+}
+
+void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size) {
+  return _dmaImpl2(base_addr, src_off, dst_off, size);
+}
+
+void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size) {
+  return _dmaImpl2(base_addr, src_off, dst_off, size);
+}
+
+#else
+
+__attribute__((__always_inline__))
+void* _dmaImpl1(void* base_addr, size_t offset, size_t size) {
+  assert(size > 0);
+  asm("");
+  return NULL;
+}
+
+void* dmaLoad(void* addr, size_t offset, size_t size) {
+  return _dmaImpl1(addr, offset, size);
+}
+
+void* dmaStore(void* addr, size_t offset, size_t size) {
+  return _dmaImpl1(addr, offset, size);
+}
+#endif
+
+void dmaFence() {
+  asm("");
+}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h
new file mode 100644
index 0000000000..f23234eede
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/dma_interface.h
@@ -0,0 +1,44 @@
+#ifndef __DMA_INTERFACE_H__
+#define __DMA_INTERFACE_H__
+
+#include <stddef.h>
+
+#define PAGE_SIZE 4096
+
+#if defined(DMA_INTERFACE_V3)
+
+// Version 3 of the DMA interface enables memcpy operations from arbitrary
+// source and destination addresses.
+
+int dmaLoad(void* dst_addr, void* src_host_addr, size_t size);
+int dmaStore(void* dst_host_addr, void* src_addr, size_t size);
+
+// The user can explicitly toggle the state of ready bits, if ready mode is
+// enabled. This requires support from DMA v3.
+int setReadyBits(void* start_addr, size_t size, unsigned value);
+
+#elif defined(DMA_INTERFACE_V2)
+
+#warning "DMA interface v2 is deprecated!"
+
+// Version 2 of the DMA interface separates source and destination offsets from
+// the base address into different fields, and on the host machine, memory is
+// actually copied from source to destination (the memory copy will not show up
+// in the trace).
+
+void* dmaLoad(void* base_addr, size_t src_off, size_t dst_off, size_t size);
+void* dmaStore(void* base_addr, size_t src_off, size_t dst_off, size_t size);
+
+#else
+
+#warning "DMA interface v1 is deprecated!"
+
+// Version 1 of the DMA interface is now deprecated and will be removed entirely.
+
+void* dmaLoad(void* addr, size_t offset, size_t size);
+void* dmaStore(void* addr, size_t offset, size_t size);
+
+#endif
+void dmaFence();
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h
new file mode 100644
index 0000000000..36859cfe1b
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/gem5_harness.h
@@ -0,0 +1,10 @@
+#ifndef _GEM5_HARNESS_H_
+#define _GEM5_HARNESS_H_
+
+/* One header to include them all. */
+
+//#include "aladdin_sys_connection.h"
+//#include "aladdin_sys_constants.h"
+#include "dma_interface.h"
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c
new file mode 100644
index 0000000000..124fe0b7d1
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.c
@@ -0,0 +1,335 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "utility.h"
+#include "pipe_stages.h"
+#include "load_cam_model.h"
+
+// Get color space transform
+float* get_Ts(char* cam_model_path) {
+  float *Ts;
+  int err = posix_memalign((void **)&Ts, CACHELINE_SIZE, sizeof(float) * 9);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Open file for reading
+  char file_name[] = "raw2jpg_transform.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str); 
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx >= 1 && line_idx <= 3) {
+      for (int j = 0; j < 3; j++) {
+        Ts[(line_idx - 1) * 3 + j] = line_data[j];
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return Ts;
+}
+
+// Get white balance transform
+float* get_Tw(char* cam_model_path, int wb_index) {
+  float *Tw;
+  int err = posix_memalign((void **)&Tw, CACHELINE_SIZE, sizeof(float) * 9);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Calculate base for the white balance transform selected
+  // For more details see the camera model readme
+  int wb_base  = 8 + 5*(wb_index-1);
+
+  // Open file for reading
+  // Open file for reading
+  char file_name[] = "raw2jpg_transform.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  // Read a line at a time
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str); 
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx == wb_base) {
+      // Convert the white balance vector into a diagaonal matrix
+      for (int i=0; i<3; i++) {
+        for (int j=0; j<3; j++) {
+          if (i == j) {
+            Tw[i * 3 + j] = line_data[i];
+          } else {
+            Tw[i * 3 + j] = 0.0;
+          }
+        }
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return Tw;
+}
+
+
+// Get combined transforms for checking
+float* get_TsTw(char* cam_model_path, int wb_index) {
+  float *TsTw;
+  int err = posix_memalign((void **)&TsTw, CACHELINE_SIZE, sizeof(float) * 9);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Calculate base for the white balance transform selected
+  // For more details see the camera model readme
+  int wb_base  = 5 + 5*(wb_index-1);
+
+  // Open file for reading
+  char file_name[] = "raw2jpg_transform.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  // Read a line at a time
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str); 
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx >= wb_base && line_idx <= (wb_base + 2)) {
+      for (int j = 0; j < 3; j++) {
+        TsTw[(line_idx - wb_base) * 3 + j] = line_data[j];
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return TsTw;
+}
+
+// Get control points
+float* get_ctrl_pts(char* cam_model_path, int num_cntrl_pts) {
+  float *ctrl_pnts;
+  int err = posix_memalign((void **)&ctrl_pnts, CACHELINE_SIZE,
+                           sizeof(float) * num_cntrl_pts * 3);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Open file for reading
+  char file_name[] = "raw2jpg_ctrlPoints.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  // Read a line at a time
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str);
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx >= 1 && line_idx <= num_cntrl_pts) {
+      for (int j = 0; j < 3; j++) {
+        ctrl_pnts[(line_idx - 1) * 3 + j] = line_data[j];
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return ctrl_pnts;
+}
+
+// Get weights
+float* get_weights(char* cam_model_path, int num_cntrl_pts) {
+  float *weights;
+  int err = posix_memalign((void **)&weights, CACHELINE_SIZE,
+                           sizeof(float) * num_cntrl_pts * 3);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Open file for reading
+  char file_name[] = "raw2jpg_coefs.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  // Read a line at a time
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str);
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx >= 1 && line_idx <= num_cntrl_pts) {
+      for (int j = 0; j < 3; j++) {
+        weights[(line_idx - 1) * 3 + j] = line_data[j];
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return weights;
+}
+
+// Get coeficients
+float* get_coefs(char* cam_model_path, int num_cntrl_pts) {
+  float *coefs;
+  int err = posix_memalign((void **)&coefs, CACHELINE_SIZE, sizeof(float) * 12);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Open file for reading
+  char file_name[] = "raw2jpg_coefs.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  // Read a line at a time
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str);
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx >= (num_cntrl_pts + 1) && line_idx <= (num_cntrl_pts + 4)) {
+      for (int j = 0; j < 3; j++) {
+        coefs[(line_idx - num_cntrl_pts - 1) * 3 + j] = line_data[j];
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return coefs;
+}
+
+
+// Get tone mapping table
+float* get_tone_map(char* cam_model_path) {
+  float *tone_map;
+  int err = posix_memalign((void **)&tone_map, CACHELINE_SIZE,
+                           sizeof(float) * 256 * CHAN_SIZE);
+  assert(err == 0 && "Failed to allocate memory!");
+  char *line;
+  char *str;
+  float line_data[3];
+  size_t len = 0;
+  int line_idx = 0;
+
+  // Open file for reading
+  char file_name[] = "raw2jpg_respFcns.txt";
+  char file_path[100];
+  strcpy(file_path, cam_model_path);
+  strcat(file_path, file_name);
+  FILE *fp = fopen(file_path, "r");
+  if (fp == NULL) {
+    printf("Didn't find the camera model file!\n");
+    exit(1);
+  }
+
+  // Read a line at a time
+  while (getline(&line, &len, fp) != -1) {
+    str = strtok(line, " \n");
+    int i = 0;
+    while (str != NULL) {
+      line_data[i] = atof(str);
+      str = strtok(NULL, " \n");
+      i++;
+    }
+
+    if (line_idx >= 1 && line_idx <= 256) {
+      for (int j = 0; j < 3; j++) {
+        tone_map[(line_idx - 1) * 3 + j] = line_data[j];
+      }
+    }
+    line_idx = line_idx + 1;
+  }
+  fclose(fp);
+  free(line);
+  return tone_map;
+}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h
new file mode 100644
index 0000000000..8e5ee95217
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/load_cam_model.h
@@ -0,0 +1,25 @@
+#ifndef _LOAD_CAM_MODEL_H_
+#define _LOAD_CAM_MODEL_H_
+
+// Get color space transform
+float *get_Ts(char *cam_model_path);
+
+// Get white balance transform
+float *get_Tw(char *cam_model_path, int wb_index);
+
+// Get combined transforms for checking
+float *get_TsTw(char *cam_model_path, int wb_index);
+
+// Get control points
+float *get_ctrl_pts(char *cam_model_path, int num_cntrl_pts);
+
+// Get weights
+float *get_weights(char *cam_model_path, int num_cntrl_pts);
+
+// Get coeficients
+float *get_coefs(char *cam_model_path, int num_cntrl_pts);
+
+// Get tone mapping table
+float *get_tone_map(char *cam_model_path);
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c
new file mode 100644
index 0000000000..cc75beb664
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main.c
@@ -0,0 +1,156 @@
+
+#include <stdlib.h>
+#include "utility.h"
+#include "visc.h"
+#include "defs.h"
+
+
+typedef struct __attribute__((__packed__)) {
+    float* input; size_t bytes_input;
+    float* result; size_t bytes_result;
+} 
+RootIn;
+
+/*
+typedef struct __attribute__((__packed__)) {
+    uint8_t *input; size_t bytes_input;
+    uint8_t *result; size_t bytes_result;
+    float *input_scaled; size_t bytes_input_scaled; 
+    float *result_scaled; size_t bytes_result_scaled;
+    float *demosaic_out; size_t bytes_demosaic_out;
+    float *denoise_out; size_t bytes_denoise_out;
+    float *transform_out; size_t bytes_transform_out;
+    float *gamut_out;size_t bytes_gamut_out;
+    float *TsTw; size_t bytes_TsTw;
+    float *ctrl_pts; size_t bytes_ctrl_pts;
+    float *weights; size_t bytes_weights;
+    float*coefs; size_t bytes_coefs;
+    float *l2_dist; size_t bytes_l2_dist;
+    float *tone_map; size_t bytes_tone_map;
+    int row_size; int col_size;
+} 
+RootIn;
+*/
+
+
+
+void scale_values(float* input, size_t num_elems) {
+
+  __visc__hint(DEVICE);
+  __visc__attributes(1, input, 1, input);
+  
+  for (int ind = 0; ind < num_elems; ind++){
+    input[ind] = input[ind] * 2.0;
+  }
+  
+  __visc__return(1, num_elems);
+}
+
+
+
+
+void graphRoot(/*0*/ float* input, /*1*/ size_t bytes_input, 
+               /*2*/ float* result, /*3*/ size_t bytes_result) {
+
+  //Specifies compilation target for current node
+  __visc__hint(CPU_TARGET);
+
+  __visc__attributes(2, input, result, 2, input, result);
+
+  // Create an 0D (specified by 1st argument) HPVM node - so a single node
+  // associated with node function ---_fxp_wrapper
+
+  void* scaleNode = __visc__createNodeND(0, scale_values);
+    
+  // BindIn binds inputs of current node with specified node
+  // - destination node
+  // - argument position in argument list of function of source node
+  // - argument position in argument list of function of destination node
+  // - streaming (1) or non-streaming (0)
+
+  // Edge transfers data between nodes within the same level of hierarchy.
+  // - source and destination dataflow nodes
+  // - edge type, all-all (1) or one-one(0)
+  // - source position (in output struct of source node)
+  // - destination position (in argument list of destination node)
+  // - streaming (1) or non-streaming (0)
+
+  // scale_fxp inputs
+  __visc__bindIn(scaleNode, 0, 0, 0); // input -> ScNode:input
+  __visc__bindIn(scaleNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input
+    
+  // Similar to bindIn, but for the output. Output of a node is a struct, and
+  // we consider the fields in increasing ordering.
+  __visc__bindOut(scaleNode, 0, 0, 0);
+    
+}
+
+
+
+
+
+int main(int argc, char* argv[]) {
+
+    size_t input_size = 100;
+    size_t result_size = 100;
+
+    size_t input_bytes = input_size * sizeof(float);
+    size_t result_bytes = result_size * sizeof(float);
+
+    // This is host_input in cam_pipe()
+    float* input = (float*) malloc(input_bytes);
+    for(unsigned int i = 0; i < input_size; i++){
+      input[i] = 1.0;
+    }
+    // This is host_result in cam_pipe()
+    float* result = (float*) malloc(result_bytes);
+
+   
+    __visc__init();
+
+    RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn));
+
+    // Set up HPVM DFG inputs in the rootArgs struct.
+    rootArgs->input = input;
+    rootArgs->bytes_input = input_size;
+
+    printf("input = %d input_bytes = %d \n", input, input_bytes);
+    
+    rootArgs->result = result;
+    rootArgs->bytes_result = result_size;
+
+  
+    llvm_visc_track_mem(input, input_bytes);
+    llvm_visc_track_mem(result, result_bytes);
+
+
+    void* testDFG = __visc__launch(0, graphRoot, (void*) rootArgs);
+    __visc__wait(testDFG);
+
+
+    printf("input = %d \n", input);
+    
+    llvm_visc_request_mem(input, input_bytes);
+    //llvm_visc_request_mem(result, result_bytes);
+
+    printf("requested mem \n");
+
+    for(unsigned int i = 0; i < input_size; i++){
+      printf("input[%d] = %f \n", i, input[i]);
+    }
+
+    //llvm_visc_untrack_mem(input);
+    //llvm_visc_untrack_mem(result);
+
+    printf ("untracked mem \n");
+    
+    __visc__cleanup();
+
+    printf ("cleaned up visc");
+
+    return 0;
+}
+
+
+
+    
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c
new file mode 100644
index 0000000000..ea42ad0bf8
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/main_old.c
@@ -0,0 +1,865 @@
+#include <argp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+#include "utility.h"
+
+#include "cam_pipe_utility.h"
+#include "pipe_stages.h"
+#include "load_cam_model.h"
+
+#include "visc.h"
+
+int NUM_TEST_CASES;
+int NUM_CLASSES;
+int INPUT_DIM;
+int NUM_WORKER_THREADS;
+
+// Type of struct that is used to pass arguments to the HPVM dataflow graph
+// using the hpvm launch operation
+typedef struct __attribute__((__packed__)) {
+    uint8_t *input; size_t bytes_input;
+    uint8_t *result; size_t bytes_result;
+    float *input_scaled; size_t bytes_input_scaled; 
+    float *result_scaled; size_t bytes_result_scaled;
+    float *demosaic_out; size_t bytes_demosaic_out;
+    float *denoise_out; size_t bytes_denoise_out;
+    float *transform_out; size_t bytes_transform_out;
+    float *gamut_out;size_t bytes_gamut_out;
+    float *TsTw; size_t bytes_TsTw;
+    float *ctrl_pts; size_t bytes_ctrl_pts;
+    float *weights; size_t bytes_weights;
+    float*coefs; size_t bytes_coefs;
+    float *l2_dist; size_t bytes_l2_dist;
+    float *tone_map; size_t bytes_tone_map;
+    int row_size; int col_size;
+} 
+RootIn;
+
+typedef enum _argnum {
+    RAW_IMAGE_BIN,
+    OUTPUT_IMAGE_BIN,
+    NUM_REQUIRED_ARGS,
+    DATA_FILE = NUM_REQUIRED_ARGS,
+    NUM_ARGS,
+} argnum;
+
+typedef struct _arguments {
+    char* args[NUM_ARGS];
+    int num_inputs;
+    int num_threads;
+} arguments;
+
+static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n";
+static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary";
+static struct argp_option options[] = {
+    { "num-inputs", 'n', "N", 0, "Number of input images" }, { 0 },
+    { "data-file", 'f', "F", 0,
+      "File to read data and weights from (if data-init-mode == READ_FILE or "
+      "save-params is true). *.txt files are decoded as text files, while "
+      "*.bin files are decoded as binary files." },
+};
+
+static error_t parse_opt(int key, char* arg, struct argp_state* state) {
+    arguments* args = (arguments*)(state->input);
+    switch (key) {
+        case 'n': {
+            args->num_inputs = strtol(arg, NULL, 10);
+            break;
+        }
+        case 'f': {
+            args->args[DATA_FILE] = arg;
+            break;
+        }
+        case 't': {
+            args->num_threads = strtol(arg, NULL, 10);
+            break;
+        }
+        case ARGP_KEY_ARG: {
+            if (state->arg_num >= NUM_REQUIRED_ARGS)
+                argp_usage(state);
+            args->args[state->arg_num] = arg;
+            break;
+        }
+        case ARGP_KEY_END: {
+            if (state->arg_num < NUM_REQUIRED_ARGS) {
+                fprintf(stderr,
+                        "Not enough arguments! Got %d, require %d.\n",
+                        state->arg_num,
+                        NUM_REQUIRED_ARGS);
+                argp_usage(state);
+            }
+            break;
+        }
+        default:
+            return ARGP_ERR_UNKNOWN;
+    }
+    return 0;
+}
+
+void set_default_args(arguments* args) {
+    args->num_inputs = 1;
+    args->num_threads = 0;
+    for (int i = 0; i < NUM_ARGS; i++) {
+        args->args[i] = NULL;
+    }
+}
+
+static struct argp parser = { options, parse_opt, args_doc, prog_doc };
+
+// Helper function for printing intermediate results
+void descale_cpu(float *input, size_t bytes_input, 
+                 uint8_t *output, size_t bytes_result,
+                 int row_size, int col_size) {
+  
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    for (int row = 0; row < row_size; row++)
+      for (int col = 0; col < col_size; col++) {
+        int index = (chan*row_size + row) * col_size + col;
+        output[index] = min(max(input[index] * 255, 0), 255);
+      }
+}
+
+static void sort(float arr[], int n) {
+    int i, j;
+    for (i = 0; i < n - 1; i++)
+        for (j = 0; j < n - i - 1; j++)
+            if (arr[j] > arr[j + 1]) {
+                float temp = arr[j];
+                arr[j] = arr[j + 1];
+                arr[j + 1] = temp;
+            }
+}
+
+/**************************************************************/
+/*** HPVM Leaf node Functions - Performing the computations ***/
+/**************************************************************/
+
+// In this benchmark, no use of HPVM query intrinsics in the leaf node functions
+
+// Leaf HPVM node function for scale
+void scale_fxp(uint8_t *input, size_t bytes_input, 
+               float *output, size_t bytes_output,
+               int row_size, int col_size) {
+
+  //Specifies compilation target for current node
+  __visc__hint(DEVICE);
+
+  // Specifies pointer arguments that will be used as "in" and "out" arguments
+  // - count of "in" arguments
+  // - list of "in" argument , and similar for "out"
+  __visc__attributes(2, input, output, 1, output);
+  
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    for (int row = 0; row < row_size; row++)
+      for (int col = 0; col < col_size; col++){
+        int index = (chan*row_size + row) * col_size + col;
+        output[index] = input[index] * 1.0 / 255;
+      }
+  __visc__return(1, bytes_output);
+}
+
+// Leaf HPVM node function for descale
+void descale_fxp(float *input, size_t bytes_input, 
+                 uint8_t *output, size_t bytes_result,
+                 int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, output, 1, output);
+  
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    for (int row = 0; row < row_size; row++)
+      for (int col = 0; col < col_size; col++) {
+        int index = (chan*row_size + row) * col_size + col;
+        output[index] = min(max(input[index] * 255, 0), 255);
+      }
+  __visc__return(1, bytes_result);
+}
+
+// Leaf HPVM node function for demosaicing
+void demosaic_fxp(float *input, size_t bytes_input, 
+                  float *result, size_t bytes_result,
+                  int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, result, 1, result);
+  
+  for (int row = 1; row < row_size - 1; row++)
+    for (int col = 1; col < col_size - 1; col++) {
+        int index_0 = (0 * row_size + row) * col_size + col;
+        int index_1 = (1 * row_size + row) * col_size + col;
+        int index_2 = (2 * row_size + row) * col_size + col;
+        if (row % 2 == 0 && col % 2 == 0) {
+            // Green pixel
+            // Getting the R values
+            float R1 = input[index_0 - 1];
+            float R2 = input[index_0 + 1];
+            // Getting the B values
+            float B1 = input[index_2 - col_size];
+            float B2 = input[index_2 + col_size];
+            // R
+            result[index_0] = (R1 + R2) / 2;
+            // G
+            result[index_1] = input[index_1] * 2;
+            // B
+            result[index_2] = (B1 + B2) / 2;
+        } else if (row % 2 == 0 && col % 2 == 1) {
+            // Red pixel
+            // Getting the G values
+            float G1 = input[index_1 - col_size];
+            float G2 = input[index_1 + col_size];
+            float G3 = input[index_1 - 1];
+            float G4 = input[index_1 + 1];
+            // Getting the B values
+            float B1 = input[index_2 - col_size - 1];
+            float B2 = input[index_2 - col_size + 1];
+            float B3 = input[index_2 + col_size - 1];
+            float B4 = input[index_2 + col_size + 1];
+            // R
+            result[index_0] = input[index_0];
+            // G
+            result[index_1] = (G1 + G2 + G3 + G4) / 2;
+            // B (center pixel)
+            result[index_2] = (B1 + B2 + B3 + B4) / 4;
+        } else if (row % 2 == 1 && col % 2 == 0) {
+            // Blue pixel
+            // Getting the R values
+            float R1 = input[index_0 - col_size - 1];
+            float R2 = input[index_0 + col_size - 1];
+            float R3 = input[index_0 - col_size + 1];
+            float R4 = input[index_0 + col_size + 1];
+            // Getting the G values
+            float G1 = input[index_1 - col_size];
+            float G2 = input[index_1 + col_size];
+            float G3 = input[index_1 - 1];
+            float G4 = input[index_1 + 1];
+            // R
+            result[index_0] = (R1 + R2 + R3 + R4) / 4;
+            // G
+            result[index_1] = (G1 + G2 + G3 + G4) / 2;
+            // B
+            result[index_2] = input[index_2];
+        } else {
+            // Bottom Green pixel
+            // Getting the R values
+            float R1 = input[index_0 - col_size];
+            float R2 = input[index_0 + col_size];
+            // Getting the B values
+            float B1 = input[index_2 - 1];
+            float B2 = input[index_2 + 1];
+            // R
+            result[index_0] = (R1 + R2) / 2;
+            // G
+            result[index_1] = input[index_1] * 2;
+            // B
+            result[index_2] = (B1 + B2) / 2;
+        }
+      }
+  __visc__return(1, bytes_result);
+}
+
+// Leaf HPVM node function for denoise
+void denoise_fxp(float *input, size_t bytes_input, 
+                 float *result, size_t bytes_result,
+                 int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, result, 1, result);
+  
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    for (int row = 0; row < row_size; row++)
+      for (int col = 0; col < col_size; col++)
+        if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) {
+          float filter[9];
+          for (int i = -1; i < 2; i++)
+            for (int j = -1; j < 2; j++) {
+              int index = ((i+row) - row + 1) * 3 + (j+col) - col + 1;
+              filter[index] = input[(chan * row_size + (i + row)) * col_size + (j + col)];
+            }
+          sort(filter, 9);
+          result[(chan * row_size + row) * col_size + col] = filter[4];
+        } else {
+      result[(chan * row_size + row) * col_size + col] = input[(chan * row_size + row) * col_size + col];
+        }
+  __visc__return(1, bytes_result);
+}
+
+// Leaf HPVM node function, for color map and white balance transform
+void transform_fxp(float *input, size_t bytes_input, 
+                   float *result, size_t bytes_result,
+                   float *TsTw_tran, size_t bytes_TsTw,
+                   int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(3, input, result, TsTw_tran, 1, result);
+  
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    for (int row = 0; row < row_size; row++)
+      for (int col = 0; col < col_size; col++) {
+        int index = (chan * row_size + row) * col_size + col;
+        int index_0 = (0 * row_size + row) * col_size + col;
+        int index_1 = (1 * row_size + row) * col_size + col;
+        int index_2 = (2 * row_size + row) * col_size + col;
+        int index_2d_0 = 0 * CHAN_SIZE + chan;
+        int index_2d_1 = 1 * CHAN_SIZE + chan;
+        int index_2d_2 = 2 * CHAN_SIZE + chan;
+        result[index] =
+            max(input[index_0] * TsTw_tran[index_2d_0] +
+                input[index_1] * TsTw_tran[index_2d_1] +
+                input[index_2] * TsTw_tran[index_2d_2],
+                0);
+      }
+  __visc__return(1, bytes_result);
+}
+
+// Leaf HPVM node function, for gamut mapping
+void gamut_map_fxp(float *input, size_t bytes_input, 
+                   float *result, size_t bytes_result,
+                   float *ctrl_pts, size_t bytes_ctrl_pts,
+                   float *weights, size_t bytes_weights,
+                   float *coefs, size_t bytes_coefs,
+                   float *l2_dist, size_t bytes_l2_dist,
+                   int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 2, result, l2_dist);
+
+ // First, get the L2 norm from every pixel to the control points,
+ // Then, sum it and weight it. Finally, add the bias.
+
+  for (int row = 0; row < row_size; row++)
+    for (int col = 0; col < col_size; col++) {
+      for (int cp = 0; cp < 3702; cp++) {
+        int index_0 = (0 * row_size + row) * col_size + col;
+        int index_1 = (1 * row_size + row) * col_size + col;
+        int index_2 = (2 * row_size + row) * col_size + col;
+        float val1 = (input[index_0] - ctrl_pts[cp * 3 + 0]); 
+        float val2 = (input[index_0] - ctrl_pts[cp * 3 + 0]);
+        float val3 = (input[index_1] - ctrl_pts[cp * 3 + 1]); 
+        float val4 = (input[index_1] - ctrl_pts[cp * 3 + 1]); 
+        float val5 = (input[index_2] - ctrl_pts[cp * 3 + 2]); 
+        float val6 = (input[index_2] - ctrl_pts[cp * 3 + 2]);
+        float val = val1 * val2 + val3 * val4 + val5 * val6;
+        float sqrt_val = sqrt(val);
+        l2_dist[cp] = sqrt_val;
+      }
+      for (int chan = 0; chan < CHAN_SIZE; chan++) {
+        float chan_val = 0.0;
+        for (int cp = 0; cp < 3702; cp++) {
+          chan_val += l2_dist[cp] * weights[cp * CHAN_SIZE + chan];
+        }
+        chan_val += coefs[0 * CHAN_SIZE + chan] + 
+                    coefs[1 * CHAN_SIZE + chan] * input[(0 * row_size + row) * col_size + col] +
+                    coefs[2 * CHAN_SIZE + chan] * input[(1 * row_size + row) * col_size + col] +
+                    coefs[3 * CHAN_SIZE + chan] * input[(2 * row_size + row) * col_size + col];
+        result[(chan * row_size + row) * col_size + col] = max(chan_val, 0);
+      }
+    }
+  __visc__return(1, bytes_result);
+}
+
+// HPVM leaf node function, for tone mapping
+void tone_map_fxp(float *input, size_t bytes_input, 
+                  float *result, size_t bytes_result,
+                  float *tone_map, size_t bytes_tone_map,
+                  int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(3, input, result, tone_map, 1, result);
+  
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    for (int row = 0; row < row_size; row++)
+      for (int col = 0; col < col_size; col++) {
+        int index = (chan * row_size + row) * col_size + col;
+        uint8_t x = input[index] * 255;
+        result[index] = tone_map[x * CHAN_SIZE + chan];
+      }
+  __visc__return(1, bytes_result);
+}
+
+/********************************************************************/
+/*** HPVM Internal node Functions - Determine the graph structure ***/
+/********************************************************************/
+
+// We create a wrapper node per leaf node - this is an implementation
+// requirement for the FPGA backend . The CPU backend also supports this,
+// so it does not cause a portability issue.
+
+void scale_fxp_wrapper(uint8_t *input, size_t bytes_input, 
+                       float *result, size_t bytes_result,
+                       int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(2, input, result, 1, result);
+
+  // Create an 1D (specified by 1st argument) HPVM node with 1 dynamic
+  // instance (last argument) associated with node function scale_fxp
+  void *ScaleNode = __visc__createNodeND(1, scale_fxp, (size_t)1);
+
+  // Binds inputs of current node with specified node
+  // - destination node
+  // - argument position in argument list of function of source node
+  // - argument position in argument list of function of destination node
+  // - streaming (1) or non-streaming (0)
+  __visc__bindIn(ScaleNode, 0, 0, 0); // bind input
+  __visc__bindIn(ScaleNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(ScaleNode, 2, 2, 0); // bind result
+  __visc__bindIn(ScaleNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(ScaleNode, 4, 4, 0); // bind row_size
+  __visc__bindIn(ScaleNode, 5, 5, 0); // bind col_size
+
+  // Similar to bindIn, but for the output. Output of a node is a struct, and
+  // we consider the fields in increasing ordering.
+  __visc__bindOut(ScaleNode, 0, 0, 0);
+}
+
+void descale_fxp_wrapper(float *input, size_t bytes_input, 
+                       uint8_t *result, size_t bytes_result,
+                       int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(2, input, result, 1, result);
+  void *DescaleNode = __visc__createNodeND(1, descale_fxp, (size_t)1);
+  __visc__bindIn(DescaleNode, 0, 0, 0); // bind input
+  __visc__bindIn(DescaleNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(DescaleNode, 2, 2, 0); // bind result
+  __visc__bindIn(DescaleNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(DescaleNode, 4, 4, 0); // bind row_size
+  __visc__bindIn(DescaleNode, 5, 5, 0); // bind col_size
+  
+  __visc__bindOut(DescaleNode, 0, 0, 0);
+}
+
+void demosaic_fxp_wrapper(float *input, size_t bytes_input, 
+                       float *result, size_t bytes_result,
+                       int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(2, input, result, 1, result);
+  void *DemosaicNode = __visc__createNodeND(1, demosaic_fxp, (size_t)1);
+  __visc__bindIn(DemosaicNode, 0, 0, 0); // bind input
+  __visc__bindIn(DemosaicNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(DemosaicNode, 2, 2, 0); // bind result
+  __visc__bindIn(DemosaicNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(DemosaicNode, 4, 4, 0); // bind row_size
+  __visc__bindIn(DemosaicNode, 5, 5, 0); // bind col_size
+  
+  __visc__bindOut(DemosaicNode, 0, 0, 0);
+}
+
+void denoise_fxp_wrapper(float *input, size_t bytes_input, 
+                       float *result, size_t bytes_result,
+                       int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(2, input, result, 1, result);
+  void *DenoiseNode = __visc__createNodeND(1, denoise_fxp, (size_t)1);
+  __visc__bindIn(DenoiseNode, 0, 0, 0); // bind input
+  __visc__bindIn(DenoiseNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(DenoiseNode, 2, 2, 0); // bind result
+  __visc__bindIn(DenoiseNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(DenoiseNode, 4, 4, 0); // bind row_size
+  __visc__bindIn(DenoiseNode, 5, 5, 0); // bind col_size
+  
+  __visc__bindOut(DenoiseNode, 0, 0, 0);
+}
+
+void transform_fxp_wrapper(float *input, size_t bytes_input, 
+                       float *result, size_t bytes_result,
+                       float *TsTw_tran, size_t bytes_TsTw,
+                       int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(3, input, result, TsTw_tran, 1, result);
+  void *TransformNode = __visc__createNodeND(1, transform_fxp, (size_t)1);
+  __visc__bindIn(TransformNode, 0, 0, 0); // bind input
+  __visc__bindIn(TransformNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(TransformNode, 2, 2, 0); // bind result
+  __visc__bindIn(TransformNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(TransformNode, 4, 4, 0); // bind tstw
+  __visc__bindIn(TransformNode, 5, 5, 0); // bind bytes_tstw
+  __visc__bindIn(TransformNode, 6, 6, 0); // bind row_size
+  __visc__bindIn(TransformNode, 7, 7, 0); // bind col_size
+  
+  __visc__bindOut(TransformNode, 0, 0, 0);
+}
+
+void gamut_fxp_wrapper(float *input, size_t bytes_input, 
+                       float *result, size_t bytes_result,
+                       float *ctrl_pts, size_t bytes_ctrl_pts,
+                       float *weights, size_t bytes_weights,
+                       float *coefs, size_t bytes_coefs,
+                       float *l2_dist, size_t bytes_l2_dist,
+                       int row_size, int col_size) {
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result);
+  void *GamutNode = __visc__createNodeND(1, gamut_map_fxp, (size_t)1);
+  __visc__bindIn(GamutNode, 0, 0, 0); // bind input
+  __visc__bindIn(GamutNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(GamutNode, 2, 2, 0); // bind result
+  __visc__bindIn(GamutNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(GamutNode, 4, 4, 0); // bind ctrl_pts
+  __visc__bindIn(GamutNode, 5, 5, 0); // bind bytes_ctrl_pts
+  __visc__bindIn(GamutNode, 6, 6, 0); // bind weights
+  __visc__bindIn(GamutNode, 7, 7, 0); // bind bytes_weights
+  __visc__bindIn(GamutNode, 8, 8, 0); // bind coefs
+  __visc__bindIn(GamutNode, 9, 9, 0); // bind bytes_coefs
+  __visc__bindIn(GamutNode, 10, 10, 0); // bind l2_dist
+  __visc__bindIn(GamutNode, 11, 11, 0); // bind bytes_l2_dist
+  __visc__bindIn(GamutNode, 12, 12, 0); // bind row_size
+  __visc__bindIn(GamutNode, 13, 13, 0); // bind col_size
+  
+  __visc__bindOut(GamutNode, 0, 0, 0);
+}
+void tone_map_fxp_wrapper(float *input, size_t bytes_input, 
+                       float *result, size_t bytes_result,
+                       float *tone_map, size_t bytes_tone_map,
+                       int row_size, int col_size) {
+
+  __visc__hint(CPU_TARGET);
+  __visc__attributes(3, input, result, tone_map, 1, result);
+  void *ToneMapNode = __visc__createNodeND(1, tone_map_fxp, (size_t)1);
+  __visc__bindIn(ToneMapNode, 0, 0, 0); // bind input
+  __visc__bindIn(ToneMapNode, 1, 1, 0); // bind bytes_input
+  __visc__bindIn(ToneMapNode, 2, 2, 0); // bind result
+  __visc__bindIn(ToneMapNode, 3, 3, 0); // bind bytes_result
+  __visc__bindIn(ToneMapNode, 4, 4, 0); // bind tone_map 
+  __visc__bindIn(ToneMapNode, 5, 5, 0); // bind bytes_tone_map
+  __visc__bindIn(ToneMapNode, 6, 6, 0); // bind row_size
+  __visc__bindIn(ToneMapNode, 7, 7, 0); // bind col_size
+  
+  __visc__bindOut(ToneMapNode, 0, 0, 0);
+}
+
+
+/*** ROOT Node - Top Level of the Graph Hierarchy ***/
+void CamPipeRoot(/*0*/ uint8_t *input,         /*1*/ size_t bytes_input, 
+                 /*2*/ uint8_t *result,        /*3*/ size_t bytes_result,
+                 /*4*/ float *input_scaled,    /*5*/ size_t bytes_input_scaled,
+                 /*6*/ float *result_scaled,   /*7*/ size_t bytes_result_scaled,
+                 /*8*/ float *demosaic_out,    /*9*/ size_t bytes_demosaic_out,
+                 /*10*/ float *denoise_out,    /*11*/ size_t bytes_denoise_out,
+                 /*12*/ float *transform_out,  /*13*/ size_t bytes_transform_out,
+                 /*14*/ float *gamut_out,      /*15*/ size_t bytes_gamut_out,
+                 /*16*/ float *TsTw,           /*17*/ size_t bytes_TsTw,
+                 /*18*/ float *ctrl_pts,       /*19*/ size_t bytes_ctrl_pts,
+                 /*20*/ float *weights,        /*21*/ size_t bytes_weights,
+                 /*22*/ float*coefs,           /*23*/ size_t bytes_coefs,
+                 /*24*/ float *l2_dist,        /*25*/ size_t bytes_l2_dist,
+                 /*26*/ float *tone_map,       /*27*/ size_t bytes_tone_map,
+                 /*28*/ int row_size,          /*29*/ int col_size) {
+
+  //Specifies compilation target for current node
+    __visc__hint(CPU_TARGET);
+
+  // Specifies pointer arguments that will be used as "in" and "out" arguments
+  // - count of "in" arguments
+  // - list of "in" argument , and similar for "out"
+    __visc__attributes(14, input, result, input_scaled, result_scaled, demosaic_out, denoise_out, 
+                       transform_out, gamut_out, TsTw, ctrl_pts, weights, coefs, tone_map, l2_dist, 
+                       5, result, demosaic_out, denoise_out, transform_out, gamut_out);
+
+  // Create an 0D (specified by 1st argument) HPVM node - so a single node
+  // associated with node function ---_fxp_wrapper
+    void* ScNode = __visc__createNodeND(0, scale_fxp_wrapper);
+    void* DmNode = __visc__createNodeND(0, demosaic_fxp_wrapper);
+    void *DnNode = __visc__createNodeND(0, denoise_fxp_wrapper);
+    void *TrNode = __visc__createNodeND(0, transform_fxp_wrapper);
+    void *GmNode = __visc__createNodeND(0, gamut_fxp_wrapper);
+    void *TnNode = __visc__createNodeND(0, tone_map_fxp_wrapper);
+    void *DsNode = __visc__createNodeND(0, descale_fxp_wrapper);
+    
+  // BindIn binds inputs of current node with specified node
+  // - destination node
+  // - argument position in argument list of function of source node
+  // - argument position in argument list of function of destination node
+  // - streaming (1) or non-streaming (0)
+
+  // Edge transfers data between nodes within the same level of hierarchy.
+  // - source and destination dataflow nodes
+  // - edge type, all-all (1) or one-one(0)
+  // - source position (in output struct of source node)
+  // - destination position (in argument list of destination node)
+  // - streaming (1) or non-streaming (0)
+
+    // scale_fxp inputs
+    __visc__bindIn(ScNode, 0, 0, 0); // input -> ScNode:input
+    __visc__bindIn(ScNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input
+    __visc__bindIn(ScNode, 4, 2, 0); // input_scaled -> ScNode:result
+    __visc__bindIn(ScNode, 5, 3, 0); // bytes_input_scaled -> ScNode:bytes_result
+    __visc__bindIn(ScNode, 28, 4, 0); // row_size -> ScNode:row_size
+    __visc__bindIn(ScNode, 29, 5, 0); // col_size -> ScNode:col_size
+
+    // demosaic_fxp inputs
+    __visc__bindIn(DmNode, 4, 0, 0); // input_scaled -> DmNode:input
+    __visc__edge(ScNode, DmNode, 1, 0, 1, 0); // SCNode:bytes_result -> DmNode:bytes_input
+    __visc__bindIn(DmNode, 8, 2, 0); // demosaic_out -> DmNode:result
+    __visc__bindIn(DmNode, 9, 3, 0); // bytes_demosaic_out -> DmNode:bytes_result
+    __visc__bindIn(DmNode, 28, 4, 0); // row_size -> DmNode:row_size 
+    __visc__bindIn(DmNode, 29, 5, 0); // col_size -> DmNode:col_size
+
+    // denoise_fxp inputs
+    __visc__bindIn(DnNode, 8, 0, 0); // demosaic_out -> DnNode:input
+    __visc__edge(DmNode, DnNode, 1, 0, 1, 0); // DMNode:bytes_result -> DnNode:bytes_input
+    __visc__bindIn(DnNode, 10, 2, 0); // denoise_out -> DnNode:result
+    __visc__bindIn(DnNode, 11, 3, 0); // bytes_denoise_out -> DnNode:bytes_result
+    __visc__bindIn(DnNode, 28, 4, 0); // row_size -> DnNode:row_size 
+    __visc__bindIn(DnNode, 29, 5, 0); // col_size -> DnNode:col_size
+    
+    // transform_fxp inputs
+    __visc__bindIn(TrNode, 10, 0, 0); // denoise_out -> TrNode:input
+    __visc__edge(DnNode, TrNode, 1, 0, 1, 0); // DnNode:bytes_result -> TrNode:bytes_input
+    __visc__bindIn(TrNode, 12, 2, 0); // transform_out -> TrNode:result
+    __visc__bindIn(TrNode, 13, 3, 0); // bytes_result_scaled -> TrNode:bytes_result
+    __visc__bindIn(TrNode, 16, 4, 0); // TsTw -> TrNode:TsTw_trann
+    __visc__bindIn(TrNode, 17, 5, 0); // bytes_TsTw -> TrNode:bytes_TsTw
+    __visc__bindIn(TrNode, 28, 6, 0); // row_size -> TrNode:row_size 
+    __visc__bindIn(TrNode, 29, 7, 0); // col_size -> TrNode:col_size
+    
+    // gamut_fxp inputs
+    __visc__bindIn(GmNode, 12, 0, 0); // transform_out -> GmNode:input
+    __visc__edge(TrNode, GmNode, 1, 0, 1, 0); // TrNode:bytes_result -> GmNode:bytes_input
+    __visc__bindIn(GmNode, 14, 2, 0); // gamut_out -> GmNode:result
+    __visc__bindIn(GmNode, 15, 3, 0); // bytes_gamut_out -> GmNode:bytes_result
+    __visc__bindIn(GmNode, 18, 4, 0); // ctrl_pts -> GmNode:ctrl_pts
+    __visc__bindIn(GmNode, 19, 5, 0); // bytes_ctrl_pts -> GmNode:bytes_ctrl_pts
+    __visc__bindIn(GmNode, 20, 6, 0); // weights -> GmNode:weights
+    __visc__bindIn(GmNode, 21, 7, 0); // bytes_weights -> GmNode:bytes_weights
+    __visc__bindIn(GmNode, 22, 8, 0); // coefs -> GmNode:coefs
+    __visc__bindIn(GmNode, 23, 9, 0); // bytes_coefs -> GmNode:bytes_coefs
+    __visc__bindIn(GmNode, 24, 10, 0); // l2_dist -> GmNode: l2_dist
+    __visc__bindIn(GmNode, 25, 11, 0); // bytes_l2_dist -> GmNode:bytes_l2_dist
+    __visc__bindIn(GmNode, 28, 12, 0); // row_size -> GmNode:row_size 
+    __visc__bindIn(GmNode, 29, 13, 0); // col_size -> GmNode:col_size
+    
+    // tone_map_fxp inputs
+    __visc__bindIn(TnNode, 14, 0, 0); // gamut_out -> TnNode:input
+    __visc__edge(GmNode, TnNode, 1, 0, 1, 0); // GmNode:bytes_result -> TnNode:bytes_input
+    __visc__bindIn(TnNode, 6, 2, 0); // result_scaled -> TnNode:result
+    __visc__bindIn(TnNode, 7, 3, 0); // bytes_result_scaled -> TnNode:bytes_result
+    __visc__bindIn(TnNode, 26, 4, 0); // tone_map -> TnNode:tone_map
+    __visc__bindIn(TnNode, 27, 5, 0); // bytes_tone_map -> TnNode:bytes_tone_map
+    __visc__bindIn(TnNode, 28, 6, 0); // row_size -> TnNode:row_size 
+    __visc__bindIn(TnNode, 29, 7, 0); // col_size -> TnNode:col_size
+
+    // descale_fxp inputs
+    __visc__bindIn(DsNode, 6, 0, 0); // result_scaled -> DsNode:input
+    __visc__edge(TnNode, DsNode, 1, 0, 1, 0); // TnNode:bytes_result -> DsNode:bytes_input
+    __visc__bindIn(DsNode, 2, 2, 0); // result -> DsNode:result
+    __visc__bindIn(DsNode, 3, 3, 0); // bytes_result -> DsNode:bytes_result
+    __visc__bindIn(DsNode, 28, 4, 0); // row_size -> DsNode:row_size
+    __visc__bindIn(DsNode, 29, 5, 0); // col_size -> DsNode:col_size
+
+  // Similar to bindIn, but for the output. Output of a node is a struct, and
+  // we consider the fields in increasing ordering.
+    __visc__bindOut(DsNode, 0, 0, 0);
+    
+}
+
+int main(int argc, char* argv[]) {
+    // Parse the arguments.
+    arguments args;
+    set_default_args(&args);
+    argp_parse(&parser, argc, argv, 0, 0, &args);
+
+    // Read a raw image.
+    // NOTE: We deliberately perform this file I/O outside of the kernel.
+    printf("Reading a raw image from %s\n", args.args[RAW_IMAGE_BIN]);
+    int row_size, col_size;
+    uint8_t *image_in = read_image_from_binary(args.args[RAW_IMAGE_BIN], &row_size, &col_size);
+
+    printf("Raw image shape: %d x %d x %d\n", row_size, col_size, CHAN_SIZE);
+
+    // Allocate a buffer for storing the output image data.
+    // (This is currently the same size as the input image data.)
+    size_t bytes_image = sizeof(uint8_t) * row_size * col_size * CHAN_SIZE;
+    size_t bytes_fimage = sizeof(float) * row_size * col_size * CHAN_SIZE;
+    uint8_t *image_out = (uint8_t*) malloc_aligned(bytes_image);
+    uint8_t *image_out_gamut = (uint8_t*) malloc_aligned(bytes_image);
+    uint8_t *image_out_demosaic = (uint8_t*) malloc_aligned(bytes_image);
+    uint8_t *image_out_denoise = (uint8_t*) malloc_aligned(bytes_image);
+    uint8_t *image_out_transform = (uint8_t*) malloc_aligned(bytes_image);
+
+    __visc__init();
+
+    ///////////////////////////////////////////////////////////////
+    // Camera Model Parameters
+    ///////////////////////////////////////////////////////////////
+    // Path to the camera model to be used
+//    char cam_model_path[100];
+//    char cam_model_path = "cam_models/NikonD7000/";
+    // White balance index (select white balance from transform file)
+    // The first white balance in the file has a wb_index of 1
+    // For more information on model format see the readme
+    int wb_index = 6;
+
+    // Number of control points
+    int num_ctrl_pts = 3702;
+    uint8_t *input, *result;
+    float *input_scaled, *result_scaled, *demosaic_out, *denoise_out, *transform_out, *gamut_out;
+    float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist;
+
+    TsTw = get_TsTw("cam_models/NikonD7000/", wb_index);
+    float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE);
+    free(TsTw);
+    TsTw = trans;
+    ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts);
+    weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts);
+    coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts);
+    tone_map = get_tone_map("cam_models/NikonD7000/");
+    
+    input_scaled = (float*) malloc_aligned(bytes_fimage);
+    result_scaled = (float*) malloc_aligned(bytes_fimage);
+    demosaic_out = (float*) malloc_aligned(bytes_fimage);
+    denoise_out = (float*) malloc_aligned(bytes_fimage);
+    transform_out  = (float*) malloc_aligned(bytes_fimage);
+    gamut_out = (float*) malloc_aligned(bytes_fimage);
+    l2_dist = (float*) malloc_aligned(sizeof(float) * num_ctrl_pts);    
+    
+    // This is host_input in cam_pipe()
+    input = (uint8_t*) malloc_aligned(bytes_image);
+    convert_hwc_to_chw(image_in, row_size, col_size, &input);
+    
+    // This is host_result in cam_pipe()
+    result = (uint8_t*) malloc_aligned(bytes_image);
+
+    // Allocate struct to pass DFG inputs
+    RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn));
+
+    // Set up HPVM DFG inputs in the rootArgs struct.
+    rootArgs->input = input;
+    rootArgs->bytes_input = bytes_image;
+    
+    rootArgs->result = result;
+    rootArgs->bytes_result = bytes_image;
+    
+    rootArgs->input_scaled = input_scaled;
+    rootArgs->bytes_input_scaled = bytes_fimage;
+    
+    rootArgs->result_scaled = result_scaled;
+    rootArgs->bytes_result_scaled = bytes_fimage;
+    
+    rootArgs->demosaic_out = demosaic_out;
+    rootArgs->bytes_demosaic_out = bytes_fimage;
+    
+    rootArgs->denoise_out = denoise_out;
+    rootArgs->bytes_denoise_out = bytes_fimage;
+    
+    rootArgs->transform_out = transform_out;
+    rootArgs->bytes_transform_out = bytes_fimage;
+
+    rootArgs->gamut_out = gamut_out;
+    rootArgs->bytes_gamut_out = bytes_fimage;
+
+    rootArgs->TsTw = TsTw;
+    rootArgs->bytes_TsTw = CHAN_SIZE * CHAN_SIZE * sizeof(float);
+    
+    rootArgs->ctrl_pts = ctrl_pts;
+    rootArgs->bytes_ctrl_pts = num_ctrl_pts * CHAN_SIZE * sizeof(float);
+    
+    rootArgs->weights = weights;
+    rootArgs->bytes_weights = num_ctrl_pts * CHAN_SIZE * sizeof(float);
+    
+    rootArgs->coefs = coefs;
+    rootArgs->bytes_coefs = 4 * CHAN_SIZE * sizeof(float);
+    
+    rootArgs->tone_map = tone_map;
+    rootArgs->bytes_tone_map = 256 * CHAN_SIZE * sizeof(float);
+    
+    rootArgs->l2_dist = l2_dist;
+    rootArgs->bytes_l2_dist = num_ctrl_pts * sizeof(float);
+    
+    rootArgs->row_size = row_size;
+    rootArgs->col_size = col_size;
+
+    // Memory tracking is required for pointer arguments.
+    // Nodes can be scheduled on different targets, and 
+    // dataflow edge implementation needs to request data.
+    // The pair (pointer, size) is inserted in memory tracker using this call
+    llvm_visc_track_mem(input, bytes_image);
+    llvm_visc_track_mem(result, bytes_image);
+    llvm_visc_track_mem(input_scaled, bytes_fimage);
+    llvm_visc_track_mem(result_scaled, bytes_fimage);
+    llvm_visc_track_mem(demosaic_out, bytes_fimage);
+    llvm_visc_track_mem(denoise_out, bytes_fimage);
+    llvm_visc_track_mem(transform_out, bytes_fimage);
+    llvm_visc_track_mem(gamut_out, bytes_fimage);
+    llvm_visc_track_mem(TsTw, CHAN_SIZE * CHAN_SIZE * sizeof(float)); 
+    llvm_visc_track_mem(ctrl_pts, num_ctrl_pts * CHAN_SIZE * sizeof(float));
+    llvm_visc_track_mem(weights, num_ctrl_pts * CHAN_SIZE * sizeof(float));
+    llvm_visc_track_mem(coefs, 4 * CHAN_SIZE *sizeof(float));
+    llvm_visc_track_mem(tone_map, 256 * CHAN_SIZE * sizeof(float));
+    llvm_visc_track_mem(l2_dist, num_ctrl_pts * sizeof(float));
+    
+    printf("\n\nLaunching CAVA pipeline!\n");
+
+    void* camPipeDFG = __visc__launch(0, CamPipeRoot, (void*) rootArgs);
+    __visc__wait(camPipeDFG);
+
+    printf("\n\nPipeline execution completed!\n");
+    printf("\n\nRequesting memory!\n");
+
+    // Request data from graph.    
+    llvm_visc_request_mem(result, bytes_image);
+    llvm_visc_request_mem(demosaic_out, bytes_fimage);
+    llvm_visc_request_mem(denoise_out, bytes_fimage);
+    llvm_visc_request_mem(transform_out, bytes_fimage);
+    llvm_visc_request_mem(gamut_out, bytes_fimage);
+    printf("\n\nDone requesting memory!\n");
+
+
+    uint8_t* gamut_out_descaled = (uint8_t*) malloc_aligned(bytes_image);
+  uint8_t* demosaic_out_descaled = (uint8_t*) malloc_aligned(bytes_image);
+    uint8_t* transform_out_descaled = (uint8_t*) malloc_aligned(bytes_image);
+    uint8_t* denoise_out_descaled = (uint8_t*) malloc_aligned(bytes_image);
+    
+  descale_cpu(demosaic_out, bytes_fimage, demosaic_out_descaled, bytes_image, row_size, col_size);
+    descale_cpu(gamut_out, bytes_fimage, gamut_out_descaled, bytes_image, row_size, col_size);
+    descale_cpu(denoise_out, bytes_fimage, denoise_out_descaled, bytes_image, row_size, col_size);
+    descale_cpu(transform_out, bytes_fimage, transform_out_descaled, bytes_image, row_size, col_size);
+    
+    convert_chw_to_hwc(result, row_size, col_size, &image_out);
+   convert_chw_to_hwc(gamut_out_descaled, row_size, col_size, &image_out_gamut);
+    convert_chw_to_hwc(demosaic_out_descaled, row_size, col_size, &image_out_demosaic);
+    convert_chw_to_hwc(denoise_out_descaled, row_size, col_size, &image_out_denoise);
+    convert_chw_to_hwc(transform_out_descaled, row_size, col_size, &image_out_transform);
+
+    
+    // Remove tracked pointers.
+    llvm_visc_untrack_mem(input);
+    llvm_visc_untrack_mem(result);
+    llvm_visc_untrack_mem(input_scaled);
+    llvm_visc_untrack_mem(result_scaled);
+    llvm_visc_untrack_mem(demosaic_out);
+    llvm_visc_untrack_mem(denoise_out);
+    llvm_visc_untrack_mem(transform_out);
+    llvm_visc_untrack_mem(gamut_out);
+    
+    llvm_visc_untrack_mem(TsTw); 
+    llvm_visc_untrack_mem(ctrl_pts);
+    llvm_visc_untrack_mem(weights);
+    llvm_visc_untrack_mem(coefs);
+    llvm_visc_untrack_mem(tone_map);
+    llvm_visc_untrack_mem(l2_dist);
+
+    // Output the image.
+    // NOTE: We deliberately perform this file I/O outside of the kernel.
+  char str[50], base_str[50];
+  strcpy(base_str, args.args[OUTPUT_IMAGE_BIN]);
+  strcpy(str, base_str);
+  strcat(str, ".bin");
+  printf("Writing output image to %s\n", str);
+  write_image_to_binary(str, image_out, row_size, col_size);
+  strcpy(str, base_str);
+  strcat(str, "_gamut.bin");
+  printf("Writing output image to %s\n", str);
+  write_image_to_binary(str, image_out_gamut, row_size, col_size);
+  strcpy(str, base_str);
+  strcat(str, "_demosaic.bin");
+  printf("Writing output image to %s\n", str);
+  write_image_to_binary(str, image_out_demosaic, row_size, col_size);
+  strcpy(str, base_str);
+  strcat(str, "_denoise.bin");
+  printf("Writing output image to %s\n", str);
+  write_image_to_binary(str, image_out_denoise, row_size, col_size);
+  strcpy(str, base_str);
+  strcat(str, "_transform.bin");
+  printf("Writing output image to %s\n", str);
+  write_image_to_binary(str, image_out_transform, row_size, col_size);
+
+    __visc__cleanup();
+
+    return 0;
+}
+
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c
new file mode 100644
index 0000000000..e6e36ba1db
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/new_main.c
@@ -0,0 +1,115 @@
+
+
+#include <stdlib.h>
+
+
+typedef struct __attribute__((__packed__)) {
+    float* input; size_t bytes_input;
+    float* result; size_t bytes_result;
+} 
+RootIn;
+
+
+
+
+void scale_values(float* input, size_t bytes_input) {
+
+  __visc__hint(DEVICE);
+  __visc__attributes(1, input, 1, input);
+  
+  for (int ind = 0; ind < bytes_input; ind++){
+    input[ind] = input[ind] * 2.0;
+  }
+  
+  __visc__return(1, bytes_input);
+}
+
+
+
+
+void graphRoot(/*0*/ float* input, /*1*/ size_t bytes_input, 
+               /*2*/ float* result, /*3*/ size_t bytes_result) {
+
+  //Specifies compilation target for current node
+  __visc__hint(CPU_TARGET);
+
+  __visc__attributes(2, input, result, 2, input, result);
+
+  // Create an 0D (specified by 1st argument) HPVM node - so a single node
+  // associated with node function ---_fxp_wrapper
+
+  void* scaleNode = __visc__createNodeND(0, scale_values);
+    
+  // BindIn binds inputs of current node with specified node
+  // - destination node
+  // - argument position in argument list of function of source node
+  // - argument position in argument list of function of destination node
+  // - streaming (1) or non-streaming (0)
+
+  // Edge transfers data between nodes within the same level of hierarchy.
+  // - source and destination dataflow nodes
+  // - edge type, all-all (1) or one-one(0)
+  // - source position (in output struct of source node)
+  // - destination position (in argument list of destination node)
+  // - streaming (1) or non-streaming (0)
+
+  // scale_fxp inputs
+  __visc__bindIn(scaleNode, 0, 0, 0); // input -> ScNode:input
+  __visc__bindIn(scaleNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input
+    
+  // Similar to bindIn, but for the output. Output of a node is a struct, and
+  // we consider the fields in increasing ordering.
+  __visc__bindOut(scaleNode, 0, 0, 0);
+    
+}
+
+
+
+
+
+int main(int argc, char* argv[]) {
+
+    __visc__init();
+
+    size_t input_size = 100;
+    size_t result_size = 100;
+
+    size_t input_bytes = input_size * sizeof(float);
+    size_t result_bytes = result_size * sizeof(float);
+
+    // This is host_input in cam_pipe()
+    input = (float*) malloc(input_bytes);
+    // This is host_result in cam_pipe()
+    result = (float*) malloc(result_bytes);
+
+
+    RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn));
+
+    // Set up HPVM DFG inputs in the rootArgs struct.
+    rootArgs->input = input;
+    rootArgs->bytes_input = input_bytes;
+    
+    rootArgs->result = result;
+    rootArgs->bytes_result = result_bytes;
+
+    llvm_visc_track_mem(input, input_bytes);
+    llvm_visc_track_mem(result, result_bytes);
+
+
+    void* testDFG = __visc__launch(0, graphRoot, (void*) rootArgs);
+    __visc__wait(testDFG);
+
+
+    llvm_visc_request_mem(result, input_bytes);
+    llvm_visc_request_mem(result, result_bytes);
+
+
+
+    __visc__cleanup();
+
+    return 0;
+}
+
+
+
+    
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c
new file mode 100644
index 0000000000..2ebedec936
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.c
@@ -0,0 +1,290 @@
+#include <stdio.h>
+#include <math.h>
+#include "pipe_stages.h"
+#include "cam_pipe_utility.h"
+
+//void scale_fxp(uint8_t *input, int row_size, int col_size, float *output) {
+void scale_fxp(uint8_t *input, size_t bytes_input, 
+               float *output, size_t bytes_output,
+               int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, output, 1, output);
+  
+  ARRAY_3D(uint8_t, _input, input, row_size, col_size);
+  ARRAY_3D(float, _output, output, row_size, col_size);
+  sl_chan:
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    sl_row:
+    for (int row = 0; row < row_size; row++)
+      sl_col:
+      for (int col = 0; col < col_size; col++)
+        _output[chan][row][col] = _input[chan][row][col] * 1.0 / 255;
+
+  __visc__return(1, bytes_output);
+}
+
+//void descale_fxp(float *input, int row_size, int col_size, uint8_t *output) {
+void descale_fxp(float *input, size_t bytes_input, 
+                 uint8_t *output, size_t bytes_result,
+                 int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, output, 1, output);
+  
+  ARRAY_3D(float, _input, input, row_size, col_size);
+  ARRAY_3D(uint8_t, _output, output, row_size, col_size);
+  dsl_chan:
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    dsl_row:
+    for (int row = 0; row < row_size; row++)
+      dsl_col:
+      for (int col = 0; col < col_size; col++)
+        _output[chan][row][col] = min(max(_input[chan][row][col] * 255, 0), 255);
+
+  __visc__return(1, bytes_output);
+}
+
+// Demosaicing stage
+// G R
+// B G
+//void demosaic_fxp(float *input, int row_size, int col_size, float *result) {
+void demosaic_fxp(float *input, size_t bytes_input, 
+                  float *result, size_t bytes_result,
+                  int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, result, 1, result);
+  
+  printf("Demosaicing.\n");
+  ARRAY_3D(float, _input, input, row_size, col_size);
+  ARRAY_3D(float, _result, result, row_size, col_size);
+
+  dm_row:
+  for (int row = 1; row < row_size - 1; row++)
+    dm_col:
+    for (int col = 1; col < col_size - 1; col++)
+        if (row % 2 == 0 && col % 2 == 0) {
+            // Green pixel
+            // Getting the R values
+            float R1 = _input[0][row][col - 1];
+            float R2 = _input[0][row][col + 1];
+            // Getting the B values
+            float B1 = _input[2][row - 1][col];
+            float B2 = _input[2][row + 1][col];
+            // R
+            _result[0][row][col] = (R1 + R2) / 2;
+            // G
+            _result[1][row][col] = _input[1][row][col] * 2;
+            // B
+            _result[2][row][col] = (B1 + B2) / 2;
+        } else if (row % 2 == 0 && col % 2 == 1) {
+            // Red pixel
+            // Getting the G values
+            float G1 = _input[1][row - 1][col];
+            float G2 = _input[1][row + 1][col];
+            float G3 = _input[1][row][col - 1];
+            float G4 = _input[1][row][col + 1];
+            // Getting the B values
+            float B1 = _input[2][row - 1][col - 1];
+            float B2 = _input[2][row - 1][col + 1];
+            float B3 = _input[2][row + 1][col - 1];
+            float B4 = _input[2][row + 1][col + 1];
+            // R
+            _result[0][row][col] = _input[0][row][col];
+            // G
+            _result[1][row][col] = (G1 + G2 + G3 + G4) / 2;
+            // B (center pixel)
+            _result[2][row][col] = (B1 + B2 + B3 + B4) / 4;
+        } else if (row % 2 == 1 && col % 2 == 0) {
+            // Blue pixel
+            // Getting the R values
+            float R1 = _input[0][row - 1][col - 1];
+            float R2 = _input[0][row + 1][col - 1];
+            float R3 = _input[0][row - 1][col + 1];
+            float R4 = _input[0][row + 1][col + 1];
+            // Getting the G values
+            float G1 = _input[1][row - 1][col];
+            float G2 = _input[1][row + 1][col];
+            float G3 = _input[1][row][col - 1];
+            float G4 = _input[1][row][col + 1];
+            // R
+            _result[0][row][col] = (R1 + R2 + R3 + R4) / 4;
+            // G
+            _result[1][row][col] = (G1 + G2 + G3 + G4) / 2;
+            // B
+            _result[2][row][col] = _input[2][row][col];
+        } else {
+            // Bottom Green pixel
+            // Getting the R values
+            float R1 = _input[0][row - 1][col];
+            float R2 = _input[0][row + 1][col];
+            // Getting the B values
+            float B1 = _input[2][row][col - 1];
+            float B2 = _input[2][row][col + 1];
+            // R
+            _result[0][row][col] = (R1 + R2) / 2;
+            // G
+            _result[1][row][col] = _input[1][row][col] * 2;
+            // B
+            _result[2][row][col] = (B1 + B2) / 2;
+        }
+
+  __visc__return(1, bytes_result);
+}
+
+static void sort(float arr[], int n) {
+    int i, j;
+    dn_sort_i:
+    for (i = 0; i < n - 1; i++)
+        dn_sort_j:
+        for (j = 0; j < n - i - 1; j++)
+            if (arr[j] > arr[j + 1]) {
+                float temp = arr[j];
+                arr[j] = arr[j + 1];
+                arr[j + 1] = temp;
+            }
+}
+
+// Simple denoise
+//void denoise_fxp(float *input, int row_size, int col_size, float *result) {
+void denoise_fxp(float *input, size_t bytes_input, 
+                 float *result, size_t bytes_result,
+                 int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(2, input, result, 1, result);
+  
+  printf("Denoising.\n");
+  ARRAY_3D(float, _input, input, row_size, col_size);
+  ARRAY_3D(float, _result, result, row_size, col_size);
+
+  dn_chan:
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    dn_row:
+    for (int row = 0; row < row_size; row++)
+      dn_col:
+      for (int col = 0; col < col_size; col++)
+        if (row >= 1 && row < row_size - 1 && col >= 1 && col < col_size - 1) {
+          float filter[9];
+          dn_slide_row:
+          for (int i = row-1; i < row+2; i++)
+            dn_slide_col:
+            for (int j = col-1; j < col+2; j++) {
+              int index = (i - row + 1) * 3 + j - col + 1;
+              filter[index] = _input[chan][i][j];
+            }
+          sort(filter, 9);
+          _result[chan][row][col] = filter[4];
+        } else {
+          _result[chan][row][col] = _input[chan][row][col];
+        }
+  __visc__return(1, bytes_result);
+}
+
+// Color map and white balance transform
+//void transform_fxp(float *input, int row_size, int col_size, float *result,
+//                   float *TsTw_tran) {
+void transform_fxp(float *input, size_t bytes_input, 
+                   float *result, size_t bytes_result,
+                   float *TsTw_tran, size_t bytes_TsTw,
+                   int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(3, input, result, TsTw_tran, 1, result);
+  
+  printf("Color mapping.\n");
+  ARRAY_3D(float, _input, input, row_size, col_size);
+  ARRAY_3D(float, _result, result, row_size, col_size);
+  ARRAY_2D(float, _TsTw_tran, TsTw_tran, 3);
+
+  tr_chan:
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    tr_row:
+    for (int row = 0; row < row_size; row++)
+      tr_col:
+      for (int col = 0; col < col_size; col++)
+        _result[chan][row][col] =
+            max(_input[0][row][col] * _TsTw_tran[0][chan] +
+                    _input[1][row][col] * _TsTw_tran[1][chan] +
+                    _input[2][row][col] * _TsTw_tran[2][chan],
+                0);
+  __visc__return(1, bytes_result);
+}
+
+//
+// Weighted radial basis function for gamut mapping
+//
+//void gamut_map_fxp(float *input, int row_size, int col_size, float *result,
+//                   float *ctrl_pts, float *weights, float *coefs, float *l2_dist) {
+void gamut_map_fxp(float *input, size_t bytes_input, 
+                   float *result, size_t bytes_result,
+                   float *ctrl_pts, size_t bytes_ctrl_pts,
+                   float *weights, size_t bytes_weights,
+                   float *coefs, size_t bytes_coefs,
+                   float *l2_dist, size_t bytes_l2_dist,
+                   int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(6, input, result, ctrl_pts, weights, coefs, l2_dist, 1, result);
+  
+  printf("Gamut mapping.\n");
+  ARRAY_3D(float, _input, input, row_size, col_size);
+  ARRAY_3D(float, _result, result, row_size, col_size);
+  ARRAY_2D(float, _ctrl_pts, ctrl_pts, 3);
+  ARRAY_2D(float, _weights, weights, 3);
+  ARRAY_2D(float, _coefs, coefs, 3);
+
+  // First, get the L2 norm from every pixel to the control points,
+  // Then, sum it and weight it. Finally, add the bias.
+  gm_rbf_row:
+  for (int row = 0; row < row_size; row++)
+    gm_rbf_col:
+    for (int col = 0; col < col_size; col++) {
+      gm_rbf_cp0:
+      for (int cp = 0; cp < num_ctrl_pts; cp++) {
+        l2_dist[cp] =
+            sqrt((_input[0][row][col] - _ctrl_pts[cp][0]) *
+                     (_input[0][row][col] - _ctrl_pts[cp][0]) +
+                 (_input[1][row][col] - _ctrl_pts[cp][1]) *
+                     (_input[1][row][col] - _ctrl_pts[cp][1]) +
+                 (_input[2][row][col] - _ctrl_pts[cp][2]) *
+                     (_input[2][row][col] - _ctrl_pts[cp][2]));
+      }
+      gm_rbf_chan:
+      for (int chan = 0; chan < CHAN_SIZE; chan++) {
+        float chan_val = 0.0;
+        gm_rbf_cp1:
+        for (int cp = 0; cp < num_ctrl_pts; cp++) {
+          chan_val += l2_dist[cp] * _weights[cp][chan];
+        }
+        // Add on the biases for the RBF
+        chan_val += _coefs[0][chan] + _coefs[1][chan] * _input[0][row][col] +
+                    _coefs[2][chan] * _input[1][row][col] +
+                    _coefs[3][chan] * _input[2][row][col];
+        _result[chan][row][col] = max(chan_val, 0);
+      }
+    }
+  __visc__return(1, bytes_result);
+}
+
+// Tone mapping
+//void tone_map_fxp(float *input, int row_size, int col_size, float *tone_map,
+//                  float *result) {
+void tone_map_fxp(float *input, size_t bytes_input, 
+                  float *result, size_t bytes_result,
+                  float *tone_map, size_t bytes_tone_map,
+                  int row_size, int col_size) {
+  __visc__hint(DEVICE);
+  __visc__attributes(3, input, result, tone_map, 1, result);
+  
+  printf("Tone mapping.\n");
+  ARRAY_3D(float, _input, input, row_size, col_size);
+  ARRAY_3D(float, _result, result, row_size, col_size);
+  ARRAY_2D(float, _tone_map, tone_map, 3);
+
+  tm_chan:
+  for (int chan = 0; chan < CHAN_SIZE; chan++)
+    tm_row:
+    for (int row = 0; row < row_size; row++)
+      tm_col:
+      for (int col = 0; col < col_size; col++) {
+        uint8_t x = _input[chan][row][col] * 255;
+        _result[chan][row][col] = _tone_map[x][chan];
+      }
+  __visc__return(1, bytes_result);
+}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h
new file mode 100644
index 0000000000..eae4347b99
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/pipe_stages.h
@@ -0,0 +1,63 @@
+#ifndef _PIPE_STAGES_H_
+#define _PIPE_STAGES_H_
+
+#include "defs.h"
+
+#define CHAN_SIZE 3
+
+#define ISP 0x4
+
+#define max(a,b) \
+  ({ __typeof__ (a) _a = (a); \
+      __typeof__ (b) _b = (b); \
+    _a > _b ? _a : _b; })
+
+#define min(a,b) \
+  ({ __typeof__ (a) _a = (a); \
+      __typeof__ (b) _b = (b); \
+    _a < _b ? _a : _b; })
+
+#define abs(a) \
+  ({ __typeof__ (a) _a = (a); \
+    _a < 0 ? -_a : _a; })
+
+extern int num_ctrl_pts;
+
+void scale_fxp(uint8_t *input, size_t bytes_input, 
+               float *output, size_t bytes_output,
+               int row_size, int col_size);
+
+void descale_fxp(float *input, size_t bytes_input, 
+                 uint8_t *output, size_t bytes_result,
+                 int row_size, int col_size);
+
+void demosaic_fxp(float *input, size_t bytes_input, 
+                  float *result, size_t bytes_result,
+                  int row_size, int col_size);
+
+void denoise_fxp(float *input, size_t bytes_input, 
+                 float *result, size_t bytes_result,
+                 int row_size, int col_size);
+
+void transform_fxp(float *input, size_t bytes_input, 
+                   float *result, size_t bytes_result,
+                   float *TsTw_tran, size_t bytes_TsTw,
+                   int row_size, int col_size);
+
+void gamut_map_fxp(float *input, size_t bytes_input, 
+                   float *result, size_t bytes_result,
+                   float *ctrl_pts, size_t bytes_ctrl_pts,
+                   float *weights, size_t bytes_weights,
+                   float *coefs, size_t bytes_coefs,
+                   float *l2_dist, size_t bytes_l2_dist,
+                   int row_size, int col_size);
+
+void tone_map_fxp(float *input, size_t bytes_input, 
+                  float *result, size_t bytes_result,
+                  float *tone_map, size_t bytes_tone_map,
+                  int row_size, int col_size);
+
+void tone_map_approx_fxp(float *input, int row_size, int col_size,
+                         float *result);
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh
new file mode 100755
index 0000000000..3b6faac63a
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/rename.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+for f in *.cc; do 
+    mv -- "$f" "${f%.cc}.c"
+done
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c
new file mode 100644
index 0000000000..c1eaee3333
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.c
@@ -0,0 +1,11 @@
+#include <stdlib.h>
+#include <assert.h>
+#include "defs.h"
+#include "utility.h"
+
+void *malloc_aligned(size_t size) {
+  void *ptr = NULL;
+  int err = posix_memalign((void **)&ptr, CACHELINE_SIZE, size);
+  assert(err == 0 && "Failed to allocate memory!");
+  return ptr;
+}
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h
new file mode 100644
index 0000000000..fc407c72a3
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/utility.h
@@ -0,0 +1,8 @@
+#ifndef _COMMON_UTILITY_H_
+#define _COMMON_UTILITY_H_
+
+#include <stddef.h>
+
+void *malloc_aligned(size_t size);
+
+#endif
diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h
new file mode 100644
index 0000000000..a263e35252
--- /dev/null
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/cava_test/src/visc.h
@@ -0,0 +1,110 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+#ifndef DEVICE
+#define DEVICE GPU_TARGET
+#endif
+
+#include "llvm/SupportVISC/VISCHint.h"
+
+#ifdef __cplusplus
+extern "C" {
+void __visc__hint(visc::Target);
+//void __visc__wait(void*);
+#else
+void __visc__hint(enum Target);
+//void __visc__wait(unsigned);
+#endif
+
+#ifdef __cplusplus
+//void* __visc__node(...);
+//void* __visc__createNode(...);
+//void* __visc__createNode1D(...);
+//void* __visc__createNode2D(...);
+//void* __visc__createNode3D(...);
+//void __visc__return(...);
+#endif
+
+void* __visc__createNodeND(unsigned,...);
+void __visc__return(unsigned, ...);
+
+void __visc__attributes(unsigned, ...);
+void __visc__init();
+void __visc__cleanup();
+
+void __visc__bindIn(void*, unsigned, unsigned, unsigned);
+void __visc__bindOut(void*, unsigned, unsigned, unsigned);
+void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned);
+void __visc__push(void*, void*);
+void* __visc__pop(void*);
+void* __visc__launch(unsigned, ...);
+void __visc__wait(void*);
+
+void* __visc__getNode();
+void* __visc__getParentNode(void*);
+void __visc__barrier();
+void* __visc__malloc(long);
+long __visc__getNodeInstanceID_x(void*);
+long __visc__getNodeInstanceID_y(void*);
+long __visc__getNodeInstanceID_z(void*);
+long __visc__getNumNodeInstances_x(void*);
+long __visc__getNumNodeInstances_y(void*);
+long __visc__getNumNodeInstances_z(void*);
+
+// Atomic
+// signed int
+int __visc__atomic_cmpxchg(int*, int, int);
+int __visc__atomic_add(int*, int);
+int __visc__atomic_sub(int*, int);
+int __visc__atomic_xchg(int*, int);
+int __visc__atomic_inc(int*);
+int __visc__atomic_dec(int*);
+int __visc__atomic_min(int*, int);
+int __visc__atomic_max(int*, int);
+int __visc__atomic_umax(int*, int);
+int __visc__atomic_umin(int*, int);
+int __visc__atomic_and(int*, int);
+int __visc__atomic_or(int*, int);
+int __visc__atomic_xor(int*, int);
+
+// Special Func
+float __visc__floor(float);
+float __visc__rsqrt(float);
+float __visc__sqrt(float);
+float __visc__sin(float);
+float __visc__cos(float);
+// unsigned int
+//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned);
+//unsigned __visc__atomic_add(unsigned*, unsigned);
+//unsigned __visc__atomic_sub(unsigned*, unsigned);
+//unsigned __visc__atomic_xchg(unsigned*, unsigned);
+//unsigned __visc__atomic_inc(unsigned*);
+//unsigned __visc__atomic_dec(unsigned*);
+//unsigned __visc__atomic_min(unsigned*, unsigned);
+//unsigned __visc__atomic_max(unsigned*, unsigned);
+//unsigned __visc__atomic_and(unsigned*, unsigned);
+//unsigned __visc__atomic_or(unsigned*, unsigned);
+//unsigned __visc__atomic_xor(unsigned*, unsigned);
+
+
+#include <unistd.h>
+
+long get_global_id(int);
+long get_group_id(int);
+long get_local_id(int);
+long get_local_size(int);
+
+
+void llvm_visc_track_mem(void*, size_t);
+void llvm_visc_untrack_mem(void*);
+void llvm_visc_request_mem(void*, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
-- 
GitLab