diff --git a/hpvm/test/README.md b/hpvm/test/README.md index e709ef04195c90e0f91c2a4b4a4b1d2f0b716d1f..94103affb668afc29d32e52d85d0d60182bd16d8 100644 --- a/hpvm/test/README.md +++ b/hpvm/test/README.md @@ -22,9 +22,9 @@ TODO ## Pipeline ``` make TARGET={seq, gpu} - ./pipeline-{seq, gpu} datasets/big/input/formula1_scaled.mp4 + ./pipeline-{seq, gpu} datasets/formula1_scaled.mp4 ``` ## Your own project See `template/` for an example Makefile and config. -Include `visc.h` to use HPVM intrinsics, found in the `test/include/visc.h`. +Include `visc.h` to use HPVM C api functions, found in the `test/include/visc.h`. diff --git a/hpvm/test/parboil/README.md b/hpvm/test/parboil/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1166e4f10f6a6e29e4f5d40871674c27da975acc --- /dev/null +++ b/hpvm/test/parboil/README.md @@ -0,0 +1,8 @@ +# Current Benchmark Compatability + +| Benchmark | Version | Supported on CPU | Supported on GPU | +| :-------- | :------ | :--------------: | :--------------: | +| sgemm | visc | ✔ | ✔ | +| stencil | visc | ✔ | ✔ | +| spmv | visc | ✔ | ✘ | +| lbm | visc | ✔ | ✘ | diff --git a/hpvm/test/parboil/common/Makefile.conf.example-ati b/hpvm/test/parboil/common/Makefile.conf.example-ati deleted file mode 100644 index e7dacca07360035cc3d07404ae0358c310ad4507..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/Makefile.conf.example-ati +++ /dev/null @@ -1,3 +0,0 @@ -OPENCL_PATH=/opt/ati/ -OPENCL_LIB_PATH=/opt/ati/lib/x86_64 - diff --git a/hpvm/test/parboil/common/Makefile.conf.example-mcuda b/hpvm/test/parboil/common/Makefile.conf.example-mcuda deleted file mode 100644 index 36c110c015b6cde1aafc01cc3d6623c3bf905887..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/Makefile.conf.example-mcuda +++ /dev/null @@ -1 +0,0 @@ -MCUDA_PATH=/usr/local/mcuda diff --git a/hpvm/test/parboil/common/Makefile.conf.example-nvidia b/hpvm/test/parboil/common/Makefile.conf.example-nvidia deleted file mode 100644 index bdf84b6329487cba5c1c38dd845b9f63af4c28b5..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/Makefile.conf.example-nvidia +++ /dev/null @@ -1,5 +0,0 @@ -CUDA_PATH=/usr/local/cuda -CUDA_LIB_PATH=/usr/local/cuda/lib64 -OPENCL_PATH=/usr/local/cuda -OPENCL_LIB_PATH=/usr/lib - diff --git a/hpvm/test/parboil/common/mk/c.mk b/hpvm/test/parboil/common/mk/c.mk deleted file mode 100644 index 9334ba5c8f76e56490b96ec1527bc33e7d8248f0..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/mk/c.mk +++ /dev/null @@ -1,86 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Default language wide options - -LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -I/usr/local/cuda/include -LANG_CXXFLAGS=$(LANG_CFLAGS) -LANG_LDFLAGS= - -CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS) -CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS) -LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS) - -# Rules common to all makefiles - -######################################## -# Functions -######################################## - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILDDIR)/,$(1)) - -# Add SRCDIR as a prefix to each element of $1 -INSRCDIR=$(addprefix $(SRCDIR)/,$(1)) - - -######################################## -# Environment variable check -######################################## - -# The second-last directory in the $(BUILDDIR) path -# must have the name "build". This reduces the risk of terrible -# accidents if paths are not set up correctly. -ifeq ("$(notdir $(BUILDDIR))", "") -$(error $$BUILDDIR is not set correctly) -endif - -ifneq ("$(notdir $(patsubst %/,%,$(dir $(BUILDDIR))))", "build") -$(error $$BUILDDIR is not set correctly) -endif - -.PHONY: run - -######################################## -# Derived variables -######################################## - -ifeq ($(DEBUGGER),) -DEBUGGER=gdb -endif - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) - -######################################## -# Rules -######################################## - -default: $(BUILDDIR) $(BIN) - -run: - @$(BIN) $(ARGS) - -debug: - @$(DEBUGGER) --args $(BIN) $(ARGS) - -clean : - rm -f $(BUILDDIR)/* - if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi - -$(BIN) : $(OBJS) $(BUILDDIR)/parboil.o - $(CXX) $^ -o $@ $(LDFLAGS) - -$(BUILDDIR) : - mkdir -p $(BUILDDIR) - -$(BUILDDIR)/%.o : $(SRCDIR)/%.c - $(CC) $(CFLAGS) -c $< -o $@ - -$(BUILDDIR)/parboil.o: $(PARBOIL_ROOT)/common/src/parboil.c - $(CC) $(CFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - diff --git a/hpvm/test/parboil/common/mk/cuda.mk b/hpvm/test/parboil/common/mk/cuda.mk deleted file mode 100644 index 00f205b2bb0dca42ea140f1d5dd4fa5f149fa178..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/mk/cuda.mk +++ /dev/null @@ -1,109 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Default language wide options - -# CUDA specific -LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -I$(CUDA_PATH)/include -LANG_CXXFLAGS=$(LANG_CFLAGS) -LANG_LDFLAGS=-L$(CUDA_LIB_PATH) - -LANG_CUDACFLAGS=$(LANG_CFLAGS) - -CFLAGS=$(APP_CFLAGS) $(LANG_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS=$(APP_CXXFLAGS) $(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) - -CUDACFLAGS=$(LANG_CUDACFLAGS) $(PLATFORM_CUDACFLAGS) $(APP_CUDACFLAGS) -CUDALDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_CUDALDFLAGS) $(APP_CUDALDFLAGS) - -# Rules common to all makefiles - -######################################## -# Functions -######################################## - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILDDIR)/,$(1)) - -# Add SRCDIR as a prefix to each element of $1 -INSRCDIR=$(addprefix $(SRCDIR)/,$(1)) - - -######################################## -# Environment variable check -######################################## - -# The second-last directory in the $(BUILDDIR) path -# must have the name "build". This reduces the risk of terrible -# accidents if paths are not set up correctly. -ifeq ("$(notdir $(BUILDDIR))", "") -$(error $$BUILDDIR is not set correctly) -endif - -ifneq ("$(notdir $(patsubst %/,%,$(dir $(BUILDDIR))))", "build") -$(error $$BUILDDIR is not set correctly) -endif - -.PHONY: run - -ifeq ($(CUDA_PATH),) -FAILSAFE=no_cuda -else -FAILSAFE= -endif - -######################################## -# Derived variables -######################################## - -ifeq ($(DEBUGGER),) -DEBUGGER=gdb -endif - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) - -######################################## -# Rules -######################################## - -default: $(FAILSAFE) $(BUILDDIR) $(BIN) - -run: - @echo "Resolving CUDA runtime library..." - @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) ldd $(BIN) | grep cuda - $(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) ./$(BIN) $(ARGS) - -debug: - @echo "Resolving CUDA runtime library..." - @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) ldd $(BIN) | grep cuda - @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) $(DEBUGGER) --args $(BIN) $(ARGS) - -clean : - rm -rf $(BUILDDIR)/* - if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi - -$(BIN) : $(OBJS) $(BUILDDIR)/parboil_cuda.o - $(CUDALINK) $^ -o $@ $(CUDALDFLAGS) - -$(BUILDDIR) : - mkdir -p $(BUILDDIR) - -$(BUILDDIR)/%.o : $(SRCDIR)/%.c - $(CC) $(CFLAGS) -c $< -o $@ - -$(BUILDDIR)/parboil_cuda.o: $(PARBOIL_ROOT)/common/src/parboil_cuda.c - $(CC) $(CFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cu - $(CUDACC) $< $(CUDACFLAGS) -c -o $@ - -no_cuda: - @echo "CUDA_PATH is not set. Open $(CUDA_ROOT)/common/Makefile.conf to set default value." - @echo "You may use $(PLATFORM_MK) if you want a platform specific configurations." - @exit 1 - diff --git a/hpvm/test/parboil/common/mk/opencl.mk b/hpvm/test/parboil/common/mk/opencl.mk deleted file mode 100644 index 35b4d1bc90c6fe776ca7e376e6883dd3bef687c9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/mk/opencl.mk +++ /dev/null @@ -1,129 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Default language wide options - -LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -I$(OPENCL_PATH)/include -LANG_CXXFLAGS=$(LANG_CFLAGS) -LANG_LDFLAGS=-lOpenCL -L$(OPENCL_LIB_PATH) -lrt - -CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS) -CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS) -LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS) - - -LLVM_INSTALL:=$(LLVM_SRC_ROOT)/../build -LIBCLC:=$(LLVM_SRC_ROOT)/../../libclc-install -LLVM_CC:=$(LLVM_INSTALL)/bin/clang -LLVM_LINK:=$(LLVM_INSTALL)/bin/llvm-link - - -# Rules common to all makefiles - -######################################## -# Functions -######################################## - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILDDIR)/,$(1)) - -# Add SRCDIR as a prefix to each element of $1 -INSRCDIR=$(addprefix $(SRCDIR)/,$(1)) - - -######################################## -# Environment variable check -######################################## - -# The second-last directory in the $(BUILDDIR) path -# must have the name "build". This reduces the risk of terrible -# accidents if paths are not set up correctly. -ifeq ("$(notdir $(BUILDDIR))", "") -$(error $$BUILDDIR is not set correctly) -endif - -ifneq ("$(notdir $(patsubst %/,%,$(dir $(BUILDDIR))))", "build") -$(error $$BUILDDIR is not set correctly) -endif - -.PHONY: run -.PRECIOUS: $(BUILDDIR)/%.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -######################################## -# Derived variables -######################################## - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -KERNEL = $(call INBUILDDIR,$(KERNEL_OBJS)) - -ifeq ($(DEBUGGER),) -DEBUGGER=gdb -endif - -######################################## -# Rules -######################################## - -default: $(FAILSAFE) $(BUILDDIR) $(BIN) $(KERNEL) - -run : $(RUNDIR) - echo "Resolving OpenCL library..." - $(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ldd ./$(BIN) | grep OpenCL - $(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ./$(BIN) $(ARGS) - $(TOOL) $(OUTPUT) $(REF_OUTPUT) - -debug: - @echo "Resolving OpenCL library..." - @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ldd $(BIN) | grep OpenCL - @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) $(DEBUGGER) --args $(BIN) $(ARGS) - -clean : - rm -f $(BUILDDIR)/* - if [ -a $(BIN) ]; then rm $(BIN); fi - if [ -d $(BUILDDIR) ]; then rm -rf $(BUILDDIR); fi - if [ -d $(RUNDIR) ]; then rm -rf $(RUNDIR); fi - -$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o - $(CXX) $^ -o $@ $(LDFLAGS) - -$(RUNDIR) : - mkdir -p $(RUNDIR) - -$(BUILDDIR) : - mkdir -p $(BUILDDIR) - -$(BUILDDIR)/%.o : $(SRCDIR)/%.c - $(CC) $(CFLAGS) -c $< -o $@ - -$(BUILDDIR)/parboil_opencl.o : $(PARBOIL_ROOT)/common/src/parboil_opencl.c - $(CC) $(CFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.nvptx.s : $(BUILDDIR)/%.linked.bc - $(LLVM_CC) -O3 -target nvptx64-nvidia-nvcl $< -S -o $@ - -$(BUILDDIR)/%.linked.bc : $(BUILDDIR)/%.ll - $(LLVM_LINK) $(LIBCLC)/lib/clc/nvptx64--nvidiacl.bc $< -o $@ - -$(BUILDDIR)/%.ll : $(SRCDIR)/%.cl - $(LLVM_CC) $(CFLAGS) -Dcl_clang_storage_class_specifiers -isystem $(LIBCLC)/include -include clc/clc.h -target nvptx64-nvidia-nvcl $< -O3 -emit-llvm -S -o $@ - -$(BUILDDIR)/%.ir : $(SRCDIR)/%.cl - cd $(SRCDIR); ioc64 -input=kernel.cl -ir=kernel.ir - cp $(SRCDIR)/kernel.ir $@ - -no_opencl: - @echo "OPENCL_PATH is not set. Open $(PARBOIL_ROOT)/common/Makefile.conf to set default value." - @echo "You may use $(PLATFORM_MK) if you want a platform specific configurations." - @exit 1 - diff --git a/hpvm/test/parboil/common/platform/c.default.mk b/hpvm/test/parboil/common/platform/c.default.mk deleted file mode 100644 index d9058f11b1d913189a550f060f5d5dc9e08af2c9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/c.default.mk +++ /dev/null @@ -1,17 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Rules common to all makefiles - -# Commands to build objects from source file using C compiler -# with gcc - -# gcc (default) -CC = gcc -PLATFORM_CFLAGS = - -CXX = g++ -PLATFORM_CXXFLAGS = - -LINKER = g++ -PLATFORM_LDFLAGS = -lm -lpthread - diff --git a/hpvm/test/parboil/common/platform/c.gcc.mk b/hpvm/test/parboil/common/platform/c.gcc.mk deleted file mode 100644 index d9058f11b1d913189a550f060f5d5dc9e08af2c9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/c.gcc.mk +++ /dev/null @@ -1,17 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Rules common to all makefiles - -# Commands to build objects from source file using C compiler -# with gcc - -# gcc (default) -CC = gcc -PLATFORM_CFLAGS = - -CXX = g++ -PLATFORM_CXXFLAGS = - -LINKER = g++ -PLATFORM_LDFLAGS = -lm -lpthread - diff --git a/hpvm/test/parboil/common/platform/cuda.arch20.mk b/hpvm/test/parboil/common/platform/cuda.arch20.mk deleted file mode 100644 index b08376c93195fda7ad477e89a9a93b0212ca1161..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/cuda.arch20.mk +++ /dev/null @@ -1,23 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Cuda-related definitions common to all benchmarks - -######################################## -# Variables -######################################## - -# c.default is the base along with CUDA configuration in this setting -include $(PARBOIL_ROOT)/common/platform/c.default.mk - -# Paths -CUDAHOME=/usr/local/cuda - -# Programs -CUDACC=$(CUDAHOME)/bin/nvcc -CUDALINK=$(CUDAHOME)/bin/nvcc - -# Flags -PLATFORM_CUDACFLAGS=-code=sm_20 -PLATFORM_CUDALDFLAGS=-lm -lpthread - - diff --git a/hpvm/test/parboil/common/platform/cuda.default.mk b/hpvm/test/parboil/common/platform/cuda.default.mk deleted file mode 100644 index 237da5aa3e601ed1fc53cb7fc5fb8c986c738eec..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/cuda.default.mk +++ /dev/null @@ -1,23 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Cuda-related definitions common to all benchmarks - -######################################## -# Variables -######################################## - -# c.default is the base along with CUDA configuration in this setting -include $(PARBOIL_ROOT)/common/platform/c.default.mk - -# Paths -CUDAHOME=/usr/local/cuda - -# Programs -CUDACC=$(CUDAHOME)/bin/nvcc -CUDALINK=$(CUDAHOME)/bin/nvcc - -# Flags -PLATFORM_CUDACFLAGS=-O3 -PLATFORM_CUDALDFLAGS=-lm -lpthread - - diff --git a/hpvm/test/parboil/common/platform/cuda.mcuda.mk b/hpvm/test/parboil/common/platform/cuda.mcuda.mk deleted file mode 100644 index 05a6fee9a9a0328d6ff415afdb4b9acbf91580fa..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/cuda.mcuda.mk +++ /dev/null @@ -1,25 +0,0 @@ -# (c) 2011 The Board of Trustees of the University of Illinois. - -# Cuda-related definitions common to all benchmarks - -######################################## -# Variables -######################################## - -# c.default is the base along with CUDA configuration in this setting -include $(PARBOIL_ROOT)/common/platform/c.default.mk - -# Paths -CUDA_PATH=$(MCUDA_PATH)/include - -# Programs -CUDACC=$(MCUDA_PATH)/bin/mcc_xmm -CUDALINK=$(LINKER) - -# Flags -PLATFORM_CUDACFLAGS=-O3 -PLATFORM_CFLAGS=-O3 -I$(MCUDA_PATH)/include -D__MCUDA__ -PLATFORM_CXXFLAGS=-O3 -I$(MCUDA_PATH)/include -D__MCUDA__ -PLATFORM_CUDALDFLAGS=-lm -lpthread -L$(MCUDA_PATH)/lib -lmcuda - - diff --git a/hpvm/test/parboil/common/platform/cuda.nvcc.mk b/hpvm/test/parboil/common/platform/cuda.nvcc.mk deleted file mode 100644 index 237da5aa3e601ed1fc53cb7fc5fb8c986c738eec..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/cuda.nvcc.mk +++ /dev/null @@ -1,23 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Cuda-related definitions common to all benchmarks - -######################################## -# Variables -######################################## - -# c.default is the base along with CUDA configuration in this setting -include $(PARBOIL_ROOT)/common/platform/c.default.mk - -# Paths -CUDAHOME=/usr/local/cuda - -# Programs -CUDACC=$(CUDAHOME)/bin/nvcc -CUDALINK=$(CUDAHOME)/bin/nvcc - -# Flags -PLATFORM_CUDACFLAGS=-O3 -PLATFORM_CUDALDFLAGS=-lm -lpthread - - diff --git a/hpvm/test/parboil/common/platform/opencl.default.mk b/hpvm/test/parboil/common/platform/opencl.default.mk deleted file mode 100644 index a6ac4645dceb8097c304784dd66280d4f53d1f4f..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/opencl.default.mk +++ /dev/null @@ -1,23 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Rules common to all makefiles - -# Commands to build objects from source file using C compiler -# with gcc - -# Uncomment below two lines and configure if you want to use a platform -# other than global one - -#OPENCL_PATH=/scr/hskim/ati-stream-sdk-v2.3-lnx64 -#OPENCL_LIB_PATH=$(OPENCL_PATH)/lib/x86_64 - -# gcc (default) -CC = clang -PLATFORM_CFLAGS = - -CXX = clang++ -PLATFORM_CXXFLAGS = - -LINKER = clang++ -PLATFORM_LDFLAGS = -lm -lpthread - diff --git a/hpvm/test/parboil/common/platform/opencl.gcc.mk b/hpvm/test/parboil/common/platform/opencl.gcc.mk deleted file mode 100644 index d9058f11b1d913189a550f060f5d5dc9e08af2c9..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/platform/opencl.gcc.mk +++ /dev/null @@ -1,17 +0,0 @@ -# (c) 2007 The Board of Trustees of the University of Illinois. - -# Rules common to all makefiles - -# Commands to build objects from source file using C compiler -# with gcc - -# gcc (default) -CC = gcc -PLATFORM_CFLAGS = - -CXX = g++ -PLATFORM_CXXFLAGS = - -LINKER = g++ -PLATFORM_LDFLAGS = -lm -lpthread - diff --git a/hpvm/test/parboil/common/src/parboil_cuda.c b/hpvm/test/parboil/common/src/parboil_cuda.c deleted file mode 100644 index 9fd64661643c9afec5cb470beaa516d545017bd3..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/src/parboil_cuda.c +++ /dev/null @@ -1,863 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef __MCUDA__ -#include <cuda_runtime_api.h> -#else -#include <mcuda.h> -#endif - -#if _POSIX_VERSION >= 200112L -#include <sys/time.h> -#endif - -#define true 1 - -/* Free an array of owned strings. */ -static void free_string_array(char **string_array) { - char **p; - - if (!string_array) - return; - for (p = string_array; *p; p++) - free(*p); - free(string_array); -} - -/* Parse a comma-delimited list of strings into an - * array of strings. */ -static char **read_string_array(char *in) { - char **ret; - int i; - int count; /* Number of items in the input */ - char *substring; /* Current substring within 'in' */ - - /* Count the number of items in the string */ - count = 1; - for (i = 0; in[i]; i++) - if (in[i] == ',') - count++; - - /* Allocate storage */ - ret = (char **)malloc((count + 1) * sizeof(char *)); - - /* Create copies of the strings from the list */ - substring = in; - for (i = 0; i < count; i++) { - char *substring_end; - int substring_length; - - /* Find length of substring */ - for (substring_end = substring; - (*substring_end != ',') && (*substring_end != 0); substring_end++) - ; - - substring_length = substring_end - substring; - - /* Allocate memory and copy the substring */ - ret[i] = (char *)malloc(substring_length + 1); - memcpy(ret[i], substring, substring_length); - ret[i][substring_length] = 0; - - /* go to next substring */ - substring = substring_end + 1; - } - ret[i] = NULL; /* Write the sentinel value */ - - return ret; -} - -struct argparse { - int argc; /* Number of arguments. Mutable. */ - char **argv; /* Argument values. Immutable. */ - - int argn; /* Current argument number. */ - char **argv_get; /* Argument value being read. */ - char **argv_put; /* Argument value being written. - * argv_put <= argv_get. */ -}; - -static void initialize_argparse(struct argparse *ap, int argc, char **argv) { - ap->argc = argc; - ap->argn = 0; - ap->argv_get = ap->argv_put = ap->argv = argv; -} - -static void finalize_argparse(struct argparse *ap) { - /* Move the remaining arguments */ - for (; ap->argn < ap->argc; ap->argn++) - *ap->argv_put++ = *ap->argv_get++; -} - -/* Delete the current argument. */ -static void delete_argument(struct argparse *ap) { - if (ap->argn >= ap->argc) { - fprintf(stderr, "delete_argument\n"); - } - ap->argc--; - ap->argv_get++; -} - -/* Go to the next argument. Also, move the current argument to its - * final location in argv. */ -static void next_argument(struct argparse *ap) { - if (ap->argn >= ap->argc) { - fprintf(stderr, "next_argument\n"); - } - /* Move argument to its new location. */ - *ap->argv_put++ = *ap->argv_get++; - ap->argn++; -} - -static int is_end_of_arguments(struct argparse *ap) { - return ap->argn == ap->argc; -} - -static char *get_argument(struct argparse *ap) { return *ap->argv_get; } - -static char *consume_argument(struct argparse *ap) { - char *ret = get_argument(ap); - delete_argument(ap); - return ret; -} - -struct pb_Parameters *pb_ReadParameters(int *_argc, char **argv) { - char *err_message; - struct argparse ap; - struct pb_Parameters *ret = - (struct pb_Parameters *)malloc(sizeof(struct pb_Parameters)); - - /* Initialize the parameters structure */ - ret->outFile = NULL; - ret->inpFiles = (char **)malloc(sizeof(char *)); - ret->inpFiles[0] = NULL; - - /* Each argument */ - initialize_argparse(&ap, *_argc, argv); - while (!is_end_of_arguments(&ap)) { - char *arg = get_argument(&ap); - - /* Single-character flag */ - if ((arg[0] == '-') && (arg[1] != 0) && (arg[2] == 0)) { - delete_argument(&ap); /* This argument is consumed here */ - - switch (arg[1]) { - case 'o': /* Output file name */ - if (is_end_of_arguments(&ap)) { - err_message = "Expecting file name after '-o'\n"; - goto error; - } - free(ret->outFile); - ret->outFile = strdup(consume_argument(&ap)); - break; - case 'i': /* Input file name */ - if (is_end_of_arguments(&ap)) { - err_message = "Expecting file name after '-i'\n"; - goto error; - } - ret->inpFiles = read_string_array(consume_argument(&ap)); - break; - case '-': /* End of options */ - goto end_of_options; - default: - err_message = "Unexpected command-line parameter\n"; - goto error; - } - } else { - /* Other parameters are ignored */ - next_argument(&ap); - } - } /* end for each argument */ - -end_of_options: - *_argc = ap.argc; /* Save the modified argc value */ - finalize_argparse(&ap); - - return ret; - -error: - fputs(err_message, stderr); - pb_FreeParameters(ret); - return NULL; -} - -void pb_FreeParameters(struct pb_Parameters *p) { - char **cpp; - - free(p->outFile); - free_string_array(p->inpFiles); - free(p); -} - -int pb_Parameters_CountInputs(struct pb_Parameters *p) { - int n; - - for (n = 0; p->inpFiles[n]; n++) - ; - return n; -} - -/*****************************************************************************/ -/* Timer routines */ - -static int is_async(enum pb_TimerID timer) { - return (timer == pb_TimerID_KERNEL) || (timer == pb_TimerID_COPY_ASYNC); -} - -static int is_blocking(enum pb_TimerID timer) { - return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE); -} - -#define INVALID_TIMERID pb_TimerID_LAST - -static int asyncs_outstanding(struct pb_TimerSet *timers) { - return (timers->async_markers != NULL) && - (timers->async_markers->timerID != INVALID_TIMERID); -} - -static struct pb_async_time_marker_list * -get_last_async(struct pb_TimerSet *timers) { - /* Find the last event recorded thus far */ - struct pb_async_time_marker_list *last_event = timers->async_markers; - if (last_event != NULL && last_event->timerID != INVALID_TIMERID) { - while (last_event->next != NULL && - last_event->next->timerID != INVALID_TIMERID) - last_event = last_event->next; - return last_event; - } else - return NULL; -} - -static void insert_marker(struct pb_TimerSet *tset, enum pb_TimerID timer) { - struct pb_async_time_marker_list **new_event = &(tset->async_markers); - - while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) - new_event = &((*new_event)->next); - - if (*new_event == NULL) { - *new_event = (struct pb_async_time_marker_list *)malloc( - sizeof(struct pb_async_time_marker_list)); - (*new_event)->marker = malloc(sizeof(cudaEvent_t)); - cudaEventCreate((*new_event)->marker); - (*new_event)->next = NULL; - } - - /* valid event handle now aquired: insert the event record */ - (*new_event)->label = NULL; - (*new_event)->timerID = timer; - cudaEventRecord(*((cudaEvent_t *)((*new_event)->marker)), 0); -} - -static void insert_submarker(struct pb_TimerSet *tset, char *label, - enum pb_TimerID timer) { - struct pb_async_time_marker_list **new_event = &(tset->async_markers); - - while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) - new_event = &((*new_event)->next); - - if (*new_event == NULL) { - *new_event = (struct pb_async_time_marker_list *)malloc( - sizeof(struct pb_async_time_marker_list)); - (*new_event)->marker = malloc(sizeof(cudaEvent_t)); - cudaEventCreate((*new_event)->marker); - - (*new_event)->next = NULL; - } - - /* valid event handle now aquired: insert the event record */ - (*new_event)->label = label; - (*new_event)->timerID = timer; - cudaEventRecord(*((cudaEvent_t *)((*new_event)->marker)), 0); -} - -/* Assumes that all recorded events have completed */ -static pb_Timestamp record_async_times(struct pb_TimerSet *tset) { - struct pb_async_time_marker_list *next_interval = NULL; - struct pb_async_time_marker_list *last_marker = get_last_async(tset); - pb_Timestamp total_async_time = 0; - enum pb_TimerID timer; - for (next_interval = tset->async_markers; next_interval != last_marker; - next_interval = next_interval->next) { - float interval_time_ms; - cudaEventElapsedTime(&interval_time_ms, - *((cudaEvent_t *)next_interval->marker), - *((cudaEvent_t *)next_interval->next->marker)); - pb_Timestamp interval = (pb_Timestamp)(interval_time_ms * 1e3); - tset->timers[next_interval->timerID].elapsed += interval; - if (next_interval->label != NULL) { - struct pb_SubTimer *subtimer = - tset->sub_timer_list[next_interval->timerID]->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, next_interval->label) == 0) { - subtimer->timer.elapsed += interval; - break; - } - subtimer = subtimer->next; - } - } - total_async_time += interval; - next_interval->timerID = INVALID_TIMERID; - } - - if (next_interval != NULL) - next_interval->timerID = INVALID_TIMERID; - - return total_async_time; -} - -static void accumulate_time(pb_Timestamp *accum, pb_Timestamp start, - pb_Timestamp end) { -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -#error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp)(tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -#error "no supported time libraries are available on this platform" -#endif - -void pb_ResetTimer(struct pb_Timer *timer) { - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -#error "pb_ResetTimer: not implemented for this system" -#endif -} - -void pb_StartTimer(struct pb_Timer *timer) { - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -#error "pb_StartTimer: not implemented for this system" -#endif -} - -void pb_StartTimerAndSubTimer(struct pb_Timer *timer, - struct pb_Timer *subtimer) { - - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -#error "pb_StartTimer: not implemented for this system" -#endif -} - -void pb_StopTimer(struct pb_Timer *timer) { - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -#error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, - struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -#error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } -} - -/* Get the elapsed time in seconds. */ -double pb_GetElapsedTime(struct pb_Timer *timer) { - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -#error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void pb_InitializeTimerSet(struct pb_TimerSet *timers) { - int n; - - timers->wall_begin = get_time(); - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; - } -} - -void pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr) {} - -void pb_AddSubTimer(struct pb_TimerSet *timers, char *label, - enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = - (struct pb_SubTimer *)malloc(sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *)malloc(sizeof(char) * (len + 1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = - (struct pb_SubTimerList *)malloc(sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } -} - -void pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) { - /* Stop the currently running timer */ - if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimerList *subtimerlist = - timers->sub_timer_list[timers->current]; - struct pb_SubTimer *currSubTimer = - (subtimerlist != NULL) ? subtimerlist->current : NULL; - - if (!is_async(timers->current)) { - if (timers->current != timer) { - if (currSubTimer != NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], - &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } else { - if (currSubTimer != NULL) { - pb_StopTimer(&currSubTimer->timer); - } - } - } else { - insert_marker(timers, timer); - if (!is_async(timer)) { // if switching to async too, keep driver going - pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - - pb_Timestamp currentTime = get_time(); - - /* The only cases we check for asynchronous task completion is - * when an overlapping CPU operation completes, or the next - * segment blocks on completion of previous async operations */ - if (asyncs_outstanding(timers) && - (!is_async(timers->current) || is_blocking(timer))) { - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - /* cudaSuccess if completed */ - cudaError_t async_done = - cudaEventQuery(*((cudaEvent_t *)last_event->marker)); - - if (is_blocking(timer)) { - /* Async operations completed after previous CPU operations: - * overlapped time is the total CPU time since this set of async - * operations were first issued */ - - // timer to switch to is COPY or NONE - if (async_done != cudaSuccess) - accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), - timers->async_begin, currentTime); - - /* Wait on async operation completion */ - cudaEventSynchronize(*((cudaEvent_t *)last_event->marker)); - pb_Timestamp total_async_time = record_async_times(timers); - - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - if (async_done == cudaSuccess) - timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; - - } else - /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ - // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are - // outstanding so something is deeper in stack - if (async_done == cudaSuccess) { - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); - } - } - - /* Start the new timer */ - if (timer != pb_TimerID_NONE) { - if (!is_async(timer)) { - pb_StartTimer(&timers->timers[timer]); - } else { - // toSwitchTo Is Async (KERNEL/COPY_ASYNC) - if (!asyncs_outstanding(timers)) { - /* No asyncs outstanding, insert a fresh async marker */ - - insert_marker(timers, timer); - timers->async_begin = currentTime; - } else if (!is_async(timers->current)) { - /* Previous asyncs still in flight, but a previous SwitchTo - * already marked the end of the most recent async operation, - * so we can rename that marker as the beginning of this async - * operation */ - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - last_event->label = NULL; - last_event->timerID = timer; - } - if (!is_async(timers->current)) { - pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - timers->current = timer; -} - -void pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, - enum pb_TimerID category) { - struct pb_SubTimerList *subtimerlist = - timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = - (subtimerlist != NULL) ? subtimerlist->current : NULL; - - if (timers->current != pb_TimerID_NONE) { - if (!is_async(timers->current)) { - if (timers->current != category) { - if (curr != NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], - &curr->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } else { - if (curr != NULL) { - pb_StopTimer(&curr->timer); - } - } - } else { - insert_submarker(timers, label, category); - if (!is_async(category)) { // if switching to async too, keep driver going - pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - - pb_Timestamp currentTime = get_time(); - - /* The only cases we check for asynchronous task completion is - * when an overlapping CPU operation completes, or the next - * segment blocks on completion of previous async operations */ - if (asyncs_outstanding(timers) && - (!is_async(timers->current) || is_blocking(category))) { - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - /* cudaSuccess if completed */ - cudaError_t async_done = - cudaEventQuery(*((cudaEvent_t *)last_event->marker)); - - if (is_blocking(category)) { - /* Async operations completed after previous CPU operations: - * overlapped time is the total CPU time since this set of async - * operations were first issued */ - - // timer to switch to is COPY or NONE - // if it hasn't already finished, then just take now and use that as the - // elapsed time in OVERLAP anything happening after now isn't OVERLAP - // because everything is being stopped to wait for synchronization it - // seems that the extra sync wall time isn't being recorded anywhere - if (async_done != cudaSuccess) - accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), - timers->async_begin, currentTime); - - /* Wait on async operation completion */ - cudaEventSynchronize(*((cudaEvent_t *)last_event->marker)); - pb_Timestamp total_async_time = record_async_times(timers); - - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - // If it did finish, then accumulate all the async time that did happen - // into OVERLAP the immediately preceding EventSynchronize theoretically - // didn't have any effect since it was already completed. - if (async_done == cudaSuccess) - timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; - - } else - /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ - // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are - // outstanding so something is deeper in stack - if (async_done == cudaSuccess) { - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); - } - // else, this isn't blocking, so just check the next time around - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - /* Start the new timer */ - if (category != pb_TimerID_NONE) { - if (!is_async(category)) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - pb_StartTimer(&subtimer->timer); - } else { - pb_StartTimer(&timers->timers[category]); - } - } else { - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - // toSwitchTo Is Async (KERNEL/COPY_ASYNC) - if (!asyncs_outstanding(timers)) { - /* No asyncs outstanding, insert a fresh async marker */ - insert_submarker(timers, label, category); - timers->async_begin = currentTime; - } else if (!is_async(timers->current)) { - /* Previous asyncs still in flight, but a previous SwitchTo - * already marked the end of the most recent async operation, - * so we can rename that marker as the beginning of this async - * operation */ - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - last_event->timerID = category; - last_event->label = label; - } // else, marker for switchToThis was already inserted - - // toSwitchto is already asynchronous, but if current/prev state is async - // too, then DRIVER is already running - if (!is_async(timers->current)) { - pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - - timers->current = category; -} - -void pb_PrintTimerSet(struct pb_TimerSet *timers) { - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer *sub = NULL; - - int maxSubLength; - - // const char *categories[] = { - // "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - // }; - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", - "Copy Async", "Compute", "Overlap", "Init_Ctx", - "Clear_Ctx", "Copy_Scalar", "Copy_Ptr", "Mem_Free", - "Read_Output", "Setup", "Mem_Track", "Mem_Untrack", - "Misc", "Pthread_Create", "Arg_Unpack", "Computation", - "Output_Pack", "Output_Unpack"}; - - const int maxCategoryLength = 10; - - int i; - for (i = 1; i < pb_TimerID_LAST; - ++i) { // exclude NONE and OVRELAP from this format - if (pb_GetElapsedTime(&t[i]) != 0 || true) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i - 1], - pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, - pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if (pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", - pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin) / 1e6; - printf("Timer Wall Time: %f\n", walltime); -} - -void pb_DestroyTimerSet(struct pb_TimerSet *timers) { - /* clean up all of the async event markers */ - struct pb_async_time_marker_list **event = &(timers->async_markers); - while (*event != NULL) { - cudaEventSynchronize(*((cudaEvent_t *)(*event)->marker)); - cudaEventDestroy(*((cudaEvent_t *)(*event)->marker)); - free((*event)->marker); - struct pb_async_time_marker_list **next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for (i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} diff --git a/hpvm/test/parboil/common/src/parboil_opencl.c b/hpvm/test/parboil/common/src/parboil_opencl.c deleted file mode 100644 index d493992acee859186d58330a9988ef7ef2571f73..0000000000000000000000000000000000000000 --- a/hpvm/test/parboil/common/src/parboil_opencl.c +++ /dev/null @@ -1,1051 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include <CL/cl.h> -#include <assert.h> -#include <parboil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#if _POSIX_VERSION >= 200112L -#include <time.h> -#endif - -#define BILLION 1000000000LL -#define true 1 - -cl_context *clContextPtr; -cl_command_queue *clCommandQueuePtr; - -/* Free an array of owned strings. */ -static void free_string_array(char **string_array) { - char **p; - - if (!string_array) - return; - for (p = string_array; *p; p++) - free(*p); - free(string_array); -} - -/* Parse a comma-delimited list of strings into an - * array of strings. */ -static char **read_string_array(char *in) { - char **ret; - int i; - int count; /* Number of items in the input */ - char *substring; /* Current substring within 'in' */ - - /* Count the number of items in the string */ - count = 1; - for (i = 0; in[i]; i++) - if (in[i] == ',') - count++; - - /* Allocate storage */ - ret = (char **)malloc((count + 1) * sizeof(char *)); - - /* Create copies of the strings from the list */ - substring = in; - for (i = 0; i < count; i++) { - char *substring_end; - int substring_length; - - /* Find length of substring */ - for (substring_end = substring; - (*substring_end != ',') && (*substring_end != 0); substring_end++) - ; - - substring_length = substring_end - substring; - - /* Allocate memory and copy the substring */ - ret[i] = (char *)malloc(substring_length + 1); - memcpy(ret[i], substring, substring_length); - ret[i][substring_length] = 0; - - /* go to next substring */ - substring = substring_end + 1; - } - ret[i] = NULL; /* Write the sentinel value */ - - return ret; -} - -struct argparse { - int argc; /* Number of arguments. Mutable. */ - char **argv; /* Argument values. Immutable. */ - - int argn; /* Current argument number. */ - char **argv_get; /* Argument value being read. */ - char **argv_put; /* Argument value being written. - * argv_put <= argv_get. */ -}; - -static void initialize_argparse(struct argparse *ap, int argc, char **argv) { - ap->argc = argc; - ap->argn = 0; - ap->argv_get = ap->argv_put = ap->argv = argv; -} - -static void finalize_argparse(struct argparse *ap) { - /* Move the remaining arguments */ - for (; ap->argn < ap->argc; ap->argn++) - *ap->argv_put++ = *ap->argv_get++; -} - -/* Delete the current argument. */ -static void delete_argument(struct argparse *ap) { - if (ap->argn >= ap->argc) { - // fprintf(stderr, "delete_argument\n"); - } - ap->argc--; - ap->argv_get++; -} - -/* Go to the next argument. Also, move the current argument to its - * final location in argv. */ -static void next_argument(struct argparse *ap) { - if (ap->argn >= ap->argc) { - // fprintf(stderr, "next_argument\n"); - } - /* Move argument to its new location. */ - *ap->argv_put++ = *ap->argv_get++; - ap->argn++; -} - -static int is_end_of_arguments(struct argparse *ap) { - return ap->argn == ap->argc; -} - -static char *get_argument(struct argparse *ap) { return *ap->argv_get; } - -static char *consume_argument(struct argparse *ap) { - char *ret = get_argument(ap); - delete_argument(ap); - return ret; -} - -struct pb_Parameters *pb_ReadParameters(int *_argc, char **argv) { - char *err_message; - struct argparse ap; - struct pb_Parameters *ret = - (struct pb_Parameters *)malloc(sizeof(struct pb_Parameters)); - - /* Initialize the parameters structure */ - ret->outFile = NULL; - ret->inpFiles = (char **)malloc(sizeof(char *)); - ret->inpFiles[0] = NULL; - - /* Each argument */ - initialize_argparse(&ap, *_argc, argv); - while (!is_end_of_arguments(&ap)) { - char *arg = get_argument(&ap); - - /* Single-character flag */ - if ((arg[0] == '-') && (arg[1] != 0) && (arg[2] == 0)) { - delete_argument(&ap); /* This argument is consumed here */ - - switch (arg[1]) { - case 'o': /* Output file name */ - if (is_end_of_arguments(&ap)) { - err_message = "Expecting file name after '-o'\n"; - goto error; - } - free(ret->outFile); - ret->outFile = strdup(consume_argument(&ap)); - break; - case 'i': /* Input file name */ - if (is_end_of_arguments(&ap)) { - err_message = "Expecting file name after '-i'\n"; - goto error; - } - ret->inpFiles = read_string_array(consume_argument(&ap)); - break; - case '-': /* End of options */ - goto end_of_options; - default: - err_message = "Unexpected command-line parameter\n"; - goto error; - } - } else { - /* Other parameters are ignored */ - next_argument(&ap); - } - } /* end for each argument */ - -end_of_options: - *_argc = ap.argc; /* Save the modified argc value */ - finalize_argparse(&ap); - - return ret; - -error: - fputs(err_message, stderr); - pb_FreeParameters(ret); - return NULL; -} - -void pb_FreeParameters(struct pb_Parameters *p) { - char **cpp; - - free(p->outFile); - free_string_array(p->inpFiles); - free(p); -} - -int pb_Parameters_CountInputs(struct pb_Parameters *p) { - int n; - - for (n = 0; p->inpFiles[n]; n++) - ; - return n; -} - -/*****************************************************************************/ -/* Timer routines */ - -static int is_async(enum pb_TimerID timer) { -#ifndef OPENCL_CPU - return (timer == pb_TimerID_KERNEL) || (timer == pb_TimerID_COPY_ASYNC); -#else - return (timer == pb_TimerID_COPY_ASYNC); -#endif -} - -static int is_blocking(enum pb_TimerID timer) { - return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE); -} - -#define INVALID_TIMERID pb_TimerID_LAST - -static int asyncs_outstanding(struct pb_TimerSet *timers) { - return (timers->async_markers != NULL) && - (timers->async_markers->timerID != INVALID_TIMERID); -} - -static struct pb_async_time_marker_list * -get_last_async(struct pb_TimerSet *timers) { - /* Find the last event recorded thus far */ - struct pb_async_time_marker_list *last_event = timers->async_markers; - if (last_event != NULL && last_event->timerID != INVALID_TIMERID) { - while (last_event->next != NULL && - last_event->next->timerID != INVALID_TIMERID) - last_event = last_event->next; - return last_event; - } else - return NULL; -} - -static void insert_marker(struct pb_TimerSet *tset, enum pb_TimerID timer) { - cl_int ciErrNum = CL_SUCCESS; - struct pb_async_time_marker_list **new_event = &(tset->async_markers); - - while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { - new_event = &((*new_event)->next); - } - - if (*new_event == NULL) { - *new_event = (struct pb_async_time_marker_list *)malloc( - sizeof(struct pb_async_time_marker_list)); - (*new_event)->marker = calloc(1, sizeof(cl_event)); - /* - // I don't think this is needed at all. I believe clEnqueueMarker 'creates' -the event #if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) fprintf(stderr, "Creating -Marker [%d]\n", timer); - *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, -&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User -Event Object!\n"); - } - ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), -CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User -Event Status!\n"); - } -#endif -*/ - (*new_event)->next = NULL; - } - - /* valid event handle now aquired: insert the event record */ - (*new_event)->label = NULL; - (*new_event)->timerID = timer; - ciErrNum = - clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Enqueueing Marker!\n"); - } -} - -static void insert_submarker(struct pb_TimerSet *tset, char *label, - enum pb_TimerID timer) { - cl_int ciErrNum = CL_SUCCESS; - struct pb_async_time_marker_list **new_event = &(tset->async_markers); - - while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { - new_event = &((*new_event)->next); - } - - if (*new_event == NULL) { - *new_event = (struct pb_async_time_marker_list *)malloc( - sizeof(struct pb_async_time_marker_list)); - (*new_event)->marker = calloc(1, sizeof(cl_event)); - /* -#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) -fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer); - *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, -&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User -Event Object!\n"); - } - ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), -CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User -Event Status!\n"); - } -#endif -*/ - (*new_event)->next = NULL; - } - - /* valid event handle now aquired: insert the event record */ - (*new_event)->label = label; - (*new_event)->timerID = timer; - ciErrNum = - clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Enqueueing Marker!\n"); - } -} - -/* Assumes that all recorded events have completed */ -static pb_Timestamp record_async_times(struct pb_TimerSet *tset) { - struct pb_async_time_marker_list *next_interval = NULL; - struct pb_async_time_marker_list *last_marker = get_last_async(tset); - pb_Timestamp total_async_time = 0; - enum pb_TimerID timer; - - for (next_interval = tset->async_markers; next_interval != last_marker; - next_interval = next_interval->next) { - cl_ulong command_start = 0, command_end = 0; - cl_int ciErrNum = CL_SUCCESS; - - ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), - CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &command_start, NULL); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum); - } - - ciErrNum = clGetEventProfilingInfo( - *((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &command_end, NULL); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", - ciErrNum); - } - - pb_Timestamp interval = - (pb_Timestamp)(((double)(command_end - command_start))); - tset->timers[next_interval->timerID].elapsed += interval; - if (next_interval->label != NULL) { - struct pb_SubTimer *subtimer = - tset->sub_timer_list[next_interval->timerID]->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, next_interval->label) == 0) { - subtimer->timer.elapsed += interval; - break; - } - subtimer = subtimer->next; - } - } - total_async_time += interval; - next_interval->timerID = INVALID_TIMERID; - } - - if (next_interval != NULL) - next_interval->timerID = INVALID_TIMERID; - - return total_async_time; -} - -static void accumulate_time(pb_Timestamp *accum, pb_Timestamp start, - pb_Timestamp end) { -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -#error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() { - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - return (pb_Timestamp)(tv.tv_sec * BILLION + tv.tv_nsec); -} -#else -#error "no supported time libraries are available on this platform" -#endif - -void pb_ResetTimer(struct pb_Timer *timer) { - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -#error "pb_ResetTimer: not implemented for this system" -#endif -} - -void pb_StartTimer(struct pb_Timer *timer) { - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - timer->init = tv.tv_sec * BILLION + tv.tv_nsec; - } -#else -#error "pb_StartTimer: not implemented for this system" -#endif -} - -void pb_StartTimerAndSubTimer(struct pb_Timer *timer, - struct pb_Timer *subtimer) { - - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * BILLION + tv.tv_nsec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * BILLION + tv.tv_nsec; - } - } -#else -#error "pb_StartTimer: not implemented for this system" -#endif -} - -void pb_StopTimer(struct pb_Timer *timer) { - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - fini = tv.tv_sec * BILLION + tv.tv_nsec; - } -#else -#error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, - struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - fini = tv.tv_sec * BILLION + tv.tv_nsec; - } -#else -#error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } -} - -/* Get the elapsed time in seconds. */ -double pb_GetElapsedTime(struct pb_Timer *timer) { - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e9; -#else -#error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void pb_InitializeTimerSet(struct pb_TimerSet *timers) { - int n; - - timers->wall_begin = get_time(); - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; - } -} - -void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) { - clContextPtr = ((cl_context *)p_clContextPtr); - clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr); -} - -static char *LoadProgSource(const char *Filename, size_t *szFinalLength) { - // locals - FILE *pFileStream = NULL; - size_t szSourceLength; - - // open the OpenCL source code file - pFileStream = fopen(Filename, "rb"); - if (pFileStream == 0) { - return NULL; - } - - // get the length of the source code - fseek(pFileStream, 0, SEEK_END); - szSourceLength = ftell(pFileStream); - fseek(pFileStream, 0, SEEK_SET); - - // allocate a buffer for the source code string and read it in - char *cSourceString = (char *)malloc(szSourceLength + 1); - if (fread((cSourceString), szSourceLength, 1, pFileStream) != 1) { - fclose(pFileStream); - free(cSourceString); - return 0; - } - - // close the file and return the total length of the combined (preamble + - // source) string - fclose(pFileStream); - if (szFinalLength != 0) { - *szFinalLength = szSourceLength; - } - cSourceString[szSourceLength] = '\0'; - - return cSourceString; -} - -static inline void checkErr(cl_int err, cl_int success, const char *name) { - if (err != success) { - printf("ERROR: %s\n", name); - exit(EXIT_FAILURE); - } -} - -void pb_CreateAndBuildKernelFromBinary(const char *file, const char *kernel, - void *clContextPtr, void *clDevicePtr, - void *clProgramPtr, void *clKernelPtr) { - size_t kernelLength; - char *programSource = LoadProgSource(file, &kernelLength); - checkErr(programSource != NULL, 1 /*bool true*/, - "Failure to load Program Binary"); - - cl_int binaryStatus; - cl_int errcode; - cl_device_id clDevice = *(cl_device_id *)clDevicePtr; - cl_context clContext = *(cl_context *)clContextPtr; - cl_program clProgram = clCreateProgramWithBinary( - clContext, 1, &clDevice, &kernelLength, - (const unsigned char **)&programSource, &binaryStatus, &errcode); - checkErr(errcode, CL_SUCCESS, "Failure to create program from binary"); - - // printf("Building kernel - %s, from file %s\n", kernel, file); - errcode = clBuildProgram(clProgram, 0, NULL, NULL, NULL, NULL); - // If build fails, get build log from device - if (errcode != CL_SUCCESS) { - printf("ERROR: Failure to build program\n"); - size_t len = 0; - errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, - 0, NULL, &len); - printf("LOG LENGTH: %lu\n", len); - checkErr(errcode, CL_SUCCESS, - "Failure to collect program build log length"); - char *log = (char *)malloc(len * sizeof(char)); - errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, - len, log, NULL); - checkErr(errcode, CL_SUCCESS, "Failure to collect program build log"); - - printf("Device Build Log: %s\n", log); - free(log); - exit(EXIT_FAILURE); - } - - cl_kernel clKernel = clCreateKernel(clProgram, kernel, &errcode); - checkErr(errcode, CL_SUCCESS, "Failure to create kernel"); - - *(cl_program *)clProgramPtr = clProgram; - *(cl_kernel *)clKernelPtr = clKernel; - - free(programSource); -} - -void pb_AddSubTimer(struct pb_TimerSet *timers, char *label, - enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = - (struct pb_SubTimer *)malloc(sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *)malloc(sizeof(char) * (len + 1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = - (struct pb_SubTimerList *)calloc(1, sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } -} - -void pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) { - /* Stop the currently running timer */ - if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimerList *subtimerlist = - timers->sub_timer_list[timers->current]; - struct pb_SubTimer *currSubTimer = - (subtimerlist != NULL) ? subtimerlist->current : NULL; - - if (!is_async(timers->current)) { - if (timers->current != timer) { - if (currSubTimer != NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], - &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } else { - if (currSubTimer != NULL) { - pb_StopTimer(&currSubTimer->timer); - } - } - } else { - insert_marker(timers, timer); - if (!is_async(timer)) { // if switching to async too, keep driver going - pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - - pb_Timestamp currentTime = get_time(); - - /* The only cases we check for asynchronous task completion is - * when an overlapping CPU operation completes, or the next - * segment blocks on completion of previous async operations */ - if (asyncs_outstanding(timers) && - (!is_async(timers->current) || is_blocking(timer))) { - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - /* CL_COMPLETE if completed */ - - cl_int ciErrNum = CL_SUCCESS; - cl_int async_done = CL_COMPLETE; - - ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), - CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), - &async_done, NULL); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Querying EventInfo!\n"); - } - - if (is_blocking(timer)) { - /* Async operations completed after previous CPU operations: - * overlapped time is the total CPU time since this set of async - * operations were first issued */ - - // timer to switch to is COPY or NONE - if (async_done != CL_COMPLETE) { - accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), - timers->async_begin, currentTime); - } - - /* Wait on async operation completion */ - ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Waiting for Events!\n"); - } - - pb_Timestamp total_async_time = record_async_times(timers); - - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - if (async_done == CL_COMPLETE) { - // fprintf(stderr, "Async_done: total_async_type = %lld\n", - // total_async_time); - timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; - } - - } else - /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ - // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are - // outstanding so something is deeper in stack - if (async_done == CL_COMPLETE) { - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); - } - } - - /* Start the new timer */ - if (timer != pb_TimerID_NONE) { - if (!is_async(timer)) { - pb_StartTimer(&timers->timers[timer]); - } else { - // toSwitchTo Is Async (KERNEL/COPY_ASYNC) - if (!asyncs_outstanding(timers)) { - /* No asyncs outstanding, insert a fresh async marker */ - - insert_marker(timers, timer); - timers->async_begin = currentTime; - } else if (!is_async(timers->current)) { - /* Previous asyncs still in flight, but a previous SwitchTo - * already marked the end of the most recent async operation, - * so we can rename that marker as the beginning of this async - * operation */ - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - last_event->label = NULL; - last_event->timerID = timer; - } - if (!is_async(timers->current)) { - pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - timers->current = timer; -} - -void pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, - enum pb_TimerID category) { - struct pb_SubTimerList *subtimerlist = - timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = - (subtimerlist != NULL) ? subtimerlist->current : NULL; - - if (timers->current != pb_TimerID_NONE) { - if (!is_async(timers->current)) { - if (timers->current != category) { - if (curr != NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], - &curr->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } else { - if (curr != NULL) { - pb_StopTimer(&curr->timer); - } - } - } else { - insert_submarker(timers, label, category); - if (!is_async(category)) { // if switching to async too, keep driver going - pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - - pb_Timestamp currentTime = get_time(); - - /* The only cases we check for asynchronous task completion is - * when an overlapping CPU operation completes, or the next - * segment blocks on completion of previous async operations */ - if (asyncs_outstanding(timers) && - (!is_async(timers->current) || is_blocking(category))) { - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - /* CL_COMPLETE if completed */ - - cl_int ciErrNum = CL_SUCCESS; - cl_int async_done = CL_COMPLETE; - - ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), - CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), - &async_done, NULL); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Querying EventInfo!\n"); - } - - if (is_blocking(category)) { - /* Async operations completed after previous CPU operations: - * overlapped time is the total CPU time since this set of async - * operations were first issued */ - - // timer to switch to is COPY or NONE - // if it hasn't already finished, then just take now and use that as the - // elapsed time in OVERLAP anything happening after now isn't OVERLAP - // because everything is being stopped to wait for synchronization it - // seems that the extra sync wall time isn't being recorded anywhere - if (async_done != CL_COMPLETE) - accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), - timers->async_begin, currentTime); - - /* Wait on async operation completion */ - ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Waiting for Events!\n"); - } - pb_Timestamp total_async_time = record_async_times(timers); - - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - // If it did finish, then accumulate all the async time that did happen - // into OVERLAP the immediately preceding EventSynchronize theoretically - // didn't have any effect since it was already completed. - if (async_done == CL_COMPLETE /*cudaSuccess*/) - timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; - - } else - /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ - // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are - // outstanding so something is deeper in stack - if (async_done == CL_COMPLETE /*cudaSuccess*/) { - /* Async operations completed before previous CPU operations: - * overlapped time is the total async time */ - timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); - } - // else, this isn't blocking, so just check the next time around - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - /* Start the new timer */ - if (category != pb_TimerID_NONE) { - if (!is_async(category)) { - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - pb_StartTimer(&subtimer->timer); - } else { - pb_StartTimer(&timers->timers[category]); - } - } else { - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - // toSwitchTo Is Async (KERNEL/COPY_ASYNC) - if (!asyncs_outstanding(timers)) { - /* No asyncs outstanding, insert a fresh async marker */ - insert_submarker(timers, label, category); - timers->async_begin = currentTime; - } else if (!is_async(timers->current)) { - /* Previous asyncs still in flight, but a previous SwitchTo - * already marked the end of the most recent async operation, - * so we can rename that marker as the beginning of this async - * operation */ - - struct pb_async_time_marker_list *last_event = get_last_async(timers); - last_event->timerID = category; - last_event->label = label; - } // else, marker for switchToThis was already inserted - - // toSwitchto is already asynchronous, but if current/prev state is async - // too, then DRIVER is already running - if (!is_async(timers->current)) { - pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); - } - } - } - - timers->current = category; -} - -void pb_PrintTimerSet(struct pb_TimerSet *timers) { - printf("Printing Parboil Timer: Default\n"); - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer *sub = NULL; - - int maxSubLength; - - // const char *categories[] = { - // "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - // }; - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", - "Copy Async", "Compute", "Overlap", "Init_Ctx", - "Clear_Ctx", "Copy_Scalar", "Copy_Ptr", "Mem_Free", - "Read_Output", "Setup", "Mem_Track", "Mem_Untrack", - "Misc", "Pthread_Create", "Arg_Pack", "Arg_Unpack", - "Computation", "Output_Pack", "Output_Unpack"}; - - const int maxCategoryLength = 20; - - int i; - for (i = 1; i < pb_TimerID_LAST; - ++i) { // exclude NONE and OVRELAP from this format - if (pb_GetElapsedTime(&t[i]) != 0 || true) { - - // Print Category Timer - printf("%-*s: %.9f\n", maxCategoryLength, categories[i - 1], - pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %.9f\n", maxSubLength, sub->label, - pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if (pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %.9f\n", - pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin) / 1e9; - printf("Timer Wall Time: %.9f\n", walltime); -} - -void pb_DestroyTimerSet(struct pb_TimerSet *timers) { - /* clean up all of the async event markers */ - struct pb_async_time_marker_list *event = timers->async_markers; - while (event != NULL) { - - cl_int ciErrNum = CL_SUCCESS; - ciErrNum = clWaitForEvents(1, (cl_event *)(event)->marker); - if (ciErrNum != CL_SUCCESS) { - // fprintf(stderr, "Error Waiting for Events!\n"); - } - - ciErrNum = clReleaseEvent(*((cl_event *)(event)->marker)); - if (ciErrNum != CL_SUCCESS) { - fprintf(stderr, "Error Release Events!\n"); - } - - free((event)->marker); - struct pb_async_time_marker_list *next = ((event)->next); - - free(event); - - // (*event) = NULL; - event = next; - } - - int i = 0; - for (i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -}