diff --git a/hpvm/test/README.md b/hpvm/test/README.md
index e709ef04195c90e0f91c2a4b4a4b1d2f0b716d1f..94103affb668afc29d32e52d85d0d60182bd16d8 100644
--- a/hpvm/test/README.md
+++ b/hpvm/test/README.md
@@ -22,9 +22,9 @@ TODO
 ## Pipeline
 ```
 make TARGET={seq, gpu}
- ./pipeline-{seq, gpu} datasets/big/input/formula1_scaled.mp4
+ ./pipeline-{seq, gpu} datasets/formula1_scaled.mp4
 ```
 
 ## Your own project
 See `template/` for an example Makefile and config.
-Include `visc.h` to use HPVM intrinsics, found in the `test/include/visc.h`.
+Include `visc.h` to use HPVM C api functions, found in the `test/include/visc.h`.
diff --git a/hpvm/test/parboil/README.md b/hpvm/test/parboil/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1166e4f10f6a6e29e4f5d40871674c27da975acc
--- /dev/null
+++ b/hpvm/test/parboil/README.md
@@ -0,0 +1,8 @@
+# Current Benchmark Compatability
+
+| Benchmark | Version | Supported on CPU | Supported on GPU |
+| :-------- | :------ | :--------------: | :--------------: |
+| sgemm     | visc    | ✔                | ✔                |
+| stencil   | visc    | ✔                | ✔                |
+| spmv      | visc    | ✔                | ✘                |
+| lbm       | visc    | ✔                | ✘                |
diff --git a/hpvm/test/parboil/common/Makefile.conf.example-ati b/hpvm/test/parboil/common/Makefile.conf.example-ati
deleted file mode 100644
index e7dacca07360035cc3d07404ae0358c310ad4507..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/Makefile.conf.example-ati
+++ /dev/null
@@ -1,3 +0,0 @@
-OPENCL_PATH=/opt/ati/
-OPENCL_LIB_PATH=/opt/ati/lib/x86_64
-
diff --git a/hpvm/test/parboil/common/Makefile.conf.example-mcuda b/hpvm/test/parboil/common/Makefile.conf.example-mcuda
deleted file mode 100644
index 36c110c015b6cde1aafc01cc3d6623c3bf905887..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/Makefile.conf.example-mcuda
+++ /dev/null
@@ -1 +0,0 @@
-MCUDA_PATH=/usr/local/mcuda
diff --git a/hpvm/test/parboil/common/Makefile.conf.example-nvidia b/hpvm/test/parboil/common/Makefile.conf.example-nvidia
deleted file mode 100644
index bdf84b6329487cba5c1c38dd845b9f63af4c28b5..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/Makefile.conf.example-nvidia
+++ /dev/null
@@ -1,5 +0,0 @@
-CUDA_PATH=/usr/local/cuda
-CUDA_LIB_PATH=/usr/local/cuda/lib64
-OPENCL_PATH=/usr/local/cuda
-OPENCL_LIB_PATH=/usr/lib
-
diff --git a/hpvm/test/parboil/common/mk/c.mk b/hpvm/test/parboil/common/mk/c.mk
deleted file mode 100644
index 9334ba5c8f76e56490b96ec1527bc33e7d8248f0..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/mk/c.mk
+++ /dev/null
@@ -1,86 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Default language wide options
-
-LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -I/usr/local/cuda/include
-LANG_CXXFLAGS=$(LANG_CFLAGS)
-LANG_LDFLAGS=
-
-CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS)
-CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS)
-LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS)
-
-# Rules common to all makefiles
-
-########################################
-# Functions
-########################################
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILDDIR)/,$(1))
-
-# Add SRCDIR as a prefix to each element of $1
-INSRCDIR=$(addprefix $(SRCDIR)/,$(1))
-
-
-########################################
-# Environment variable check
-########################################
-
-# The second-last directory in the $(BUILDDIR) path
-# must have the name "build".  This reduces the risk of terrible
-# accidents if paths are not set up correctly.
-ifeq ("$(notdir $(BUILDDIR))", "")
-$(error $$BUILDDIR is not set correctly)
-endif
-
-ifneq ("$(notdir $(patsubst %/,%,$(dir $(BUILDDIR))))", "build")
-$(error $$BUILDDIR is not set correctly)
-endif
-
-.PHONY: run
-
-########################################
-# Derived variables
-########################################
-
-ifeq ($(DEBUGGER),)
-DEBUGGER=gdb
-endif
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-
-########################################
-# Rules
-########################################
-
-default: $(BUILDDIR) $(BIN)
-
-run:
-	@$(BIN) $(ARGS)
-
-debug:
-	@$(DEBUGGER) --args $(BIN) $(ARGS)
-
-clean :
-	rm -f $(BUILDDIR)/*
-	if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi
-
-$(BIN) : $(OBJS) $(BUILDDIR)/parboil.o
-	$(CXX) $^ -o $@ $(LDFLAGS)
-
-$(BUILDDIR) :
-	mkdir -p $(BUILDDIR)
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-$(BUILDDIR)/parboil.o: $(PARBOIL_ROOT)/common/src/parboil.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cc
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
diff --git a/hpvm/test/parboil/common/mk/cuda.mk b/hpvm/test/parboil/common/mk/cuda.mk
deleted file mode 100644
index 00f205b2bb0dca42ea140f1d5dd4fa5f149fa178..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/mk/cuda.mk
+++ /dev/null
@@ -1,109 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Default language wide options
-
-# CUDA specific
-LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -I$(CUDA_PATH)/include
-LANG_CXXFLAGS=$(LANG_CFLAGS)
-LANG_LDFLAGS=-L$(CUDA_LIB_PATH)
-
-LANG_CUDACFLAGS=$(LANG_CFLAGS)
-
-CFLAGS=$(APP_CFLAGS) $(LANG_CFLAGS) $(PLATFORM_CFLAGS)
-CXXFLAGS=$(APP_CXXFLAGS) $(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS)
-
-CUDACFLAGS=$(LANG_CUDACFLAGS) $(PLATFORM_CUDACFLAGS) $(APP_CUDACFLAGS) 
-CUDALDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_CUDALDFLAGS) $(APP_CUDALDFLAGS)
-
-# Rules common to all makefiles
-
-########################################
-# Functions
-########################################
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILDDIR)/,$(1))
-
-# Add SRCDIR as a prefix to each element of $1
-INSRCDIR=$(addprefix $(SRCDIR)/,$(1))
-
-
-########################################
-# Environment variable check
-########################################
-
-# The second-last directory in the $(BUILDDIR) path
-# must have the name "build".  This reduces the risk of terrible
-# accidents if paths are not set up correctly.
-ifeq ("$(notdir $(BUILDDIR))", "")
-$(error $$BUILDDIR is not set correctly)
-endif
-
-ifneq ("$(notdir $(patsubst %/,%,$(dir $(BUILDDIR))))", "build")
-$(error $$BUILDDIR is not set correctly)
-endif
-
-.PHONY: run
-
-ifeq ($(CUDA_PATH),)
-FAILSAFE=no_cuda
-else 
-FAILSAFE=
-endif
-
-########################################
-# Derived variables
-########################################
-
-ifeq ($(DEBUGGER),)
-DEBUGGER=gdb
-endif
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-
-########################################
-# Rules
-########################################
-
-default: $(FAILSAFE) $(BUILDDIR) $(BIN)
-
-run:
-	@echo "Resolving CUDA runtime library..."
-	@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) ldd $(BIN) | grep cuda
-	$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) ./$(BIN) $(ARGS)
-
-debug:
-	@echo "Resolving CUDA runtime library..."
-	@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) ldd $(BIN) | grep cuda
-	@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(CUDA_LIB_PATH) $(DEBUGGER) --args $(BIN) $(ARGS)
-
-clean :
-	rm -rf $(BUILDDIR)/*
-	if [ -d $(BUILDDIR) ]; then rmdir $(BUILDDIR); fi
-
-$(BIN) : $(OBJS) $(BUILDDIR)/parboil_cuda.o
-	$(CUDALINK) $^ -o $@ $(CUDALDFLAGS)
-
-$(BUILDDIR) :
-	mkdir -p $(BUILDDIR)
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-$(BUILDDIR)/parboil_cuda.o: $(PARBOIL_ROOT)/common/src/parboil_cuda.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cc
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cu
-	$(CUDACC) $< $(CUDACFLAGS) -c -o $@
-
-no_cuda:
-	@echo "CUDA_PATH is not set. Open $(CUDA_ROOT)/common/Makefile.conf to set default value."
-	@echo "You may use $(PLATFORM_MK) if you want a platform specific configurations."
-	@exit 1
-
diff --git a/hpvm/test/parboil/common/mk/opencl.mk b/hpvm/test/parboil/common/mk/opencl.mk
deleted file mode 100644
index 35b4d1bc90c6fe776ca7e376e6883dd3bef687c9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/mk/opencl.mk
+++ /dev/null
@@ -1,129 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Default language wide options
-
-LANG_CFLAGS=-I$(PARBOIL_ROOT)/common/include -I$(OPENCL_PATH)/include
-LANG_CXXFLAGS=$(LANG_CFLAGS)
-LANG_LDFLAGS=-lOpenCL -L$(OPENCL_LIB_PATH) -lrt
-
-CFLAGS=$(LANG_CFLAGS) $(PLATFORM_CFLAGS) $(APP_CFLAGS)
-CXXFLAGS=$(LANG_CXXFLAGS) $(PLATFORM_CXXFLAGS) $(APP_CXXFLAGS)
-LDFLAGS=$(LANG_LDFLAGS) $(PLATFORM_LDFLAGS) $(APP_LDFLAGS)
-
-
-LLVM_INSTALL:=$(LLVM_SRC_ROOT)/../build
-LIBCLC:=$(LLVM_SRC_ROOT)/../../libclc-install
-LLVM_CC:=$(LLVM_INSTALL)/bin/clang
-LLVM_LINK:=$(LLVM_INSTALL)/bin/llvm-link
-
-
-# Rules common to all makefiles
-
-########################################
-# Functions
-########################################
-
-# Add BUILDDIR as a prefix to each element of $1
-INBUILDDIR=$(addprefix $(BUILDDIR)/,$(1))
-
-# Add SRCDIR as a prefix to each element of $1
-INSRCDIR=$(addprefix $(SRCDIR)/,$(1))
-
-
-########################################
-# Environment variable check
-########################################
-
-# The second-last directory in the $(BUILDDIR) path
-# must have the name "build".  This reduces the risk of terrible
-# accidents if paths are not set up correctly.
-ifeq ("$(notdir $(BUILDDIR))", "")
-$(error $$BUILDDIR is not set correctly)
-endif
-
-ifneq ("$(notdir $(patsubst %/,%,$(dir $(BUILDDIR))))", "build")
-$(error $$BUILDDIR is not set correctly)
-endif
-
-.PHONY: run
-.PRECIOUS: $(BUILDDIR)/%.ll
-
-ifeq ($(OPENCL_PATH),)
-FAILSAFE=no_opencl
-else 
-FAILSAFE=
-endif
-
-########################################
-# Derived variables
-########################################
-
-OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
-KERNEL = $(call INBUILDDIR,$(KERNEL_OBJS))
-
-ifeq ($(DEBUGGER),)
-DEBUGGER=gdb
-endif
-
-########################################
-# Rules
-########################################
-
-default: $(FAILSAFE) $(BUILDDIR) $(BIN) $(KERNEL)
-
-run : $(RUNDIR)
-	echo "Resolving OpenCL library..."
-	$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ldd ./$(BIN) | grep OpenCL
-	$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ./$(BIN) $(ARGS)
-	$(TOOL) $(OUTPUT) $(REF_OUTPUT)
-
-debug:
-	@echo "Resolving OpenCL library..."
-	@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ldd $(BIN) | grep OpenCL
-	@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) $(DEBUGGER) --args $(BIN) $(ARGS)
-
-clean :
-	rm -f $(BUILDDIR)/*
-	if [ -a $(BIN) ]; then rm $(BIN); fi
-	if [ -d $(BUILDDIR) ]; then rm -rf $(BUILDDIR); fi
-	if [ -d $(RUNDIR) ]; then rm -rf $(RUNDIR); fi
-
-$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o 
-	$(CXX) $^ -o $@ $(LDFLAGS)
-
-$(RUNDIR) :
-	mkdir -p $(RUNDIR)
-
-$(BUILDDIR) :
-	mkdir -p $(BUILDDIR)
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-$(BUILDDIR)/parboil_opencl.o : $(PARBOIL_ROOT)/common/src/parboil_opencl.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cc
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-$(BUILDDIR)/%.nvptx.s : $(BUILDDIR)/%.linked.bc
-	$(LLVM_CC) -O3 -target nvptx64-nvidia-nvcl $< -S -o $@
-
-$(BUILDDIR)/%.linked.bc : $(BUILDDIR)/%.ll
-	$(LLVM_LINK) $(LIBCLC)/lib/clc/nvptx64--nvidiacl.bc $< -o $@
-
-$(BUILDDIR)/%.ll : $(SRCDIR)/%.cl
-	$(LLVM_CC) $(CFLAGS) -Dcl_clang_storage_class_specifiers -isystem $(LIBCLC)/include -include clc/clc.h -target nvptx64-nvidia-nvcl $< -O3 -emit-llvm -S -o $@
-
-$(BUILDDIR)/%.ir : $(SRCDIR)/%.cl
-	cd $(SRCDIR); ioc64 -input=kernel.cl -ir=kernel.ir
-	cp $(SRCDIR)/kernel.ir $@
-
-no_opencl:
-	@echo "OPENCL_PATH is not set. Open $(PARBOIL_ROOT)/common/Makefile.conf to set default value."
-	@echo "You may use $(PLATFORM_MK) if you want a platform specific configurations."
-	@exit 1
-
diff --git a/hpvm/test/parboil/common/platform/c.default.mk b/hpvm/test/parboil/common/platform/c.default.mk
deleted file mode 100644
index d9058f11b1d913189a550f060f5d5dc9e08af2c9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/c.default.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Rules common to all makefiles
-
-# Commands to build objects from source file using C compiler
-# with gcc
-
-# gcc (default)
-CC = gcc
-PLATFORM_CFLAGS = 
-  
-CXX = g++
-PLATFORM_CXXFLAGS = 
-  
-LINKER = g++
-PLATFORM_LDFLAGS = -lm -lpthread
-
diff --git a/hpvm/test/parboil/common/platform/c.gcc.mk b/hpvm/test/parboil/common/platform/c.gcc.mk
deleted file mode 100644
index d9058f11b1d913189a550f060f5d5dc9e08af2c9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/c.gcc.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Rules common to all makefiles
-
-# Commands to build objects from source file using C compiler
-# with gcc
-
-# gcc (default)
-CC = gcc
-PLATFORM_CFLAGS = 
-  
-CXX = g++
-PLATFORM_CXXFLAGS = 
-  
-LINKER = g++
-PLATFORM_LDFLAGS = -lm -lpthread
-
diff --git a/hpvm/test/parboil/common/platform/cuda.arch20.mk b/hpvm/test/parboil/common/platform/cuda.arch20.mk
deleted file mode 100644
index b08376c93195fda7ad477e89a9a93b0212ca1161..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/cuda.arch20.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Cuda-related definitions common to all benchmarks
-
-########################################
-# Variables
-########################################
-
-# c.default is the base along with CUDA configuration in this setting
-include $(PARBOIL_ROOT)/common/platform/c.default.mk
-
-# Paths
-CUDAHOME=/usr/local/cuda
-
-# Programs
-CUDACC=$(CUDAHOME)/bin/nvcc
-CUDALINK=$(CUDAHOME)/bin/nvcc
-
-# Flags
-PLATFORM_CUDACFLAGS=-code=sm_20
-PLATFORM_CUDALDFLAGS=-lm -lpthread
-
-
diff --git a/hpvm/test/parboil/common/platform/cuda.default.mk b/hpvm/test/parboil/common/platform/cuda.default.mk
deleted file mode 100644
index 237da5aa3e601ed1fc53cb7fc5fb8c986c738eec..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/cuda.default.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Cuda-related definitions common to all benchmarks
-
-########################################
-# Variables
-########################################
-
-# c.default is the base along with CUDA configuration in this setting
-include $(PARBOIL_ROOT)/common/platform/c.default.mk
-
-# Paths
-CUDAHOME=/usr/local/cuda
-
-# Programs
-CUDACC=$(CUDAHOME)/bin/nvcc
-CUDALINK=$(CUDAHOME)/bin/nvcc
-
-# Flags
-PLATFORM_CUDACFLAGS=-O3
-PLATFORM_CUDALDFLAGS=-lm -lpthread
-
-
diff --git a/hpvm/test/parboil/common/platform/cuda.mcuda.mk b/hpvm/test/parboil/common/platform/cuda.mcuda.mk
deleted file mode 100644
index 05a6fee9a9a0328d6ff415afdb4b9acbf91580fa..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/cuda.mcuda.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-# (c) 2011 The Board of Trustees of the University of Illinois.
-
-# Cuda-related definitions common to all benchmarks
-
-########################################
-# Variables
-########################################
-
-# c.default is the base along with CUDA configuration in this setting
-include $(PARBOIL_ROOT)/common/platform/c.default.mk
-
-# Paths
-CUDA_PATH=$(MCUDA_PATH)/include
-
-# Programs
-CUDACC=$(MCUDA_PATH)/bin/mcc_xmm
-CUDALINK=$(LINKER)
-
-# Flags
-PLATFORM_CUDACFLAGS=-O3 
-PLATFORM_CFLAGS=-O3 -I$(MCUDA_PATH)/include -D__MCUDA__
-PLATFORM_CXXFLAGS=-O3 -I$(MCUDA_PATH)/include -D__MCUDA__
-PLATFORM_CUDALDFLAGS=-lm -lpthread -L$(MCUDA_PATH)/lib -lmcuda
-
-
diff --git a/hpvm/test/parboil/common/platform/cuda.nvcc.mk b/hpvm/test/parboil/common/platform/cuda.nvcc.mk
deleted file mode 100644
index 237da5aa3e601ed1fc53cb7fc5fb8c986c738eec..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/cuda.nvcc.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Cuda-related definitions common to all benchmarks
-
-########################################
-# Variables
-########################################
-
-# c.default is the base along with CUDA configuration in this setting
-include $(PARBOIL_ROOT)/common/platform/c.default.mk
-
-# Paths
-CUDAHOME=/usr/local/cuda
-
-# Programs
-CUDACC=$(CUDAHOME)/bin/nvcc
-CUDALINK=$(CUDAHOME)/bin/nvcc
-
-# Flags
-PLATFORM_CUDACFLAGS=-O3
-PLATFORM_CUDALDFLAGS=-lm -lpthread
-
-
diff --git a/hpvm/test/parboil/common/platform/opencl.default.mk b/hpvm/test/parboil/common/platform/opencl.default.mk
deleted file mode 100644
index a6ac4645dceb8097c304784dd66280d4f53d1f4f..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/opencl.default.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Rules common to all makefiles
-
-# Commands to build objects from source file using C compiler
-# with gcc
-
-# Uncomment below two lines and configure if you want to use a platform
-# other than global one
-
-#OPENCL_PATH=/scr/hskim/ati-stream-sdk-v2.3-lnx64
-#OPENCL_LIB_PATH=$(OPENCL_PATH)/lib/x86_64
-
-# gcc (default)
-CC = clang
-PLATFORM_CFLAGS = 
-  
-CXX = clang++
-PLATFORM_CXXFLAGS = 
-  
-LINKER = clang++
-PLATFORM_LDFLAGS = -lm -lpthread
-
diff --git a/hpvm/test/parboil/common/platform/opencl.gcc.mk b/hpvm/test/parboil/common/platform/opencl.gcc.mk
deleted file mode 100644
index d9058f11b1d913189a550f060f5d5dc9e08af2c9..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/platform/opencl.gcc.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-# (c) 2007 The Board of Trustees of the University of Illinois.
-
-# Rules common to all makefiles
-
-# Commands to build objects from source file using C compiler
-# with gcc
-
-# gcc (default)
-CC = gcc
-PLATFORM_CFLAGS = 
-  
-CXX = g++
-PLATFORM_CXXFLAGS = 
-  
-LINKER = g++
-PLATFORM_LDFLAGS = -lm -lpthread
-
diff --git a/hpvm/test/parboil/common/src/parboil_cuda.c b/hpvm/test/parboil/common/src/parboil_cuda.c
deleted file mode 100644
index 9fd64661643c9afec5cb470beaa516d545017bd3..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/src/parboil_cuda.c
+++ /dev/null
@@ -1,863 +0,0 @@
-/*
- * (c) 2007 The Board of Trustees of the University of Illinois.
- */
-
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef __MCUDA__
-#include <cuda_runtime_api.h>
-#else
-#include <mcuda.h>
-#endif
-
-#if _POSIX_VERSION >= 200112L
-#include <sys/time.h>
-#endif
-
-#define true 1
-
-/* Free an array of owned strings. */
-static void free_string_array(char **string_array) {
-  char **p;
-
-  if (!string_array)
-    return;
-  for (p = string_array; *p; p++)
-    free(*p);
-  free(string_array);
-}
-
-/* Parse a comma-delimited list of strings into an
- * array of strings. */
-static char **read_string_array(char *in) {
-  char **ret;
-  int i;
-  int count;       /* Number of items in the input */
-  char *substring; /* Current substring within 'in' */
-
-  /* Count the number of items in the string */
-  count = 1;
-  for (i = 0; in[i]; i++)
-    if (in[i] == ',')
-      count++;
-
-  /* Allocate storage */
-  ret = (char **)malloc((count + 1) * sizeof(char *));
-
-  /* Create copies of the strings from the list */
-  substring = in;
-  for (i = 0; i < count; i++) {
-    char *substring_end;
-    int substring_length;
-
-    /* Find length of substring */
-    for (substring_end = substring;
-         (*substring_end != ',') && (*substring_end != 0); substring_end++)
-      ;
-
-    substring_length = substring_end - substring;
-
-    /* Allocate memory and copy the substring */
-    ret[i] = (char *)malloc(substring_length + 1);
-    memcpy(ret[i], substring, substring_length);
-    ret[i][substring_length] = 0;
-
-    /* go to next substring */
-    substring = substring_end + 1;
-  }
-  ret[i] = NULL; /* Write the sentinel value */
-
-  return ret;
-}
-
-struct argparse {
-  int argc;    /* Number of arguments.  Mutable. */
-  char **argv; /* Argument values.  Immutable. */
-
-  int argn;        /* Current argument number. */
-  char **argv_get; /* Argument value being read. */
-  char **argv_put; /* Argument value being written.
-                    * argv_put <= argv_get. */
-};
-
-static void initialize_argparse(struct argparse *ap, int argc, char **argv) {
-  ap->argc = argc;
-  ap->argn = 0;
-  ap->argv_get = ap->argv_put = ap->argv = argv;
-}
-
-static void finalize_argparse(struct argparse *ap) {
-  /* Move the remaining arguments */
-  for (; ap->argn < ap->argc; ap->argn++)
-    *ap->argv_put++ = *ap->argv_get++;
-}
-
-/* Delete the current argument. */
-static void delete_argument(struct argparse *ap) {
-  if (ap->argn >= ap->argc) {
-    fprintf(stderr, "delete_argument\n");
-  }
-  ap->argc--;
-  ap->argv_get++;
-}
-
-/* Go to the next argument.  Also, move the current argument to its
- * final location in argv. */
-static void next_argument(struct argparse *ap) {
-  if (ap->argn >= ap->argc) {
-    fprintf(stderr, "next_argument\n");
-  }
-  /* Move argument to its new location. */
-  *ap->argv_put++ = *ap->argv_get++;
-  ap->argn++;
-}
-
-static int is_end_of_arguments(struct argparse *ap) {
-  return ap->argn == ap->argc;
-}
-
-static char *get_argument(struct argparse *ap) { return *ap->argv_get; }
-
-static char *consume_argument(struct argparse *ap) {
-  char *ret = get_argument(ap);
-  delete_argument(ap);
-  return ret;
-}
-
-struct pb_Parameters *pb_ReadParameters(int *_argc, char **argv) {
-  char *err_message;
-  struct argparse ap;
-  struct pb_Parameters *ret =
-      (struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
-
-  /* Initialize the parameters structure */
-  ret->outFile = NULL;
-  ret->inpFiles = (char **)malloc(sizeof(char *));
-  ret->inpFiles[0] = NULL;
-
-  /* Each argument */
-  initialize_argparse(&ap, *_argc, argv);
-  while (!is_end_of_arguments(&ap)) {
-    char *arg = get_argument(&ap);
-
-    /* Single-character flag */
-    if ((arg[0] == '-') && (arg[1] != 0) && (arg[2] == 0)) {
-      delete_argument(&ap); /* This argument is consumed here */
-
-      switch (arg[1]) {
-      case 'o': /* Output file name */
-        if (is_end_of_arguments(&ap)) {
-          err_message = "Expecting file name after '-o'\n";
-          goto error;
-        }
-        free(ret->outFile);
-        ret->outFile = strdup(consume_argument(&ap));
-        break;
-      case 'i': /* Input file name */
-        if (is_end_of_arguments(&ap)) {
-          err_message = "Expecting file name after '-i'\n";
-          goto error;
-        }
-        ret->inpFiles = read_string_array(consume_argument(&ap));
-        break;
-      case '-': /* End of options */
-        goto end_of_options;
-      default:
-        err_message = "Unexpected command-line parameter\n";
-        goto error;
-      }
-    } else {
-      /* Other parameters are ignored */
-      next_argument(&ap);
-    }
-  } /* end for each argument */
-
-end_of_options:
-  *_argc = ap.argc; /* Save the modified argc value */
-  finalize_argparse(&ap);
-
-  return ret;
-
-error:
-  fputs(err_message, stderr);
-  pb_FreeParameters(ret);
-  return NULL;
-}
-
-void pb_FreeParameters(struct pb_Parameters *p) {
-  char **cpp;
-
-  free(p->outFile);
-  free_string_array(p->inpFiles);
-  free(p);
-}
-
-int pb_Parameters_CountInputs(struct pb_Parameters *p) {
-  int n;
-
-  for (n = 0; p->inpFiles[n]; n++)
-    ;
-  return n;
-}
-
-/*****************************************************************************/
-/* Timer routines */
-
-static int is_async(enum pb_TimerID timer) {
-  return (timer == pb_TimerID_KERNEL) || (timer == pb_TimerID_COPY_ASYNC);
-}
-
-static int is_blocking(enum pb_TimerID timer) {
-  return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE);
-}
-
-#define INVALID_TIMERID pb_TimerID_LAST
-
-static int asyncs_outstanding(struct pb_TimerSet *timers) {
-  return (timers->async_markers != NULL) &&
-         (timers->async_markers->timerID != INVALID_TIMERID);
-}
-
-static struct pb_async_time_marker_list *
-get_last_async(struct pb_TimerSet *timers) {
-  /* Find the last event recorded thus far */
-  struct pb_async_time_marker_list *last_event = timers->async_markers;
-  if (last_event != NULL && last_event->timerID != INVALID_TIMERID) {
-    while (last_event->next != NULL &&
-           last_event->next->timerID != INVALID_TIMERID)
-      last_event = last_event->next;
-    return last_event;
-  } else
-    return NULL;
-}
-
-static void insert_marker(struct pb_TimerSet *tset, enum pb_TimerID timer) {
-  struct pb_async_time_marker_list **new_event = &(tset->async_markers);
-
-  while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID)
-    new_event = &((*new_event)->next);
-
-  if (*new_event == NULL) {
-    *new_event = (struct pb_async_time_marker_list *)malloc(
-        sizeof(struct pb_async_time_marker_list));
-    (*new_event)->marker = malloc(sizeof(cudaEvent_t));
-    cudaEventCreate((*new_event)->marker);
-    (*new_event)->next = NULL;
-  }
-
-  /* valid event handle now aquired: insert the event record */
-  (*new_event)->label = NULL;
-  (*new_event)->timerID = timer;
-  cudaEventRecord(*((cudaEvent_t *)((*new_event)->marker)), 0);
-}
-
-static void insert_submarker(struct pb_TimerSet *tset, char *label,
-                             enum pb_TimerID timer) {
-  struct pb_async_time_marker_list **new_event = &(tset->async_markers);
-
-  while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID)
-    new_event = &((*new_event)->next);
-
-  if (*new_event == NULL) {
-    *new_event = (struct pb_async_time_marker_list *)malloc(
-        sizeof(struct pb_async_time_marker_list));
-    (*new_event)->marker = malloc(sizeof(cudaEvent_t));
-    cudaEventCreate((*new_event)->marker);
-
-    (*new_event)->next = NULL;
-  }
-
-  /* valid event handle now aquired: insert the event record */
-  (*new_event)->label = label;
-  (*new_event)->timerID = timer;
-  cudaEventRecord(*((cudaEvent_t *)((*new_event)->marker)), 0);
-}
-
-/* Assumes that all recorded events have completed */
-static pb_Timestamp record_async_times(struct pb_TimerSet *tset) {
-  struct pb_async_time_marker_list *next_interval = NULL;
-  struct pb_async_time_marker_list *last_marker = get_last_async(tset);
-  pb_Timestamp total_async_time = 0;
-  enum pb_TimerID timer;
-  for (next_interval = tset->async_markers; next_interval != last_marker;
-       next_interval = next_interval->next) {
-    float interval_time_ms;
-    cudaEventElapsedTime(&interval_time_ms,
-                         *((cudaEvent_t *)next_interval->marker),
-                         *((cudaEvent_t *)next_interval->next->marker));
-    pb_Timestamp interval = (pb_Timestamp)(interval_time_ms * 1e3);
-    tset->timers[next_interval->timerID].elapsed += interval;
-    if (next_interval->label != NULL) {
-      struct pb_SubTimer *subtimer =
-          tset->sub_timer_list[next_interval->timerID]->subtimer_list;
-      while (subtimer != NULL) {
-        if (strcmp(subtimer->label, next_interval->label) == 0) {
-          subtimer->timer.elapsed += interval;
-          break;
-        }
-        subtimer = subtimer->next;
-      }
-    }
-    total_async_time += interval;
-    next_interval->timerID = INVALID_TIMERID;
-  }
-
-  if (next_interval != NULL)
-    next_interval->timerID = INVALID_TIMERID;
-
-  return total_async_time;
-}
-
-static void accumulate_time(pb_Timestamp *accum, pb_Timestamp start,
-                            pb_Timestamp end) {
-#if _POSIX_VERSION >= 200112L
-  *accum += end - start;
-#else
-#error "Timestamps not implemented for this system"
-#endif
-}
-
-#if _POSIX_VERSION >= 200112L
-static pb_Timestamp get_time() {
-  struct timeval tv;
-  gettimeofday(&tv, NULL);
-  return (pb_Timestamp)(tv.tv_sec * 1000000LL + tv.tv_usec);
-}
-#else
-#error "no supported time libraries are available on this platform"
-#endif
-
-void pb_ResetTimer(struct pb_Timer *timer) {
-  timer->state = pb_Timer_STOPPED;
-
-#if _POSIX_VERSION >= 200112L
-  timer->elapsed = 0;
-#else
-#error "pb_ResetTimer: not implemented for this system"
-#endif
-}
-
-void pb_StartTimer(struct pb_Timer *timer) {
-  if (timer->state != pb_Timer_STOPPED) {
-    fputs("Ignoring attempt to start a running timer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_RUNNING;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
-  }
-#else
-#error "pb_StartTimer: not implemented for this system"
-#endif
-}
-
-void pb_StartTimerAndSubTimer(struct pb_Timer *timer,
-                              struct pb_Timer *subtimer) {
-
-  unsigned int numNotStopped = 0x3; // 11
-  if (timer->state != pb_Timer_STOPPED) {
-    fputs("Warning: Timer was not stopped\n", stderr);
-    numNotStopped &= 0x1; // Zero out 2^1
-  }
-  if (subtimer->state != pb_Timer_STOPPED) {
-    fputs("Warning: Subtimer was not stopped\n", stderr);
-    numNotStopped &= 0x2; // Zero out 2^0
-  }
-  if (numNotStopped == 0x0) {
-    fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_RUNNING;
-  subtimer->state = pb_Timer_RUNNING;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-
-    if (numNotStopped & 0x2) {
-      timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
-    }
-
-    if (numNotStopped & 0x1) {
-      subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
-    }
-  }
-#else
-#error "pb_StartTimer: not implemented for this system"
-#endif
-}
-
-void pb_StopTimer(struct pb_Timer *timer) {
-  pb_Timestamp fini;
-
-  if (timer->state != pb_Timer_RUNNING) {
-    fputs("Ignoring attempt to stop a stopped timer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_STOPPED;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    fini = tv.tv_sec * 1000000LL + tv.tv_usec;
-  }
-#else
-#error "pb_StopTimer: not implemented for this system"
-#endif
-
-  accumulate_time(&timer->elapsed, timer->init, fini);
-  timer->init = fini;
-}
-
-void pb_StopTimerAndSubTimer(struct pb_Timer *timer,
-                             struct pb_Timer *subtimer) {
-
-  pb_Timestamp fini;
-
-  unsigned int numNotRunning = 0x3; // 11
-  if (timer->state != pb_Timer_RUNNING) {
-    fputs("Warning: Timer was not running\n", stderr);
-    numNotRunning &= 0x1; // Zero out 2^1
-  }
-  if (subtimer->state != pb_Timer_RUNNING) {
-    fputs("Warning: Subtimer was not running\n", stderr);
-    numNotRunning &= 0x2; // Zero out 2^0
-  }
-  if (numNotRunning == 0x0) {
-    fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_STOPPED;
-  subtimer->state = pb_Timer_STOPPED;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    fini = tv.tv_sec * 1000000LL + tv.tv_usec;
-  }
-#else
-#error "pb_StopTimer: not implemented for this system"
-#endif
-
-  if (numNotRunning & 0x2) {
-    accumulate_time(&timer->elapsed, timer->init, fini);
-    timer->init = fini;
-  }
-
-  if (numNotRunning & 0x1) {
-    accumulate_time(&subtimer->elapsed, subtimer->init, fini);
-    subtimer->init = fini;
-  }
-}
-
-/* Get the elapsed time in seconds. */
-double pb_GetElapsedTime(struct pb_Timer *timer) {
-  double ret;
-
-  if (timer->state != pb_Timer_STOPPED) {
-    fputs("Elapsed time from a running timer is inaccurate\n", stderr);
-  }
-
-#if _POSIX_VERSION >= 200112L
-  ret = timer->elapsed / 1e6;
-#else
-#error "pb_GetElapsedTime: not implemented for this system"
-#endif
-  return ret;
-}
-
-void pb_InitializeTimerSet(struct pb_TimerSet *timers) {
-  int n;
-
-  timers->wall_begin = get_time();
-  timers->current = pb_TimerID_NONE;
-
-  timers->async_markers = NULL;
-
-  for (n = 0; n < pb_TimerID_LAST; n++) {
-    pb_ResetTimer(&timers->timers[n]);
-    timers->sub_timer_list[n] = NULL;
-  }
-}
-
-void pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr) {}
-
-void pb_AddSubTimer(struct pb_TimerSet *timers, char *label,
-                    enum pb_TimerID pb_Category) {
-
-  struct pb_SubTimer *subtimer =
-      (struct pb_SubTimer *)malloc(sizeof(struct pb_SubTimer));
-
-  int len = strlen(label);
-
-  subtimer->label = (char *)malloc(sizeof(char) * (len + 1));
-  sprintf(subtimer->label, "%s\0", label);
-
-  pb_ResetTimer(&subtimer->timer);
-  subtimer->next = NULL;
-
-  struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
-  if (subtimerlist == NULL) {
-    subtimerlist =
-        (struct pb_SubTimerList *)malloc(sizeof(struct pb_SubTimerList));
-    subtimerlist->subtimer_list = subtimer;
-    timers->sub_timer_list[pb_Category] = subtimerlist;
-  } else {
-    // Append to list
-    struct pb_SubTimer *element = subtimerlist->subtimer_list;
-    while (element->next != NULL) {
-      element = element->next;
-    }
-    element->next = subtimer;
-  }
-}
-
-void pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) {
-  /* Stop the currently running timer */
-  if (timers->current != pb_TimerID_NONE) {
-    struct pb_SubTimerList *subtimerlist =
-        timers->sub_timer_list[timers->current];
-    struct pb_SubTimer *currSubTimer =
-        (subtimerlist != NULL) ? subtimerlist->current : NULL;
-
-    if (!is_async(timers->current)) {
-      if (timers->current != timer) {
-        if (currSubTimer != NULL) {
-          pb_StopTimerAndSubTimer(&timers->timers[timers->current],
-                                  &currSubTimer->timer);
-        } else {
-          pb_StopTimer(&timers->timers[timers->current]);
-        }
-      } else {
-        if (currSubTimer != NULL) {
-          pb_StopTimer(&currSubTimer->timer);
-        }
-      }
-    } else {
-      insert_marker(timers, timer);
-      if (!is_async(timer)) { // if switching to async too, keep driver going
-        pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-
-  pb_Timestamp currentTime = get_time();
-
-  /* The only cases we check for asynchronous task completion is
-   * when an overlapping CPU operation completes, or the next
-   * segment blocks on completion of previous async operations */
-  if (asyncs_outstanding(timers) &&
-      (!is_async(timers->current) || is_blocking(timer))) {
-
-    struct pb_async_time_marker_list *last_event = get_last_async(timers);
-    /* cudaSuccess if completed */
-    cudaError_t async_done =
-        cudaEventQuery(*((cudaEvent_t *)last_event->marker));
-
-    if (is_blocking(timer)) {
-      /* Async operations completed after previous CPU operations:
-       * overlapped time is the total CPU time since this set of async
-       * operations were first issued */
-
-      // timer to switch to is COPY or NONE
-      if (async_done != cudaSuccess)
-        accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
-                        timers->async_begin, currentTime);
-
-      /* Wait on async operation completion */
-      cudaEventSynchronize(*((cudaEvent_t *)last_event->marker));
-      pb_Timestamp total_async_time = record_async_times(timers);
-
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      if (async_done == cudaSuccess)
-        timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
-
-    } else
-        /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
-        // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are
-        // outstanding so something is deeper in stack
-        if (async_done == cudaSuccess) {
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
-    }
-  }
-
-  /* Start the new timer */
-  if (timer != pb_TimerID_NONE) {
-    if (!is_async(timer)) {
-      pb_StartTimer(&timers->timers[timer]);
-    } else {
-      // toSwitchTo Is Async (KERNEL/COPY_ASYNC)
-      if (!asyncs_outstanding(timers)) {
-        /* No asyncs outstanding, insert a fresh async marker */
-
-        insert_marker(timers, timer);
-        timers->async_begin = currentTime;
-      } else if (!is_async(timers->current)) {
-        /* Previous asyncs still in flight, but a previous SwitchTo
-         * already marked the end of the most recent async operation,
-         * so we can rename that marker as the beginning of this async
-         * operation */
-
-        struct pb_async_time_marker_list *last_event = get_last_async(timers);
-        last_event->label = NULL;
-        last_event->timerID = timer;
-      }
-      if (!is_async(timers->current)) {
-        pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-  timers->current = timer;
-}
-
-void pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label,
-                         enum pb_TimerID category) {
-  struct pb_SubTimerList *subtimerlist =
-      timers->sub_timer_list[timers->current];
-  struct pb_SubTimer *curr =
-      (subtimerlist != NULL) ? subtimerlist->current : NULL;
-
-  if (timers->current != pb_TimerID_NONE) {
-    if (!is_async(timers->current)) {
-      if (timers->current != category) {
-        if (curr != NULL) {
-          pb_StopTimerAndSubTimer(&timers->timers[timers->current],
-                                  &curr->timer);
-        } else {
-          pb_StopTimer(&timers->timers[timers->current]);
-        }
-      } else {
-        if (curr != NULL) {
-          pb_StopTimer(&curr->timer);
-        }
-      }
-    } else {
-      insert_submarker(timers, label, category);
-      if (!is_async(category)) { // if switching to async too, keep driver going
-        pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-
-  pb_Timestamp currentTime = get_time();
-
-  /* The only cases we check for asynchronous task completion is
-   * when an overlapping CPU operation completes, or the next
-   * segment blocks on completion of previous async operations */
-  if (asyncs_outstanding(timers) &&
-      (!is_async(timers->current) || is_blocking(category))) {
-
-    struct pb_async_time_marker_list *last_event = get_last_async(timers);
-    /* cudaSuccess if completed */
-    cudaError_t async_done =
-        cudaEventQuery(*((cudaEvent_t *)last_event->marker));
-
-    if (is_blocking(category)) {
-      /* Async operations completed after previous CPU operations:
-       * overlapped time is the total CPU time since this set of async
-       * operations were first issued */
-
-      // timer to switch to is COPY or NONE
-      // if it hasn't already finished, then just take now and use that as the
-      // elapsed time in OVERLAP anything happening after now isn't OVERLAP
-      // because everything is being stopped to wait for synchronization it
-      // seems that the extra sync wall time isn't being recorded anywhere
-      if (async_done != cudaSuccess)
-        accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
-                        timers->async_begin, currentTime);
-
-      /* Wait on async operation completion */
-      cudaEventSynchronize(*((cudaEvent_t *)last_event->marker));
-      pb_Timestamp total_async_time = record_async_times(timers);
-
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      // If it did finish, then accumulate all the async time that did happen
-      // into OVERLAP the immediately preceding EventSynchronize theoretically
-      // didn't have any effect since it was already completed.
-      if (async_done == cudaSuccess)
-        timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
-
-    } else
-        /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
-        // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are
-        // outstanding so something is deeper in stack
-        if (async_done == cudaSuccess) {
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
-    }
-    // else, this isn't blocking, so just check the next time around
-  }
-
-  subtimerlist = timers->sub_timer_list[category];
-  struct pb_SubTimer *subtimer = NULL;
-
-  if (label != NULL) {
-    subtimer = subtimerlist->subtimer_list;
-    while (subtimer != NULL) {
-      if (strcmp(subtimer->label, label) == 0) {
-        break;
-      } else {
-        subtimer = subtimer->next;
-      }
-    }
-  }
-
-  /* Start the new timer */
-  if (category != pb_TimerID_NONE) {
-    if (!is_async(category)) {
-
-      if (subtimerlist != NULL) {
-        subtimerlist->current = subtimer;
-      }
-
-      if (category != timers->current && subtimer != NULL) {
-        pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
-      } else if (subtimer != NULL) {
-        pb_StartTimer(&subtimer->timer);
-      } else {
-        pb_StartTimer(&timers->timers[category]);
-      }
-    } else {
-      if (subtimerlist != NULL) {
-        subtimerlist->current = subtimer;
-      }
-
-      // toSwitchTo Is Async (KERNEL/COPY_ASYNC)
-      if (!asyncs_outstanding(timers)) {
-        /* No asyncs outstanding, insert a fresh async marker */
-        insert_submarker(timers, label, category);
-        timers->async_begin = currentTime;
-      } else if (!is_async(timers->current)) {
-        /* Previous asyncs still in flight, but a previous SwitchTo
-         * already marked the end of the most recent async operation,
-         * so we can rename that marker as the beginning of this async
-         * operation */
-
-        struct pb_async_time_marker_list *last_event = get_last_async(timers);
-        last_event->timerID = category;
-        last_event->label = label;
-      } // else, marker for switchToThis was already inserted
-
-      // toSwitchto is already asynchronous, but if current/prev state is async
-      // too, then DRIVER is already running
-      if (!is_async(timers->current)) {
-        pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-
-  timers->current = category;
-}
-
-void pb_PrintTimerSet(struct pb_TimerSet *timers) {
-  pb_Timestamp wall_end = get_time();
-
-  struct pb_Timer *t = timers->timers;
-  struct pb_SubTimer *sub = NULL;
-
-  int maxSubLength;
-
-  //  const char *categories[] = {
-  //    "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
-  //  };
-  const char *categories[] = {
-      "IO",          "Kernel",         "Copy",       "Driver",
-      "Copy Async",  "Compute",        "Overlap",    "Init_Ctx",
-      "Clear_Ctx",   "Copy_Scalar",    "Copy_Ptr",   "Mem_Free",
-      "Read_Output", "Setup",          "Mem_Track",  "Mem_Untrack",
-      "Misc",        "Pthread_Create", "Arg_Unpack", "Computation",
-      "Output_Pack", "Output_Unpack"};
-
-  const int maxCategoryLength = 10;
-
-  int i;
-  for (i = 1; i < pb_TimerID_LAST;
-       ++i) { // exclude NONE and OVRELAP from this format
-    if (pb_GetElapsedTime(&t[i]) != 0 || true) {
-
-      // Print Category Timer
-      printf("%-*s: %f\n", maxCategoryLength, categories[i - 1],
-             pb_GetElapsedTime(&t[i]));
-
-      if (timers->sub_timer_list[i] != NULL) {
-        sub = timers->sub_timer_list[i]->subtimer_list;
-        maxSubLength = 0;
-        while (sub != NULL) {
-          // Find longest SubTimer label
-          if (strlen(sub->label) > maxSubLength) {
-            maxSubLength = strlen(sub->label);
-          }
-          sub = sub->next;
-        }
-
-        // Fit to Categories
-        if (maxSubLength <= maxCategoryLength) {
-          maxSubLength = maxCategoryLength;
-        }
-
-        sub = timers->sub_timer_list[i]->subtimer_list;
-
-        // Print SubTimers
-        while (sub != NULL) {
-          printf(" -%-*s: %f\n", maxSubLength, sub->label,
-                 pb_GetElapsedTime(&sub->timer));
-          sub = sub->next;
-        }
-      }
-    }
-  }
-
-  if (pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
-    printf("CPU/Kernel Overlap: %f\n",
-           pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
-
-  float walltime = (wall_end - timers->wall_begin) / 1e6;
-  printf("Timer Wall Time: %f\n", walltime);
-}
-
-void pb_DestroyTimerSet(struct pb_TimerSet *timers) {
-  /* clean up all of the async event markers */
-  struct pb_async_time_marker_list **event = &(timers->async_markers);
-  while (*event != NULL) {
-    cudaEventSynchronize(*((cudaEvent_t *)(*event)->marker));
-    cudaEventDestroy(*((cudaEvent_t *)(*event)->marker));
-    free((*event)->marker);
-    struct pb_async_time_marker_list **next = &((*event)->next);
-    free(*event);
-    (*event) = NULL;
-    event = next;
-  }
-
-  int i = 0;
-  for (i = 0; i < pb_TimerID_LAST; ++i) {
-    if (timers->sub_timer_list[i] != NULL) {
-      struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
-      struct pb_SubTimer *prev = NULL;
-      while (subtimer != NULL) {
-        free(subtimer->label);
-        prev = subtimer;
-        subtimer = subtimer->next;
-        free(prev);
-      }
-      free(timers->sub_timer_list[i]);
-    }
-  }
-}
diff --git a/hpvm/test/parboil/common/src/parboil_opencl.c b/hpvm/test/parboil/common/src/parboil_opencl.c
deleted file mode 100644
index d493992acee859186d58330a9988ef7ef2571f73..0000000000000000000000000000000000000000
--- a/hpvm/test/parboil/common/src/parboil_opencl.c
+++ /dev/null
@@ -1,1051 +0,0 @@
-/*
- * (c) 2007 The Board of Trustees of the University of Illinois.
- */
-
-#include <CL/cl.h>
-#include <assert.h>
-#include <parboil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if _POSIX_VERSION >= 200112L
-#include <time.h>
-#endif
-
-#define BILLION 1000000000LL
-#define true 1
-
-cl_context *clContextPtr;
-cl_command_queue *clCommandQueuePtr;
-
-/* Free an array of owned strings. */
-static void free_string_array(char **string_array) {
-  char **p;
-
-  if (!string_array)
-    return;
-  for (p = string_array; *p; p++)
-    free(*p);
-  free(string_array);
-}
-
-/* Parse a comma-delimited list of strings into an
- * array of strings. */
-static char **read_string_array(char *in) {
-  char **ret;
-  int i;
-  int count;       /* Number of items in the input */
-  char *substring; /* Current substring within 'in' */
-
-  /* Count the number of items in the string */
-  count = 1;
-  for (i = 0; in[i]; i++)
-    if (in[i] == ',')
-      count++;
-
-  /* Allocate storage */
-  ret = (char **)malloc((count + 1) * sizeof(char *));
-
-  /* Create copies of the strings from the list */
-  substring = in;
-  for (i = 0; i < count; i++) {
-    char *substring_end;
-    int substring_length;
-
-    /* Find length of substring */
-    for (substring_end = substring;
-         (*substring_end != ',') && (*substring_end != 0); substring_end++)
-      ;
-
-    substring_length = substring_end - substring;
-
-    /* Allocate memory and copy the substring */
-    ret[i] = (char *)malloc(substring_length + 1);
-    memcpy(ret[i], substring, substring_length);
-    ret[i][substring_length] = 0;
-
-    /* go to next substring */
-    substring = substring_end + 1;
-  }
-  ret[i] = NULL; /* Write the sentinel value */
-
-  return ret;
-}
-
-struct argparse {
-  int argc;    /* Number of arguments.  Mutable. */
-  char **argv; /* Argument values.  Immutable. */
-
-  int argn;        /* Current argument number. */
-  char **argv_get; /* Argument value being read. */
-  char **argv_put; /* Argument value being written.
-                    * argv_put <= argv_get. */
-};
-
-static void initialize_argparse(struct argparse *ap, int argc, char **argv) {
-  ap->argc = argc;
-  ap->argn = 0;
-  ap->argv_get = ap->argv_put = ap->argv = argv;
-}
-
-static void finalize_argparse(struct argparse *ap) {
-  /* Move the remaining arguments */
-  for (; ap->argn < ap->argc; ap->argn++)
-    *ap->argv_put++ = *ap->argv_get++;
-}
-
-/* Delete the current argument. */
-static void delete_argument(struct argparse *ap) {
-  if (ap->argn >= ap->argc) {
-    // fprintf(stderr, "delete_argument\n");
-  }
-  ap->argc--;
-  ap->argv_get++;
-}
-
-/* Go to the next argument.  Also, move the current argument to its
- * final location in argv. */
-static void next_argument(struct argparse *ap) {
-  if (ap->argn >= ap->argc) {
-    // fprintf(stderr, "next_argument\n");
-  }
-  /* Move argument to its new location. */
-  *ap->argv_put++ = *ap->argv_get++;
-  ap->argn++;
-}
-
-static int is_end_of_arguments(struct argparse *ap) {
-  return ap->argn == ap->argc;
-}
-
-static char *get_argument(struct argparse *ap) { return *ap->argv_get; }
-
-static char *consume_argument(struct argparse *ap) {
-  char *ret = get_argument(ap);
-  delete_argument(ap);
-  return ret;
-}
-
-struct pb_Parameters *pb_ReadParameters(int *_argc, char **argv) {
-  char *err_message;
-  struct argparse ap;
-  struct pb_Parameters *ret =
-      (struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
-
-  /* Initialize the parameters structure */
-  ret->outFile = NULL;
-  ret->inpFiles = (char **)malloc(sizeof(char *));
-  ret->inpFiles[0] = NULL;
-
-  /* Each argument */
-  initialize_argparse(&ap, *_argc, argv);
-  while (!is_end_of_arguments(&ap)) {
-    char *arg = get_argument(&ap);
-
-    /* Single-character flag */
-    if ((arg[0] == '-') && (arg[1] != 0) && (arg[2] == 0)) {
-      delete_argument(&ap); /* This argument is consumed here */
-
-      switch (arg[1]) {
-      case 'o': /* Output file name */
-        if (is_end_of_arguments(&ap)) {
-          err_message = "Expecting file name after '-o'\n";
-          goto error;
-        }
-        free(ret->outFile);
-        ret->outFile = strdup(consume_argument(&ap));
-        break;
-      case 'i': /* Input file name */
-        if (is_end_of_arguments(&ap)) {
-          err_message = "Expecting file name after '-i'\n";
-          goto error;
-        }
-        ret->inpFiles = read_string_array(consume_argument(&ap));
-        break;
-      case '-': /* End of options */
-        goto end_of_options;
-      default:
-        err_message = "Unexpected command-line parameter\n";
-        goto error;
-      }
-    } else {
-      /* Other parameters are ignored */
-      next_argument(&ap);
-    }
-  } /* end for each argument */
-
-end_of_options:
-  *_argc = ap.argc; /* Save the modified argc value */
-  finalize_argparse(&ap);
-
-  return ret;
-
-error:
-  fputs(err_message, stderr);
-  pb_FreeParameters(ret);
-  return NULL;
-}
-
-void pb_FreeParameters(struct pb_Parameters *p) {
-  char **cpp;
-
-  free(p->outFile);
-  free_string_array(p->inpFiles);
-  free(p);
-}
-
-int pb_Parameters_CountInputs(struct pb_Parameters *p) {
-  int n;
-
-  for (n = 0; p->inpFiles[n]; n++)
-    ;
-  return n;
-}
-
-/*****************************************************************************/
-/* Timer routines */
-
-static int is_async(enum pb_TimerID timer) {
-#ifndef OPENCL_CPU
-  return (timer == pb_TimerID_KERNEL) || (timer == pb_TimerID_COPY_ASYNC);
-#else
-  return (timer == pb_TimerID_COPY_ASYNC);
-#endif
-}
-
-static int is_blocking(enum pb_TimerID timer) {
-  return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE);
-}
-
-#define INVALID_TIMERID pb_TimerID_LAST
-
-static int asyncs_outstanding(struct pb_TimerSet *timers) {
-  return (timers->async_markers != NULL) &&
-         (timers->async_markers->timerID != INVALID_TIMERID);
-}
-
-static struct pb_async_time_marker_list *
-get_last_async(struct pb_TimerSet *timers) {
-  /* Find the last event recorded thus far */
-  struct pb_async_time_marker_list *last_event = timers->async_markers;
-  if (last_event != NULL && last_event->timerID != INVALID_TIMERID) {
-    while (last_event->next != NULL &&
-           last_event->next->timerID != INVALID_TIMERID)
-      last_event = last_event->next;
-    return last_event;
-  } else
-    return NULL;
-}
-
-static void insert_marker(struct pb_TimerSet *tset, enum pb_TimerID timer) {
-  cl_int ciErrNum = CL_SUCCESS;
-  struct pb_async_time_marker_list **new_event = &(tset->async_markers);
-
-  while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
-    new_event = &((*new_event)->next);
-  }
-
-  if (*new_event == NULL) {
-    *new_event = (struct pb_async_time_marker_list *)malloc(
-        sizeof(struct pb_async_time_marker_list));
-    (*new_event)->marker = calloc(1, sizeof(cl_event));
-    /*
-    // I don't think this is needed at all. I believe clEnqueueMarker 'creates'
-the event #if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) fprintf(stderr, "Creating
-Marker [%d]\n", timer);
-    *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr,
-&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User
-Event Object!\n");
-    }
-    ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)),
-CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User
-Event Status!\n");
-    }
-#endif
-*/
-    (*new_event)->next = NULL;
-  }
-
-  /* valid event handle now aquired: insert the event record */
-  (*new_event)->label = NULL;
-  (*new_event)->timerID = timer;
-  ciErrNum =
-      clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker);
-  if (ciErrNum != CL_SUCCESS) {
-    fprintf(stderr, "Error Enqueueing Marker!\n");
-  }
-}
-
-static void insert_submarker(struct pb_TimerSet *tset, char *label,
-                             enum pb_TimerID timer) {
-  cl_int ciErrNum = CL_SUCCESS;
-  struct pb_async_time_marker_list **new_event = &(tset->async_markers);
-
-  while (*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) {
-    new_event = &((*new_event)->next);
-  }
-
-  if (*new_event == NULL) {
-    *new_event = (struct pb_async_time_marker_list *)malloc(
-        sizeof(struct pb_async_time_marker_list));
-    (*new_event)->marker = calloc(1, sizeof(cl_event));
-    /*
-#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 )
-fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer);
-    *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr,
-&ciErrNum); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Creating User
-Event Object!\n");
-    }
-    ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)),
-CL_QUEUED); if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Setting User
-Event Status!\n");
-    }
-#endif
-*/
-    (*new_event)->next = NULL;
-  }
-
-  /* valid event handle now aquired: insert the event record */
-  (*new_event)->label = label;
-  (*new_event)->timerID = timer;
-  ciErrNum =
-      clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker);
-  if (ciErrNum != CL_SUCCESS) {
-    fprintf(stderr, "Error Enqueueing Marker!\n");
-  }
-}
-
-/* Assumes that all recorded events have completed */
-static pb_Timestamp record_async_times(struct pb_TimerSet *tset) {
-  struct pb_async_time_marker_list *next_interval = NULL;
-  struct pb_async_time_marker_list *last_marker = get_last_async(tset);
-  pb_Timestamp total_async_time = 0;
-  enum pb_TimerID timer;
-
-  for (next_interval = tset->async_markers; next_interval != last_marker;
-       next_interval = next_interval->next) {
-    cl_ulong command_start = 0, command_end = 0;
-    cl_int ciErrNum = CL_SUCCESS;
-
-    ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker),
-                                       CL_PROFILING_COMMAND_END,
-                                       sizeof(cl_ulong), &command_start, NULL);
-    if (ciErrNum != CL_SUCCESS) {
-      fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum);
-    }
-
-    ciErrNum = clGetEventProfilingInfo(
-        *((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END,
-        sizeof(cl_ulong), &command_end, NULL);
-    if (ciErrNum != CL_SUCCESS) {
-      fprintf(stderr, "Error getting second EventProfilingInfo: %d\n",
-              ciErrNum);
-    }
-
-    pb_Timestamp interval =
-        (pb_Timestamp)(((double)(command_end - command_start)));
-    tset->timers[next_interval->timerID].elapsed += interval;
-    if (next_interval->label != NULL) {
-      struct pb_SubTimer *subtimer =
-          tset->sub_timer_list[next_interval->timerID]->subtimer_list;
-      while (subtimer != NULL) {
-        if (strcmp(subtimer->label, next_interval->label) == 0) {
-          subtimer->timer.elapsed += interval;
-          break;
-        }
-        subtimer = subtimer->next;
-      }
-    }
-    total_async_time += interval;
-    next_interval->timerID = INVALID_TIMERID;
-  }
-
-  if (next_interval != NULL)
-    next_interval->timerID = INVALID_TIMERID;
-
-  return total_async_time;
-}
-
-static void accumulate_time(pb_Timestamp *accum, pb_Timestamp start,
-                            pb_Timestamp end) {
-#if _POSIX_VERSION >= 200112L
-  *accum += end - start;
-#else
-#error "Timestamps not implemented for this system"
-#endif
-}
-
-#if _POSIX_VERSION >= 200112L
-static pb_Timestamp get_time() {
-  struct timespec tv;
-  clock_gettime(CLOCK_MONOTONIC, &tv);
-  return (pb_Timestamp)(tv.tv_sec * BILLION + tv.tv_nsec);
-}
-#else
-#error "no supported time libraries are available on this platform"
-#endif
-
-void pb_ResetTimer(struct pb_Timer *timer) {
-  timer->state = pb_Timer_STOPPED;
-
-#if _POSIX_VERSION >= 200112L
-  timer->elapsed = 0;
-#else
-#error "pb_ResetTimer: not implemented for this system"
-#endif
-}
-
-void pb_StartTimer(struct pb_Timer *timer) {
-  if (timer->state != pb_Timer_STOPPED) {
-    fputs("Ignoring attempt to start a running timer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_RUNNING;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timespec tv;
-    clock_gettime(CLOCK_MONOTONIC, &tv);
-    timer->init = tv.tv_sec * BILLION + tv.tv_nsec;
-  }
-#else
-#error "pb_StartTimer: not implemented for this system"
-#endif
-}
-
-void pb_StartTimerAndSubTimer(struct pb_Timer *timer,
-                              struct pb_Timer *subtimer) {
-
-  unsigned int numNotStopped = 0x3; // 11
-  if (timer->state != pb_Timer_STOPPED) {
-    fputs("Warning: Timer was not stopped\n", stderr);
-    numNotStopped &= 0x1; // Zero out 2^1
-  }
-  if (subtimer->state != pb_Timer_STOPPED) {
-    fputs("Warning: Subtimer was not stopped\n", stderr);
-    numNotStopped &= 0x2; // Zero out 2^0
-  }
-  if (numNotStopped == 0x0) {
-    fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_RUNNING;
-  subtimer->state = pb_Timer_RUNNING;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timespec tv;
-    clock_gettime(CLOCK_MONOTONIC, &tv);
-
-    if (numNotStopped & 0x2) {
-      timer->init = tv.tv_sec * BILLION + tv.tv_nsec;
-    }
-
-    if (numNotStopped & 0x1) {
-      subtimer->init = tv.tv_sec * BILLION + tv.tv_nsec;
-    }
-  }
-#else
-#error "pb_StartTimer: not implemented for this system"
-#endif
-}
-
-void pb_StopTimer(struct pb_Timer *timer) {
-  pb_Timestamp fini;
-
-  if (timer->state != pb_Timer_RUNNING) {
-    fputs("Ignoring attempt to stop a stopped timer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_STOPPED;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timespec tv;
-    clock_gettime(CLOCK_MONOTONIC, &tv);
-    fini = tv.tv_sec * BILLION + tv.tv_nsec;
-  }
-#else
-#error "pb_StopTimer: not implemented for this system"
-#endif
-
-  accumulate_time(&timer->elapsed, timer->init, fini);
-  timer->init = fini;
-}
-
-void pb_StopTimerAndSubTimer(struct pb_Timer *timer,
-                             struct pb_Timer *subtimer) {
-
-  pb_Timestamp fini;
-
-  unsigned int numNotRunning = 0x3; // 11
-  if (timer->state != pb_Timer_RUNNING) {
-    fputs("Warning: Timer was not running\n", stderr);
-    numNotRunning &= 0x1; // Zero out 2^1
-  }
-  if (subtimer->state != pb_Timer_RUNNING) {
-    fputs("Warning: Subtimer was not running\n", stderr);
-    numNotRunning &= 0x2; // Zero out 2^0
-  }
-  if (numNotRunning == 0x0) {
-    fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
-    return;
-  }
-
-  timer->state = pb_Timer_STOPPED;
-  subtimer->state = pb_Timer_STOPPED;
-
-#if _POSIX_VERSION >= 200112L
-  {
-    struct timespec tv;
-    clock_gettime(CLOCK_MONOTONIC, &tv);
-    fini = tv.tv_sec * BILLION + tv.tv_nsec;
-  }
-#else
-#error "pb_StopTimer: not implemented for this system"
-#endif
-
-  if (numNotRunning & 0x2) {
-    accumulate_time(&timer->elapsed, timer->init, fini);
-    timer->init = fini;
-  }
-
-  if (numNotRunning & 0x1) {
-    accumulate_time(&subtimer->elapsed, subtimer->init, fini);
-    subtimer->init = fini;
-  }
-}
-
-/* Get the elapsed time in seconds. */
-double pb_GetElapsedTime(struct pb_Timer *timer) {
-  double ret;
-
-  if (timer->state != pb_Timer_STOPPED) {
-    fputs("Elapsed time from a running timer is inaccurate\n", stderr);
-  }
-
-#if _POSIX_VERSION >= 200112L
-  ret = timer->elapsed / 1e9;
-#else
-#error "pb_GetElapsedTime: not implemented for this system"
-#endif
-  return ret;
-}
-
-void pb_InitializeTimerSet(struct pb_TimerSet *timers) {
-  int n;
-
-  timers->wall_begin = get_time();
-  timers->current = pb_TimerID_NONE;
-
-  timers->async_markers = NULL;
-
-  for (n = 0; n < pb_TimerID_LAST; n++) {
-    pb_ResetTimer(&timers->timers[n]);
-    timers->sub_timer_list[n] = NULL;
-  }
-}
-
-void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) {
-  clContextPtr = ((cl_context *)p_clContextPtr);
-  clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr);
-}
-
-static char *LoadProgSource(const char *Filename, size_t *szFinalLength) {
-  // locals
-  FILE *pFileStream = NULL;
-  size_t szSourceLength;
-
-  // open the OpenCL source code file
-  pFileStream = fopen(Filename, "rb");
-  if (pFileStream == 0) {
-    return NULL;
-  }
-
-  // get the length of the source code
-  fseek(pFileStream, 0, SEEK_END);
-  szSourceLength = ftell(pFileStream);
-  fseek(pFileStream, 0, SEEK_SET);
-
-  // allocate a buffer for the source code string and read it in
-  char *cSourceString = (char *)malloc(szSourceLength + 1);
-  if (fread((cSourceString), szSourceLength, 1, pFileStream) != 1) {
-    fclose(pFileStream);
-    free(cSourceString);
-    return 0;
-  }
-
-  // close the file and return the total length of the combined (preamble +
-  // source) string
-  fclose(pFileStream);
-  if (szFinalLength != 0) {
-    *szFinalLength = szSourceLength;
-  }
-  cSourceString[szSourceLength] = '\0';
-
-  return cSourceString;
-}
-
-static inline void checkErr(cl_int err, cl_int success, const char *name) {
-  if (err != success) {
-    printf("ERROR: %s\n", name);
-    exit(EXIT_FAILURE);
-  }
-}
-
-void pb_CreateAndBuildKernelFromBinary(const char *file, const char *kernel,
-                                       void *clContextPtr, void *clDevicePtr,
-                                       void *clProgramPtr, void *clKernelPtr) {
-  size_t kernelLength;
-  char *programSource = LoadProgSource(file, &kernelLength);
-  checkErr(programSource != NULL, 1 /*bool true*/,
-           "Failure to load Program Binary");
-
-  cl_int binaryStatus;
-  cl_int errcode;
-  cl_device_id clDevice = *(cl_device_id *)clDevicePtr;
-  cl_context clContext = *(cl_context *)clContextPtr;
-  cl_program clProgram = clCreateProgramWithBinary(
-      clContext, 1, &clDevice, &kernelLength,
-      (const unsigned char **)&programSource, &binaryStatus, &errcode);
-  checkErr(errcode, CL_SUCCESS, "Failure to create program from binary");
-
-  // printf("Building kernel - %s, from file %s\n", kernel, file);
-  errcode = clBuildProgram(clProgram, 0, NULL, NULL, NULL, NULL);
-  // If build fails, get build log from device
-  if (errcode != CL_SUCCESS) {
-    printf("ERROR: Failure to build program\n");
-    size_t len = 0;
-    errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG,
-                                    0, NULL, &len);
-    printf("LOG LENGTH: %lu\n", len);
-    checkErr(errcode, CL_SUCCESS,
-             "Failure to collect program build log length");
-    char *log = (char *)malloc(len * sizeof(char));
-    errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG,
-                                    len, log, NULL);
-    checkErr(errcode, CL_SUCCESS, "Failure to collect program build log");
-
-    printf("Device Build Log: %s\n", log);
-    free(log);
-    exit(EXIT_FAILURE);
-  }
-
-  cl_kernel clKernel = clCreateKernel(clProgram, kernel, &errcode);
-  checkErr(errcode, CL_SUCCESS, "Failure to create kernel");
-
-  *(cl_program *)clProgramPtr = clProgram;
-  *(cl_kernel *)clKernelPtr = clKernel;
-
-  free(programSource);
-}
-
-void pb_AddSubTimer(struct pb_TimerSet *timers, char *label,
-                    enum pb_TimerID pb_Category) {
-
-  struct pb_SubTimer *subtimer =
-      (struct pb_SubTimer *)malloc(sizeof(struct pb_SubTimer));
-
-  int len = strlen(label);
-
-  subtimer->label = (char *)malloc(sizeof(char) * (len + 1));
-  sprintf(subtimer->label, "%s\0", label);
-
-  pb_ResetTimer(&subtimer->timer);
-  subtimer->next = NULL;
-
-  struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
-  if (subtimerlist == NULL) {
-    subtimerlist =
-        (struct pb_SubTimerList *)calloc(1, sizeof(struct pb_SubTimerList));
-    subtimerlist->subtimer_list = subtimer;
-    timers->sub_timer_list[pb_Category] = subtimerlist;
-  } else {
-    // Append to list
-    struct pb_SubTimer *element = subtimerlist->subtimer_list;
-    while (element->next != NULL) {
-      element = element->next;
-    }
-    element->next = subtimer;
-  }
-}
-
-void pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) {
-  /* Stop the currently running timer */
-  if (timers->current != pb_TimerID_NONE) {
-    struct pb_SubTimerList *subtimerlist =
-        timers->sub_timer_list[timers->current];
-    struct pb_SubTimer *currSubTimer =
-        (subtimerlist != NULL) ? subtimerlist->current : NULL;
-
-    if (!is_async(timers->current)) {
-      if (timers->current != timer) {
-        if (currSubTimer != NULL) {
-          pb_StopTimerAndSubTimer(&timers->timers[timers->current],
-                                  &currSubTimer->timer);
-        } else {
-          pb_StopTimer(&timers->timers[timers->current]);
-        }
-      } else {
-        if (currSubTimer != NULL) {
-          pb_StopTimer(&currSubTimer->timer);
-        }
-      }
-    } else {
-      insert_marker(timers, timer);
-      if (!is_async(timer)) { // if switching to async too, keep driver going
-        pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-
-  pb_Timestamp currentTime = get_time();
-
-  /* The only cases we check for asynchronous task completion is
-   * when an overlapping CPU operation completes, or the next
-   * segment blocks on completion of previous async operations */
-  if (asyncs_outstanding(timers) &&
-      (!is_async(timers->current) || is_blocking(timer))) {
-
-    struct pb_async_time_marker_list *last_event = get_last_async(timers);
-    /* CL_COMPLETE if completed */
-
-    cl_int ciErrNum = CL_SUCCESS;
-    cl_int async_done = CL_COMPLETE;
-
-    ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker),
-                              CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int),
-                              &async_done, NULL);
-    if (ciErrNum != CL_SUCCESS) {
-      fprintf(stderr, "Error Querying EventInfo!\n");
-    }
-
-    if (is_blocking(timer)) {
-      /* Async operations completed after previous CPU operations:
-       * overlapped time is the total CPU time since this set of async
-       * operations were first issued */
-
-      // timer to switch to is COPY or NONE
-      if (async_done != CL_COMPLETE) {
-        accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
-                        timers->async_begin, currentTime);
-      }
-
-      /* Wait on async operation completion */
-      ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
-      if (ciErrNum != CL_SUCCESS) {
-        fprintf(stderr, "Error Waiting for Events!\n");
-      }
-
-      pb_Timestamp total_async_time = record_async_times(timers);
-
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      if (async_done == CL_COMPLETE) {
-        // fprintf(stderr, "Async_done: total_async_type = %lld\n",
-        // total_async_time);
-        timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
-      }
-
-    } else
-        /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
-        // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are
-        // outstanding so something is deeper in stack
-        if (async_done == CL_COMPLETE) {
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
-    }
-  }
-
-  /* Start the new timer */
-  if (timer != pb_TimerID_NONE) {
-    if (!is_async(timer)) {
-      pb_StartTimer(&timers->timers[timer]);
-    } else {
-      // toSwitchTo Is Async (KERNEL/COPY_ASYNC)
-      if (!asyncs_outstanding(timers)) {
-        /* No asyncs outstanding, insert a fresh async marker */
-
-        insert_marker(timers, timer);
-        timers->async_begin = currentTime;
-      } else if (!is_async(timers->current)) {
-        /* Previous asyncs still in flight, but a previous SwitchTo
-         * already marked the end of the most recent async operation,
-         * so we can rename that marker as the beginning of this async
-         * operation */
-
-        struct pb_async_time_marker_list *last_event = get_last_async(timers);
-        last_event->label = NULL;
-        last_event->timerID = timer;
-      }
-      if (!is_async(timers->current)) {
-        pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-  timers->current = timer;
-}
-
-void pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label,
-                         enum pb_TimerID category) {
-  struct pb_SubTimerList *subtimerlist =
-      timers->sub_timer_list[timers->current];
-  struct pb_SubTimer *curr =
-      (subtimerlist != NULL) ? subtimerlist->current : NULL;
-
-  if (timers->current != pb_TimerID_NONE) {
-    if (!is_async(timers->current)) {
-      if (timers->current != category) {
-        if (curr != NULL) {
-          pb_StopTimerAndSubTimer(&timers->timers[timers->current],
-                                  &curr->timer);
-        } else {
-          pb_StopTimer(&timers->timers[timers->current]);
-        }
-      } else {
-        if (curr != NULL) {
-          pb_StopTimer(&curr->timer);
-        }
-      }
-    } else {
-      insert_submarker(timers, label, category);
-      if (!is_async(category)) { // if switching to async too, keep driver going
-        pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-
-  pb_Timestamp currentTime = get_time();
-
-  /* The only cases we check for asynchronous task completion is
-   * when an overlapping CPU operation completes, or the next
-   * segment blocks on completion of previous async operations */
-  if (asyncs_outstanding(timers) &&
-      (!is_async(timers->current) || is_blocking(category))) {
-
-    struct pb_async_time_marker_list *last_event = get_last_async(timers);
-    /* CL_COMPLETE if completed */
-
-    cl_int ciErrNum = CL_SUCCESS;
-    cl_int async_done = CL_COMPLETE;
-
-    ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker),
-                              CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int),
-                              &async_done, NULL);
-    if (ciErrNum != CL_SUCCESS) {
-      fprintf(stderr, "Error Querying EventInfo!\n");
-    }
-
-    if (is_blocking(category)) {
-      /* Async operations completed after previous CPU operations:
-       * overlapped time is the total CPU time since this set of async
-       * operations were first issued */
-
-      // timer to switch to is COPY or NONE
-      // if it hasn't already finished, then just take now and use that as the
-      // elapsed time in OVERLAP anything happening after now isn't OVERLAP
-      // because everything is being stopped to wait for synchronization it
-      // seems that the extra sync wall time isn't being recorded anywhere
-      if (async_done != CL_COMPLETE)
-        accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed),
-                        timers->async_begin, currentTime);
-
-      /* Wait on async operation completion */
-      ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker);
-      if (ciErrNum != CL_SUCCESS) {
-        fprintf(stderr, "Error Waiting for Events!\n");
-      }
-      pb_Timestamp total_async_time = record_async_times(timers);
-
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      // If it did finish, then accumulate all the async time that did happen
-      // into OVERLAP the immediately preceding EventSynchronize theoretically
-      // didn't have any effect since it was already completed.
-      if (async_done == CL_COMPLETE /*cudaSuccess*/)
-        timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time;
-
-    } else
-        /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */
-        // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are
-        // outstanding so something is deeper in stack
-        if (async_done == CL_COMPLETE /*cudaSuccess*/) {
-      /* Async operations completed before previous CPU operations:
-       * overlapped time is the total async time */
-      timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers);
-    }
-    // else, this isn't blocking, so just check the next time around
-  }
-
-  subtimerlist = timers->sub_timer_list[category];
-  struct pb_SubTimer *subtimer = NULL;
-
-  if (label != NULL) {
-    subtimer = subtimerlist->subtimer_list;
-    while (subtimer != NULL) {
-      if (strcmp(subtimer->label, label) == 0) {
-        break;
-      } else {
-        subtimer = subtimer->next;
-      }
-    }
-  }
-
-  /* Start the new timer */
-  if (category != pb_TimerID_NONE) {
-    if (!is_async(category)) {
-      if (subtimerlist != NULL) {
-        subtimerlist->current = subtimer;
-      }
-
-      if (category != timers->current && subtimer != NULL) {
-        pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
-      } else if (subtimer != NULL) {
-        pb_StartTimer(&subtimer->timer);
-      } else {
-        pb_StartTimer(&timers->timers[category]);
-      }
-    } else {
-      if (subtimerlist != NULL) {
-        subtimerlist->current = subtimer;
-      }
-
-      // toSwitchTo Is Async (KERNEL/COPY_ASYNC)
-      if (!asyncs_outstanding(timers)) {
-        /* No asyncs outstanding, insert a fresh async marker */
-        insert_submarker(timers, label, category);
-        timers->async_begin = currentTime;
-      } else if (!is_async(timers->current)) {
-        /* Previous asyncs still in flight, but a previous SwitchTo
-         * already marked the end of the most recent async operation,
-         * so we can rename that marker as the beginning of this async
-         * operation */
-
-        struct pb_async_time_marker_list *last_event = get_last_async(timers);
-        last_event->timerID = category;
-        last_event->label = label;
-      } // else, marker for switchToThis was already inserted
-
-      // toSwitchto is already asynchronous, but if current/prev state is async
-      // too, then DRIVER is already running
-      if (!is_async(timers->current)) {
-        pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]);
-      }
-    }
-  }
-
-  timers->current = category;
-}
-
-void pb_PrintTimerSet(struct pb_TimerSet *timers) {
-  printf("Printing Parboil Timer: Default\n");
-  pb_Timestamp wall_end = get_time();
-
-  struct pb_Timer *t = timers->timers;
-  struct pb_SubTimer *sub = NULL;
-
-  int maxSubLength;
-
-  //  const char *categories[] = {
-  //    "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
-  //  };
-  const char *categories[] = {
-      "IO",          "Kernel",         "Copy",         "Driver",
-      "Copy Async",  "Compute",        "Overlap",      "Init_Ctx",
-      "Clear_Ctx",   "Copy_Scalar",    "Copy_Ptr",     "Mem_Free",
-      "Read_Output", "Setup",          "Mem_Track",    "Mem_Untrack",
-      "Misc",        "Pthread_Create", "Arg_Pack",     "Arg_Unpack",
-      "Computation", "Output_Pack",    "Output_Unpack"};
-
-  const int maxCategoryLength = 20;
-
-  int i;
-  for (i = 1; i < pb_TimerID_LAST;
-       ++i) { // exclude NONE and OVRELAP from this format
-    if (pb_GetElapsedTime(&t[i]) != 0 || true) {
-
-      // Print Category Timer
-      printf("%-*s: %.9f\n", maxCategoryLength, categories[i - 1],
-             pb_GetElapsedTime(&t[i]));
-
-      if (timers->sub_timer_list[i] != NULL) {
-        sub = timers->sub_timer_list[i]->subtimer_list;
-        maxSubLength = 0;
-        while (sub != NULL) {
-          // Find longest SubTimer label
-          if (strlen(sub->label) > maxSubLength) {
-            maxSubLength = strlen(sub->label);
-          }
-          sub = sub->next;
-        }
-
-        // Fit to Categories
-        if (maxSubLength <= maxCategoryLength) {
-          maxSubLength = maxCategoryLength;
-        }
-
-        sub = timers->sub_timer_list[i]->subtimer_list;
-
-        // Print SubTimers
-        while (sub != NULL) {
-          printf(" -%-*s: %.9f\n", maxSubLength, sub->label,
-                 pb_GetElapsedTime(&sub->timer));
-          sub = sub->next;
-        }
-      }
-    }
-  }
-
-  if (pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
-    printf("CPU/Kernel Overlap: %.9f\n",
-           pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
-
-  float walltime = (wall_end - timers->wall_begin) / 1e9;
-  printf("Timer Wall Time: %.9f\n", walltime);
-}
-
-void pb_DestroyTimerSet(struct pb_TimerSet *timers) {
-  /* clean up all of the async event markers */
-  struct pb_async_time_marker_list *event = timers->async_markers;
-  while (event != NULL) {
-
-    cl_int ciErrNum = CL_SUCCESS;
-    ciErrNum = clWaitForEvents(1, (cl_event *)(event)->marker);
-    if (ciErrNum != CL_SUCCESS) {
-      // fprintf(stderr, "Error Waiting for Events!\n");
-    }
-
-    ciErrNum = clReleaseEvent(*((cl_event *)(event)->marker));
-    if (ciErrNum != CL_SUCCESS) {
-      fprintf(stderr, "Error Release Events!\n");
-    }
-
-    free((event)->marker);
-    struct pb_async_time_marker_list *next = ((event)->next);
-
-    free(event);
-
-    // (*event) = NULL;
-    event = next;
-  }
-
-  int i = 0;
-  for (i = 0; i < pb_TimerID_LAST; ++i) {
-    if (timers->sub_timer_list[i] != NULL) {
-      struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
-      struct pb_SubTimer *prev = NULL;
-      while (subtimer != NULL) {
-        free(subtimer->label);
-        prev = subtimer;
-        subtimer = subtimer->next;
-        free(prev);
-      }
-      free(timers->sub_timer_list[i]);
-    }
-  }
-}