diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt index 4041df11ce8d79e39d6f72bdf0a1068eae449300..1e34f44f403e373607d2efc10aca7f796c81b938 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library( LLVMDFG2LLVM_OpenCL DEPENDS intrinsics_gen + llvm-cbe # Called within the pass PLUGIN_TOOL opt ) diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp index 5a58f272b3042a4ebfc2e3c7bb3606b5c19e8d84..286bcdebd2ed3322b134581f3e8fdac286ee325b 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp @@ -47,6 +47,8 @@ #include "llvm/Support/ToolOutputFile.h" #include <sstream> +#include <unistd.h> // ".", nullptr() (POSIX-only) +#include <cstdlib> // std::system #ifndef LLVM_BUILD_DIR #error LLVM_BUILD_DIR is not defined @@ -147,7 +149,7 @@ static void getExecuteNodeParams(Module &M, Value *&, Value *&, Value *&, static Value *genWorkGroupPtr(Module &M, std::vector<Value *>, ValueToValueMapTy &, Instruction *, const Twine &WGName = "WGSize"); -static std::string getPTXFilename(const Module &); +static std::string getPTXFilePath(const Module &); static void changeDataLayout(Module &); static void changeTargetTriple(Module &); static void findReturnInst(Function *, std::vector<ReturnInst *> &); @@ -258,7 +260,7 @@ public: DEBUG(errs() << *KernelM); } - void writeKernelsModule(); + void writeKModCompilePTX(); }; // Initialize the HPVM runtime API. This makes it easier to insert these calls @@ -846,7 +848,7 @@ void CGT_OpenCL::codeGen(DFInternalNode *N) { // Now the remaining nodes to be visited should be ignored KernelLaunchNode = NULL; DEBUG(errs() << "Insert Runtime calls\n"); - insertRuntimeCalls(N, kernel, getPTXFilename(M)); + insertRuntimeCalls(N, kernel, getPTXFilePath(M)); } else { DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); @@ -1901,7 +1903,7 @@ bool DFG2LLVM_OpenCL::runOnModule(Module &M) { CGTVisitor->visit(rootNode); } - CGTVisitor->writeKernelsModule(); + CGTVisitor->writeKModCompilePTX(); // TODO: Edit module epilogue to remove the HPVM intrinsic declarations delete CGTVisitor; @@ -2010,16 +2012,18 @@ void CGT_OpenCL::addCLMetadata(Function *F) { MDN_annotations->addOperand(MDNvvmAnnotationsNode); } -void CGT_OpenCL::writeKernelsModule() { +void CGT_OpenCL::writeKModCompilePTX() { // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. legacy::PassManager Passes; + auto kmodName = getKernelsModuleName(M); + auto ptxPath = getPTXFilePath(M); DEBUG(errs() << "Writing to File --- "); - DEBUG(errs() << getKernelsModuleName(M).c_str() << "\n"); + DEBUG(errs() << kmodName << "\n"); std::error_code EC; - ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); + ToolOutputFile Out(kmodName.c_str(), EC, sys::fs::F_None); if (EC) { DEBUG(errs() << EC.message() << '\n'); } @@ -2030,6 +2034,13 @@ void CGT_OpenCL::writeKernelsModule() { // Declare success. Out.keep(); + + // Starts calling llvm-cbe. + auto llvmCBE = std::string(LLVM_BUILD_DIR_STR) + "/bin/llvm-cbe"; + std::string command = llvmCBE + " " + kmodName + " -o " + ptxPath; + DEBUG(errs() << "Compiling PTX from kernel module:\n"); + DEBUG(errs() << command); + std::system(command.c_str()); } Function *CGT_OpenCL::transformFunctionToVoid(Function *F) { @@ -2346,10 +2357,13 @@ static Value *genWorkGroupPtr(Module &M, std::vector<Value *> WGSize, } // Get generated PTX binary name -static std::string getPTXFilename(const Module &M) { +static std::string getPTXFilePath(const Module &M) { std::string moduleID = M.getModuleIdentifier(); - moduleID.append(".kernels.cl"); - return moduleID; + char *cwd_p = get_current_dir_name(); + std::string cwd(cwd_p); + free(cwd_p); + std::string ptxPath = cwd + "/" + moduleID + ".kernels.cl"; + return ptxPath; } // Changes the data layout of the Module to be compiled with OpenCL backend diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt index cb548b84dc9944e54c7dfdd0a0d74cf0aed9aafd..07024b6febc0c27dcc40f166262ab441e7f5675d 100644 --- a/hpvm/test/CMakeLists.txt +++ b/hpvm/test/CMakeLists.txt @@ -1,5 +1,13 @@ include(../cmake/TestFile.cmake) # Generation of `.test` files in CMake + +# approxhpvm.py, clang and clang++ are used to compile benchmarks +# in `benchmarks` and `dnn_benchmarks/hpvm-c`. +set(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py) +set(CLANG_C ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang) +set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++) + add_subdirectory(hpvm_pass) # Passes test suite +add_subdirectory(benchmarks) add_subdirectory(dnn_benchmarks/hpvm-c) # HPVM-C DNN accuracy test suite add_subdirectory(dnn_benchmarks/tensor-rt-src) # tensor_runtime DNN (build only, no tests) add_subdirectory(dnn_benchmarks/profiling) # hpvm-profiler test suite diff --git a/hpvm/test/benchmarks/CMakeLists.txt b/hpvm/test/benchmarks/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c226aefd58f20c296db232f1a03c35bed0b7195 --- /dev/null +++ b/hpvm/test/benchmarks/CMakeLists.txt @@ -0,0 +1,38 @@ +macro(hpvm_compile_util_sources compiler_flags util_srcs language_mode) + # "Returns" ${util_bitcodes}, a list of paths to generated bitcode (.ll) files + foreach(src ${util_srcs}) + set(src_path ${CMAKE_CURRENT_SOURCE_DIR}/${src}) + get_filename_component(src_name ${src_path} NAME) + set(output_path ${CMAKE_CURRENT_BINARY_DIR}/${src_name}.ll) + add_custom_command( + OUTPUT ${output_path} + COMMAND ${CLANG_C} -x${language_mode} + ${compiler_flags} ${src_path} -emit-llvm -S -o ${output_path} + DEPENDS ${src_path} + ) + list(APPEND util_bitcodes ${output_path}) + endforeach() +endmacro() + +function(add_hpvm_benchmark + target_name output_bin_name all_flags language_mode + main_src util_bitcodes +) + set(output_bin_path ${CMAKE_CURRENT_BINARY_DIR}/${output_bin_name}) + set(main_src_path ${CMAKE_CURRENT_SOURCE_DIR}/${main_src}) + if(util_bitcodes) + set(bitcodes_arg -b ${util_bitcodes}) + else() + set(bitcodes_arg "") + endif() + add_custom_command( + OUTPUT ${output_bin_path} + COMMAND ${APPROXHPVM_PY} ${all_flags} -x${language_mode} + ${bitcodes_arg} -- ${main_src_path} ${output_bin_path} + DEPENDS ${main_src_path} ${util_bitcodes} approxhpvm.py + ) + add_custom_target(${target_name} DEPENDS ${output_bin_path}) +endfunction(add_hpvm_benchmark) + +add_subdirectory(hpvm-cava) +add_subdirectory(pipeline) diff --git a/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..57e3bd3f33c580c9244634608402009804dbc38e --- /dev/null +++ b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt @@ -0,0 +1,22 @@ +set(other_srcs src/load_cam_model.c src/cam_pipe_utility.c src/utility.c) +set( + compiler_flags + # Insanity warning: only -O1 works for this benchmark. + -O1 -I ${CMAKE_CURRENT_SOURCE_DIR}/src + -DDMA_MODE -DDMA_INTERFACE_V3 +) + +# Sets ${util_bitcodes} +hpvm_compile_util_sources("${compiler_flags}" "${other_srcs}" "c") + +set(all_flags_cpu ${compiler_flags} "-DDEVICE=CPU_TARGET" "-lpthread") +add_hpvm_benchmark( + "hpvm_cava_cpu" "hpvm-cava-cpu" "${all_flags_cpu}" "c" + src/main.c "${util_bitcodes}" +) + +set(all_flags_gpu ${compiler_flags} "-DDEVICE=GPU_TARGET" "--opencl" "-lpthread") +add_hpvm_benchmark( + "hpvm_cava_gpu" "hpvm-cava-gpu" "${all_flags_gpu}" "c" + src/main.c "${util_bitcodes}" +) diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile deleted file mode 100644 index 58dfa72aacb172252ea1c13ed3331322fb600861..0000000000000000000000000000000000000000 --- a/hpvm/test/benchmarks/hpvm-cava/Makefile +++ /dev/null @@ -1,131 +0,0 @@ -# This Makefile compiles the HPVM-CAVA pilot project. -# It builds HPVM-related dependencies, then the native camera pipeline ISP code. -# -# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, -# which can be copied from Makefile.config.example for a start. - -CONFIG_FILE := ../include/Makefile.config - -ifeq ($(wildcard $(CONFIG_FILE)),) - $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) -endif -include $(CONFIG_FILE) - -# Compiler Flags - -LFLAGS += -lm -lrt - -ifeq ($(TARGET),) - TARGET = gpu -endif - -# Build dirs -SRC_DIR = src/ -CAM_PIPE_SRC_DIR = $(SRC_DIR) -BUILD_DIR = build/$(TARGET) -CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - - -INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR) -ifneq ($(CONFUSE_ROOT),) - INCLUDES += -I$(CONFUSE_ROOT)/include - LFLAGS += -L$(CONFUSE_ROOT)/lib -endif - -EXE = cava-hpvm-$(TARGET) - -LFLAGS += -pthread - -## BEGIN HPVM MAKEFILE -LANGUAGE=hpvm -SRCDIR_OBJS=load_cam_model.ll cam_pipe_utility.ll utility.ll -OBJS_SRC=src/load_cam_model.c src/cam_pipe_utility.c src/utility.c -HPVM_OBJS=main.hpvm.ll -APP = $(EXE) -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 -APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include -APP_LDFLAGS=$(LFLAGS) - -CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) -OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) -LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) - -HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt - -HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc - - -TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce - -ifeq ($(TARGET),seq) - DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -else - DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx -endif - TESTGEN_OPTFLAGS += -hpvm-timers-gen - -CFLAGS += -DDEVICE=$(DEVICE) -CXXFLAGS += -DDEVICE=$(DEVICE) - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) - -.PRECIOUS: $(BUILD_DIR)/%.ll - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) -KERNEL = $(TEST_OBJS).kernels.ll - -ifeq ($(TARGET),gpu) - KERNEL_OCL = $(TEST_OBJS).kernels.cl -endif - -HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll -HOST = $(BUILD_DIR)/$(APP).host.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -# Targets - -default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) - -clean : - if [ -f $(EXE) ]; then rm $(EXE); fi - if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi - if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi - -$(KERNEL_OCL) : $(KERNEL) - $(OCLBE) $< -o $@ - -$(EXE) : $(HOST_LINKED) - $(CXX) -O3 $(LDFLAGS) $< -o $@ - -$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) - $(LLVM_LINK) $^ -S -o $@ - -$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) - $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c - $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c - $(CC) $(CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll - $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ - -## END HPVM MAKEFILE diff --git a/hpvm/test/benchmarks/hpvm-cava/src/main.c b/hpvm/test/benchmarks/hpvm-cava/src/main.c index 09430239d7c06a0b2fab2a59c7e6310babe617c6..0d8fb37d0103b55234e59e8d2b66742faed207b6 100644 --- a/hpvm/test/benchmarks/hpvm-cava/src/main.c +++ b/hpvm/test/benchmarks/hpvm-cava/src/main.c @@ -53,6 +53,7 @@ typedef struct __attribute__((__packed__)) { } RootIn; typedef enum _argnum { + CAM_MODEL, RAW_IMAGE_BIN, OUTPUT_IMAGE_BIN, NUM_REQUIRED_ARGS, @@ -67,7 +68,7 @@ typedef struct _arguments { } arguments; static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n"; -static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary"; +static char args_doc[] = "path/to/cam-model path/to/raw-image-binary path/to/output-image-binary"; static struct argp_option options[] = { {"num-inputs", 'n', "N", 0, "Number of input images"}, {0}, @@ -724,14 +725,14 @@ int main(int argc, char *argv[]) { *transform_out, *gamut_out; float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; - TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); + TsTw = get_TsTw(args.args[CAM_MODEL], wb_index); float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); free(TsTw); TsTw = trans; - ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); - weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); - coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); - tone_map = get_tone_map("cam_models/NikonD7000/"); + ctrl_pts = get_ctrl_pts(args.args[CAM_MODEL], num_ctrl_pts); + weights = get_weights(args.args[CAM_MODEL], num_ctrl_pts); + coefs = get_coefs(args.args[CAM_MODEL], num_ctrl_pts); + tone_map = get_tone_map(args.args[CAM_MODEL]); input_scaled = (float *)malloc_aligned(bytes_fimage); result_scaled = (float *)malloc_aligned(bytes_fimage); diff --git a/hpvm/test/benchmarks/pipeline/CMakeLists.txt b/hpvm/test/benchmarks/pipeline/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..84cb2d85afb4929b2f26854844808dd2493da1e3 --- /dev/null +++ b/hpvm/test/benchmarks/pipeline/CMakeLists.txt @@ -0,0 +1,13 @@ +find_package(OpenCV 2) +if(${OpenCV_FOUND}) + set( + all_flags + -O3 -I${OpenCV_INCLUDE_DIRS} + -ffast-math -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize + -lpthread + ) + add_hpvm_benchmark("pipeline_cpu" "pipeline-cpu" "${all_flags}" "c++" src/main.cc "") + add_hpvm_benchmark("pipeline_gpu" "pipeline-gpu" "${all_flags};--opencl" "c++" src/main.cc "") +else() + message(WARNING "opencv-2 not found; not compiling HPVM benchmark 'pipeline'.") +endif() diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile deleted file mode 100644 index 8a55393f241f30d840cbf85c31488e652c2023a0..0000000000000000000000000000000000000000 --- a/hpvm/test/benchmarks/pipeline/Makefile +++ /dev/null @@ -1,118 +0,0 @@ -# This Makefile compiles the HPVM-pipeline test. -# It builds HPVM-related dependencies, then the native camera pipeline ISP code. -# -# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, -# which can be copied from Makefile.config.example for a start. - -CONFIG_FILE := ../include/Makefile.config - -ifeq ($(wildcard $(CONFIG_FILE)),) - $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) -endif -include $(CONFIG_FILE) - -# Compiler Flags - -LFLAGS += -lm -lrt - -ifeq ($(TARGET),) - TARGET = gpu -endif - -# Build dirs -SRC_DIR = src/ -BUILD_DIR = build/$(TARGET) -CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - -EXE = pipeline-$(TARGET) - -INCLUDES += -I$(SRC_DIR) - -## BEGIN HPVM MAKEFILE -SRCDIR_OBJS= -HPVM_OBJS=main.hpvm.ll -APP = $(EXE) -APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -APP_LDFLAGS=`pkg-config opencv --libs` - -CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) -OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) -LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) - -HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt -HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc - -TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce - -ifeq ($(TARGET),seq) - DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -else - DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx -endif - TESTGEN_OPTFLAGS += -hpvm-timers-gen - -CFLAGS += -DDEVICE=$(DEVICE) -CXXFLAGS += -DDEVICE=$(DEVICE) - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) - -.PRECIOUS: $(BUILD_DIR)/%.ll - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) -KERNEL = $(TEST_OBJS).kernels.ll - -ifeq ($(TARGET),seq) -else - KERNEL_OCL = $(TEST_OBJS).kernels.cl -endif - -HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll -HOST = $(BUILD_DIR)/$(APP).host.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -# Targets -default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) - -clean : - if [ -f $(EXE) ]; then rm $(EXE); fi - if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi - if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi - -$(KERNEL_OCL) : $(KERNEL) - $(OCLBE) $< -o $@ - -$(EXE) : $(HOST_LINKED) - $(CXX) -O3 $(LDFLAGS) $< -o $@ - -$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) - $(LLVM_LINK) $^ -S -o $@ - -$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) - $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc - $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc - $(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll - $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ - -## END HPVM MAKEFILE diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt index d9cdafa7bc8dcffffb9d6b8bf3fb8d5aa958175f..9f34317d34157d57468c60cb854828b5c54f1cde 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt +++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt @@ -11,7 +11,7 @@ function(compile_hpvm_c target_name src_filepath codegen_target) OUTPUT ${output_bin_path} DEPENDS ${generated_file_path} hpvm-clang COMMAND hpvm-clang - ${generated_file_path} ${output_bin_path} + ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN} ) add_custom_target(${target_name} DEPENDS ${output_bin_path}) diff --git a/hpvm/tools/hpvm-clang/CMakeLists.txt b/hpvm/tools/hpvm-clang/CMakeLists.txt index 53a98d0b2deed5ed0337cce46a75078abfd8c887..217bd39328788a41d7387105b312daf0c6133bb6 100644 --- a/hpvm/tools/hpvm-clang/CMakeLists.txt +++ b/hpvm/tools/hpvm-clang/CMakeLists.txt @@ -27,16 +27,20 @@ set(DIRECT_LINK_LIBS ${OpenCL_LIBRARY} "$<TARGET_FILE:tensor_runtime>") # and does not export its file location. # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) +# And this must be manually in sync with `hpvm/lib/Transforms/*`. +# which is fine for because we don't have many passes for now. set( AVAILABLE_PASSES LLVMBuildDFG - LLVMInPlaceDFGAnalysis + LLVMClearDFG LLVMDFG2LLVM_CPU LLVMDFG2LLVM_CUDNN + LLVMDFG2LLVM_OpenCL LLVMDFG2LLVM_WrapperAPI LLVMFuseHPVMTensorNodes - LLVMClearDFG LLVMGenHPVM + LLVMInPlaceDFGAnalysis + LLVMLocalMem ) # ---[ Create package folder structure diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in index 5fa256d26ba1312b20acbc86ed2c59befb52da2a..d37f293789787fbe81e5369543d99de97cb989ac 100644 --- a/hpvm/tools/hpvm-clang/main.py.in +++ b/hpvm/tools/hpvm-clang/main.py.in @@ -18,40 +18,70 @@ DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";") AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") HPVM_RT_PATH = "@HPVM_RT_PATH@" -COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"] - def compile_hpvm_c( - input_file: PathLike, + hpvm_src: PathLike, output_file: PathLike, - codegen_target: str = "tensor", + tensor_target: Optional[str], + opencl: bool, + link_bitcode: List[PathLike] = None, include: List[PathLike] = None, + macro: List[str] = None, + flags: List[str] = None, + optim_level: str = "0", # -O0 + is_cpp: bool = True, # otherwise is C + std: str = None, # language std (-std=c++11) + link_dirs: List[PathLike] = None, + link_libs: List[str] = None, working_dir: PathLike = None, conf_file: PathLike = None, ): from subprocess import check_output - codegen_functions = { - "tensor": lambda i, o: opt_codegen_tensor(i, o, conf_file), - "cudnn": opt_codegen_cudnn - } - codegen_f = codegen_functions[codegen_target] + passes = ["LLVMBuildDFG"] + pass_flags = ["buildDFG"] + if tensor_target == "tensor": + if conf_file is None: + raise ValueError("conf_file must be defined when tensor_target=='tensor'.") + passes += ["LLVMInPlaceDFGAnalysis", "LLVMFuseHPVMTensorNodes", "LLVMDFG2LLVM_WrapperAPI"] + pass_flags += [ + "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi", + f"configuration-inputs-filename={conf_file}" + ] + elif tensor_target == "cudnn": + passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"] + pass_flags += ["inplace", "dfg2llvm-cudnn"] + elif tensor_target is None: + passes += ["LLVMLocalMem"] + pass_flags += ["localmem"] + else: + raise ValueError(f"Tensor target {tensor_target} not recognized") + if opencl: + passes += ["LLVMDFG2LLVM_OpenCL"] + pass_flags += ["dfg2llvm-opencl"] + passes += ["LLVMDFG2LLVM_CPU", "LLVMClearDFG"] + pass_flags += ["dfg2llvm-cpu", "clearDFG"] + working_dir = Path(working_dir or ".") if not working_dir.is_dir(): os.makedirs(working_dir) - name_stem = Path(input_file).stem + # All commands for compiling the main hpvm_c file + name_stem = Path(hpvm_src).stem ll_file = working_dir / f"{name_stem}.ll" hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll" llvm_ll_file = working_dir / f"{name_stem}.llvm.ll" hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc" + link_bitcode_ = [Path(bc) for bc in (link_bitcode or [])] commands = [ - hpvm_c_to_ll(input_file, ll_file, extra_includes=include), + hpvm_c_to_ll(hpvm_src, ll_file, include, macro, flags, optim_level, is_cpp, std), opt_codegen_hpvm(ll_file, hpvm_ll_file), - codegen_f(hpvm_ll_file, llvm_ll_file), - link_hpvm_rt(llvm_ll_file, hpvm_rt_linked_file), - link_binary(hpvm_rt_linked_file, output_file), + _run_opt(hpvm_ll_file, llvm_ll_file, passes, pass_flags), + link_hpvm_rt(link_bitcode_ + [llvm_ll_file], hpvm_rt_linked_file), ] + commands.append( + link_binary(hpvm_rt_linked_file, output_file, link_dirs, link_libs) + ) for command in commands: print(" ".join(command)) check_output(command) @@ -60,14 +90,23 @@ def compile_hpvm_c( def hpvm_c_to_ll( src_file: PathLike, target_file: PathLike, - extra_includes: Optional[List[PathLike]] = None, + extra_includes: List[PathLike] = None, + macros: List[str] = None, flags: List[str] = None, + optim_level: str = "0", # -O0 + is_cpp: bool = True, # otherwise is C + std: str = None, # --std=c++11 ) -> List[str]: - extra_includes = extra_includes or [] - includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes] - flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS] + includes = (extra_includes or []) + TRT_INCLUDE_DIRS + includes_s = [f"-I{path}" for path in includes] + macros = [f"-D{macro}" for macro in (macros or [])] + flags = [f"-f{flg}" for flg in (flags or [])] + if std: + flags.append(f"-std={std}") + clang = "clang++" if is_cpp else "clang" return [ - str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S", + str(LLVM_BUILD_BIN / clang), *includes_s, *flags, *macros, + f"-O{optim_level}", "-emit-llvm", "-S", str(src_file), "-o", str(target_file) ] @@ -76,63 +115,44 @@ def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]: return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"]) -def opt_codegen_cudnn(src_file: PathLike, target_file: PathLike) -> List[str]: - passes = [ - "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis", - "LLVMDFG2LLVM_CUDNN", "LLVMDFG2LLVM_CPU", - "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM" - ] - flags = [ - "buildDFG", "inplace", "hpvm-fuse", - "dfg2llvm-cudnn", "dfg2llvm-cpu", "clearDFG" - ] - return _run_opt(src_file, target_file, passes, flags) +def link_hpvm_rt(bitcodes: List[PathLike], target_file: PathLike) -> List[str]: + bitcodes_s = [str(bc) for bc in bitcodes] + return [str(LLVM_BUILD_BIN / "llvm-link"), *bitcodes_s, HPVM_RT_PATH, "-S", "-o", str(target_file)] -def opt_codegen_tensor( - src_file: PathLike, target_file: PathLike, conf_file: PathLike -): - passes = [ - "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis", - "LLVMDFG2LLVM_WrapperAPI", "LLVMDFG2LLVM_CPU", - "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM" - ] - flags = [ - "buildDFG", "inplace", "hpvm-fuse", - "dfg2llvm-wrapperapi", - f"configuration-inputs-filename={conf_file}", - "dfg2llvm-cpu", "clearDFG", +def link_binary( + src_file: PathLike, + target_file: PathLike, + extra_link_dirs: List[PathLike] = None, + extra_link_libs: List[str] = None +) -> List[str]: + link_dirs, link_libs = _link_args(extra_link_dirs or [], extra_link_libs or []) + linker_dir_flags = [] + for path in link_dirs: + linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) + linker_lib_flags = [f"-l{lib}" for lib in link_libs] + return [ + str(LLVM_BUILD_BIN / "clang++"), str(src_file), + "-o", str(target_file), *linker_dir_flags, *linker_lib_flags ] - return _run_opt(src_file, target_file, passes, flags) -def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]: - return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] - - -def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]: +def _link_args(extra_link_dirs: List[PathLike], extra_link_libs: List[str]): def drop_suffix(libname: str): import re match = re.match(r"lib(.*)\.so", libname) return libname if match is None else match.group(1) - link_dirs, link_libnames = [], [] + link_dirs, link_libs = extra_link_dirs.copy(), extra_link_libs.copy() for lib in DIRECT_LINK_LIBS: lib = Path(lib) link_dirs.append(lib.parent) - link_libnames.append(drop_suffix(lib.name)) + link_libs.append(lib.name) link_dirs += TRT_LINK_DIRS - link_libnames += TRT_LINK_LIBS - - linker_dir_flags = [] - for path in link_dirs: - linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) - linker_lib_flags = [f"-l{drop_suffix(lib)}" for lib in link_libnames] - return [ - str(LLVM_BUILD_BIN / "clang++"), str(src_file), - "-o", str(target_file), *linker_dir_flags, *linker_lib_flags - ] + link_libs += TRT_LINK_LIBS + link_libnames = [drop_suffix(s) for s in link_libs] + return link_dirs, link_libnames def _run_opt( @@ -143,7 +163,7 @@ def _run_opt( ) -> List[str]: unavailable = set(pass_names) - set(AVAILABLE_PASSES) if unavailable: - raise ValueError(f"Passes {unavailable} are unavailable from CMake") + raise ValueError(f"Passes {unavailable} are unavailable for this compilation.") load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] pass_flags_strs = [f"-{flag}" for flag in pass_flags] return [ @@ -154,37 +174,98 @@ def _run_opt( def parse_args(): parser = argparse.ArgumentParser("hpvm-clang") - parser.add_argument("input_file", type=Path, help="HPVM-C code to compile") + parser.add_argument( + "hpvm_src", type=Path, + help="""HPVM-C code to compile. +HPVM-C code must be single file, but additional bitcode file can be linked together. +See option -b for that.""" + ) parser.add_argument("output_file", type=Path, help="Path to generate binary to") + parser.add_argument( + "-x", type=str, metavar="language", default="c++", + help="Treat input file as having type <language>", + ) + parser.add_argument( + "-b", + "--link-bitcode", + type=Path, + nargs="+", + help="Additional bitcode (.ll/.bc) files to link to", + ) parser.add_argument( "-t", - "--codegen-target", + "--tensor-target", type=str, - required=True, choices=["tensor", "cudnn"], - help="Backend to use", + help="Backend to use for tensor operators", + ) + parser.add_argument( + "--conf-file", type=Path, + help="File to approximation configurations; required for tensor target 'tensor'" + ) + parser.add_argument( + "--opencl", + action="store_true", + help="Use OpenCL support. Requires HPVM built with OpenCL", ) parser.add_argument( "-d", "--working-dir", type=Path, help="Directory to generate temp files in" ) + + # Relaying arguments for clang++ (source -> bitcode stage) parser.add_argument( - "--conf-file", type=Path, - help="File to approximation configurations; required for 'tensor' target" + "-I", "--include", type=Path, action="append", metavar="dir", + help="[clang emit-llvm] Add directory to include search path" + ) + parser.add_argument( + "-D", type=str, action="append", metavar="<macro>=<value>", + help="[clang emit-llvm] Define macro" + ) + parser.add_argument( + "-f", type=str, action="append", metavar="flag", + help="[clang emit-llvm] clang++ flags (such as -ffastmath)" + ) + parser.add_argument( + "-O", type=str, default="0", metavar="level", + help="[clang emit-llvm] Optimization level" + ) + parser.add_argument( + "--std", type=str, + help="[clang emit-llvm] Language standard to compile for. Double dashes (--std, not -std)." + ) + + # Relaying arguments for clang++ (linking stage) + parser.add_argument( + "-L", type=Path, action="append", metavar="dir", + help="[clang linker] Add directory to library search path" ) parser.add_argument( - "-I", "--include", type=Path, action="append", - help="Additional include directories to use" + "-l", type=str, action="append", metavar="name", + help="[clang linker] Link library (such as -lpthread)" ) args = parser.parse_args() - if args.codegen_target == "tensor": + if args.tensor_target == "tensor": if args.conf_file is None: - parser.error('Codegen target "tensor" requires --conf-file argument') + parser.error('Tensor target "tensor" requires --conf-file argument') + if args.x == "c": + args.is_cpp = False + elif args.x == "c++": + args.is_cpp = True + else: + parser.error(f"Language mode {args.x} not supported yet -- only c or c++") return args def main(): - compile_hpvm_c(**vars(parse_args())) + args = vars(parse_args()) + args["macro"] = args.pop("D") + args["flags"] = args.pop("f") + args["optim_level"] = args.pop("O") + args["link_dirs"] = args.pop("L") + args["link_libs"] = args.pop("l") + args.pop("x") + compile_hpvm_c(**args) if __name__ == "__main__":