diff --git a/.gitignore b/.gitignore index 605cade94d773b32c3d3c79d2340e247aad51886..f3a8c7903b4dab089be74151a36c29481c1ec843 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ -hpvm/build/ -hpvm/install/ +hpvm/build*/ hpvm/llvm/ hpvm/llvm-*.src.tar.xz hpvm/llvm-*.src/ @@ -23,7 +22,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ lib64/ parts/ sdist/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..bd5edbd1a467666f67c66be132b3a9d9bbd2d540 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,28 @@ +image: hpvm/gitlab-ci +variables: + GIT_SUBMODULE_STRATEGY: recursive + # Use a better compressor + FF_USE_FASTZIP: "true" + # output upload and download progress every 2 seconds + TRANSFER_METER_FREQUENCY: "2s" + # Use no compression for artifacts + CACHE_COMPRESSION_LEVEL: "fastest" +cache: + key: "$CI_COMMIT_REF_SLUG" + paths: + - hpvm/build/ + - hpvm/llvm/ + when: always + +build: + stage: build + tags: + - hpvm + script: + - pwd + - source activate hpvm && cd hpvm + - ./install.sh -j32 -t "X86" DCMAKE_BUILD_TYPE=Release + - cd .. + only: + - hpvm-release-exp + - merge_requests diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..be6d1e0985998a369e994efc434ed3d417b26bc3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +ARG IMAGE_NAME=nvidia/cuda +FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 + +# Install dependencies: python 3.6, curl, git, libboost +RUN apt-get update && apt-get install -y --no-install-recommends python3 curl git libboost-dev + +# Install cmake +RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.20.0/cmake-3.20.0-linux-x86_64.sh -o cmake.sh && \ + bash ./cmake.sh --skip-license --prefix=/usr && rm cmake.sh + +# Install conda +RUN curl https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh -o anaconda.sh && \ + bash anaconda.sh -b && rm anaconda.sh + +# Set PATH to include conda +ENV PATH="/root/anaconda3/bin:${PATH}" + +# Send conda env spec into container +COPY . /root/hpvm/ + +# Create conda env named hpvm based on spec +RUN conda env create -n hpvm -f /root/hpvm/hpvm/env.yaml diff --git a/README.md b/README.md index 7fb3736d8b631233cdc150a83b716564499f1e7b..8d1040a8f41d273728de7585d3d2aaa366728bba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # The HPVM Compiler Infrastructure [](https://hpvm.readthedocs.io/en/latest/?badge=latest) +[](https://gitlab.engr.illinois.edu/llvm/hpvm/-/commits/hpvm-release-exp) This repository contains the source code and documentation for the HPVM Compiler Infrastructure. diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt index 809a30cfa52e16f436dac4e22843f4c5a3add3d9..71e6de5999ad0127da5155d93fe3403c05cb3c7f 100644 --- a/hpvm/CMakeLists.txt +++ b/hpvm/CMakeLists.txt @@ -7,11 +7,16 @@ message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}") # find_package will use the auxillary cmake/Find*.cmake we provide list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -find_package(CUDNN 7 EXACT REQUIRED) # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_PATH +find_package(CUDNN 7 EXACT REQUIRED) # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_DIR and CUDNN::cudnn +find_package(OpenCL) # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} +if(${OpenCL_FOUND}) + set(HPVM_USE_OPENCL 1) +else() + message(WARNING "OpenCL not found. --opencl flag of hpvm-clang will be unavailable for this build.") + set(HPVM_USE_OPENCL 0) +endif() include_directories(./include/) -# find_package will use the auxillary cmake/Find*.cmake we provide -list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) # Generate TENSOR_RT_PREFIX into config.h set(TENSOR_RT_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) diff --git a/hpvm/cmake/FindCUDNN.cmake b/hpvm/cmake/FindCUDNN.cmake index e5a427f0317a6f3b8f7e7b2cc89fd176fd4362dc..bb4918f704a5fe210f0e25e893e1b33335189add 100644 --- a/hpvm/cmake/FindCUDNN.cmake +++ b/hpvm/cmake/FindCUDNN.cmake @@ -10,8 +10,9 @@ # The following are set after configuration is done: # CUDNN_FOUND # CUDNN_INCLUDE_PATH -# CUDNN_LIBRARY_PATH +# CUDNN_LIBRARY_DIR # +# It also provides the IMPORTed target CUDNN::cudnn. include(FindPackageHandleStandardArgs) @@ -45,11 +46,8 @@ endif() find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME} PATHS ${CUDNN_LIBRARY} PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64) -# Get director from filename ${CUDNN_LIBRARY_PATH} -get_filename_component( - CUDNN_LIBRARY_PATH - "${CUDNN_LIBRARY_PATH}/.." ABSOLUTE -) +# Get directory from filename ${CUDNN_LIBRARY_PATH} +get_filename_component(CUDNN_LIBRARY_DIR "${CUDNN_LIBRARY_PATH}/.." ABSOLUTE) # This version check is from OpenCV repo: https://github.com/opencv/opencv/blob/master/cmake/FindCUDNN.cmake # extract version from the include @@ -80,4 +78,8 @@ find_package_handle_standard_args( VERSION_VAR CUDNN_VERSION ) +add_library(CUDNN::cudnn IMPORTED INTERFACE) +target_include_directories(CUDNN::cudnn SYSTEM INTERFACE "${CUDNN_INCLUDE_PATH}") +target_link_libraries(CUDNN::cudnn INTERFACE "${CUDNN_LIBRARY_PATH}") + mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE_DIR CUDNN_LIBRARY) diff --git a/hpvm/docs/install.rst b/hpvm/docs/install.rst index a4f386ff1b49d48e17e4c1e085fa9c0de41a5c19..1b6ad4c6068d43e21c983d689594698e586a8810 100644 --- a/hpvm/docs/install.rst +++ b/hpvm/docs/install.rst @@ -150,14 +150,14 @@ Some common options that can be used with CMake are: * ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for type are Debug, Release, RelWithDebInfo, and MinSizeRel. Default is Debug. * ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled (default is Yes for Debug builds, No for all other build types). -Now, compile the HPVM Compilation Tool ``approxhpvm.py`` using: +Now, compile the HPVM Compilation Tool ``hpvm-clang`` using: .. code-block:: shell - make -j<number of threads> approxhpvm.py + make -j<number of threads> hpvm-clang With all the aforementioned steps, HPVM should be built, installed, tested and ready to use. -In particular, ``approxhpvm.py`` should be an executable command from your command line. +In particular, ``hpvm-clang`` should be an executable command from your command line. Benchmarks and Tests -------------------- @@ -166,7 +166,7 @@ We provide a number of general benchmarks, DNN benchmarks, and test cases, writt ``make`` targets ``check-hpvm-pass``, ``check-hpvm-dnn``, and ``check-hpvm-profiler`` tests various components of HPVM and are increasingly time-consuming. -You can run tests similarly as how ``approxhpvm.py`` is compiled: for example, +You can run tests similarly as how ``hpvm-clang`` is compiled: for example, .. code-block:: shell diff --git a/hpvm/include/SupportHPVM/HPVMUtils.h b/hpvm/include/SupportHPVM/HPVMUtils.h index 9a91494a41d6109cda8a8b9b885919fd197fb768..781306956dad0eacc85eadaaf60be4c8ce0e7b21 100644 --- a/hpvm/include/SupportHPVM/HPVMUtils.h +++ b/hpvm/include/SupportHPVM/HPVMUtils.h @@ -404,6 +404,7 @@ bool tagIncludesTarget(hpvm::Target Tag, hpvm::Target T) { return false; default: assert(false && "Unknown Target\n"); + return false; // What kind of compiler doesn't know this is unreachable?! } } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h b/hpvm/include/hpvm.h similarity index 100% rename from hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h rename to hpvm/include/hpvm.h diff --git a/hpvm/install.sh b/hpvm/install.sh index dd737034f043e2022710a94982467e60456d2bd4..3676a61972ff32566102d0eebdb490f00eaccb4b 100755 --- a/hpvm/install.sh +++ b/hpvm/install.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Run installer script -# Pass on args to installer that can parse them +# Run installer script and pass on args to installer that can parse them scripts/hpvm_installer.py "$@" -# Set path. -export PATH=$BUILD_DIR/bin:$PATH +ret_code=$? +echo "Installer returned with code $ret_code" +exit $ret_code diff --git a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp index 7ebdc39c28bdd49c193c5ae980439439178492a3..e7293a0640b5d7e45614459ed9687768998142a4 100644 --- a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp +++ b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp @@ -58,19 +58,7 @@ bool BuildDFG::runOnModule(Module &M) { Roots.push_back(Root); BuildGraph(Root, F); - for (DFGraph::children_iterator i = Root->getChildGraph()->begin(), - e = Root->getChildGraph()->end(); - i != e; i++) { - DFNode *N = *i; - // DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n"); - } Root->getChildGraph()->sortChildren(); - for (DFGraph::children_iterator i = Root->getChildGraph()->begin(), - e = Root->getChildGraph()->end(); - i != e; i++) { - DFNode *N = *i; - // DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n"); - } viewDFGraph(Root->getChildGraph()); } } diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp index a44b79b3fe20cb52383ff7458466a72d041b90fc..d5904bd83c0eadcbdd912a79443bd7126acc36c5 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp @@ -283,30 +283,6 @@ void CGT_CPU::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart, ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch); } -Instruction *CGT_CPU::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, - BasicBlock *Body) { - Module *M = Entry->getParent()->getParent(); - Type *Int64Ty = Type::getInt64Ty(M->getContext()); - - // Insert a PHI instruction at the beginning of the condition block - Instruction *IB = Cond->getFirstNonPHI(); - PHINode *CounterPhi = PHINode::Create(Int64Ty, 2, "cnt", IB); - - ConstantInt *IConst = - ConstantInt::get(Type::getInt64Ty(M->getContext()), 1, true); - Instruction *CounterIncr = - BinaryOperator::CreateNSW(Instruction::BinaryOps::Add, CounterPhi, IConst, - "cnt_incr", Body->getTerminator()); - - // Set incoming values for Phi node - IConst = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0, true); - CounterPhi->addIncoming(IConst, Entry); - CounterPhi->addIncoming(CounterIncr, Body); - - // Return the pointer to the created PHI node in the corresponding argument - return CounterPhi; -} - /* Add Loop around the instruction I * Algorithm: * (1) Split the basic block of instruction I into three parts, where the diff --git a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp index 4653ad0f8a4c4ddd8d76e984b1750d9f94d813a9..0559e8136da6bff441e3c1fb0b948bcaaeb954ee 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -540,9 +540,6 @@ void CGT_CUDNN::codeGen(DFLeafNode *N) { case Intrinsic::hpvm_node_id: { /* llvm.hpvm.node.id */ DEBUG(errs() << F_cudnn->getName() << "\t: Handling Node ID Intrinsic \n"); - // Get uint32 argument - Value *Op = II->getOperand(0); - // Argument list for the runtime call std::vector<Value *> Args; Args.push_back(II->getOperand(0)); diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt index 4041df11ce8d79e39d6f72bdf0a1068eae449300..1e34f44f403e373607d2efc10aca7f796c81b938 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library( LLVMDFG2LLVM_OpenCL DEPENDS intrinsics_gen + llvm-cbe # Called within the pass PLUGIN_TOOL opt ) diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp index 5a58f272b3042a4ebfc2e3c7bb3606b5c19e8d84..286bcdebd2ed3322b134581f3e8fdac286ee325b 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp @@ -47,6 +47,8 @@ #include "llvm/Support/ToolOutputFile.h" #include <sstream> +#include <unistd.h> // ".", nullptr() (POSIX-only) +#include <cstdlib> // std::system #ifndef LLVM_BUILD_DIR #error LLVM_BUILD_DIR is not defined @@ -147,7 +149,7 @@ static void getExecuteNodeParams(Module &M, Value *&, Value *&, Value *&, static Value *genWorkGroupPtr(Module &M, std::vector<Value *>, ValueToValueMapTy &, Instruction *, const Twine &WGName = "WGSize"); -static std::string getPTXFilename(const Module &); +static std::string getPTXFilePath(const Module &); static void changeDataLayout(Module &); static void changeTargetTriple(Module &); static void findReturnInst(Function *, std::vector<ReturnInst *> &); @@ -258,7 +260,7 @@ public: DEBUG(errs() << *KernelM); } - void writeKernelsModule(); + void writeKModCompilePTX(); }; // Initialize the HPVM runtime API. This makes it easier to insert these calls @@ -846,7 +848,7 @@ void CGT_OpenCL::codeGen(DFInternalNode *N) { // Now the remaining nodes to be visited should be ignored KernelLaunchNode = NULL; DEBUG(errs() << "Insert Runtime calls\n"); - insertRuntimeCalls(N, kernel, getPTXFilename(M)); + insertRuntimeCalls(N, kernel, getPTXFilePath(M)); } else { DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); @@ -1901,7 +1903,7 @@ bool DFG2LLVM_OpenCL::runOnModule(Module &M) { CGTVisitor->visit(rootNode); } - CGTVisitor->writeKernelsModule(); + CGTVisitor->writeKModCompilePTX(); // TODO: Edit module epilogue to remove the HPVM intrinsic declarations delete CGTVisitor; @@ -2010,16 +2012,18 @@ void CGT_OpenCL::addCLMetadata(Function *F) { MDN_annotations->addOperand(MDNvvmAnnotationsNode); } -void CGT_OpenCL::writeKernelsModule() { +void CGT_OpenCL::writeKModCompilePTX() { // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. legacy::PassManager Passes; + auto kmodName = getKernelsModuleName(M); + auto ptxPath = getPTXFilePath(M); DEBUG(errs() << "Writing to File --- "); - DEBUG(errs() << getKernelsModuleName(M).c_str() << "\n"); + DEBUG(errs() << kmodName << "\n"); std::error_code EC; - ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); + ToolOutputFile Out(kmodName.c_str(), EC, sys::fs::F_None); if (EC) { DEBUG(errs() << EC.message() << '\n'); } @@ -2030,6 +2034,13 @@ void CGT_OpenCL::writeKernelsModule() { // Declare success. Out.keep(); + + // Starts calling llvm-cbe. + auto llvmCBE = std::string(LLVM_BUILD_DIR_STR) + "/bin/llvm-cbe"; + std::string command = llvmCBE + " " + kmodName + " -o " + ptxPath; + DEBUG(errs() << "Compiling PTX from kernel module:\n"); + DEBUG(errs() << command); + std::system(command.c_str()); } Function *CGT_OpenCL::transformFunctionToVoid(Function *F) { @@ -2346,10 +2357,13 @@ static Value *genWorkGroupPtr(Module &M, std::vector<Value *> WGSize, } // Get generated PTX binary name -static std::string getPTXFilename(const Module &M) { +static std::string getPTXFilePath(const Module &M) { std::string moduleID = M.getModuleIdentifier(); - moduleID.append(".kernels.cl"); - return moduleID; + char *cwd_p = get_current_dir_name(); + std::string cwd(cwd_p); + free(cwd_p); + std::string ptxPath = cwd + "/" + moduleID + ".kernels.cl"; + return ptxPath; } // Changes the data layout of the Module to be compiled with OpenCL backend diff --git a/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py b/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py index 14d0f491b8dccb41f7136087f4146f64e0470693..4e91fbbe4a4af2c16b7583443360a09d88b0ac61 100644 --- a/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py +++ b/hpvm/projects/hpvm-profiler/hpvm_profiler/__init__.py @@ -50,7 +50,7 @@ def profile_configs( # Run binary_path binary, # which generates `profile_filename` and `qos_filename` file in cwd. try: - check_call([str(binary_path), "-c", str(temp_file.name)], stdout=PIPE) + check_call([str(binary_path), "-c", str(temp_file.name)]) except CalledProcessError as e: print("Output from the program:") print(e.output) @@ -158,9 +158,12 @@ def read_hpvm_configs(config_file: PathLike) -> Tuple[str, List[Config]]: def write_hpvm_config(header: str, configs: Iterable[Config], to_file: PathLike): + text_segs = [header] + [str(config) for config in configs] with open(to_file, "w") as f: f.write("\n".join(text_segs)) + f.flush() + def _read_profile_file(profile_file_path: Path): diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt index 6efd8d3d0a9d86236adc87657fb68b782f3daaa0..3147d00e892f8d89b71ebbe7e04aa0fa33210534 100644 --- a/hpvm/projects/hpvm-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-rt/CMakeLists.txt @@ -3,16 +3,18 @@ add_definitions(-DNUM_CORES=8) SET(CMAKE_C_COMPILER ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang) SET(CMAKE_CXX_COMPILER ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++) SET(CMAKE_CXX_STANDARD 11) -# Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} if found -find_package(OpenCL REQUIRED) # This puts libhpvm-rt.a in lib/ which we don't care about # we want ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o # which is a LLVM Bitcode file because of the -flto below. add_llvm_library(hpvm-rt hpvm-rt.cpp DEPENDS clang) -target_compile_options(hpvm-rt PUBLIC -flto) -target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS}) -target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY}) +target_compile_options(hpvm-rt PUBLIC -flto -DHPVM_USE_OPENCL=${HPVM_USE_OPENCL}) +if(${HPVM_USE_OPENCL}) + target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS}) + target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY}) +else() + message(STATUS "hpvm-rt.bc is not using OpenCL.") +endif() # Move and rename hpvm-rt.cpp.o to be an actual bc code add_custom_command( @@ -22,4 +24,4 @@ add_custom_command( ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o ${CMAKE_CURRENT_BINARY_DIR}/hpvm-rt.bc ) -add_custom_target(hpvm-rt.bc ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/hpvm-rt.bc") +add_custom_target(hpvm-rt-bc ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/hpvm-rt.bc") diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp index dff4f0e4a21e8e0eafad36bd7b14805c584cd10c..f0716378fe8c62639555396a3d268aae051534ba 100644 --- a/hpvm/projects/hpvm-rt/hpvm-rt.cpp +++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp @@ -1,4 +1,6 @@ -#include <CL/cl.h> + +//#define HPVM_USE_OPENCL 1 + #include <algorithm> #include <cassert> #include <cstdio> @@ -8,9 +10,14 @@ #include <map> #include <pthread.h> #include <string> - #include <unistd.h> +#ifdef HPVM_USE_OPENCL + +#include <CL/cl.h> + +#endif + #if _POSIX_VERSION >= 200112L #include <sys/time.h> #endif @@ -40,6 +47,9 @@ typedef struct { std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers; } DFNodeContext_CPU; + +#ifdef HPVM_USE_OPENCL + typedef struct { cl_context clOCLContext; cl_command_queue clCommandQue; @@ -51,6 +61,9 @@ cl_context globalOCLContext; cl_device_id *clDevices; cl_command_queue globalCommandQue; +#endif + + MemTracker MTracker; vector<DFGDepth> DStack; // Mutex to prevent concurrent access by multiple thereads in pipeline @@ -59,6 +72,8 @@ pthread_mutex_t ocl_mtx; #define NUM_TESTS 1 hpvm_TimerSet kernel_timer; +#ifdef HPVM_USE_OPENCL + static const char *getErrorString(cl_int error) { switch (error) { // run-time and JIT compiler errors @@ -209,6 +224,15 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) { } } +#endif + + +void openCLAbort(){ + cout <<" ERROR: OpenCL NOT found!. Please Recompile with OpenCL - Make sure to have OpenCL on System \n "; + abort(); +} + + /************************* Depth Stack Routines ***************************/ void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, @@ -260,6 +284,9 @@ uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) { /********************** Memory Tracking Routines **************************/ void llvm_hpvm_track_mem(void *ptr, size_t size) { + +#ifdef HPVM_USE_OPENCL + DEBUG(cout << "Start tracking memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE != NULL) { @@ -269,9 +296,19 @@ void llvm_hpvm_track_mem(void *ptr, size_t size) { DEBUG(cout << "Inserting ID " << ptr << " in the MemTracker Table\n"); MTracker.insert(ptr, size, MemTrackerEntry::HOST, ptr); DEBUG(MTracker.print()); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_untrack_mem(void *ptr) { + +#ifdef HPVM_USE_OPENCL + DEBUG(cout << "Stop tracking memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE == NULL) { @@ -284,11 +321,22 @@ void llvm_hpvm_untrack_mem(void *ptr) { clReleaseMemObject((cl_mem)MTE->getAddress()); MTracker.remove(ptr); DEBUG(MTracker.print()); + +#else + + openCLAbort(); + +#endif + } + +#ifdef HPVM_USE_OPENCL + static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, DFNodeContext_OCL *Context, bool isInput, bool isOutput) { + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "[OCL] Request memory: " << ptr << " for context: " << Context->clOCLContext << flush << "\n"); @@ -343,13 +391,20 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, DEBUG(MTracker.print()); pthread_mutex_unlock(&ocl_mtx); return d_input; + } +#endif + + void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) { return llvm_hpvm_request_mem(ptr, size); } void *llvm_hpvm_request_mem(void *ptr, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "[CPU] Request memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); @@ -386,6 +441,13 @@ void *llvm_hpvm_request_mem(void *ptr, size_t size) { DEBUG(MTracker.print()); pthread_mutex_unlock(&ocl_mtx); return ptr; + +#else + + openCLAbort(); + +#endif + } /*************************** Timer Routines **********************************/ @@ -419,6 +481,9 @@ get_last_async(struct hpvm_TimerSet *timers) { } static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { + +#ifdef HPVM_USE_OPENCL + cl_int ciErrNum = CL_SUCCESS; struct hpvm_async_time_marker_list **new_event = &(tset->async_markers); @@ -441,10 +506,20 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Enqueueing Marker!\n"); } + +#else + + openCLAbort(); + +#endif + } static void insert_submarker(struct hpvm_TimerSet *tset, char *label, enum hpvm_TimerID timer) { + +#ifdef HPVM_USE_OPENCL + cl_int ciErrNum = CL_SUCCESS; struct hpvm_async_time_marker_list **new_event = &(tset->async_markers); @@ -467,10 +542,20 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label, if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Enqueueing Marker!\n"); } + +#else + + openCLAbort(); + +#endif + } /* Assumes that all recorded events have completed */ static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) { + +#ifdef HPVM_USE_OPENCL + struct hpvm_async_time_marker_list *next_interval = NULL; struct hpvm_async_time_marker_list *last_marker = get_last_async(tset); hpvm_Timestamp total_async_time = 0; @@ -517,6 +602,13 @@ static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) { next_interval->timerID = INVALID_TIMERID; return total_async_time; + +#else + + openCLAbort(); + +#endif + } static void accumulate_time(hpvm_Timestamp *accum, hpvm_Timestamp start, @@ -733,6 +825,9 @@ void hpvm_AddSubTimer(struct hpvm_TimerSet *timers, char *label, } void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) { + +#ifdef HPVM_USE_OPENCL + // cerr << "Switch to timer: " << timer << flush << "\n"; /* Stop the currently running timer */ if (timers->current != hpvm_TimerID_NONE) { @@ -849,10 +944,21 @@ void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) { } } timers->current = timer; + +#else + + openCLAbort(); + +#endif + + } void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label, enum hpvm_TimerID category) { + +#ifdef HPVM_USE_OPENCL + struct hpvm_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; struct hpvm_SubTimer *curr = @@ -1001,6 +1107,13 @@ void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label, } timers->current = category; + +#else + + openCLAbort(); + +#endif + } void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) { @@ -1069,6 +1182,9 @@ void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) { } void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { + +#ifdef HPVM_USE_OPENCL + /* clean up all of the async event markers */ struct hpvm_async_time_marker_list *event = timers->async_markers; while (event != NULL) { @@ -1106,6 +1222,13 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { free(timers->sub_timer_list[i]); } } + +#else + + openCLAbort(); + +#endif + } /**************************** Pipeline API ************************************/ @@ -1304,10 +1427,13 @@ void llvm_hpvm_cpu_wait(void *graphID) { DEBUG(cout << "\t... pthread Done!\n"); } + +#ifdef HPVM_USE_OPENCL + // Returns the platform name. std::string getPlatformName(cl_platform_id pid) { + cl_int status; - size_t sz; status = clGetPlatformInfo(pid, CL_PLATFORM_NAME, 0, NULL, &sz); checkErr(status, CL_SUCCESS, "Query for platform name size failed"); @@ -1318,12 +1444,18 @@ std::string getPlatformName(cl_platform_id pid) { const auto &tmp = std::string(name, name + sz); delete[] name; - return tmp; + return tmp; } +#endif + + +#ifdef HPVM_USE_OPENCL + // Searches all platforms for the first platform whose name // contains the search string (case-insensitive). cl_platform_id findPlatform(const char *platform_name_search) { + cl_int status; std::string search = platform_name_search; @@ -1360,7 +1492,13 @@ cl_platform_id findPlatform(const char *platform_name_search) { assert(false && "No matching platform found!"); } +#endif + + void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(std::string Target = T == hpvm::GPU_TARGET ? "GPU" : "SPIR"); DEBUG(cout << "Initializing Context for " << Target << " device\n"); @@ -1450,9 +1588,19 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { pthread_mutex_unlock(&ocl_mtx); return globalOCLContext; + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_clearContext(void *graphID) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Clear Context\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; @@ -1464,9 +1612,19 @@ void llvm_hpvm_ocl_clearContext(void *graphID) { cout << "Printing HPVM Timer: KernelTimer\n"; hpvm_PrintTimerSet(&kernel_timer); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Shared Memory Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1477,10 +1635,20 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL); checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument"); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Scalar Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1491,10 +1659,20 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input); checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument"); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, size_t size, bool isInput, bool isOutput) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Pointer Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input @@ -1518,9 +1696,19 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n"); pthread_mutex_unlock(&ocl_mtx); return d_input; + +#else + + openCLAbort(); + +#endif + } void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set device memory for Output Struct:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1536,12 +1724,22 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n"); pthread_mutex_unlock(&ocl_mtx); return d_output; + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_free(void *ptr) {} void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Get Output:\n"); DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output @@ -1555,11 +1753,21 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output"); pthread_mutex_unlock(&ocl_mtx); return h_output; + +#else + + openCLAbort(); + +#endif + } void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, const size_t *localWorkSize, const size_t *globalWorkSize) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); size_t GlobalWG[3]; @@ -1610,6 +1818,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, pthread_mutex_unlock(&ocl_mtx); return NULL; + +#else + + openCLAbort(); + +#endif + } ////////////////////////////////////////////////////////////////////////////// @@ -1656,6 +1871,9 @@ static char *LoadProgSource(const char *Filename, size_t *szFinalLength) { } void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Launch OCL Kernel\n"); // Initialize OpenCL @@ -1717,14 +1935,31 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { pthread_mutex_unlock(&ocl_mtx); return Context; + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_wait(void *graphID) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Wait\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; clFinish(Context->clCommandQue); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) { @@ -1749,3 +1984,5 @@ void *llvm_hpvm_initializeTimerSet() { pthread_mutex_unlock(&ocl_mtx); return TS; } + + diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index a142d524b69cb605b85c496aa140c806ad258dfd..cbabc8bbe0111a0ec6c99520176a8b37a530a4fb 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -24,10 +24,8 @@ configure_file( # -- Default include directories set( INCLUDES - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ${CUDNN_INCLUDE_PATH} ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include - ./dnn_sources/include + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDNN_INCLUDE_PATH} ) # Build gpu_profiler and soc_simulator (dependencies) @@ -39,13 +37,43 @@ add_library(soc_simulator SHARED soc_simulator/promise_timing_model.cpp) target_include_directories(soc_simulator PUBLIC soc_simulator/) # -- Link libraries -find_package(OpenMP REQUIRED) # Provides ${OpenMP_CXX_FLAGS} -set(LINK_DIR CUDNN_LIBRARY_PATH) -set(LINK_LIBS gpu_profiler soc_simulator stdc++fs cudnn curand cublas) +find_package(OpenMP REQUIRED) # Provides ${OpenMP_CXX_FLAGS} and OpenMP::OpenMP_CXX +# This will use the CUDA found by CUDA language support in the root CMake, +# but it exports the CUDA::* targets (used below) so we can freely add libraries to link to. +find_package(CUDAToolkit REQUIRED) +set( + LINK_LIBS + gpu_profiler soc_simulator + CUDA::cublas CUDA::curand CUDNN::cudnn + OpenMP::OpenMP_CXX +) if(USE_GFLAGS) list(APPEND LINK_LIBS gflags) endif() +# -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS) +# tensor_runtime links to, because CMake doesn't help us do this. +# This is needed by both hpvm-clang and the RPATH setting step (below). +# First, take a guess at the paths to the libraries that are used here. +# (CMake, why do you make this so difficult?) +foreach(interface_lib ${LINK_LIBS}) + get_target_property(actual_libs ${interface_lib} INTERFACE_LINK_LIBRARIES) + foreach(actual_lib ${actual_libs}) + # ${actual_lib} may not be a path, then taking the directory of it should return "". + get_filename_component(libdir ${actual_lib} DIRECTORY) + get_filename_component(libname ${actual_lib} NAME) + if(NOT ${libdir} STREQUAL "") + list(APPEND TRT_LINK_DIRS ${libdir}) + endif() + if(NOT ${libname} STREQUAL "" AND NOT ${libname} STREQUAL "actual_libs-NOTFOUND") + list(APPEND TRT_LINK_LIBS ${libname}) + endif() + endforeach() +endforeach() +# Dedup, just for shorter compiler arguments. +list(REMOVE_DUPLICATES TRT_LINK_DIRS) +list(REMOVE_DUPLICATES TRT_LINK_LIBS) + # -- Definitions set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true) if(USE_GFLAGS) @@ -77,16 +105,27 @@ endforeach() # -- Adding tensor_runtime targets function(add_tensor_runtime target_name) add_library(${target_name} SHARED ${RUNTIME_SRCS}) - set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60) + set_target_properties(${target_name} PROPERTIES CUDA_ARCHITECTURES 60) target_compile_options( ${target_name} PRIVATE --expt-relaxed-constexpr -maxrregcount 32 -Xcompiler=${OpenMP_CXX_FLAGS} $<$<CONFIG:DEBUG>:-lineinfo -Xcompiler=-ggdb> ) target_include_directories(${target_name} PUBLIC ${INCLUDES}) - target_link_directories(${target_name} PUBLIC ${LINK_DIR}) - target_link_libraries(${target_name} PUBLIC ${LINK_LIBS} ${OpenMP_CXX_FLAGS}) + target_link_libraries(${target_name} PUBLIC ${LINK_LIBS}) target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN}) + + # We have to manually set rpath because cmake is not willing to comply... + foreach(libdir ${TRT_LINK_DIRS}) + target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}") + endforeach() + # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target + # so that hpvm-clang can read them. (we'll create our own properties.) + set_target_properties( + ${target_name} PROPERTIES + TRT_LINK_DIRS "${TRT_LINK_DIRS}" + TRT_LINK_LIBS "${TRT_LINK_LIBS}" + ) endfunction(add_tensor_runtime) # Adding new rule for building a cuDNN runtime library @@ -101,8 +140,8 @@ add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=f add_dependencies(tensor_runtime_online tensor_runtime) # Adding rule for the debugging source -add_executable(unit_tests dnn_sources/src/unit_tests.cc) -target_link_libraries(unit_tests tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${OpenMP_CXX_FLAGS}) +add_executable(unit_tests tests/unit_tests.cc) +target_link_libraries(unit_tests tensor_runtime_online) # -- Compile tensor_runtime.ll if possible if(INDEP_BUILD) @@ -126,6 +165,8 @@ endif() # If some clang-9 is found, create a tensor_runtime.ll from tensor_signatures.cc if(CLANG_NAME) message(STATUS "Creating tensor_runtime.ll in ${TENSOR_RT_LL_PREFIX}") + # Manually add cuda includes because add_custom_command doesn't handle them + # (unlike add_library which has CUDA-lang support). foreach(dir ${INCLUDES}) list(APPEND INCLUDE_COMPILER_STRINGS "-I${dir}") endforeach() @@ -136,59 +177,3 @@ if(CLANG_NAME) -o ${TENSOR_RT_LL_PREFIX}/tensor_runtime.ll ) endif() - - -#**************** FP32 TensorRT Source Builds *********** - -add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc) -target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc) -target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc) -target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc) -target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc) -target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc) -target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc) -target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc) -target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc) -target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc) -target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -#********* FP16 TensorRT Source Builds ****** - -add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc) -target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc) -target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc) -target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc) -target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc) -target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc) -target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc) -target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) diff --git a/hpvm/projects/hpvm-tensor-rt/soc_simulator/promise_timing_model.cpp b/hpvm/projects/hpvm-tensor-rt/soc_simulator/promise_timing_model.cpp index 87150f45a3a967443a947cf1b95b0e8d6fbae842..5bcd8173a5966888d89238cd94c50484cd719a69 100644 --- a/hpvm/projects/hpvm-tensor-rt/soc_simulator/promise_timing_model.cpp +++ b/hpvm/projects/hpvm-tensor-rt/soc_simulator/promise_timing_model.cpp @@ -27,7 +27,7 @@ Scratchpad::Scratchpad(const bool enable, const unsigned size, const double dram_latency, const double dram_bandwidth) - : enable_(enable), dram_(dram_latency, dram_bandwidth) { + : dram_(dram_latency, dram_bandwidth), enable_(enable) { num_lines_ = size / line_size; lines_.resize(num_lines_); @@ -78,7 +78,7 @@ std::pair<double, double> Scratchpad::access(const unsigned address, // Keep reading line by line until everything is read while (num_bytes_remaining > 0) { - if (lines_[getIndex(addr)] == address) { + if ((unsigned) lines_[getIndex(addr)] == address) { // Hit hits++; } else { diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h similarity index 50% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h rename to hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h index 61fd362afcc665e21a7ba8636c8df778ac95184e..d4961d19b9326daa4571d066dfe2b3177f6a78d4 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h @@ -3,23 +3,35 @@ #ifndef UTILS_HEADER #define UTILS_HEADER -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> #include <sstream> #include <vector> #include <bits/stdc++.h> #include <tensor_runtime.h> #include <tensor.h> #include <cmath> -#include <string.h> + std::vector<float> run_accuracies; std::string model_params_path = "../../test/dnn_benchmarks/model_params/"; + +void printTensorInfo(void *tensor_ptr) { + + struct Tensor *tensor = (struct Tensor *)tensor_ptr; + + if (tensor->gpu_data != NULL) { + printf("Successful cudaMalloc \n"); + } + + printf("tensor dims = %d \n", tensor->dims.num_dims); + printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]); + printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]); + printf("num_elems = %lu \n", tensor->num_elems); +} + // FIXIT: Move this to debug.h and include in all files -void dumpWeightsToFile(const char *file_name, void *weights_ptr) { +void dumpWeightsToFile(char *file_name, void *weights_ptr) { + struct Tensor *weights = (Tensor *)weights_ptr; // Move data back to host hpvm_request_tensor(weights, 0); @@ -31,6 +43,10 @@ void dumpWeightsToFile(const char *file_name, void *weights_ptr) { abort(); } + // printf("size_in_bytes = %lu \n", weights->size_in_bytes); + size_t bytes_written = + fwrite(weights->host_data, 1, weights->size_in_bytes, fp); + // printf("bytes_written = %lu \n", bytes_written); fclose(fp); } @@ -58,21 +74,13 @@ void fillWithOnesAndTwos(void *tensor_ptr) { // initialization is specific to the floating point type if (tensor->data_type == CUDNN_DATA_FLOAT) { float *data_arr = (float *)tensor->host_data; - - for (unsigned int i = 0; i < tensor->num_elems; i++) { - if (i % 2 == 0) - data_arr[i] = 1.0; - else - data_arr[i] = 2.0; - } - - /*for(unsigned int i = 0; i < tensor->num_elems/2; i++){ + for (unsigned int i = 0; i < tensor->num_elems / 2; i++) { data_arr[i] = 1.0; } - for(unsigned int i = tensor->num_elems/2; i < tensor->num_elems; i++){ + for (unsigned int i = tensor->num_elems / 2; i < tensor->num_elems; i++) { data_arr[i] = 2.0; - }*/ + } } } @@ -106,6 +114,18 @@ void fillTensorWithNegOnes(void *tensor_ptr) { } } +void fillTensorVals(void *tensor_ptr) { + + struct Tensor *tensor = (struct Tensor *)tensor_ptr; + // initialization is specific to the floating point type + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + data_arr[i] = i + 1; + } + } +} + void printTensorValues(void *tensor_ptr) { struct Tensor *tensor = (struct Tensor *)tensor_ptr; @@ -113,14 +133,11 @@ void printTensorValues(void *tensor_ptr) { hpvm_request_tensor(tensor, 0); // printing is specific to the floating point type - if (tensor->data_type != CUDNN_DATA_FLOAT) { - // printf("\n WARNING: The tensor is non-float type tensor \n\n"); - } - - float *data_arr = (float *)tensor->host_data; - - for (unsigned int i = 0; i < tensor->num_elems; i++) { - printf("%f,", data_arr[i]); + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + printf("%f,", data_arr[i]); + } } printf("\n"); @@ -131,11 +148,49 @@ void printTensorDims(void *tensor_ptr) { struct Tensor *tensor = (struct Tensor *)tensor_ptr; printf("Num_elems = %lu \n", tensor->num_elems); - for (int i = 0; i < tensor->dims.num_dims; i++) { + for (unsigned int i = 0; i < tensor->dims.num_dims; i++) { printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]); } } +void compareTensors(void *tensor1_ptr, void *tensor2_ptr) { + + struct Tensor *tensor1 = (struct Tensor *)tensor1_ptr; + struct Tensor *tensor2 = (struct Tensor *)tensor2_ptr; + + hpvm_request_tensor(tensor1, 0); + hpvm_request_tensor(tensor2, 0); + + float *tensor_data1 = (float *)tensor1->host_data; + float *tensor_data2 = (float *)tensor2->host_data; + + for (unsigned int i = 0; i < tensor1->num_elems; i++) { + if (tensor_data1[i] != tensor_data2[i]) { + printf("Tensor data mismatch at index %d \n", i); + abort(); + } + } +} + +void compareValues(void *tensor_ptr, float *data, size_t num_elems) { + + struct Tensor *tensor = (struct Tensor *)tensor_ptr; + + hpvm_request_tensor(tensor, 0); + + float *tensor_data = (float *)tensor->host_data; + for (unsigned int i = 0; i < num_elems; i++) { + if (tensor_data[i] != data[i]) { + printf("Tensor data mismatch"); + abort(); + } + } +} + + + + + struct Tensor *readTrainedWeights(const char *file_name, int data_type, long int dim1_size, long int dim2_size, long int dim3_size, long int dim4_size) { @@ -146,7 +201,7 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type, long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; float *tensor_data = (float *)malloc(sizeof(float) * num_elems); - // printf("size_in_bytes = %lu \n", size_in_bytes); + printf("size_in_bytes = %lu \n", size_in_bytes); int file_header_size = 0; @@ -157,7 +212,11 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type, } fseek(file, file_header_size, SEEK_CUR); // Skipping the file header - fread(tensor_data, 1, size_in_bytes, file); + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + + // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, + // bytes_read); + fclose(file); struct Tensor *weights = (struct Tensor *)create4DTensor( @@ -170,9 +229,9 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type, return weights; } -struct Tensor *readInputBatch(const char *file_name, int data_type, - long int start, long int end, long int dim2_size, - long int dim3_size, long int dim4_size) { +struct Tensor *readInputBatch(const char *file_name, long data_type, + long start, long end, + long dim2_size, long dim3_size, long dim4_size) { long int dim1_size = end - start; // FIXIT: Don't assume floating point types @@ -191,9 +250,12 @@ struct Tensor *readInputBatch(const char *file_name, int data_type, } fseek(file, file_header_size, SEEK_SET); // Skipping the file header - fread(tensor_data, 1, size_in_bytes, file); + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + fclose(file); + // printf ("FIXED input BATCH read \n"); + struct Tensor *weights = (struct Tensor *)create4DTensor( data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size); @@ -203,10 +265,7 @@ struct Tensor *readInputBatch(const char *file_name, int data_type, return weights; } -uint8_t *readLabelsBatch(const char *labels_file, int start, int end) { - - int num_labels = end - start; - int file_header_size = sizeof(uint8_t) * start; +uint8_t *readLabels(const char *labels_file, int num_labels) { uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels); FILE *file = fopen(labels_file, "rb"); @@ -215,14 +274,30 @@ uint8_t *readLabelsBatch(const char *labels_file, int start, int end) { abort(); } - fseek(file, file_header_size, SEEK_SET); // Skipping the file header - fread(labels, 1, sizeof(uint8_t) * num_labels, file); + size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file); + + fclose(file); + + return labels; +} + +uint32_t *readLabels3(const char *labels_file, int num_labels) { + + uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels); + FILE *file = fopen(labels_file, "rb"); + if (file == NULL) { + printf("Data file %s is not found. Aborting...\n", labels_file); + abort(); + } + + size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); + fclose(file); - // printf("--labels bytes_read = %lu \n", bytes_read); return labels; } + uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) { int num_labels = end - start; @@ -236,29 +311,31 @@ uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) { } fseek(file, file_header_size, SEEK_SET); // Skipping the file header - fread(labels, 1, sizeof(uint32_t) * num_labels, file); + + size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); + fclose(file); return labels; } -// NOTE: batch_size and num_classes are Unused arguments -float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr, - size_t num_classes = 10) { + + +float computeAccuracy3(uint32_t *labels, void *result_ptr) { struct Tensor *result = (struct Tensor *)result_ptr; size_t batch_dim = result->dims.dim_sizes[0]; - num_classes = result->dims.dim_sizes[1]; + size_t num_classes = result->dims.dim_sizes[1]; float *data = (float *)result->host_data; int num_errors = 0; - printf("batch_dim = %lu, channels = %lu \n", batch_dim, num_classes); + printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes); for (unsigned int i = 0; i < batch_dim; i++) { int chosen = 0; - for (size_t id = 1; id < num_classes; ++id) { + for (unsigned int id = 1; id < num_classes; ++id) { if (data[i * num_classes + chosen] < data[i * num_classes + id]) chosen = id; } @@ -285,27 +362,49 @@ float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr, return accuracy; } -float computeAccuracy3(uint32_t *labels, void *result_ptr) { +struct ClassProb { + float prob; + int index; +}; + +bool descendFloatComp(ClassProb obj1, ClassProb obj2) { + return obj1.prob > obj2.prob; +} + +float computeTop5Accuracy(uint8_t *labels, int num_labels, void *result_ptr, + unsigned num_classes = 10) { struct Tensor *result = (struct Tensor *)result_ptr; size_t batch_dim = result->dims.dim_sizes[0]; - size_t num_classes = result->dims.dim_sizes[1]; + size_t channels = result->dims.dim_sizes[1]; float *data = (float *)result->host_data; int num_errors = 0; - printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes); + printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels); - for (size_t i = 0; i < batch_dim; i++) { + for (unsigned int i = 0; i < num_labels; i++) { - uint32_t chosen = 0; - for (size_t id = 1; id < num_classes; ++id) { - if (data[i * num_classes + chosen] < data[i * num_classes + id]) - chosen = id; + std::vector<ClassProb> elem_probs; + for (unsigned int id = 0; id < num_classes; ++id) { + ClassProb cProb; + cProb.prob = data[i * channels + id]; + cProb.index = id; + elem_probs.push_back(cProb); } - if (chosen != labels[i]) - num_errors++; + std: + sort(elem_probs.begin(), elem_probs.end(), descendFloatComp); + // Check if any of top-5 predictions matches + bool matched = false; + for (int j = 0; j < 5; j++) { + ClassProb cProb = elem_probs[j]; + if (cProb.index == labels[i]) + matched = true; + } + + if (!matched) + num_errors += 1; } float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; @@ -344,11 +443,38 @@ void dumpFinalAccuracy(float accuracy) { run_accuracies.push_back(accuracy); } +void dumpAvgPSNR(float avg_psnr) { + + FILE *fp = fopen("avg_psnr", "w+"); + if (fp != NULL) { + std::ostringstream ss; + ss << std::fixed << avg_psnr; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + } + + fclose(fp); +} + +void dumpPSNRStd(float psnr_std) { + + FILE *fp = fopen("psnr_std.txt", "w+"); + if (fp != NULL) { + std::ostringstream ss; + ss << std::fixed << psnr_std; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + } + + fclose(fp); +} + + void dumpExecutionAccuracies() { FILE *fp = fopen("run_accuracies.txt", "w+"); if (fp != NULL) { - for (size_t i = 0; i < run_accuracies.size(); i++) { + for (unsigned int i = 0; i < run_accuracies.size(); i++) { float accuracy = run_accuracies[i]; std::ostringstream ss; ss << std::fixed << accuracy; @@ -360,4 +486,110 @@ void dumpExecutionAccuracies() { fclose(fp); } + +float readPSNRFromFile(const char *file_name) { + + float psnr; + FILE *pFile = fopen(file_name, "r"); + if (pFile == NULL) { + printf("ERROR: psnr.txt not found! \n"); + abort(); + } + + fscanf(pFile, "%f", &psnr); + printf("**** PSNR read = %f \n\n", psnr); + return psnr; +} + +float computePSNRViolation(void *gold_ptr, void *approx_ptr, + float PSNR_threshold) { + + PSNR_threshold = readPSNRFromFile("psnr.txt"); + std::vector<float> psnr_list; + + struct Tensor *gold_tensor = (struct Tensor *)gold_ptr; + struct Tensor *approx_tensor = (struct Tensor *)approx_ptr; + + size_t *dim_sizes = gold_tensor->dims.dim_sizes; + size_t batch_dim = dim_sizes[0]; + size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; + + printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size); + + float *gold_data = (float *)gold_tensor->host_data; + float *approx_data = (float *)approx_tensor->host_data; + + FILE *fp = fopen("img_psnr.txt", "w+"); + + float sum_psnr = 0.0; + int num_errors = 0; + for (size_t i = 0; i < batch_dim; i++) { + float mse_sum = 0.0; + float max_val = -999999; + size_t offset = i * image_size; + + for (size_t j = 0; j < image_size; j++) { + float diff = gold_data[offset + j] - approx_data[offset + j]; + float diff_square = diff * diff; + mse_sum += diff_square; + + if (max_val < gold_data[offset + j]) { + max_val = gold_data[offset + j]; + } + } + + mse_sum = mse_sum / image_size; + float psnr = 20 * log10(255 / sqrt(mse_sum)); + + sum_psnr += psnr; + if (psnr < PSNR_threshold) + num_errors += 1; + + printf("PSNR value = %f \n", psnr); + psnr_list.push_back(psnr); + + std::ostringstream ss; + ss << std::fixed << psnr; + std::string print_str = ss.str(); + fwrite(print_str.c_str(), 1, print_str.length(), fp); + fwrite("\n", 1, 1, fp); + } + + float violation_rate = (num_errors * 1.0) / batch_dim * 100.0; + printf("*** violation_rate= %f \n\n", violation_rate); + + float avg_psnr = sum_psnr / batch_dim; + printf("*** avg_psnr = %f \n\n", avg_psnr); + dumpAvgPSNR(avg_psnr); + + float success_rate = 100.0 - violation_rate; + dumpFinalAccuracy(success_rate); + + fclose(fp); + + float var = 0.0; + for (size_t i = 0; i < batch_dim; i++) { + var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr); + } + + var /= batch_dim; + float std = sqrt(var); + + dumpPSNRStd(std); + + return violation_rate; +} + +void dumpOutput(void *output_ptr, const char *file_name) { + + struct Tensor *out_tensor = (struct Tensor *)output_ptr; + size_t size_in_bytes = out_tensor->size_in_bytes; + printf("** Output size = %lu \n", size_in_bytes); + + float *host_data = (float *)out_tensor->host_data; + FILE *fd = fopen(file_name, "w+"); + fwrite(host_data, 1, size_in_bytes, fd); + fclose(fd); +} + #endif diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc rename to hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc index 746f62bce19b25c3b74bec4908cdc3c87bee034a..ffb4c3a809b3e936f6c27ebd7c11aef5c4460104 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc +++ b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc @@ -6,7 +6,7 @@ #include <string.h> #include "tensor_runtime.h" #include "tensor_cpu_runtime.h" -#include "utils.h" +#include "tensorUtils.h" #include "tensor_custom_ops_cpu.h" using namespace std; diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py index 922b6795ade457ba4c961af4d2e70ce150e22e92..172448a60d4f65fc4aafc09c9a76d9cb492ff7b0 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py @@ -172,7 +172,7 @@ class ModelExporter: from subprocess import run args = [ - "approxhpvm.py", + "hpvm-clang", str(self.codefile), str(output_binary), *self.compile_args, diff --git a/hpvm/scripts/hpvm_installer.py b/hpvm/scripts/hpvm_installer.py index 26166d6b58443f65deb37b2c5a8370c35b91eeff..cce8b3f07928d9ab096df3166bd02f4e6f8e1f5d 100755 --- a/hpvm/scripts/hpvm_installer.py +++ b/hpvm/scripts/hpvm_installer.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 -from argparse import ArgumentParser, Namespace +from argparse import ArgumentParser from os import chdir, environ, makedirs from pathlib import Path from subprocess import CalledProcessError, check_call -from typing import List +from typing import List, Union VERSION = "9.0.0" URL = "http://releases.llvm.org" -WGET = "wget" +DOWNLOADER = "curl" CLANG_DIR = f"cfe-{VERSION}.src" CLANG_TARBALL = f"{CLANG_DIR}.tar.xz" LLVM_DIR = f"llvm-{VERSION}.src" @@ -30,7 +30,7 @@ LINKS = [ "test", "tools", ] -MAKE_TARGETS = ["approxhpvm.py"] +MAKE_TARGETS = ["hpvm-clang"] MAKE_TEST_TARGETS = ["check-hpvm-dnn", "check-hpvm-pass"] # Relative to project root which is __file__.parent.parent @@ -79,6 +79,11 @@ def parse_args(args=None): "Supported targets: AArch64, AMDGPU, ARM, BPF, Hexagon, Mips, MSP430, NVPTX, PowerPC, " "Sparc, SystemZ, X86, XCore.", ) + parser.add_argument( + "--ninja", + action="store_true", + help="Use Ninja to build HPVM. Uses 'make' otherwise.", + ) parser.add_argument( "-r", "--run-tests", action="store_true", help="Build and run test cases" ) @@ -150,7 +155,8 @@ Example: "DCMAKE_BUILD_TYPE=Release DCMAKE_INSTALL_PREFIX=install". Arguments: """ ) args.cmake_args = input() - args.cmake_args = [f"-{arg}" for arg in args.cmake_args.split(" ")] + if args.cmake_args.strip() != "": + args.cmake_args = [f"-{arg}" for arg in args.cmake_args.split(" ")] args.no_params = not input_with_check( "Download DNN weights (recommended)? [y/n]: ", parse_yn, "Please enter y or n" @@ -165,6 +171,8 @@ def print_args(args): print("Running with the following options:") print(f" Automated build: {not args.no_build}") print(f" Build directory: {args.build_dir}") + build_sys = "ninja" if args.ninja else "make" + print(f" Build system: {build_sys}") print(f" Threads: {args.parallel}") print(f" Targets: {args.targets}") print(f" Download DNN weights: {not args.no_params}") @@ -181,7 +189,7 @@ def check_download_llvm_clang(): else: print(f"Downloading {LLVM_TARBALL}...") print(f"=============================") - check_call([WGET, f"{URL}/{VERSION}/{LLVM_TARBALL}"]) + download(f"{URL}/{VERSION}/{LLVM_TARBALL}", LLVM_TARBALL) check_call(["tar", "xf", LLVM_TARBALL]) check_call(["mv", LLVM_DIR, "llvm"]) tools = Path("llvm/tools") @@ -196,7 +204,7 @@ def check_download_llvm_clang(): chdir(tools) print(f"Downloading {CLANG_TARBALL}...") print(f"=============================") - check_call([WGET, f"{URL}/{VERSION}/{CLANG_TARBALL}"]) + download(f"{URL}/{VERSION}/{CLANG_TARBALL}", CLANG_TARBALL) check_call(["tar", "xf", CLANG_TARBALL]) check_call(["mv", CLANG_DIR, "clang"]) assert Path("clang/").is_dir(), "Problem with clang download. Exiting!" @@ -214,7 +222,7 @@ def check_download_model_params(): else: print(f"Downloading DNN model parameters: {MODEL_PARAMS_TAR}...") print(f"=============================") - check_call([WGET, MODEL_PARAMS_LINK, "-O", MODEL_PARAMS_TAR]) + download(MODEL_PARAMS_LINK, MODEL_PARAMS_TAR) print( f"Extracting DNN model parameters {MODEL_PARAMS_TAR} => {MODEL_PARAMS_DIR}..." ) @@ -255,7 +263,7 @@ def build( build_dir: Path, nthreads: int, targets: str, - build_test_targets: bool, + use_ninja: bool, cmake_additional_args: List[str], ): print("Now building...") @@ -271,13 +279,17 @@ def build( f"-DLLVM_TARGETS_TO_BUILD={targets}", *cmake_additional_args, ] + if use_ninja: + cmake_args.append("-GNinja") print(f"CMake: {' '.join(cmake_args)}") print(f"=============================") check_call(cmake_args) - make_args = ["make", f"-j{nthreads}", *MAKE_TARGETS] - print(f"Make: {' '.join(make_args)}") + + build_sys = "ninja" if use_ninja else "make" + build_args = [build_sys, f"-j{nthreads}", *MAKE_TARGETS] + print(f"Build system ({build_sys}): {' '.join(build_args)}") print(f"=============================") - check_call(make_args) + check_call(build_args) chdir(ROOT_DIR) @@ -291,12 +303,13 @@ def install_py_packages(): check_call([sys.executable, "-m", "pip", "install", str(package_home)]) -def run_tests(build_dir: Path, nthreads: int): +def run_tests(build_dir: Path, use_ninja: bool, nthreads: int): chdir(build_dir) - make_args = ["make", f"-j{nthreads}", *MAKE_TEST_TARGETS] - print(f"Tests: {' '.join(make_args)}") + build_sys = "ninja" if use_ninja else "make" + build_args = [build_sys, f"-j{nthreads}", *MAKE_TARGETS] + print(f"Tests: {' '.join(build_args)}") print(f"=============================") - check_call(make_args) + check_call(build_args) chdir(ROOT_DIR) @@ -310,6 +323,10 @@ def input_with_check(prompt: str, parse, prompt_when_invalid: str): return value +def download(link: str, output: Union[Path, str]): + check_call(["curl", "-L", link, "-o", str(output)]) + + def main(): from sys import argv @@ -328,21 +345,15 @@ HPVM not installed. To complete installation, follow these instructions: - Create and navigate to a folder "./build" - Run "cmake ../llvm [options]". Find potential options in README.md. -- Run "make -j<number of threads> approxhpvm.py" and then "make install" +- Run "make -j<number of threads> hpvm-clang" and then "make install" For more details refer to README.md. """ ) return else: - build( - args.build_dir, - args.parallel, - args.targets, - args.run_tests, - args.cmake_args, - ) + build(args.build_dir, args.parallel, args.targets, args.ninja, args.cmake_args) if args.run_tests: - run_tests(args.build_dir, args.parallel) + run_tests(args.build_dir, args.ninja, args.parallel) else: print("Skipping tests.") diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt index 660003538fe72d45e3dbfc1178fd296cdc7156b5..3c4f26472317f511edaab98c5e4a4f8ed7ba2dfb 100644 --- a/hpvm/test/CMakeLists.txt +++ b/hpvm/test/CMakeLists.txt @@ -1,4 +1,12 @@ include(../cmake/TestFile.cmake) # Generation of `.test` files in CMake + +# clang and clang++ are used to compile benchmarks +# in `benchmarks` and `dnn_benchmarks/hpvm-c`. +set(CLANG_C ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang) +set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++) + add_subdirectory(hpvm_pass) # Passes test suite -add_subdirectory(dnn_benchmarks/hpvm-c) # DNN accuracy test suite +add_subdirectory(benchmarks) +add_subdirectory(dnn_benchmarks/hpvm-c) # HPVM-C DNN accuracy test suite +add_subdirectory(dnn_benchmarks/tensor-rt-src) # tensor_runtime DNN (build only, no tests) add_subdirectory(dnn_benchmarks/profiling) # hpvm-profiler test suite diff --git a/hpvm/test/benchmarks/CMakeLists.txt b/hpvm/test/benchmarks/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d4f0691fc0e70cd9b532947688da264b2ca4e7b --- /dev/null +++ b/hpvm/test/benchmarks/CMakeLists.txt @@ -0,0 +1,38 @@ +macro(hpvm_compile_util_sources compiler_flags util_srcs language_mode) + # "Returns" ${util_bitcodes}, a list of paths to generated bitcode (.ll) files + foreach(src ${util_srcs}) + set(src_path ${CMAKE_CURRENT_SOURCE_DIR}/${src}) + get_filename_component(src_name ${src_path} NAME) + set(output_path ${CMAKE_CURRENT_BINARY_DIR}/${src_name}.ll) + add_custom_command( + OUTPUT ${output_path} + COMMAND ${CLANG_C} -x${language_mode} + ${compiler_flags} ${src_path} -emit-llvm -S -o ${output_path} + DEPENDS ${src_path} + ) + list(APPEND util_bitcodes ${output_path}) + endforeach() +endmacro() + +function(add_hpvm_benchmark + target_name output_bin_name all_flags language_mode + main_src util_bitcodes +) + set(output_bin_path ${CMAKE_CURRENT_BINARY_DIR}/${output_bin_name}) + set(main_src_path ${CMAKE_CURRENT_SOURCE_DIR}/${main_src}) + if(util_bitcodes) + set(bitcodes_arg -b ${util_bitcodes}) + else() + set(bitcodes_arg "") + endif() + add_custom_command( + OUTPUT ${output_bin_path} + COMMAND hpvm-clang ${all_flags} -x${language_mode} + ${bitcodes_arg} -- ${main_src_path} ${output_bin_path} + DEPENDS ${main_src_path} ${util_bitcodes} hpvm-clang + ) + add_custom_target(${target_name} DEPENDS ${output_bin_path}) +endfunction(add_hpvm_benchmark) + +add_subdirectory(hpvm-cava) +add_subdirectory(pipeline) diff --git a/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d89a01a05669a0c49fecf1ccbd25029fce14143b --- /dev/null +++ b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt @@ -0,0 +1,24 @@ +set(other_srcs src/load_cam_model.c src/cam_pipe_utility.c src/utility.c) +set( + compiler_flags + # Insanity warning: only -O1 works for this benchmark. + -O1 -I ${CMAKE_CURRENT_SOURCE_DIR}/src + -DDMA_MODE -DDMA_INTERFACE_V3 +) + +# Sets ${util_bitcodes} +hpvm_compile_util_sources("${compiler_flags}" "${other_srcs}" "c") + +set(all_flags_cpu ${compiler_flags} "-DDEVICE=CPU_TARGET" "-lpthread") +add_hpvm_benchmark( + "hpvm_cava_cpu" "hpvm-cava-cpu" "${all_flags_cpu}" "c" + src/main.c "${util_bitcodes}" +) + +if(HPVM_USE_OPENCL) + set(all_flags_gpu ${compiler_flags} "-DDEVICE=GPU_TARGET" "--opencl" "-lpthread") + add_hpvm_benchmark( + "hpvm_cava_gpu" "hpvm-cava-gpu" "${all_flags_gpu}" "c" + src/main.c "${util_bitcodes}" + ) +endif() diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile deleted file mode 100644 index 58dfa72aacb172252ea1c13ed3331322fb600861..0000000000000000000000000000000000000000 --- a/hpvm/test/benchmarks/hpvm-cava/Makefile +++ /dev/null @@ -1,131 +0,0 @@ -# This Makefile compiles the HPVM-CAVA pilot project. -# It builds HPVM-related dependencies, then the native camera pipeline ISP code. -# -# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, -# which can be copied from Makefile.config.example for a start. - -CONFIG_FILE := ../include/Makefile.config - -ifeq ($(wildcard $(CONFIG_FILE)),) - $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) -endif -include $(CONFIG_FILE) - -# Compiler Flags - -LFLAGS += -lm -lrt - -ifeq ($(TARGET),) - TARGET = gpu -endif - -# Build dirs -SRC_DIR = src/ -CAM_PIPE_SRC_DIR = $(SRC_DIR) -BUILD_DIR = build/$(TARGET) -CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - - -INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR) -ifneq ($(CONFUSE_ROOT),) - INCLUDES += -I$(CONFUSE_ROOT)/include - LFLAGS += -L$(CONFUSE_ROOT)/lib -endif - -EXE = cava-hpvm-$(TARGET) - -LFLAGS += -pthread - -## BEGIN HPVM MAKEFILE -LANGUAGE=hpvm -SRCDIR_OBJS=load_cam_model.ll cam_pipe_utility.ll utility.ll -OBJS_SRC=src/load_cam_model.c src/cam_pipe_utility.c src/utility.c -HPVM_OBJS=main.hpvm.ll -APP = $(EXE) -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 -APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include -APP_LDFLAGS=$(LFLAGS) - -CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) -OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) -LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) - -HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt - -HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc - - -TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce - -ifeq ($(TARGET),seq) - DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -else - DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx -endif - TESTGEN_OPTFLAGS += -hpvm-timers-gen - -CFLAGS += -DDEVICE=$(DEVICE) -CXXFLAGS += -DDEVICE=$(DEVICE) - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) - -.PRECIOUS: $(BUILD_DIR)/%.ll - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) -KERNEL = $(TEST_OBJS).kernels.ll - -ifeq ($(TARGET),gpu) - KERNEL_OCL = $(TEST_OBJS).kernels.cl -endif - -HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll -HOST = $(BUILD_DIR)/$(APP).host.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -# Targets - -default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) - -clean : - if [ -f $(EXE) ]; then rm $(EXE); fi - if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi - if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi - -$(KERNEL_OCL) : $(KERNEL) - $(OCLBE) $< -o $@ - -$(EXE) : $(HOST_LINKED) - $(CXX) -O3 $(LDFLAGS) $< -o $@ - -$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) - $(LLVM_LINK) $^ -S -o $@ - -$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) - $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c - $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c - $(CC) $(CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll - $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ - -## END HPVM MAKEFILE diff --git a/hpvm/test/benchmarks/hpvm-cava/src/main.c b/hpvm/test/benchmarks/hpvm-cava/src/main.c index 09430239d7c06a0b2fab2a59c7e6310babe617c6..0d8fb37d0103b55234e59e8d2b66742faed207b6 100644 --- a/hpvm/test/benchmarks/hpvm-cava/src/main.c +++ b/hpvm/test/benchmarks/hpvm-cava/src/main.c @@ -53,6 +53,7 @@ typedef struct __attribute__((__packed__)) { } RootIn; typedef enum _argnum { + CAM_MODEL, RAW_IMAGE_BIN, OUTPUT_IMAGE_BIN, NUM_REQUIRED_ARGS, @@ -67,7 +68,7 @@ typedef struct _arguments { } arguments; static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n"; -static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary"; +static char args_doc[] = "path/to/cam-model path/to/raw-image-binary path/to/output-image-binary"; static struct argp_option options[] = { {"num-inputs", 'n', "N", 0, "Number of input images"}, {0}, @@ -724,14 +725,14 @@ int main(int argc, char *argv[]) { *transform_out, *gamut_out; float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; - TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); + TsTw = get_TsTw(args.args[CAM_MODEL], wb_index); float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); free(TsTw); TsTw = trans; - ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); - weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); - coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); - tone_map = get_tone_map("cam_models/NikonD7000/"); + ctrl_pts = get_ctrl_pts(args.args[CAM_MODEL], num_ctrl_pts); + weights = get_weights(args.args[CAM_MODEL], num_ctrl_pts); + coefs = get_coefs(args.args[CAM_MODEL], num_ctrl_pts); + tone_map = get_tone_map(args.args[CAM_MODEL]); input_scaled = (float *)malloc_aligned(bytes_fimage); result_scaled = (float *)malloc_aligned(bytes_fimage); diff --git a/hpvm/test/benchmarks/pipeline/CMakeLists.txt b/hpvm/test/benchmarks/pipeline/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..db2b8eeab1647f4ac72c05127877ecec1f949c90 --- /dev/null +++ b/hpvm/test/benchmarks/pipeline/CMakeLists.txt @@ -0,0 +1,15 @@ +find_package(OpenCV 2) +if(${OpenCV_FOUND}) + set( + all_flags + -O3 -I${OpenCV_INCLUDE_DIRS} + -ffast-math -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize + -lpthread + ) + add_hpvm_benchmark("pipeline_cpu" "pipeline-cpu" "${all_flags}" "c++" src/main.cc "") + if(HPVM_USE_OPENCL) + add_hpvm_benchmark("pipeline_gpu" "pipeline-gpu" "${all_flags};--opencl" "c++" src/main.cc "") + endif() +else() + message(WARNING "opencv-2 not found; not compiling HPVM benchmark 'pipeline'.") +endif() diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile deleted file mode 100644 index 8a55393f241f30d840cbf85c31488e652c2023a0..0000000000000000000000000000000000000000 --- a/hpvm/test/benchmarks/pipeline/Makefile +++ /dev/null @@ -1,118 +0,0 @@ -# This Makefile compiles the HPVM-pipeline test. -# It builds HPVM-related dependencies, then the native camera pipeline ISP code. -# -# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, -# which can be copied from Makefile.config.example for a start. - -CONFIG_FILE := ../include/Makefile.config - -ifeq ($(wildcard $(CONFIG_FILE)),) - $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) -endif -include $(CONFIG_FILE) - -# Compiler Flags - -LFLAGS += -lm -lrt - -ifeq ($(TARGET),) - TARGET = gpu -endif - -# Build dirs -SRC_DIR = src/ -BUILD_DIR = build/$(TARGET) -CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - -EXE = pipeline-$(TARGET) - -INCLUDES += -I$(SRC_DIR) - -## BEGIN HPVM MAKEFILE -SRCDIR_OBJS= -HPVM_OBJS=main.hpvm.ll -APP = $(EXE) -APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -APP_LDFLAGS=`pkg-config opencv --libs` - -CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) -OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) -LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) - -HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt -HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc - -TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce - -ifeq ($(TARGET),seq) - DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -else - DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx -endif - TESTGEN_OPTFLAGS += -hpvm-timers-gen - -CFLAGS += -DDEVICE=$(DEVICE) -CXXFLAGS += -DDEVICE=$(DEVICE) - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) - -.PRECIOUS: $(BUILD_DIR)/%.ll - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) -KERNEL = $(TEST_OBJS).kernels.ll - -ifeq ($(TARGET),seq) -else - KERNEL_OCL = $(TEST_OBJS).kernels.cl -endif - -HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll -HOST = $(BUILD_DIR)/$(APP).host.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -# Targets -default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) - -clean : - if [ -f $(EXE) ]; then rm $(EXE); fi - if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi - if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi - -$(KERNEL_OCL) : $(KERNEL) - $(OCLBE) $< -o $@ - -$(EXE) : $(HOST_LINKED) - $(CXX) -O3 $(LDFLAGS) $< -o $@ - -$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) - $(LLVM_LINK) $^ -S -o $@ - -$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) - $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc - $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc - $(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll - $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ - -## END HPVM MAKEFILE diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt index 3a0c6534e02ce82fdfd02f483f71c6be1a9ab433..9f34317d34157d57468c60cb854828b5c54f1cde 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt +++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt @@ -1,53 +1,21 @@ -# First get approxhpvm.py which we then use to compile benchmarks. -get_filename_component(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py REALPATH) - -# Configure config.h which tells the benchmarks where's the model parameter directory. -# We can also use the one in tensor_runtime, but we're avoiding that so as to -# decouple things. +# Each source file contains a @MODEL_PARAMS_DIR@ waiting to be filled in. set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/") -configure_file( - "include/config.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/include/config.h" -) -# This will be an extra include directory (specific to these benchmarks) -# and we'll give this to approxhpvm.py -set(CONFIG_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") - -# --[ llvm-lit test setup -# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py) -# as most of the tests require some kind of compilation / generation -# which is best done over there. -configure_lit_site_cfg( - ../../lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py -) -add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs" - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS dnn_benchmarks # Compile all dnn benchmarks to run them - ARGS "-j1" # Run DNN benchmarks sequentially -) -# Install an accuracy comparator under build/bin -set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR}) -add_custom_command( - OUTPUT ${BIN_DIR}/check_dnn_acc.py - COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR} - COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py -) - set(test_compile_targets "") -function(compile_hpvm_c bin_filename src_filepath codegen_target) +function(compile_hpvm_c target_name src_filepath codegen_target) + set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.cpp") + set(output_bin_path "${CMAKE_CURRENT_BINARY_DIR}/hpvm_${target_name}") + configure_file(${src_filepath} ${generated_file_path}) + # Add an "hpvm_" prefix here because Ninja generator doesn't like + # the name of output file and custom target to clash. add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename} - DEPENDS ${src_filepath} approxhpvm.py - COMMAND ${APPROXHPVM_PY} - ${src_filepath} ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename} - -t ${codegen_target} -I ${CONFIG_INCLUDE_DIR} ${ARGV} + OUTPUT ${output_bin_path} + DEPENDS ${generated_file_path} hpvm-clang + COMMAND hpvm-clang + ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions + -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN} ) - add_custom_target(${bin_filename} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}) - set(test_compile_targets ${test_compile_targets} ${bin_filename} PARENT_SCOPE) + add_custom_target(${target_name} DEPENDS ${output_bin_path}) + set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE) endfunction(compile_hpvm_c) function(hpvm_add_dnn_test benchmark_target) @@ -61,7 +29,8 @@ function(hpvm_add_dnn_test benchmark_target) # Removes the final_accuracy file llvm_test_run(EXECUTABLE rm final_accuracy) # llvm_add_test creates .test file to given output path for given binary. - llvm_add_test(${benchmark_target}.test ${CMAKE_CURRENT_BINARY_DIR}/${benchmark_target}) + # Also add "hpvm_" prefix here because compile_hpvm_c() did. + llvm_add_test(${benchmark_target}.test ${CMAKE_CURRENT_BINARY_DIR}/hpvm_${benchmark_target}) # TODO: add_dependencies endfunction(hpvm_add_dnn_test) @@ -79,6 +48,31 @@ foreach(dir ${entries}) hpvm_add_dnn_test(${dirname}_cudnn) endforeach(dir) +# Install an accuracy comparator under build/bin for test suite. +set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR}) +add_custom_command( + OUTPUT ${BIN_DIR}/check_dnn_acc.py + COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR} + COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py +) + message(STATUS "List of HPVM-C DNN benchmarks: ${test_compile_targets}") add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets} ${BIN_DIR}/check_dnn_acc.py) message(STATUS "Target name for compiling all DNN benchmarks: dnn_benchmarks") + +# --[ llvm-lit test setup +# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py) +# as most of the tests require some kind of compilation / generation +# which is best done over there. +configure_lit_site_cfg( + ../../lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) +add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS dnn_benchmarks # Compile all dnn benchmarks to run them + ARGS "-j1" # Run DNN benchmarks sequentially +) diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp index 860e3b6423bc78d073096a981f765bed10fb73a7..39f49784d76470c4e0bab213127369806e1e2531 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp @@ -429,7 +429,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp index f44e19dece121cb01a1f3e6a8bf9e27ea945e6ce..dafd1a6ae084c4e1bf819ce1ac94e667c696eb24 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp @@ -434,7 +434,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp index 6d8973ad982b1aa3b206a0cf40ee1888c37e293f..64350c590bb181fa4eaab4b2bf5fb37f69e11c09 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp @@ -380,7 +380,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint8_t *labels = readLabels(labels_path.c_str(), 5000); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp index b2a940d501d8b1c2e29dbe7240012ace8197bbb4..72af2ff4a1b33aabac427d203101c32c4a7403c7 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp @@ -386,7 +386,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp index 474ab64cadf3eac158d39e6e1e6686765c3bac36..37e7a34a51a14b6903d549f271d3c0c83822fec8 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp @@ -478,7 +478,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp index 10e95202f2e2188a9dcd1c12a168a612f897fcf9..1206d7bac4b9dcff2b4cfd7183f4a3e5f65d73d9 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp @@ -483,7 +483,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt index 32a9642d38ab816246b9e5cca01c6efcec3a2d8d..b8224da41c52f093bd61a23bae05eb09f39148a0 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt @@ -9,400 +9,400 @@ conf1 1 1 98.7 0.0 ----- +++++ conf2 1.828613181003043 2.071721708828981 98.65 0.04999999999999716 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf3 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 152 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf4 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 152 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf5 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 152 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf6 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf7 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf8 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf9 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf10 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf11 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 155 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf12 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 155 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf13 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 155 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf14 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf15 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf16 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf17 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf18 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf19 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf20 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 12 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf21 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 12 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf22 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 12 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf23 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf24 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf25 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf26 2.200653361151419 2.425091789360736 98.6 0.10000000000000853 -1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp_fp16 266 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf27 2.200653361151419 2.425091789360736 98.6 0.10000000000000853 -1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp_fp16 266 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf28 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf29 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf30 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf31 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf32 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf33 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf34 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 158 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf35 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 158 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf36 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 158 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf37 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf38 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf39 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf40 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf41 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf42 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf43 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 153 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf44 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 153 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf45 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 153 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf46 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 159 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf47 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 159 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf48 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 165 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf49 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 165 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf50 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 165 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf51 1.8534621507951072 2.1231113105788597 98.44000000000001 0.2599999999999909 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf_fp16 159 add fp16 1 tanh fp16 12 pool_max fp16 12 +2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12 3 gpu mul fp16 12 add fp16 1 tanh fp16 1 4 gpu mul fp16 12 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp index 5c42f6953cfd9256cea73b39868a7ec571f18565..d7ab4238ebac5598b92c432aced85a602bb5ce89 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp @@ -282,7 +282,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp index 0c2568f81b701cb474a257b190be61b4bba45f3e..26acc65a99287ea9f20e037dd996635315d76e48 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp @@ -287,7 +287,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp index 01d027341686291c83e605bdeee1bbcffa68d6e9..5f8c63dbfbfb800dc6f60f9ed9a6108dee0a9a48 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp @@ -1984,7 +1984,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp index e51e85dd980dd910389ec4415174e6e005f75c41..2070089053ef0b6e7e0ca33c2c6cc4cea17b8e29 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp @@ -1989,7 +1989,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp index fa83c534d0639241205758018f8f7c37401e6b22..5b580f26821e67cc96c8347e485b792f40105176 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp @@ -1318,7 +1318,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp index c7b789c2343a8dfd1e847652af2bd1d6adfd51f1..735e2c9abab91f00560faa5496e234321027b82c 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp @@ -1249,7 +1249,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp index 91d07e30469e675fd2027f29290e35a0db888174..160563064cc47effd463c4915b0c7f0d93bff56f 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp @@ -5151,7 +5151,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; + std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp index 932580e03e7ccc4495d8d76be2f7147369e36d68..c5cf2cb3a0177a5cce9ad0cf460484e63ded0ecd 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp @@ -4927,7 +4927,7 @@ int main(int argc, char *argv[]) { } std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; + std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp index 195c676c11d53b19e0d18ed4908198a929d188aa..bec6139c2d089e90d09fa239e1b15c9a835fd4ea 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp @@ -845,7 +845,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp index c304237ea57ba15d48cff0773860cdc469fc2a04..4fa7d5c121bacff122821fe983ed443e3c6db249 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp @@ -850,7 +850,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp index 4cd5c134293d85983146352175e278915ab1d2ba..8666030fba4390d29d9324f5a5c7d60324325f05 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp @@ -845,7 +845,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp index 532fca6b856f296624c21e9a18421763c4b70f48..6d01caa3b7c0875cff4f3e16131ddd09195e92b7 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp @@ -850,7 +850,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp index 8e299f40e6ddd04a3ce9f8d9dffff49b1de36189..b1b2b4f2e312b6372e10a2fce3ef12eab2dddded 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp @@ -893,7 +893,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp index 930a33e43c706e6e91475fc97671c39c23f63387..eb29e45805671072428318412f27b05d0da90199 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp @@ -898,7 +898,7 @@ int main(int argc, char *argv[]) { } } - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in b/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in deleted file mode 100644 index 0eb8f3f24d0e51f2aaa12f1fd672043599490082..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in +++ /dev/null @@ -1 +0,0 @@ -#define MODEL_PARAMS_DIR "@MODEL_PARAMS_DIR@" diff --git a/hpvm/test/dnn_benchmarks/keras/Benchmark.py b/hpvm/test/dnn_benchmarks/keras/Benchmark.py index c225ff97661aef8f4fceb123b79190cb7c819dd7..f3d8e9e6b2268618dc835e3d27374a8f7d738a86 100644 --- a/hpvm/test/dnn_benchmarks/keras/Benchmark.py +++ b/hpvm/test/dnn_benchmarks/keras/Benchmark.py @@ -52,20 +52,23 @@ class Benchmark: try: subprocess.run([ - "approxhpvm.py", + "hpvm-clang", "-h" ], check=True, stdout=FNULL) except: - print ("\n\n ERROR: Could not find approxhpvm.py (HPVM compile script)!! \n\n") - print ("To Compile, Must set PATH to include approxhpvm.py script. Do the following: ") - print ("**** export PATH=${PATH_TO_YOUR_HPVM_INSTALLATION}/build/bin/:$PATH *****") + print(""" + +ERROR: Could not find hpvm-clang (HPVM compile script)!! + +hpvm-clang is installed to the python environment used when compiling HPVM. +Please try rerunning 'make -j hpvm-clang'.""") sys.exit(1) try: subprocess.run([ - "approxhpvm.py", src_file, target_binary, + "hpvm-clang", src_file, target_binary, "-t", "tensor", "--conf-file", approx_conf_file ], check=True) except: diff --git a/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py b/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py index 5f4a96740cedb05295e4fcde0c5dfa65a0be34cc..230fdf8b73dfd7959cfaa98fe06eafe6a75087b1 100755 --- a/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py +++ b/hpvm/test/dnn_benchmarks/profiling/test_hpvm_c_profiling.py @@ -14,7 +14,7 @@ benchmarks_srcdir = Path(__file__).parent / "../hpvm-c/benchmarks" # So we know where the benchmark binaries are due to source directory structure, # and this is not hardcoding. dnn = argv[1] -bench_bin_file = benchmarks_bindir / dnn +bench_bin_file = benchmarks_bindir / f"hpvm_{dnn}" config_file = benchmarks_srcdir / dnn / "data/tuner_confs.txt" out_config_file = f"./{dnn}.txt" profile_configs(bench_bin_file, config_file, out_config_file) diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6e22eba67471855971005bf9e57ed0aa38dafff8 --- /dev/null +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt @@ -0,0 +1,40 @@ +#**************** FP32 and FP16 TensorRT Source Builds *********** +# Don't put binaries in build/bin. This doesn't affect global setting. +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/") +set(test_compile_targets "") +function(add_trt_source target_name filepath) + set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.cpp") + configure_file(${filepath} ${generated_file_path}) + add_executable(${target_name} ${generated_file_path}) + target_link_libraries(${target_name} tensor_runtime_online) + set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE) +endfunction(add_trt_source) + +set( + DNN_NAMES + alexnet_cifar10 + alexnet2_cifar10 + vgg16_cifar10 + resnet18_cifar10 + vgg16_cifar100 + mobilenet_cifar10 + alexnet_imagenet + vgg16_imagenet + resnet50_imagenet +) +foreach(dnn_name ${DNN_NAMES}) + # FP32 version + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fp32/${dnn_name}.cc") + add_trt_source(${dnn_name}_fp32 "${CMAKE_CURRENT_SOURCE_DIR}/fp32/${dnn_name}.cc") + endif() + # FP16 version + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fp16/${dnn_name}_half.cc") + add_trt_source(${dnn_name}_fp16 "${CMAKE_CURRENT_SOURCE_DIR}/fp16/${dnn_name}_half.cc") + endif() +endforeach() + +message(STATUS "List of tensor_runtime DNN benchmarks: ${test_compile_targets}") +add_custom_target(trt_dnn_benchmarks DEPENDS ${test_compile_targets}) +message(STATUS "Target name for compiling all DNN benchmarks: trt_dnn_benchmarks") diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc similarity index 96% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc index 0b344035296bdbab2744e32604f3a8881feb6230..ab80718fd33d0b9787be4a0f183e3a7a65dc76e7 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc @@ -1,7 +1,8 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + /* NOTE: Reference Architecture to use for profiling */ void testCifarNet() { @@ -9,8 +10,7 @@ void testCifarNet() { printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); std::string dir_prefix = - model_params_path + std::string("/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc similarity index 95% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc index 7d493b8720ab701f87fdd53b315da7eafecf6637..b3b69d6b695eca9286b90685f3e071e234887d27 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc @@ -1,13 +1,14 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc similarity index 97% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc index 03dc905bbfcb07ad9a266fc153cd1a6a0db9837e..44b78b9169707fd6c7b9ff6503a4a9aa8d2ec947 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc @@ -1,7 +1,8 @@ #include "tensor_runtime.h" -#include "utils.h" +#include "tensorUtils.h" + /* NOTE: Reference Architecture to use for profiling */ void testLenetTanh() { @@ -11,7 +12,7 @@ void testLenetTanh() { int test_batch_size = 5000; - std::string dir_prefix = model_params_path + std::string("/lenet_mnist/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc index d6eaef755743ce961d3d9c2f013eef26a77579f7..d4423bf4345756e72ad46b140ae8cafc26eae264 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc @@ -1,15 +1,15 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/mobilenet_cifar10/"); - + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc index 40e128eb8a80f6e080c090589a3e91b80ffa082f..76dea5ef08713d22fe7086b678bb3274378d0fd9 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc @@ -1,15 +1,15 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/resnet18_cifar10/"); - + std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc index eb3275b83009ec4300e9cb713f3b182727661db4..2772fd3da42d50aa2ff5391d1e3c85c610a4960a 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc @@ -1,13 +1,14 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc index 2a4b5a6a914698f621284e0f5b19843b817068df..954f6778b899d2cefb2b28d68a32fad33d52f70c 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc @@ -1,13 +1,14 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc similarity index 96% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc index 396e9f11cae92c2f6613b5acb799caecbf025a59..e7431234d705449efa0fc5aafe23238e89be1d30 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc @@ -1,7 +1,8 @@ +#include "tensor_runtime.h" +#include "tensorUtils.h" + -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" /* NOTE: Reference Architecture to use for profiling */ void testCifarNet() { @@ -9,8 +10,7 @@ void testCifarNet() { printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); std::string dir_prefix = - model_params_path + std::string("/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc similarity index 96% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc index 600512078563baf850f440ea97e78cb2d73be170..12c304c9b401c586a0da4658b092f2b791268983 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc @@ -1,13 +1,14 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); // std::string labels_path = dir_prefix + std::string("labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc index 29909e5938ca0f700c4ee22165ae2ad354e53a32..b57e60c0fef41b283ad57a7b203759a8f014252d 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc @@ -1,15 +1,15 @@ #include "tensor_runtime.h" -#include "utils.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/alexnet_imagenet/"); - + std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc similarity index 97% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc index 61a0eeb441458ff6f91af8bc76ecc17a33428aec..9777670722b69c8b23a82a77312d17386f2d5c3f 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc @@ -1,7 +1,8 @@ #include "tensor_runtime.h" -#include "../include/utils.h" +#include "tensorUtils.h" + int total_runs = 1; @@ -10,7 +11,7 @@ void testLenetTanh() { int test_batch_size = 5000; - std::string dir_prefix = model_params_path + std::string("/lenet_mnist/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc index 85849126cf164693d12fb08aba8326033ca61b82..3e37bf7feb6641af3afdeb8fb9f3a65fdfcbdce3 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc @@ -1,15 +1,14 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/mobilenet_cifar10/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc index bd3dd7dc1ea23f3cb8ad91e8632b347dd51a848b..c8a99419a81d19b374642c21c977a511413f9ae2 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc @@ -1,15 +1,15 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/resnet18_cifar10/"); - + std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc similarity index 99% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc index 0cccb124b0dca81d45887df50c4a9bcaf2a21db5..3aeabc22736e6955a9ad5ad07144fc38057616ea 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc @@ -1,15 +1,15 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); std::string dir_prefix = - model_params_path + std::string("/resnet50_imagenet/"); - + std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc index 813874f0ed888ed5889b0574de454ca2720c944d..f7fffadfc36ba0fd248371efb35a1b7dfede68d3 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc @@ -1,13 +1,13 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc index 84164bf24bbff855b7a0975d7df2883e674b34c8..d3949c7cc568063f3b344d8497551fa1f4f4102c 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc @@ -1,13 +1,14 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" +#include "tensor_runtime.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc similarity index 98% rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc index eca833b08ff374e208f0cafabbf598cd0f7b5d90..2bb1be2821a8d33062bf1cfd83bb978f59884fa9 100644 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc +++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc @@ -1,13 +1,14 @@ #include "tensor_runtime.h" -#include "utils.h" +#include "tensorUtils.h" + int main() { llvm_hpvm_initTensorRt(0); - std::string dir_prefix = model_params_path + std::string("/vgg16_imagenet/"); + std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); diff --git a/hpvm/test/hpvm_pass/CMakeLists.txt b/hpvm/test/hpvm_pass/CMakeLists.txt index fe929f4d6316b54e4b4b19db1221ae146534ee0b..a2be31fd4fc27dcdb83c5ed7424c6577eefb3d5d 100644 --- a/hpvm/test/hpvm_pass/CMakeLists.txt +++ b/hpvm/test/hpvm_pass/CMakeLists.txt @@ -7,7 +7,7 @@ configure_lit_site_cfg( set( HPVM_TEST_DEPENDS - opt hpvm-rt.bc + opt hpvm-rt-bc # Passes: LLVMGenHPVM LLVMBuildDFG LLVMLocalMem LLVMClearDFG LLVMDFG2LLVM_CPU LLVMDFG2LLVM_OpenCL diff --git a/hpvm/tools/CMakeLists.txt b/hpvm/tools/CMakeLists.txt index 495348404f3594d8bca46b87acd8be2d08d2c8a4..4758dde6a3f1564739135225f6f8673b5d34843d 100644 --- a/hpvm/tools/CMakeLists.txt +++ b/hpvm/tools/CMakeLists.txt @@ -1,2 +1,2 @@ add_llvm_tool_subdirectory(hpvm-config) -add_llvm_tool_subdirectory(py-approxhpvm) \ No newline at end of file +add_llvm_tool_subdirectory(hpvm-clang) \ No newline at end of file diff --git a/hpvm/tools/hpvm-clang/CMakeLists.txt b/hpvm/tools/hpvm-clang/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3564b00d0aa480996c152c1f027fa6fc39cd144c --- /dev/null +++ b/hpvm/tools/hpvm-clang/CMakeLists.txt @@ -0,0 +1,87 @@ +# This file is very tightly coupled with main.py.in. +# Watch out and keep them in sync. + +# CMake fills in some variables in main.py.in and generate it into a python package: +# `hpvmpy`, which is the main entry point and Python API for HPVM. + +# ---[ Define variables for main.py.in +# main.py.in requires the following variables: +# LLVM_PROJECT_DIR, LLVM_BUILD_DIR +# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS +# DIRECT_LINK_LIBS, HPVM_USE_OPENCL (defined globally) +# AVAILABLE_PASSES, HPVM_RT_PATH + +set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR}) +set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR}) + +get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES) +get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS) +get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS) + +set(DIRECT_LINK_LIBS "$<TARGET_FILE:tensor_runtime>") +if(${HPVM_USE_OPENCL}) + # We need to link to OpenCL libs when hpvm uses opencl + # because OpenCL functions may be injected by the OpenCL pass. + list(APPEND DIRECT_LINK_LIBS ${OpenCL_LIBRARY}) +else() + message(STATUS "hpvm-clang is not using OpenCL (--opencl flag not available).") +endif() + +# The hpvm-rt runtime +# This has to be explicitly set as hpvm-rt.bc is created in a custom_target +# and does not export its file location. +# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. +set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) +# And this must be manually in sync with `hpvm/lib/Transforms/*`. +# which is fine for because we don't have many passes for now. +set( + AVAILABLE_PASSES + LLVMBuildDFG + LLVMClearDFG + LLVMDFG2LLVM_CPU + LLVMDFG2LLVM_CUDNN + LLVMDFG2LLVM_OpenCL + LLVMDFG2LLVM_WrapperAPI + LLVMFuseHPVMTensorNodes + LLVMGenHPVM + LLVMInPlaceDFGAnalysis + LLVMLocalMem +) + +# ---[ Create package folder structure +# This sounds crazy but since main.py.in is generated into another file under build/ dir, +# to make a python package around it, we'll have to generate the whole package structure +# in build/ as well. +# Target dir structure: +# ${CMAKE_CURRENT_BINARY_DIR} +# hpvmpy/ +# __init__.py <- generated from main.py.in +# setup.py <- copied from setup.py +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hpvmpy) +file(COPY setup.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +# ---[ Generate main.py.in to hpvmpy/__init__.py +set(init_path ${CMAKE_CURRENT_BINARY_DIR}/hpvmpy/__init__.py) +# First resolve all `@symbol@` by configuring the file +configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) +# Then resolve all generator expressions we configured into the previous file +file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) +# Delibrately create an extra step of moving file +# which is carried out at build time (as a target) +# so we can set these dependencies on it. +set(DEPS tensor_runtime hpvm-rt-bc clang opt llvm-link ${AVAILABLE_PASSES}) +add_custom_command( + OUTPUT ${init_path} + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/main.py ${init_path} + DEPENDS ${DEPS} ${CMAKE_CURRENT_BINARY_DIR}/main.py +) + +# ---[ Call python3 -m pip to install this package. +add_custom_target( + hpvmpy + COMMAND python3 -m pip install ./ + DEPENDS ${init_path} setup.py +) +# hpvm-clang is the name of the compiler binary (see setup.py) +# We provide this alias for convenience +add_custom_target(hpvm-clang DEPENDS hpvmpy) diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in new file mode 100644 index 0000000000000000000000000000000000000000..e2bc5cbafa23bd64094a3198ad8466f682f6bbdc --- /dev/null +++ b/hpvm/tools/hpvm-clang/main.py.in @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +import argparse +import os +from pathlib import Path +from typing import List, Union, Optional + +PathLike = Union[Path, str] + +HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm" +LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin" + +# Directories to include +TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";") +TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")] +TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";") +DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";") +HPVM_USE_OPENCL = int("@HPVM_USE_OPENCL@") + +AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") +HPVM_RT_PATH = "@HPVM_RT_PATH@" + + +def compile_hpvm_c( + hpvm_src: PathLike, + output_file: PathLike, + tensor_target: Optional[str], + opencl: bool, + link_bitcode: List[PathLike] = None, + include: List[PathLike] = None, + macro: List[str] = None, + flags: List[str] = None, + optim_level: str = "0", # -O0 + is_cpp: bool = True, # otherwise is C + std: str = None, # language std (-std=c++11) + link_dirs: List[PathLike] = None, + link_libs: List[str] = None, + working_dir: PathLike = None, + conf_file: PathLike = None, +): + from subprocess import check_output + + passes = ["LLVMBuildDFG"] + pass_flags = ["buildDFG"] + if tensor_target == "tensor": + if conf_file is None: + raise ValueError("conf_file must be defined when tensor_target=='tensor'.") + passes += ["LLVMInPlaceDFGAnalysis", "LLVMFuseHPVMTensorNodes", "LLVMDFG2LLVM_WrapperAPI"] + pass_flags += [ + "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi", + f"configuration-inputs-filename={conf_file}" + ] + elif tensor_target == "cudnn": + passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"] + pass_flags += ["inplace", "dfg2llvm-cudnn"] + elif tensor_target is None: + passes += ["LLVMLocalMem"] + pass_flags += ["localmem"] + else: + raise ValueError(f"Tensor target {tensor_target} not recognized") + if opencl: + passes += ["LLVMDFG2LLVM_OpenCL"] + pass_flags += ["dfg2llvm-opencl"] + passes += ["LLVMDFG2LLVM_CPU", "LLVMClearDFG"] + pass_flags += ["dfg2llvm-cpu", "clearDFG"] + + working_dir = Path(working_dir or ".") + if not working_dir.is_dir(): + os.makedirs(working_dir) + + # All commands for compiling the main hpvm_c file + name_stem = Path(hpvm_src).stem + ll_file = working_dir / f"{name_stem}.ll" + hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll" + llvm_ll_file = working_dir / f"{name_stem}.llvm.ll" + hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc" + link_bitcode_ = [Path(bc) for bc in (link_bitcode or [])] + commands = [ + hpvm_c_to_ll(hpvm_src, ll_file, include, macro, flags, optim_level, is_cpp, std), + opt_codegen_hpvm(ll_file, hpvm_ll_file), + _run_opt(hpvm_ll_file, llvm_ll_file, passes, pass_flags), + link_hpvm_rt(link_bitcode_ + [llvm_ll_file], hpvm_rt_linked_file), + ] + commands.append( + link_binary(hpvm_rt_linked_file, output_file, link_dirs, link_libs) + ) + for command in commands: + print(" ".join(command)) + check_output(command) + + +def hpvm_c_to_ll( + src_file: PathLike, + target_file: PathLike, + extra_includes: List[PathLike] = None, + macros: List[str] = None, + flags: List[str] = None, + optim_level: str = "0", # -O0 + is_cpp: bool = True, # otherwise is C + std: str = None, # --std=c++11 +) -> List[str]: + includes = (extra_includes or []) + TRT_INCLUDE_DIRS + includes_s = [f"-I{path}" for path in includes] + macros = [f"-D{macro}" for macro in (macros or [])] + flags = [f"-f{flg}" for flg in (flags or [])] + if std: + flags.append(f"-std={std}") + clang = "clang++" if is_cpp else "clang" + return [ + str(LLVM_BUILD_BIN / clang), *includes_s, *flags, *macros, + f"-O{optim_level}", "-emit-llvm", "-S", + str(src_file), "-o", str(target_file) + ] + + +def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]: + return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"]) + + +def link_hpvm_rt(bitcodes: List[PathLike], target_file: PathLike) -> List[str]: + bitcodes_s = [str(bc) for bc in bitcodes] + return [str(LLVM_BUILD_BIN / "llvm-link"), *bitcodes_s, HPVM_RT_PATH, "-S", "-o", str(target_file)] + + +def link_binary( + src_file: PathLike, + target_file: PathLike, + extra_link_dirs: List[PathLike] = None, + extra_link_libs: List[str] = None +) -> List[str]: + link_dirs, link_libs = _link_args(extra_link_dirs or [], extra_link_libs or []) + linker_dir_flags = [] + for path in link_dirs: + linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) + linker_lib_flags = [f"-l{lib}" for lib in link_libs] + return [ + str(LLVM_BUILD_BIN / "clang++"), str(src_file), + "-o", str(target_file), *linker_dir_flags, *linker_lib_flags + ] + + +def _link_args(extra_link_dirs: List[PathLike], extra_link_libs: List[str]): + def drop_suffix(libname: str): + import re + + match = re.match(r"lib(.*)\.so", libname) + return libname if match is None else match.group(1) + + link_dirs, link_libs = extra_link_dirs.copy(), extra_link_libs.copy() + for lib in DIRECT_LINK_LIBS: + lib = Path(lib) + link_dirs.append(lib.parent) + link_libs.append(lib.name) + link_dirs += TRT_LINK_DIRS + link_libs += TRT_LINK_LIBS + link_libnames = [drop_suffix(s) for s in link_libs] + return link_dirs, link_libnames + + +def _run_opt( + src_file: PathLike, + target_file: PathLike, + pass_names: List[str], + pass_flags: List[str], +) -> List[str]: + unavailable = set(pass_names) - set(AVAILABLE_PASSES) + if unavailable: + raise ValueError(f"Passes {unavailable} are unavailable for this compilation.") + load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] + pass_flags_strs = [f"-{flag}" for flag in pass_flags] + return [ + str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs, + "-S", str(src_file), "-o", str(target_file) + ] + + +def parse_args(): + parser = argparse.ArgumentParser("hpvm-clang") + parser.add_argument( + "hpvm_src", type=Path, + help="""HPVM-C code to compile. +HPVM-C code must be single file, but additional bitcode file can be linked together. +See option -b for that.""" + ) + parser.add_argument("output_file", type=Path, help="Path to generate binary to") + parser.add_argument( + "-x", type=str, metavar="language", default="c++", + help="Treat input file as having type <language>", + ) + parser.add_argument( + "-b", + "--link-bitcode", + type=Path, + nargs="+", + help="Additional bitcode (.ll/.bc) files to link to", + ) + parser.add_argument( + "-t", + "--tensor-target", + type=str, + choices=["tensor", "cudnn"], + help="Backend to use for tensor operators", + ) + parser.add_argument( + "--conf-file", type=Path, + help="File to approximation configurations; required for tensor target 'tensor'" + ) + parser.add_argument( + "--opencl", + action="store_true", + help="Use OpenCL support. Requires HPVM built with OpenCL", + ) + parser.add_argument( + "-d", "--working-dir", type=Path, help="Directory to generate temp files in" + ) + + # Relaying arguments for clang++ (source -> bitcode stage) + parser.add_argument( + "-I", "--include", type=Path, action="append", metavar="dir", + help="[clang emit-llvm] Add directory to include search path" + ) + parser.add_argument( + "-D", type=str, action="append", metavar="<macro>=<value>", + help="[clang emit-llvm] Define macro" + ) + parser.add_argument( + "-f", type=str, action="append", metavar="flag", + help="[clang emit-llvm] clang++ flags (such as -ffastmath)" + ) + parser.add_argument( + "-O", type=str, default="0", metavar="level", + help="[clang emit-llvm] Optimization level" + ) + parser.add_argument( + "--std", type=str, + help="[clang emit-llvm] Language standard to compile for. Double dashes (--std, not -std)." + ) + + # Relaying arguments for clang++ (linking stage) + parser.add_argument( + "-L", type=Path, action="append", metavar="dir", + help="[clang linker] Add directory to library search path" + ) + parser.add_argument( + "-l", type=str, action="append", metavar="name", + help="[clang linker] Link library (such as -lpthread)" + ) + + args = parser.parse_args() + if args.tensor_target == "tensor": + if args.conf_file is None: + parser.error('Tensor target "tensor" requires --conf-file argument') + if args.x == "c": + args.is_cpp = False + elif args.x == "c++": + args.is_cpp = True + else: + parser.error(f"Language mode {args.x} not supported yet -- only c or c++") + if not HPVM_USE_OPENCL and args.opencl: + parser.error(f"OpenCL is disabled for this build of HPVM.") + return args + + +def main(): + args = vars(parse_args()) + args["macro"] = args.pop("D") + args["flags"] = args.pop("f") + args["optim_level"] = args.pop("O") + args["link_dirs"] = args.pop("L") + args["link_libs"] = args.pop("l") + args.pop("x") + compile_hpvm_c(**args) + + +if __name__ == "__main__": + main() diff --git a/hpvm/tools/hpvm-clang/setup.py b/hpvm/tools/hpvm-clang/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..75262a069de32e6f5de4bff89738e0011dd91326 --- /dev/null +++ b/hpvm/tools/hpvm-clang/setup.py @@ -0,0 +1,15 @@ +import setuptools + +setuptools.setup( + name="hpvmpy", + version="1.0", + author="Yifan Zhao", + author_email="yifanz16@illinois.edu", + description="HPVM Python API", + packages=["hpvmpy"], + entry_points={ + "console_scripts": [ + "hpvm-clang = hpvmpy:main", + ], + }, +) diff --git a/hpvm/tools/py-approxhpvm/CMakeLists.txt b/hpvm/tools/py-approxhpvm/CMakeLists.txt deleted file mode 100644 index f93f96dd0578a67630cc859bba2e24e071b39299..0000000000000000000000000000000000000000 --- a/hpvm/tools/py-approxhpvm/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -# This file is very tightly coupled with main.py.in. -# Watch out and keep them in sync. - -set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR}) -set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR}) -set(LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) -# The hpvm-rt runtime -# This has to be explicitly set as hpvm-rt.bc is created in a custom_target -# and does not export its file location. -# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. -set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) -set(TENSOR_RUNTIME_LIB "$<TARGET_FILE:tensor_runtime>") -set( - AVAILABLE_PASSES - LLVMBuildDFG - LLVMInPlaceDFGAnalysis - LLVMDFG2LLVM_CPU - LLVMDFG2LLVM_CUDNN - LLVMDFG2LLVM_WrapperAPI - LLVMFuseHPVMTensorNodes - LLVMClearDFG - LLVMGenHPVM -) -# CUDA_TOOLKIT_ROOT_DIR and CUDNN_LIBRARY_PATH has been defined globally -set(CUDNN_DIR ${CUDNN_LIBRARY_PATH}) -# First resolve all `@symbol@` by configuring the file -configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) -# Then resolve all generator expressions we configured into the previous file -file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) -# Delibrately create an extra step of moving file -# which is carried out at build time (as a target) -# so we can set these dependencies on it -set( - DEPS - tensor_runtime - LLVMBuildDFG - LLVMInPlaceDFGAnalysis - LLVMDFG2LLVM_CPU - LLVMDFG2LLVM_CUDNN - LLVMDFG2LLVM_WrapperAPI - LLVMFuseHPVMTensorNodes - LLVMClearDFG - LLVMGenHPVM - hpvm-rt.bc - clang opt llvm-link -) -add_custom_command( - OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/main.py ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py - COMMAND chmod +x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py - DEPENDS ${DEPS} ${CMAKE_CURRENT_BINARY_DIR}/main.py -) -add_custom_target(approxhpvm.py ALL DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py) diff --git a/hpvm/tools/py-approxhpvm/main.py.in b/hpvm/tools/py-approxhpvm/main.py.in deleted file mode 100644 index af706a1eaa7a53879e525d87dd5034caf814db38..0000000000000000000000000000000000000000 --- a/hpvm/tools/py-approxhpvm/main.py.in +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import os -from pathlib import Path -from typing import List, Union, Optional - -PathLike = Union[Path, str] - -HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm" -LLVM_BUILD_DIR = Path("@LLVM_BUILD_DIR@") / "bin" -CUDA_TOOLKIT_ROOT_DIR = Path("@CUDA_TOOLKIT_ROOT_DIR@") -TENSOR_RUNTIME_LIB = Path("@TENSOR_RUNTIME_LIB@") -AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") -HPVM_RT_PATH = "@HPVM_RT_PATH@" -CUDNN_DIR = "@CUDNN_DIR@" - -# Directories to include -INCLUDE_DIRS = [ - HPVM_PROJECT_DIR / "include", # HPVM include dir - # Tensor runtime include dir - HPVM_PROJECT_DIR / "projects/hpvm-tensor-rt/tensor_runtime/include", - HPVM_PROJECT_DIR / "test/dnn_benchmarks/hpvm-c/include", # hpvm-c intrinsics decl dir - CUDA_TOOLKIT_ROOT_DIR / "include", # CUDA include dir -] -LINK_DIRS = [CUDA_TOOLKIT_ROOT_DIR / "lib64", CUDNN_DIR, TENSOR_RUNTIME_LIB.parent] -LINK_LIBS = [ - "pthread", "cudart", "curand", "cudnn", "cublas", "cufft", "OpenCL", "stdc++fs", "omp", "m" -] -COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"] - - -def compile_hpvm_c( - input_file: PathLike, - output_file: PathLike, - codegen_target: str = "tensor", - include: List[PathLike] = None, - working_dir: PathLike = None, - conf_file: PathLike = None, -): - from subprocess import check_output - - codegen_functions = { - "tensor": lambda i, o: opt_codegen_tensor(i, o, conf_file), - "cudnn": opt_codegen_cudnn - } - codegen_f = codegen_functions[codegen_target] - working_dir = Path(working_dir or ".") - if not working_dir.is_dir(): - os.makedirs(working_dir) - name_stem = Path(input_file).stem - - ll_file = working_dir / f"{name_stem}.ll" - hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll" - llvm_ll_file = working_dir / f"{name_stem}.llvm.ll" - hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc" - commands = [ - hpvm_c_to_ll(input_file, ll_file, extra_includes=include), - opt_codegen_hpvm(ll_file, hpvm_ll_file), - codegen_f(hpvm_ll_file, llvm_ll_file), - link_hpvm_rt(llvm_ll_file, hpvm_rt_linked_file), - link_binary(hpvm_rt_linked_file, output_file), - ] - for command in commands: - print(" ".join(command)) - check_output(command) - - -def hpvm_c_to_ll( - src_file: PathLike, - target_file: PathLike, - extra_includes: Optional[List[PathLike]] = None, - flags: List[str] = None, -) -> List[str]: - extra_includes = extra_includes or [] - includes = [f"-I{path}" for path in INCLUDE_DIRS + extra_includes] - flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS] - return [ - str(LLVM_BUILD_DIR / "clang++"), *includes, *flags, "-emit-llvm", "-S", - str(src_file), "-o", str(target_file) - ] - - -def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]: - return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"]) - - -def opt_codegen_cudnn(src_file: PathLike, target_file: PathLike) -> List[str]: - passes = [ - "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis", - "LLVMDFG2LLVM_CUDNN", "LLVMDFG2LLVM_CPU", - "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM" - ] - flags = [ - "buildDFG", "inplace", "hpvm-fuse", - "dfg2llvm-cudnn", "dfg2llvm-cpu", "clearDFG" - ] - return _run_opt(src_file, target_file, passes, flags) - - -def opt_codegen_tensor( - src_file: PathLike, target_file: PathLike, conf_file: PathLike -): - passes = [ - "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis", - "LLVMDFG2LLVM_WrapperAPI", "LLVMDFG2LLVM_CPU", - "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM" - ] - flags = [ - "buildDFG", "inplace", "hpvm-fuse", - "dfg2llvm-wrapperapi", - f"configuration-inputs-filename={conf_file}", - "dfg2llvm-cpu", "clearDFG", - ] - return _run_opt(src_file, target_file, passes, flags) - - -def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]: - return [str(LLVM_BUILD_DIR / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] - - -def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]: - linker_dir_flags = [] - for path in LINK_DIRS: - linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) - linker_lib_flags = [f"-l{lib}" for lib in LINK_LIBS] - return [ - str(LLVM_BUILD_DIR / "clang++"), str(src_file), str(TENSOR_RUNTIME_LIB), "-o", str(target_file), - *linker_dir_flags, *linker_lib_flags - ] - - -def _run_opt( - src_file: PathLike, - target_file: PathLike, - pass_names: List[str], - pass_flags: List[str], -) -> List[str]: - unavailable = set(pass_names) - set(AVAILABLE_PASSES) - if unavailable: - raise ValueError(f"Passes {unavailable} are unavailable from CMake") - load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] - pass_flags_strs = [f"-{flag}" for flag in pass_flags] - return [ - str(LLVM_BUILD_DIR / "opt"), *load_passes_strs, *pass_flags_strs, - "-S", str(src_file), "-o", str(target_file) - ] - - -def parse_args(): - parser = argparse.ArgumentParser("approxhpvm") - parser.add_argument("input_file", type=Path, help="HPVM-C code to compile") - parser.add_argument("output_file", type=Path, help="Path to generate binary to") - parser.add_argument( - "-t", - "--codegen-target", - type=str, - choices=["tensor", "cudnn"], - help="Backend to use", - ) - parser.add_argument( - "-d", "--working-dir", type=Path, help="Directory to generate temp files in" - ) - parser.add_argument( - "--conf-file", type=Path, - help="File to approximation configurations; required for 'tensor' target" - ) - parser.add_argument( - "-I", "--include", type=Path, nargs="+", - help="Additional include directories to use" - ) - - args = parser.parse_args() - if args.codegen_target == "tensor": - if args.conf_file is None: - parser.error('Codegen target "tensor" requires --conf-file argument') - return args - - -if __name__ == "__main__": - compile_hpvm_c(**vars(parse_args()))