diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt index fcfaf264a64d52bfe13e0023fe92ad12b7cf2016..71e6de5999ad0127da5155d93fe3403c05cb3c7f 100644 --- a/hpvm/CMakeLists.txt +++ b/hpvm/CMakeLists.txt @@ -8,7 +8,13 @@ message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}") # find_package will use the auxillary cmake/Find*.cmake we provide list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) find_package(CUDNN 7 EXACT REQUIRED) # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_DIR and CUDNN::cudnn -find_package(OpenCL REQUIRED) # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} +find_package(OpenCL) # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} +if(${OpenCL_FOUND}) + set(HPVM_USE_OPENCL 1) +else() + message(WARNING "OpenCL not found. --opencl flag of hpvm-clang will be unavailable for this build.") + set(HPVM_USE_OPENCL 0) +endif() include_directories(./include/) diff --git a/hpvm/docs/install.rst b/hpvm/docs/install.rst index fa4a3f747b24501537903edc8f7b09da8928571b..9dd8365ae71277f70e72abe6f3c1f6725351eb1f 100644 --- a/hpvm/docs/install.rst +++ b/hpvm/docs/install.rst @@ -143,14 +143,14 @@ Some common options that can be used with CMake are: * ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for type are Debug, Release, RelWithDebInfo, and MinSizeRel. Default is Debug. * ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled (default is Yes for Debug builds, No for all other build types). -Now, compile the HPVM Compilation Tool ``approxhpvm.py`` using: +Now, compile the HPVM Compilation Tool ``hpvm-clang`` using: .. code-block:: shell - make -j<number of threads> approxhpvm.py + make -j<number of threads> hpvm-clang With all the aforementioned steps, HPVM should be built, installed, tested and ready to use. -In particular, ``approxhpvm.py`` should be an executable command from your command line. +In particular, ``hpvm-clang`` should be an executable command from your command line. Benchmarks and Tests -------------------- @@ -159,7 +159,7 @@ We provide a number of general benchmarks, DNN benchmarks, and test cases, writt ``make`` targets ``check-hpvm-pass``, ``check-hpvm-dnn``, and ``check-hpvm-profiler`` tests various components of HPVM and are increasingly time-consuming. -You can run tests similarly as how ``approxhpvm.py`` is compiled: for example, +You can run tests similarly as how ``hpvm-clang`` is compiled: for example, .. code-block:: shell diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt index 4041df11ce8d79e39d6f72bdf0a1068eae449300..1e34f44f403e373607d2efc10aca7f796c81b938 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library( LLVMDFG2LLVM_OpenCL DEPENDS intrinsics_gen + llvm-cbe # Called within the pass PLUGIN_TOOL opt ) diff --git a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp index 5a58f272b3042a4ebfc2e3c7bb3606b5c19e8d84..286bcdebd2ed3322b134581f3e8fdac286ee325b 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp @@ -47,6 +47,8 @@ #include "llvm/Support/ToolOutputFile.h" #include <sstream> +#include <unistd.h> // ".", nullptr() (POSIX-only) +#include <cstdlib> // std::system #ifndef LLVM_BUILD_DIR #error LLVM_BUILD_DIR is not defined @@ -147,7 +149,7 @@ static void getExecuteNodeParams(Module &M, Value *&, Value *&, Value *&, static Value *genWorkGroupPtr(Module &M, std::vector<Value *>, ValueToValueMapTy &, Instruction *, const Twine &WGName = "WGSize"); -static std::string getPTXFilename(const Module &); +static std::string getPTXFilePath(const Module &); static void changeDataLayout(Module &); static void changeTargetTriple(Module &); static void findReturnInst(Function *, std::vector<ReturnInst *> &); @@ -258,7 +260,7 @@ public: DEBUG(errs() << *KernelM); } - void writeKernelsModule(); + void writeKModCompilePTX(); }; // Initialize the HPVM runtime API. This makes it easier to insert these calls @@ -846,7 +848,7 @@ void CGT_OpenCL::codeGen(DFInternalNode *N) { // Now the remaining nodes to be visited should be ignored KernelLaunchNode = NULL; DEBUG(errs() << "Insert Runtime calls\n"); - insertRuntimeCalls(N, kernel, getPTXFilename(M)); + insertRuntimeCalls(N, kernel, getPTXFilePath(M)); } else { DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); @@ -1901,7 +1903,7 @@ bool DFG2LLVM_OpenCL::runOnModule(Module &M) { CGTVisitor->visit(rootNode); } - CGTVisitor->writeKernelsModule(); + CGTVisitor->writeKModCompilePTX(); // TODO: Edit module epilogue to remove the HPVM intrinsic declarations delete CGTVisitor; @@ -2010,16 +2012,18 @@ void CGT_OpenCL::addCLMetadata(Function *F) { MDN_annotations->addOperand(MDNvvmAnnotationsNode); } -void CGT_OpenCL::writeKernelsModule() { +void CGT_OpenCL::writeKModCompilePTX() { // In addition to deleting all other functions, we also want to spiff it // up a little bit. Do this now. legacy::PassManager Passes; + auto kmodName = getKernelsModuleName(M); + auto ptxPath = getPTXFilePath(M); DEBUG(errs() << "Writing to File --- "); - DEBUG(errs() << getKernelsModuleName(M).c_str() << "\n"); + DEBUG(errs() << kmodName << "\n"); std::error_code EC; - ToolOutputFile Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None); + ToolOutputFile Out(kmodName.c_str(), EC, sys::fs::F_None); if (EC) { DEBUG(errs() << EC.message() << '\n'); } @@ -2030,6 +2034,13 @@ void CGT_OpenCL::writeKernelsModule() { // Declare success. Out.keep(); + + // Starts calling llvm-cbe. + auto llvmCBE = std::string(LLVM_BUILD_DIR_STR) + "/bin/llvm-cbe"; + std::string command = llvmCBE + " " + kmodName + " -o " + ptxPath; + DEBUG(errs() << "Compiling PTX from kernel module:\n"); + DEBUG(errs() << command); + std::system(command.c_str()); } Function *CGT_OpenCL::transformFunctionToVoid(Function *F) { @@ -2346,10 +2357,13 @@ static Value *genWorkGroupPtr(Module &M, std::vector<Value *> WGSize, } // Get generated PTX binary name -static std::string getPTXFilename(const Module &M) { +static std::string getPTXFilePath(const Module &M) { std::string moduleID = M.getModuleIdentifier(); - moduleID.append(".kernels.cl"); - return moduleID; + char *cwd_p = get_current_dir_name(); + std::string cwd(cwd_p); + free(cwd_p); + std::string ptxPath = cwd + "/" + moduleID + ".kernels.cl"; + return ptxPath; } // Changes the data layout of the Module to be compiled with OpenCL backend diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt index 48cf16282661862a86a254b6e40097a830cc2d43..3147d00e892f8d89b71ebbe7e04aa0fa33210534 100644 --- a/hpvm/projects/hpvm-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-rt/CMakeLists.txt @@ -8,9 +8,13 @@ SET(CMAKE_CXX_STANDARD 11) # we want ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o # which is a LLVM Bitcode file because of the -flto below. add_llvm_library(hpvm-rt hpvm-rt.cpp DEPENDS clang) -target_compile_options(hpvm-rt PUBLIC -flto) -target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS}) -target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY}) +target_compile_options(hpvm-rt PUBLIC -flto -DHPVM_USE_OPENCL=${HPVM_USE_OPENCL}) +if(${HPVM_USE_OPENCL}) + target_include_directories(hpvm-rt PRIVATE ${OpenCL_INCLUDE_DIRS}) + target_link_directories(hpvm-rt PUBLIC ${OpenCL_LIBRARY}) +else() + message(STATUS "hpvm-rt.bc is not using OpenCL.") +endif() # Move and rename hpvm-rt.cpp.o to be an actual bc code add_custom_command( diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp index dff4f0e4a21e8e0eafad36bd7b14805c584cd10c..f0716378fe8c62639555396a3d268aae051534ba 100644 --- a/hpvm/projects/hpvm-rt/hpvm-rt.cpp +++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp @@ -1,4 +1,6 @@ -#include <CL/cl.h> + +//#define HPVM_USE_OPENCL 1 + #include <algorithm> #include <cassert> #include <cstdio> @@ -8,9 +10,14 @@ #include <map> #include <pthread.h> #include <string> - #include <unistd.h> +#ifdef HPVM_USE_OPENCL + +#include <CL/cl.h> + +#endif + #if _POSIX_VERSION >= 200112L #include <sys/time.h> #endif @@ -40,6 +47,9 @@ typedef struct { std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers; } DFNodeContext_CPU; + +#ifdef HPVM_USE_OPENCL + typedef struct { cl_context clOCLContext; cl_command_queue clCommandQue; @@ -51,6 +61,9 @@ cl_context globalOCLContext; cl_device_id *clDevices; cl_command_queue globalCommandQue; +#endif + + MemTracker MTracker; vector<DFGDepth> DStack; // Mutex to prevent concurrent access by multiple thereads in pipeline @@ -59,6 +72,8 @@ pthread_mutex_t ocl_mtx; #define NUM_TESTS 1 hpvm_TimerSet kernel_timer; +#ifdef HPVM_USE_OPENCL + static const char *getErrorString(cl_int error) { switch (error) { // run-time and JIT compiler errors @@ -209,6 +224,15 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) { } } +#endif + + +void openCLAbort(){ + cout <<" ERROR: OpenCL NOT found!. Please Recompile with OpenCL - Make sure to have OpenCL on System \n "; + abort(); +} + + /************************* Depth Stack Routines ***************************/ void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX, @@ -260,6 +284,9 @@ uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) { /********************** Memory Tracking Routines **************************/ void llvm_hpvm_track_mem(void *ptr, size_t size) { + +#ifdef HPVM_USE_OPENCL + DEBUG(cout << "Start tracking memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE != NULL) { @@ -269,9 +296,19 @@ void llvm_hpvm_track_mem(void *ptr, size_t size) { DEBUG(cout << "Inserting ID " << ptr << " in the MemTracker Table\n"); MTracker.insert(ptr, size, MemTrackerEntry::HOST, ptr); DEBUG(MTracker.print()); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_untrack_mem(void *ptr) { + +#ifdef HPVM_USE_OPENCL + DEBUG(cout << "Stop tracking memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); if (MTE == NULL) { @@ -284,11 +321,22 @@ void llvm_hpvm_untrack_mem(void *ptr) { clReleaseMemObject((cl_mem)MTE->getAddress()); MTracker.remove(ptr); DEBUG(MTracker.print()); + +#else + + openCLAbort(); + +#endif + } + +#ifdef HPVM_USE_OPENCL + static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, DFNodeContext_OCL *Context, bool isInput, bool isOutput) { + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "[OCL] Request memory: " << ptr << " for context: " << Context->clOCLContext << flush << "\n"); @@ -343,13 +391,20 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size, DEBUG(MTracker.print()); pthread_mutex_unlock(&ocl_mtx); return d_input; + } +#endif + + void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) { return llvm_hpvm_request_mem(ptr, size); } void *llvm_hpvm_request_mem(void *ptr, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "[CPU] Request memory: " << ptr << flush << "\n"); MemTrackerEntry *MTE = MTracker.lookup(ptr); @@ -386,6 +441,13 @@ void *llvm_hpvm_request_mem(void *ptr, size_t size) { DEBUG(MTracker.print()); pthread_mutex_unlock(&ocl_mtx); return ptr; + +#else + + openCLAbort(); + +#endif + } /*************************** Timer Routines **********************************/ @@ -419,6 +481,9 @@ get_last_async(struct hpvm_TimerSet *timers) { } static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { + +#ifdef HPVM_USE_OPENCL + cl_int ciErrNum = CL_SUCCESS; struct hpvm_async_time_marker_list **new_event = &(tset->async_markers); @@ -441,10 +506,20 @@ static void insert_marker(struct hpvm_TimerSet *tset, enum hpvm_TimerID timer) { if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Enqueueing Marker!\n"); } + +#else + + openCLAbort(); + +#endif + } static void insert_submarker(struct hpvm_TimerSet *tset, char *label, enum hpvm_TimerID timer) { + +#ifdef HPVM_USE_OPENCL + cl_int ciErrNum = CL_SUCCESS; struct hpvm_async_time_marker_list **new_event = &(tset->async_markers); @@ -467,10 +542,20 @@ static void insert_submarker(struct hpvm_TimerSet *tset, char *label, if (ciErrNum != CL_SUCCESS) { fprintf(stderr, "Error Enqueueing Marker!\n"); } + +#else + + openCLAbort(); + +#endif + } /* Assumes that all recorded events have completed */ static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) { + +#ifdef HPVM_USE_OPENCL + struct hpvm_async_time_marker_list *next_interval = NULL; struct hpvm_async_time_marker_list *last_marker = get_last_async(tset); hpvm_Timestamp total_async_time = 0; @@ -517,6 +602,13 @@ static hpvm_Timestamp record_async_times(struct hpvm_TimerSet *tset) { next_interval->timerID = INVALID_TIMERID; return total_async_time; + +#else + + openCLAbort(); + +#endif + } static void accumulate_time(hpvm_Timestamp *accum, hpvm_Timestamp start, @@ -733,6 +825,9 @@ void hpvm_AddSubTimer(struct hpvm_TimerSet *timers, char *label, } void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) { + +#ifdef HPVM_USE_OPENCL + // cerr << "Switch to timer: " << timer << flush << "\n"; /* Stop the currently running timer */ if (timers->current != hpvm_TimerID_NONE) { @@ -849,10 +944,21 @@ void hpvm_SwitchToTimer(struct hpvm_TimerSet *timers, enum hpvm_TimerID timer) { } } timers->current = timer; + +#else + + openCLAbort(); + +#endif + + } void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label, enum hpvm_TimerID category) { + +#ifdef HPVM_USE_OPENCL + struct hpvm_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; struct hpvm_SubTimer *curr = @@ -1001,6 +1107,13 @@ void hpvm_SwitchToSubTimer(struct hpvm_TimerSet *timers, char *label, } timers->current = category; + +#else + + openCLAbort(); + +#endif + } void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) { @@ -1069,6 +1182,9 @@ void hpvm_PrintTimerSet(struct hpvm_TimerSet *timers) { } void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { + +#ifdef HPVM_USE_OPENCL + /* clean up all of the async event markers */ struct hpvm_async_time_marker_list *event = timers->async_markers; while (event != NULL) { @@ -1106,6 +1222,13 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) { free(timers->sub_timer_list[i]); } } + +#else + + openCLAbort(); + +#endif + } /**************************** Pipeline API ************************************/ @@ -1304,10 +1427,13 @@ void llvm_hpvm_cpu_wait(void *graphID) { DEBUG(cout << "\t... pthread Done!\n"); } + +#ifdef HPVM_USE_OPENCL + // Returns the platform name. std::string getPlatformName(cl_platform_id pid) { + cl_int status; - size_t sz; status = clGetPlatformInfo(pid, CL_PLATFORM_NAME, 0, NULL, &sz); checkErr(status, CL_SUCCESS, "Query for platform name size failed"); @@ -1318,12 +1444,18 @@ std::string getPlatformName(cl_platform_id pid) { const auto &tmp = std::string(name, name + sz); delete[] name; - return tmp; + return tmp; } +#endif + + +#ifdef HPVM_USE_OPENCL + // Searches all platforms for the first platform whose name // contains the search string (case-insensitive). cl_platform_id findPlatform(const char *platform_name_search) { + cl_int status; std::string search = platform_name_search; @@ -1360,7 +1492,13 @@ cl_platform_id findPlatform(const char *platform_name_search) { assert(false && "No matching platform found!"); } +#endif + + void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(std::string Target = T == hpvm::GPU_TARGET ? "GPU" : "SPIR"); DEBUG(cout << "Initializing Context for " << Target << " device\n"); @@ -1450,9 +1588,19 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) { pthread_mutex_unlock(&ocl_mtx); return globalOCLContext; + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_clearContext(void *graphID) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Clear Context\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; @@ -1464,9 +1612,19 @@ void llvm_hpvm_ocl_clearContext(void *graphID) { cout << "Printing HPVM Timer: KernelTimer\n"; hpvm_PrintTimerSet(&kernel_timer); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Shared Memory Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1477,10 +1635,20 @@ void llvm_hpvm_ocl_argument_shared(void *graphID, int arg_index, size_t size) { cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, NULL); checkErr(errcode, CL_SUCCESS, "Failure to set shared memory argument"); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Scalar Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1491,10 +1659,20 @@ void llvm_hpvm_ocl_argument_scalar(void *graphID, void *input, int arg_index, cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input); checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument"); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, size_t size, bool isInput, bool isOutput) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set Pointer Input:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input @@ -1518,9 +1696,19 @@ void *llvm_hpvm_ocl_argument_ptr(void *graphID, void *input, int arg_index, DEBUG(cout << "\tDevicePtr = " << d_input << flush << "\n"); pthread_mutex_unlock(&ocl_mtx); return d_input; + +#else + + openCLAbort(); + +#endif + } void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Set device memory for Output Struct:"); DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size @@ -1536,12 +1724,22 @@ void *llvm_hpvm_ocl_output_ptr(void *graphID, int arg_index, size_t size) { DEBUG(cout << "\tDevicePtr = " << d_output << flush << "\n"); pthread_mutex_unlock(&ocl_mtx); return d_output; + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_free(void *ptr) {} void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, size_t size) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Get Output:\n"); DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output @@ -1555,11 +1753,21 @@ void *llvm_hpvm_ocl_getOutput(void *graphID, void *h_output, void *d_output, checkErr(errcode, CL_SUCCESS, "[getOutput] Failure to read output"); pthread_mutex_unlock(&ocl_mtx); return h_output; + +#else + + openCLAbort(); + +#endif + } void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, const size_t *localWorkSize, const size_t *globalWorkSize) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); size_t GlobalWG[3]; @@ -1610,6 +1818,13 @@ void *llvm_hpvm_ocl_executeNode(void *graphID, unsigned workDim, pthread_mutex_unlock(&ocl_mtx); return NULL; + +#else + + openCLAbort(); + +#endif + } ////////////////////////////////////////////////////////////////////////////// @@ -1656,6 +1871,9 @@ static char *LoadProgSource(const char *Filename, size_t *szFinalLength) { } void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Launch OCL Kernel\n"); // Initialize OpenCL @@ -1717,14 +1935,31 @@ void *llvm_hpvm_ocl_launch(const char *FileName, const char *KernelName) { pthread_mutex_unlock(&ocl_mtx); return Context; + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_ocl_wait(void *graphID) { + +#ifdef HPVM_USE_OPENCL + pthread_mutex_lock(&ocl_mtx); DEBUG(cout << "Wait\n"); DFNodeContext_OCL *Context = (DFNodeContext_OCL *)graphID; clFinish(Context->clCommandQue); pthread_mutex_unlock(&ocl_mtx); + +#else + + openCLAbort(); + +#endif + } void llvm_hpvm_switchToTimer(void **timerSet, enum hpvm_TimerID timer) { @@ -1749,3 +1984,5 @@ void *llvm_hpvm_initializeTimerSet() { pthread_mutex_unlock(&ocl_mtx); return TS; } + + diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt index f6fed2ac296f93bc060fe09b3b889b42ee8c4a1a..cbabc8bbe0111a0ec6c99520176a8b37a530a4fb 100644 --- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -53,7 +53,7 @@ endif() # -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS) # tensor_runtime links to, because CMake doesn't help us do this. -# This is needed by both approxhpvm.py and the RPATH setting step (below). +# This is needed by both hpvm-clang and the RPATH setting step (below). # First, take a guess at the paths to the libraries that are used here. # (CMake, why do you make this so difficult?) foreach(interface_lib ${LINK_LIBS}) @@ -120,7 +120,7 @@ function(add_tensor_runtime target_name) target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}") endforeach() # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target - # so that approxhpvm.py can read them. (we'll create our own properties.) + # so that hpvm-clang can read them. (we'll create our own properties.) set_target_properties( ${target_name} PROPERTIES TRT_LINK_DIRS "${TRT_LINK_DIRS}" diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py index 922b6795ade457ba4c961af4d2e70ce150e22e92..172448a60d4f65fc4aafc09c9a76d9cb492ff7b0 100644 --- a/hpvm/projects/torch2hpvm/torch2hpvm/compile.py +++ b/hpvm/projects/torch2hpvm/torch2hpvm/compile.py @@ -172,7 +172,7 @@ class ModelExporter: from subprocess import run args = [ - "approxhpvm.py", + "hpvm-clang", str(self.codefile), str(output_binary), *self.compile_args, diff --git a/hpvm/scripts/hpvm_installer.py b/hpvm/scripts/hpvm_installer.py index 255b2864ea3f5df6a0cae9604502c715e5fdc6f1..cce8b3f07928d9ab096df3166bd02f4e6f8e1f5d 100755 --- a/hpvm/scripts/hpvm_installer.py +++ b/hpvm/scripts/hpvm_installer.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -from argparse import ArgumentParser, Namespace +from argparse import ArgumentParser from os import chdir, environ, makedirs from pathlib import Path from subprocess import CalledProcessError, check_call -from typing import List +from typing import List, Union VERSION = "9.0.0" URL = "http://releases.llvm.org" @@ -30,7 +30,7 @@ LINKS = [ "test", "tools", ] -MAKE_TARGETS = ["approxhpvm.py"] +MAKE_TARGETS = ["hpvm-clang"] MAKE_TEST_TARGETS = ["check-hpvm-dnn", "check-hpvm-pass"] # Relative to project root which is __file__.parent.parent @@ -79,6 +79,11 @@ def parse_args(args=None): "Supported targets: AArch64, AMDGPU, ARM, BPF, Hexagon, Mips, MSP430, NVPTX, PowerPC, " "Sparc, SystemZ, X86, XCore.", ) + parser.add_argument( + "--ninja", + action="store_true", + help="Use Ninja to build HPVM. Uses 'make' otherwise.", + ) parser.add_argument( "-r", "--run-tests", action="store_true", help="Build and run test cases" ) @@ -166,6 +171,8 @@ def print_args(args): print("Running with the following options:") print(f" Automated build: {not args.no_build}") print(f" Build directory: {args.build_dir}") + build_sys = "ninja" if args.ninja else "make" + print(f" Build system: {build_sys}") print(f" Threads: {args.parallel}") print(f" Targets: {args.targets}") print(f" Download DNN weights: {not args.no_params}") @@ -256,7 +263,7 @@ def build( build_dir: Path, nthreads: int, targets: str, - build_test_targets: bool, + use_ninja: bool, cmake_additional_args: List[str], ): print("Now building...") @@ -272,13 +279,17 @@ def build( f"-DLLVM_TARGETS_TO_BUILD={targets}", *cmake_additional_args, ] + if use_ninja: + cmake_args.append("-GNinja") print(f"CMake: {' '.join(cmake_args)}") print(f"=============================") check_call(cmake_args) - make_args = ["make", f"-j{nthreads}", *MAKE_TARGETS] - print(f"Make: {' '.join(make_args)}") + + build_sys = "ninja" if use_ninja else "make" + build_args = [build_sys, f"-j{nthreads}", *MAKE_TARGETS] + print(f"Build system ({build_sys}): {' '.join(build_args)}") print(f"=============================") - check_call(make_args) + check_call(build_args) chdir(ROOT_DIR) @@ -292,12 +303,13 @@ def install_py_packages(): check_call([sys.executable, "-m", "pip", "install", str(package_home)]) -def run_tests(build_dir: Path, nthreads: int): +def run_tests(build_dir: Path, use_ninja: bool, nthreads: int): chdir(build_dir) - make_args = ["make", f"-j{nthreads}", *MAKE_TEST_TARGETS] - print(f"Tests: {' '.join(make_args)}") + build_sys = "ninja" if use_ninja else "make" + build_args = [build_sys, f"-j{nthreads}", *MAKE_TARGETS] + print(f"Tests: {' '.join(build_args)}") print(f"=============================") - check_call(make_args) + check_call(build_args) chdir(ROOT_DIR) @@ -311,8 +323,8 @@ def input_with_check(prompt: str, parse, prompt_when_invalid: str): return value -def download(link: str, output: Path): - check_call(["curl", "-L", link, "-o", output]) +def download(link: str, output: Union[Path, str]): + check_call(["curl", "-L", link, "-o", str(output)]) def main(): @@ -333,21 +345,15 @@ HPVM not installed. To complete installation, follow these instructions: - Create and navigate to a folder "./build" - Run "cmake ../llvm [options]". Find potential options in README.md. -- Run "make -j<number of threads> approxhpvm.py" and then "make install" +- Run "make -j<number of threads> hpvm-clang" and then "make install" For more details refer to README.md. """ ) return else: - build( - args.build_dir, - args.parallel, - args.targets, - args.run_tests, - args.cmake_args, - ) + build(args.build_dir, args.parallel, args.targets, args.ninja, args.cmake_args) if args.run_tests: - run_tests(args.build_dir, args.parallel) + run_tests(args.build_dir, args.ninja, args.parallel) else: print("Skipping tests.") diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt index cb548b84dc9944e54c7dfdd0a0d74cf0aed9aafd..3c4f26472317f511edaab98c5e4a4f8ed7ba2dfb 100644 --- a/hpvm/test/CMakeLists.txt +++ b/hpvm/test/CMakeLists.txt @@ -1,5 +1,12 @@ include(../cmake/TestFile.cmake) # Generation of `.test` files in CMake + +# clang and clang++ are used to compile benchmarks +# in `benchmarks` and `dnn_benchmarks/hpvm-c`. +set(CLANG_C ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang) +set(CLANG_CXX ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++) + add_subdirectory(hpvm_pass) # Passes test suite +add_subdirectory(benchmarks) add_subdirectory(dnn_benchmarks/hpvm-c) # HPVM-C DNN accuracy test suite add_subdirectory(dnn_benchmarks/tensor-rt-src) # tensor_runtime DNN (build only, no tests) add_subdirectory(dnn_benchmarks/profiling) # hpvm-profiler test suite diff --git a/hpvm/test/benchmarks/CMakeLists.txt b/hpvm/test/benchmarks/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d4f0691fc0e70cd9b532947688da264b2ca4e7b --- /dev/null +++ b/hpvm/test/benchmarks/CMakeLists.txt @@ -0,0 +1,38 @@ +macro(hpvm_compile_util_sources compiler_flags util_srcs language_mode) + # "Returns" ${util_bitcodes}, a list of paths to generated bitcode (.ll) files + foreach(src ${util_srcs}) + set(src_path ${CMAKE_CURRENT_SOURCE_DIR}/${src}) + get_filename_component(src_name ${src_path} NAME) + set(output_path ${CMAKE_CURRENT_BINARY_DIR}/${src_name}.ll) + add_custom_command( + OUTPUT ${output_path} + COMMAND ${CLANG_C} -x${language_mode} + ${compiler_flags} ${src_path} -emit-llvm -S -o ${output_path} + DEPENDS ${src_path} + ) + list(APPEND util_bitcodes ${output_path}) + endforeach() +endmacro() + +function(add_hpvm_benchmark + target_name output_bin_name all_flags language_mode + main_src util_bitcodes +) + set(output_bin_path ${CMAKE_CURRENT_BINARY_DIR}/${output_bin_name}) + set(main_src_path ${CMAKE_CURRENT_SOURCE_DIR}/${main_src}) + if(util_bitcodes) + set(bitcodes_arg -b ${util_bitcodes}) + else() + set(bitcodes_arg "") + endif() + add_custom_command( + OUTPUT ${output_bin_path} + COMMAND hpvm-clang ${all_flags} -x${language_mode} + ${bitcodes_arg} -- ${main_src_path} ${output_bin_path} + DEPENDS ${main_src_path} ${util_bitcodes} hpvm-clang + ) + add_custom_target(${target_name} DEPENDS ${output_bin_path}) +endfunction(add_hpvm_benchmark) + +add_subdirectory(hpvm-cava) +add_subdirectory(pipeline) diff --git a/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d89a01a05669a0c49fecf1ccbd25029fce14143b --- /dev/null +++ b/hpvm/test/benchmarks/hpvm-cava/CMakeLists.txt @@ -0,0 +1,24 @@ +set(other_srcs src/load_cam_model.c src/cam_pipe_utility.c src/utility.c) +set( + compiler_flags + # Insanity warning: only -O1 works for this benchmark. + -O1 -I ${CMAKE_CURRENT_SOURCE_DIR}/src + -DDMA_MODE -DDMA_INTERFACE_V3 +) + +# Sets ${util_bitcodes} +hpvm_compile_util_sources("${compiler_flags}" "${other_srcs}" "c") + +set(all_flags_cpu ${compiler_flags} "-DDEVICE=CPU_TARGET" "-lpthread") +add_hpvm_benchmark( + "hpvm_cava_cpu" "hpvm-cava-cpu" "${all_flags_cpu}" "c" + src/main.c "${util_bitcodes}" +) + +if(HPVM_USE_OPENCL) + set(all_flags_gpu ${compiler_flags} "-DDEVICE=GPU_TARGET" "--opencl" "-lpthread") + add_hpvm_benchmark( + "hpvm_cava_gpu" "hpvm-cava-gpu" "${all_flags_gpu}" "c" + src/main.c "${util_bitcodes}" + ) +endif() diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile deleted file mode 100644 index 58dfa72aacb172252ea1c13ed3331322fb600861..0000000000000000000000000000000000000000 --- a/hpvm/test/benchmarks/hpvm-cava/Makefile +++ /dev/null @@ -1,131 +0,0 @@ -# This Makefile compiles the HPVM-CAVA pilot project. -# It builds HPVM-related dependencies, then the native camera pipeline ISP code. -# -# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, -# which can be copied from Makefile.config.example for a start. - -CONFIG_FILE := ../include/Makefile.config - -ifeq ($(wildcard $(CONFIG_FILE)),) - $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) -endif -include $(CONFIG_FILE) - -# Compiler Flags - -LFLAGS += -lm -lrt - -ifeq ($(TARGET),) - TARGET = gpu -endif - -# Build dirs -SRC_DIR = src/ -CAM_PIPE_SRC_DIR = $(SRC_DIR) -BUILD_DIR = build/$(TARGET) -CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - - -INCLUDES += -I$(SRC_DIR) -I$(CAM_PIPE_SRC_DIR) -ifneq ($(CONFUSE_ROOT),) - INCLUDES += -I$(CONFUSE_ROOT)/include - LFLAGS += -L$(CONFUSE_ROOT)/lib -endif - -EXE = cava-hpvm-$(TARGET) - -LFLAGS += -pthread - -## BEGIN HPVM MAKEFILE -LANGUAGE=hpvm -SRCDIR_OBJS=load_cam_model.ll cam_pipe_utility.ll utility.ll -OBJS_SRC=src/load_cam_model.c src/cam_pipe_utility.c src/utility.c -HPVM_OBJS=main.hpvm.ll -APP = $(EXE) -APP_CUDALDFLAGS=-lm -lstdc++ -APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 -APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include -APP_LDFLAGS=$(LFLAGS) - -CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) -OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) -LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) - -HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt - -HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc - - -TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce - -ifeq ($(TARGET),seq) - DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -else - DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx -endif - TESTGEN_OPTFLAGS += -hpvm-timers-gen - -CFLAGS += -DDEVICE=$(DEVICE) -CXXFLAGS += -DDEVICE=$(DEVICE) - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) - -.PRECIOUS: $(BUILD_DIR)/%.ll - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) -KERNEL = $(TEST_OBJS).kernels.ll - -ifeq ($(TARGET),gpu) - KERNEL_OCL = $(TEST_OBJS).kernels.cl -endif - -HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll -HOST = $(BUILD_DIR)/$(APP).host.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -# Targets - -default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) - -clean : - if [ -f $(EXE) ]; then rm $(EXE); fi - if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi - if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi - -$(KERNEL_OCL) : $(KERNEL) - $(OCLBE) $< -o $@ - -$(EXE) : $(HOST_LINKED) - $(CXX) -O3 $(LDFLAGS) $< -o $@ - -$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) - $(LLVM_LINK) $^ -S -o $@ - -$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) - $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c - $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c - $(CC) $(CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll - $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ - -## END HPVM MAKEFILE diff --git a/hpvm/test/benchmarks/hpvm-cava/src/main.c b/hpvm/test/benchmarks/hpvm-cava/src/main.c index 09430239d7c06a0b2fab2a59c7e6310babe617c6..0d8fb37d0103b55234e59e8d2b66742faed207b6 100644 --- a/hpvm/test/benchmarks/hpvm-cava/src/main.c +++ b/hpvm/test/benchmarks/hpvm-cava/src/main.c @@ -53,6 +53,7 @@ typedef struct __attribute__((__packed__)) { } RootIn; typedef enum _argnum { + CAM_MODEL, RAW_IMAGE_BIN, OUTPUT_IMAGE_BIN, NUM_REQUIRED_ARGS, @@ -67,7 +68,7 @@ typedef struct _arguments { } arguments; static char prog_doc[] = "\nCamera pipeline on gem5-Aladdin.\n"; -static char args_doc[] = "path/to/raw-image-binary path/to/output-image-binary"; +static char args_doc[] = "path/to/cam-model path/to/raw-image-binary path/to/output-image-binary"; static struct argp_option options[] = { {"num-inputs", 'n', "N", 0, "Number of input images"}, {0}, @@ -724,14 +725,14 @@ int main(int argc, char *argv[]) { *transform_out, *gamut_out; float *TsTw, *ctrl_pts, *weights, *coefs, *tone_map, *l2_dist; - TsTw = get_TsTw("cam_models/NikonD7000/", wb_index); + TsTw = get_TsTw(args.args[CAM_MODEL], wb_index); float *trans = transpose_mat(TsTw, CHAN_SIZE, CHAN_SIZE); free(TsTw); TsTw = trans; - ctrl_pts = get_ctrl_pts("cam_models/NikonD7000/", num_ctrl_pts); - weights = get_weights("cam_models/NikonD7000/", num_ctrl_pts); - coefs = get_coefs("cam_models/NikonD7000/", num_ctrl_pts); - tone_map = get_tone_map("cam_models/NikonD7000/"); + ctrl_pts = get_ctrl_pts(args.args[CAM_MODEL], num_ctrl_pts); + weights = get_weights(args.args[CAM_MODEL], num_ctrl_pts); + coefs = get_coefs(args.args[CAM_MODEL], num_ctrl_pts); + tone_map = get_tone_map(args.args[CAM_MODEL]); input_scaled = (float *)malloc_aligned(bytes_fimage); result_scaled = (float *)malloc_aligned(bytes_fimage); diff --git a/hpvm/test/benchmarks/pipeline/CMakeLists.txt b/hpvm/test/benchmarks/pipeline/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..db2b8eeab1647f4ac72c05127877ecec1f949c90 --- /dev/null +++ b/hpvm/test/benchmarks/pipeline/CMakeLists.txt @@ -0,0 +1,15 @@ +find_package(OpenCV 2) +if(${OpenCV_FOUND}) + set( + all_flags + -O3 -I${OpenCV_INCLUDE_DIRS} + -ffast-math -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize + -lpthread + ) + add_hpvm_benchmark("pipeline_cpu" "pipeline-cpu" "${all_flags}" "c++" src/main.cc "") + if(HPVM_USE_OPENCL) + add_hpvm_benchmark("pipeline_gpu" "pipeline-gpu" "${all_flags};--opencl" "c++" src/main.cc "") + endif() +else() + message(WARNING "opencv-2 not found; not compiling HPVM benchmark 'pipeline'.") +endif() diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile deleted file mode 100644 index 8a55393f241f30d840cbf85c31488e652c2023a0..0000000000000000000000000000000000000000 --- a/hpvm/test/benchmarks/pipeline/Makefile +++ /dev/null @@ -1,118 +0,0 @@ -# This Makefile compiles the HPVM-pipeline test. -# It builds HPVM-related dependencies, then the native camera pipeline ISP code. -# -# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, -# which can be copied from Makefile.config.example for a start. - -CONFIG_FILE := ../include/Makefile.config - -ifeq ($(wildcard $(CONFIG_FILE)),) - $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) -endif -include $(CONFIG_FILE) - -# Compiler Flags - -LFLAGS += -lm -lrt - -ifeq ($(TARGET),) - TARGET = gpu -endif - -# Build dirs -SRC_DIR = src/ -BUILD_DIR = build/$(TARGET) -CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - -EXE = pipeline-$(TARGET) - -INCLUDES += -I$(SRC_DIR) - -## BEGIN HPVM MAKEFILE -SRCDIR_OBJS= -HPVM_OBJS=main.hpvm.ll -APP = $(EXE) -APP_CFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -APP_CXXFLAGS += $(INCLUDES) -ffast-math -O3 -fno-lax-vector-conversions -fno-vectorize -fno-slp-vectorize -APP_LDFLAGS=`pkg-config opencv --libs` - -CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) -OBJS_CFLAGS = $(APP_CFLAGS) $(PLATFORM_CFLAGS) -CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) -LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) - -HPVM_RT_PATH = $(LLVM_BUILD_DIR)/tools/hpvm/projects/hpvm-rt -HPVM_RT_LIB = $(HPVM_RT_PATH)/hpvm-rt.bc - -TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce - -ifeq ($(TARGET),seq) - DEVICE = CPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -else - DEVICE = GPU_TARGET - HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-opencl -dfg2llvm-cpu -clearDFG - HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx -endif - TESTGEN_OPTFLAGS += -hpvm-timers-gen - -CFLAGS += -DDEVICE=$(DEVICE) -CXXFLAGS += -DDEVICE=$(DEVICE) - -# Add BUILDDIR as a prefix to each element of $1 -INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) - -.PRECIOUS: $(BUILD_DIR)/%.ll - -OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) -TEST_OBJS = $(call INBUILDDIR,$(HPVM_OBJS)) -KERNEL = $(TEST_OBJS).kernels.ll - -ifeq ($(TARGET),seq) -else - KERNEL_OCL = $(TEST_OBJS).kernels.cl -endif - -HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll -HOST = $(BUILD_DIR)/$(APP).host.ll - -ifeq ($(OPENCL_PATH),) -FAILSAFE=no_opencl -else -FAILSAFE= -endif - -# Targets -default: $(FAILSAFE) $(BUILD_DIR) $(KERNEL_OCL) $(EXE) - -clean : - if [ -f $(EXE) ]; then rm $(EXE); fi - if [ -f DataflowGraph.dot ]; then rm DataflowGraph.dot*; fi - if [ -d $(BUILD_DIR) ]; then rm -rf $(BUILD_DIR); fi - -$(KERNEL_OCL) : $(KERNEL) - $(OCLBE) $< -o $@ - -$(EXE) : $(HOST_LINKED) - $(CXX) -O3 $(LDFLAGS) $< -o $@ - -$(HOST_LINKED) : $(HOST) $(OBJS) $(HPVM_RT_LIB) - $(LLVM_LINK) $^ -S -o $@ - -$(HOST) $(KERNEL): $(BUILD_DIR)/$(HPVM_OBJS) - $(OPT) $(HPVM_OPTFLAGS) -S $< -o $(HOST) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.cc - $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.cc - $(CC) $(CXXFLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/main.hpvm.ll : $(BUILD_DIR)/main.ll - $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ - -## END HPVM MAKEFILE diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt index d429bca23cd56cafc70a84fe79a35fed38912bec..9f34317d34157d57468c60cb854828b5c54f1cde 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt +++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt @@ -1,6 +1,3 @@ -# First get approxhpvm.py which we then use to compile benchmarks. -get_filename_component(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py REALPATH) - # Each source file contains a @MODEL_PARAMS_DIR@ waiting to be filled in. set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/") set(test_compile_targets "") @@ -12,9 +9,9 @@ function(compile_hpvm_c target_name src_filepath codegen_target) # the name of output file and custom target to clash. add_custom_command( OUTPUT ${output_bin_path} - DEPENDS ${generated_file_path} approxhpvm.py - COMMAND ${APPROXHPVM_PY} - ${generated_file_path} ${output_bin_path} + DEPENDS ${generated_file_path} hpvm-clang + COMMAND hpvm-clang + ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN} ) add_custom_target(${target_name} DEPENDS ${output_bin_path}) diff --git a/hpvm/test/dnn_benchmarks/keras/Benchmark.py b/hpvm/test/dnn_benchmarks/keras/Benchmark.py index c225ff97661aef8f4fceb123b79190cb7c819dd7..f3d8e9e6b2268618dc835e3d27374a8f7d738a86 100644 --- a/hpvm/test/dnn_benchmarks/keras/Benchmark.py +++ b/hpvm/test/dnn_benchmarks/keras/Benchmark.py @@ -52,20 +52,23 @@ class Benchmark: try: subprocess.run([ - "approxhpvm.py", + "hpvm-clang", "-h" ], check=True, stdout=FNULL) except: - print ("\n\n ERROR: Could not find approxhpvm.py (HPVM compile script)!! \n\n") - print ("To Compile, Must set PATH to include approxhpvm.py script. Do the following: ") - print ("**** export PATH=${PATH_TO_YOUR_HPVM_INSTALLATION}/build/bin/:$PATH *****") + print(""" + +ERROR: Could not find hpvm-clang (HPVM compile script)!! + +hpvm-clang is installed to the python environment used when compiling HPVM. +Please try rerunning 'make -j hpvm-clang'.""") sys.exit(1) try: subprocess.run([ - "approxhpvm.py", src_file, target_binary, + "hpvm-clang", src_file, target_binary, "-t", "tensor", "--conf-file", approx_conf_file ], check=True) except: diff --git a/hpvm/tools/CMakeLists.txt b/hpvm/tools/CMakeLists.txt index 495348404f3594d8bca46b87acd8be2d08d2c8a4..4758dde6a3f1564739135225f6f8673b5d34843d 100644 --- a/hpvm/tools/CMakeLists.txt +++ b/hpvm/tools/CMakeLists.txt @@ -1,2 +1,2 @@ add_llvm_tool_subdirectory(hpvm-config) -add_llvm_tool_subdirectory(py-approxhpvm) \ No newline at end of file +add_llvm_tool_subdirectory(hpvm-clang) \ No newline at end of file diff --git a/hpvm/tools/hpvm-clang/CMakeLists.txt b/hpvm/tools/hpvm-clang/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3564b00d0aa480996c152c1f027fa6fc39cd144c --- /dev/null +++ b/hpvm/tools/hpvm-clang/CMakeLists.txt @@ -0,0 +1,87 @@ +# This file is very tightly coupled with main.py.in. +# Watch out and keep them in sync. + +# CMake fills in some variables in main.py.in and generate it into a python package: +# `hpvmpy`, which is the main entry point and Python API for HPVM. + +# ---[ Define variables for main.py.in +# main.py.in requires the following variables: +# LLVM_PROJECT_DIR, LLVM_BUILD_DIR +# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS +# DIRECT_LINK_LIBS, HPVM_USE_OPENCL (defined globally) +# AVAILABLE_PASSES, HPVM_RT_PATH + +set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR}) +set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR}) + +get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES) +get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS) +get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS) + +set(DIRECT_LINK_LIBS "$<TARGET_FILE:tensor_runtime>") +if(${HPVM_USE_OPENCL}) + # We need to link to OpenCL libs when hpvm uses opencl + # because OpenCL functions may be injected by the OpenCL pass. + list(APPEND DIRECT_LINK_LIBS ${OpenCL_LIBRARY}) +else() + message(STATUS "hpvm-clang is not using OpenCL (--opencl flag not available).") +endif() + +# The hpvm-rt runtime +# This has to be explicitly set as hpvm-rt.bc is created in a custom_target +# and does not export its file location. +# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. +set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) +# And this must be manually in sync with `hpvm/lib/Transforms/*`. +# which is fine for because we don't have many passes for now. +set( + AVAILABLE_PASSES + LLVMBuildDFG + LLVMClearDFG + LLVMDFG2LLVM_CPU + LLVMDFG2LLVM_CUDNN + LLVMDFG2LLVM_OpenCL + LLVMDFG2LLVM_WrapperAPI + LLVMFuseHPVMTensorNodes + LLVMGenHPVM + LLVMInPlaceDFGAnalysis + LLVMLocalMem +) + +# ---[ Create package folder structure +# This sounds crazy but since main.py.in is generated into another file under build/ dir, +# to make a python package around it, we'll have to generate the whole package structure +# in build/ as well. +# Target dir structure: +# ${CMAKE_CURRENT_BINARY_DIR} +# hpvmpy/ +# __init__.py <- generated from main.py.in +# setup.py <- copied from setup.py +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hpvmpy) +file(COPY setup.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +# ---[ Generate main.py.in to hpvmpy/__init__.py +set(init_path ${CMAKE_CURRENT_BINARY_DIR}/hpvmpy/__init__.py) +# First resolve all `@symbol@` by configuring the file +configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) +# Then resolve all generator expressions we configured into the previous file +file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) +# Delibrately create an extra step of moving file +# which is carried out at build time (as a target) +# so we can set these dependencies on it. +set(DEPS tensor_runtime hpvm-rt-bc clang opt llvm-link ${AVAILABLE_PASSES}) +add_custom_command( + OUTPUT ${init_path} + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/main.py ${init_path} + DEPENDS ${DEPS} ${CMAKE_CURRENT_BINARY_DIR}/main.py +) + +# ---[ Call python3 -m pip to install this package. +add_custom_target( + hpvmpy + COMMAND python3 -m pip install ./ + DEPENDS ${init_path} setup.py +) +# hpvm-clang is the name of the compiler binary (see setup.py) +# We provide this alias for convenience +add_custom_target(hpvm-clang DEPENDS hpvmpy) diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in new file mode 100644 index 0000000000000000000000000000000000000000..e2bc5cbafa23bd64094a3198ad8466f682f6bbdc --- /dev/null +++ b/hpvm/tools/hpvm-clang/main.py.in @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +import argparse +import os +from pathlib import Path +from typing import List, Union, Optional + +PathLike = Union[Path, str] + +HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm" +LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin" + +# Directories to include +TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";") +TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")] +TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";") +DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";") +HPVM_USE_OPENCL = int("@HPVM_USE_OPENCL@") + +AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") +HPVM_RT_PATH = "@HPVM_RT_PATH@" + + +def compile_hpvm_c( + hpvm_src: PathLike, + output_file: PathLike, + tensor_target: Optional[str], + opencl: bool, + link_bitcode: List[PathLike] = None, + include: List[PathLike] = None, + macro: List[str] = None, + flags: List[str] = None, + optim_level: str = "0", # -O0 + is_cpp: bool = True, # otherwise is C + std: str = None, # language std (-std=c++11) + link_dirs: List[PathLike] = None, + link_libs: List[str] = None, + working_dir: PathLike = None, + conf_file: PathLike = None, +): + from subprocess import check_output + + passes = ["LLVMBuildDFG"] + pass_flags = ["buildDFG"] + if tensor_target == "tensor": + if conf_file is None: + raise ValueError("conf_file must be defined when tensor_target=='tensor'.") + passes += ["LLVMInPlaceDFGAnalysis", "LLVMFuseHPVMTensorNodes", "LLVMDFG2LLVM_WrapperAPI"] + pass_flags += [ + "inplace", "hpvm-fuse", "dfg2llvm-wrapperapi", + f"configuration-inputs-filename={conf_file}" + ] + elif tensor_target == "cudnn": + passes += ["LLVMInPlaceDFGAnalysis", "LLVMDFG2LLVM_CUDNN"] + pass_flags += ["inplace", "dfg2llvm-cudnn"] + elif tensor_target is None: + passes += ["LLVMLocalMem"] + pass_flags += ["localmem"] + else: + raise ValueError(f"Tensor target {tensor_target} not recognized") + if opencl: + passes += ["LLVMDFG2LLVM_OpenCL"] + pass_flags += ["dfg2llvm-opencl"] + passes += ["LLVMDFG2LLVM_CPU", "LLVMClearDFG"] + pass_flags += ["dfg2llvm-cpu", "clearDFG"] + + working_dir = Path(working_dir or ".") + if not working_dir.is_dir(): + os.makedirs(working_dir) + + # All commands for compiling the main hpvm_c file + name_stem = Path(hpvm_src).stem + ll_file = working_dir / f"{name_stem}.ll" + hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll" + llvm_ll_file = working_dir / f"{name_stem}.llvm.ll" + hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc" + link_bitcode_ = [Path(bc) for bc in (link_bitcode or [])] + commands = [ + hpvm_c_to_ll(hpvm_src, ll_file, include, macro, flags, optim_level, is_cpp, std), + opt_codegen_hpvm(ll_file, hpvm_ll_file), + _run_opt(hpvm_ll_file, llvm_ll_file, passes, pass_flags), + link_hpvm_rt(link_bitcode_ + [llvm_ll_file], hpvm_rt_linked_file), + ] + commands.append( + link_binary(hpvm_rt_linked_file, output_file, link_dirs, link_libs) + ) + for command in commands: + print(" ".join(command)) + check_output(command) + + +def hpvm_c_to_ll( + src_file: PathLike, + target_file: PathLike, + extra_includes: List[PathLike] = None, + macros: List[str] = None, + flags: List[str] = None, + optim_level: str = "0", # -O0 + is_cpp: bool = True, # otherwise is C + std: str = None, # --std=c++11 +) -> List[str]: + includes = (extra_includes or []) + TRT_INCLUDE_DIRS + includes_s = [f"-I{path}" for path in includes] + macros = [f"-D{macro}" for macro in (macros or [])] + flags = [f"-f{flg}" for flg in (flags or [])] + if std: + flags.append(f"-std={std}") + clang = "clang++" if is_cpp else "clang" + return [ + str(LLVM_BUILD_BIN / clang), *includes_s, *flags, *macros, + f"-O{optim_level}", "-emit-llvm", "-S", + str(src_file), "-o", str(target_file) + ] + + +def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]: + return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"]) + + +def link_hpvm_rt(bitcodes: List[PathLike], target_file: PathLike) -> List[str]: + bitcodes_s = [str(bc) for bc in bitcodes] + return [str(LLVM_BUILD_BIN / "llvm-link"), *bitcodes_s, HPVM_RT_PATH, "-S", "-o", str(target_file)] + + +def link_binary( + src_file: PathLike, + target_file: PathLike, + extra_link_dirs: List[PathLike] = None, + extra_link_libs: List[str] = None +) -> List[str]: + link_dirs, link_libs = _link_args(extra_link_dirs or [], extra_link_libs or []) + linker_dir_flags = [] + for path in link_dirs: + linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) + linker_lib_flags = [f"-l{lib}" for lib in link_libs] + return [ + str(LLVM_BUILD_BIN / "clang++"), str(src_file), + "-o", str(target_file), *linker_dir_flags, *linker_lib_flags + ] + + +def _link_args(extra_link_dirs: List[PathLike], extra_link_libs: List[str]): + def drop_suffix(libname: str): + import re + + match = re.match(r"lib(.*)\.so", libname) + return libname if match is None else match.group(1) + + link_dirs, link_libs = extra_link_dirs.copy(), extra_link_libs.copy() + for lib in DIRECT_LINK_LIBS: + lib = Path(lib) + link_dirs.append(lib.parent) + link_libs.append(lib.name) + link_dirs += TRT_LINK_DIRS + link_libs += TRT_LINK_LIBS + link_libnames = [drop_suffix(s) for s in link_libs] + return link_dirs, link_libnames + + +def _run_opt( + src_file: PathLike, + target_file: PathLike, + pass_names: List[str], + pass_flags: List[str], +) -> List[str]: + unavailable = set(pass_names) - set(AVAILABLE_PASSES) + if unavailable: + raise ValueError(f"Passes {unavailable} are unavailable for this compilation.") + load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] + pass_flags_strs = [f"-{flag}" for flag in pass_flags] + return [ + str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs, + "-S", str(src_file), "-o", str(target_file) + ] + + +def parse_args(): + parser = argparse.ArgumentParser("hpvm-clang") + parser.add_argument( + "hpvm_src", type=Path, + help="""HPVM-C code to compile. +HPVM-C code must be single file, but additional bitcode file can be linked together. +See option -b for that.""" + ) + parser.add_argument("output_file", type=Path, help="Path to generate binary to") + parser.add_argument( + "-x", type=str, metavar="language", default="c++", + help="Treat input file as having type <language>", + ) + parser.add_argument( + "-b", + "--link-bitcode", + type=Path, + nargs="+", + help="Additional bitcode (.ll/.bc) files to link to", + ) + parser.add_argument( + "-t", + "--tensor-target", + type=str, + choices=["tensor", "cudnn"], + help="Backend to use for tensor operators", + ) + parser.add_argument( + "--conf-file", type=Path, + help="File to approximation configurations; required for tensor target 'tensor'" + ) + parser.add_argument( + "--opencl", + action="store_true", + help="Use OpenCL support. Requires HPVM built with OpenCL", + ) + parser.add_argument( + "-d", "--working-dir", type=Path, help="Directory to generate temp files in" + ) + + # Relaying arguments for clang++ (source -> bitcode stage) + parser.add_argument( + "-I", "--include", type=Path, action="append", metavar="dir", + help="[clang emit-llvm] Add directory to include search path" + ) + parser.add_argument( + "-D", type=str, action="append", metavar="<macro>=<value>", + help="[clang emit-llvm] Define macro" + ) + parser.add_argument( + "-f", type=str, action="append", metavar="flag", + help="[clang emit-llvm] clang++ flags (such as -ffastmath)" + ) + parser.add_argument( + "-O", type=str, default="0", metavar="level", + help="[clang emit-llvm] Optimization level" + ) + parser.add_argument( + "--std", type=str, + help="[clang emit-llvm] Language standard to compile for. Double dashes (--std, not -std)." + ) + + # Relaying arguments for clang++ (linking stage) + parser.add_argument( + "-L", type=Path, action="append", metavar="dir", + help="[clang linker] Add directory to library search path" + ) + parser.add_argument( + "-l", type=str, action="append", metavar="name", + help="[clang linker] Link library (such as -lpthread)" + ) + + args = parser.parse_args() + if args.tensor_target == "tensor": + if args.conf_file is None: + parser.error('Tensor target "tensor" requires --conf-file argument') + if args.x == "c": + args.is_cpp = False + elif args.x == "c++": + args.is_cpp = True + else: + parser.error(f"Language mode {args.x} not supported yet -- only c or c++") + if not HPVM_USE_OPENCL and args.opencl: + parser.error(f"OpenCL is disabled for this build of HPVM.") + return args + + +def main(): + args = vars(parse_args()) + args["macro"] = args.pop("D") + args["flags"] = args.pop("f") + args["optim_level"] = args.pop("O") + args["link_dirs"] = args.pop("L") + args["link_libs"] = args.pop("l") + args.pop("x") + compile_hpvm_c(**args) + + +if __name__ == "__main__": + main() diff --git a/hpvm/tools/hpvm-clang/setup.py b/hpvm/tools/hpvm-clang/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..75262a069de32e6f5de4bff89738e0011dd91326 --- /dev/null +++ b/hpvm/tools/hpvm-clang/setup.py @@ -0,0 +1,15 @@ +import setuptools + +setuptools.setup( + name="hpvmpy", + version="1.0", + author="Yifan Zhao", + author_email="yifanz16@illinois.edu", + description="HPVM Python API", + packages=["hpvmpy"], + entry_points={ + "console_scripts": [ + "hpvm-clang = hpvmpy:main", + ], + }, +) diff --git a/hpvm/tools/py-approxhpvm/CMakeLists.txt b/hpvm/tools/py-approxhpvm/CMakeLists.txt deleted file mode 100644 index 1e5eeb891d35fc028d1aa85c5e5e679902a4dad7..0000000000000000000000000000000000000000 --- a/hpvm/tools/py-approxhpvm/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -# This file is very tightly coupled with main.py.in. -# Watch out and keep them in sync. -# main.py.in (to become approxhpvm.py) requires the following variables: -# LLVM_PROJECT_DIR, LLVM_BUILD_DIR -# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS -# DIRECT_LINK_LIBS -# AVAILABLE_PASSES, HPVM_RT_PATH - -set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR}) -set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR}) - -get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES) -get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS) -get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS) - -# This is defined globally. We need to manually link to this -# because OpenCL functions are injected by HPVM Passes. -set(DIRECT_LINK_LIBS ${OpenCL_LIBRARY} "$<TARGET_FILE:tensor_runtime>") - -# The hpvm-rt runtime -# This has to be explicitly set as hpvm-rt.bc is created in a custom_target -# and does not export its file location. -# Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt. -set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc) -set( - AVAILABLE_PASSES - LLVMBuildDFG - LLVMInPlaceDFGAnalysis - LLVMDFG2LLVM_CPU - LLVMDFG2LLVM_CUDNN - LLVMDFG2LLVM_WrapperAPI - LLVMFuseHPVMTensorNodes - LLVMClearDFG - LLVMGenHPVM -) - -# First resolve all `@symbol@` by configuring the file -configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) -# Then resolve all generator expressions we configured into the previous file -file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf) -# Delibrately create an extra step of moving file -# which is carried out at build time (as a target) -# so we can set these dependencies on it -set( - DEPS - tensor_runtime hpvm-rt-bc clang opt llvm-link ${AVAILABLE_PASSES} -) -add_custom_command( - OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/main.py ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py - COMMAND chmod +x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py - DEPENDS ${DEPS} ${CMAKE_CURRENT_BINARY_DIR}/main.py -) -add_custom_target(approxhpvm.py ALL DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py) diff --git a/hpvm/tools/py-approxhpvm/main.py.in b/hpvm/tools/py-approxhpvm/main.py.in deleted file mode 100644 index 7b211911643c64d8bf2c34ef8a43e3ac98cdd88a..0000000000000000000000000000000000000000 --- a/hpvm/tools/py-approxhpvm/main.py.in +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import os -from pathlib import Path -from typing import List, Union, Optional - -PathLike = Union[Path, str] - -HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm" -LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin" - -# Directories to include -TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";") -TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")] -TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";") -DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";") - -AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";") -HPVM_RT_PATH = "@HPVM_RT_PATH@" - -COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"] - - -def compile_hpvm_c( - input_file: PathLike, - output_file: PathLike, - codegen_target: str = "tensor", - include: List[PathLike] = None, - working_dir: PathLike = None, - conf_file: PathLike = None, -): - from subprocess import check_output - - codegen_functions = { - "tensor": lambda i, o: opt_codegen_tensor(i, o, conf_file), - "cudnn": opt_codegen_cudnn - } - codegen_f = codegen_functions[codegen_target] - working_dir = Path(working_dir or ".") - if not working_dir.is_dir(): - os.makedirs(working_dir) - name_stem = Path(input_file).stem - - ll_file = working_dir / f"{name_stem}.ll" - hpvm_ll_file = working_dir / f"{name_stem}.hpvm.ll" - llvm_ll_file = working_dir / f"{name_stem}.llvm.ll" - hpvm_rt_linked_file = working_dir / f"{name_stem}.linked.bc" - commands = [ - hpvm_c_to_ll(input_file, ll_file, extra_includes=include), - opt_codegen_hpvm(ll_file, hpvm_ll_file), - codegen_f(hpvm_ll_file, llvm_ll_file), - link_hpvm_rt(llvm_ll_file, hpvm_rt_linked_file), - link_binary(hpvm_rt_linked_file, output_file), - ] - for command in commands: - print(" ".join(command)) - check_output(command) - - -def hpvm_c_to_ll( - src_file: PathLike, - target_file: PathLike, - extra_includes: Optional[List[PathLike]] = None, - flags: List[str] = None, -) -> List[str]: - extra_includes = extra_includes or [] - includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes] - flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS] - return [ - str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S", - str(src_file), "-o", str(target_file) - ] - - -def opt_codegen_hpvm(src_file: PathLike, target_file: PathLike) -> List[str]: - return _run_opt(src_file, target_file, ["LLVMGenHPVM"], ["genhpvm", "globaldce"]) - - -def opt_codegen_cudnn(src_file: PathLike, target_file: PathLike) -> List[str]: - passes = [ - "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis", - "LLVMDFG2LLVM_CUDNN", "LLVMDFG2LLVM_CPU", - "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM" - ] - flags = [ - "buildDFG", "inplace", "hpvm-fuse", - "dfg2llvm-cudnn", "dfg2llvm-cpu", "clearDFG" - ] - return _run_opt(src_file, target_file, passes, flags) - - -def opt_codegen_tensor( - src_file: PathLike, target_file: PathLike, conf_file: PathLike -): - passes = [ - "LLVMBuildDFG", "LLVMInPlaceDFGAnalysis", - "LLVMDFG2LLVM_WrapperAPI", "LLVMDFG2LLVM_CPU", - "LLVMFuseHPVMTensorNodes", "LLVMClearDFG", "LLVMGenHPVM" - ] - flags = [ - "buildDFG", "inplace", "hpvm-fuse", - "dfg2llvm-wrapperapi", - f"configuration-inputs-filename={conf_file}", - "dfg2llvm-cpu", "clearDFG", - ] - return _run_opt(src_file, target_file, passes, flags) - - -def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]: - return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)] - - -def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]: - def drop_suffix(libname: str): - import re - - match = re.match(r"lib(.*)\.so", libname) - return libname if match is None else match.group(1) - - link_dirs, link_libnames = [], [] - for lib in DIRECT_LINK_LIBS: - lib = Path(lib) - link_dirs.append(lib.parent) - link_libnames.append(drop_suffix(lib.name)) - link_dirs += TRT_LINK_DIRS - link_libnames += TRT_LINK_LIBS - - linker_dir_flags = [] - for path in link_dirs: - linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"]) - linker_lib_flags = [f"-l{drop_suffix(lib)}" for lib in link_libnames] - return [ - str(LLVM_BUILD_BIN / "clang++"), str(src_file), - "-o", str(target_file), *linker_dir_flags, *linker_lib_flags - ] - - -def _run_opt( - src_file: PathLike, - target_file: PathLike, - pass_names: List[str], - pass_flags: List[str], -) -> List[str]: - unavailable = set(pass_names) - set(AVAILABLE_PASSES) - if unavailable: - raise ValueError(f"Passes {unavailable} are unavailable from CMake") - load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]] - pass_flags_strs = [f"-{flag}" for flag in pass_flags] - return [ - str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs, - "-S", str(src_file), "-o", str(target_file) - ] - - -def parse_args(): - parser = argparse.ArgumentParser("approxhpvm") - parser.add_argument("input_file", type=Path, help="HPVM-C code to compile") - parser.add_argument("output_file", type=Path, help="Path to generate binary to") - parser.add_argument( - "-t", - "--codegen-target", - type=str, - required=True, - choices=["tensor", "cudnn"], - help="Backend to use", - ) - parser.add_argument( - "-d", "--working-dir", type=Path, help="Directory to generate temp files in" - ) - parser.add_argument( - "--conf-file", type=Path, - help="File to approximation configurations; required for 'tensor' target" - ) - parser.add_argument( - "-I", "--include", type=Path, action="append", - help="Additional include directories to use" - ) - - args = parser.parse_args() - if args.codegen_target == "tensor": - if args.conf_file is None: - parser.error('Codegen target "tensor" requires --conf-file argument') - return args - - -if __name__ == "__main__": - compile_hpvm_c(**vars(parse_args()))